// @ZBS { // *MODULE_OWNER_NAME zhashtable // } #ifndef ZHASHTABLE_H #define ZHASHTABLE_H // The HashTable class is a generic associative array // that associate keys which are nul-terminate strings with // generic typed buffers of arbitrary length. // // HashTables will automatically rebuild themselves when // they get to dense which can be an expensive operation // therefore it is best if you create them with an // initial size roughly double what you expect will be used. #include // for FILE* used by zHashTableLoad/Save #ifdef ZHASH_THREADSAFE // @ZBSIF extraDefines( 'ZHASH_THREADSAFE' ) // the above is for the perl-parsing of files for dependencies; we don't // want the dependency builder to see these includes if ZHASH_THREADSAFE is not // defined. Thus if your app doesn't need thread-safe hash, we don't create // dependency on thread libraries etc. (tfb) #include "pmutex.h" // @ZBSENDIF #else class PMutex; #endif #ifdef WIN32 typedef __int64 S64; #endif #ifdef __USE_GNU typedef long long S64; #endif struct ZHashRecord { short flags; #define zhNONE (0x00) #define zhDELETED (0x01) #define zhCHANGED (0x02) short type; #define zhBIN (0x01) // binary #define zhSTR (0x02) // nul-terminated string #define zhS32 (0x03) // signed 32 #define zhU32 (0x04) // unsigned 32 #define zhS64 (0x05) // signed int64 #define zhFLT (0x06) // float #define zhDBL (0x07) // double #define zhPTR (0x08) // The record contains a pointer #define zhPFR (0x09) // The pointer is to be freed on delete or overwrite int keyLen; int valLen; char key[1]; // This is really keyLen long // The val follows this after keyLen bytes }; class ZHashTable { friend struct ZHashWalk; protected: char *last; int lastLen; // These store the last return from convert // Note that the use of these return buffers among other parts // of this means that access to these hash tables is NOT thread safe // Be sure to use mutexes if you are accessing a hash from more than one thread int collisions; // Incremented each time the set() function collides. // Useful for profiling the hash function int initialSize; // How big the initial array is int hasAnyChanged; // Has anything in here changed since the last time this was cleared int hashTableSize; // Number of pointers in the hash table int numRecords; // Actual active records in the table ZHashRecord **hashTable; // Array of hash records int threadSafe; PMutex *mutex; // thread-safety is per hashtable, must be enabled with a #define, and even // then must be manually turned on per hashtable with setThreadSafe(). static unsigned int hash( char *key, int keyLen ); // Hashing function. currently using the one from http://burtleburtle.net/bob/hash/doobs.html int internalSet( ZHashRecord **_hashTable, int _hashTableSize, char *key, int keyLen, ZHashRecord *_recPtr ); // Internal method to functionalize rebuilds of tables // Returns 1 if the key is new, 0 if it is replacing an existing public: ZHashTable( int _initialSize=64 ); // For optimal performance make _initialSize twice as big as expected size ZHashTable( const ZHashTable © ); // Copy constructor virtual ~ZHashTable(); void init( int _initSize=64 ); void clear( int _initSize=-1, int freeFirst=1 ); // kill everything, start fresh. Will reset initSize if you request it int size() { return hashTableSize; } // The size of the hashtable including empty records int activeCount() { return numRecords; } // The actual number of active records virtual void resetCallback() { }; // Overrid this if you need to know when the internal hash pointers move around void copyFrom( ZHashTable &table ); // Copies all the items from table. // Does *NOT* clear first, if you want to // start off fresh, call clear() first // NOMENCLATURE // // "put" is used to place into the hashtable and is prefixed and postfixed to denote type // which are are encoded as: // // S = nul terminated string // I = signed integer value encoded as a string // U = unsigned integer value encoded as a string // L = int64 encoded as a string // F = float value encoded as a string // D = double value encoded as a string // b = binary data (there may be embedded nuls) // i = int stored in binary // u = unsigned int stored in binary // l = int64 stored in binary // f = float stored in binary // d = double stored in binary // p = a pointer to a buffer (see special rules below) // // The prefix denotes the type of the hash key. No prefix means "string" // none = string // i = binary int // // The generic "put" and "get" take a key, keyLen, val, and valLen and // they assume that len values of -1 mean that it is a nul-termineated // string on which the len should be calculated // // IMPORTANT NOTE. The functions putI putF and putD are deprecated. You // should now use the puti putf and putd functions because the type is // encoded in the flags and thus you can use the getI getF and getD functions // to do the conversion to string if desired. // // PUT: // When a value is put into the hash, it will overwrite any existing value // which is already associated with that key. If the value is a pointer to a buffer // then it may free that buffer (see putp Special Rules below) // A val ptr of zero indicates delete. // The memory of a records is not necessarily deleted immediately. Instead, // the record is marked as deleted and, if the valLen is large, it may choose // to reallocate the record and free the old one to save memory. // // PUTP SPECIAL RULES: // Sometimes you may wish to associate an arbitrary buffer with the hash table which, // when the key is freed, you wish to free. In this way, the hash table becomes a // kind of "dynamic variable heap". // When a buffer is placed into the hash using putp() you give it a void * val ptr. // Two extra flags are then associated with the record. One indicates that you wish // this ptr to be freed upon deletion and the other indicates that you wish consider // a val ptr of null to indicate "delete" as opposed to a nul pointer. In the case // that you do not set delOnNul, passing a val ptr of 0 will mean that the record key // will still exist in the hash but that a call to get will report success and a 0 value. // // GET: // You may either get by hash key (typical) or by record number (used for iterating, see below) // When getting, you may optionally specify a value to return in the case that the record // does not exist, the default for all types is zero. // // The various get functions do optional typecasting // The uppercase versions do their best to convert to the type requested // The lowercase versions do not. // For example, imagine in the cache there is a string "key" associated with the *string* "1.234" // The getF function will convert the string into a floating point number but getf will return 0.f // // GET ITERATING: // Sometimes you want to traverse every record in a hash table. You may use "getVal and getKey" for this // Example: // for( int i=0; i=hash->size() ) { return 0; } rec = hash->hashTable[i]; if( rec && !(rec->flags&zhDELETED) ) { key = rec->key; val = &rec->key[rec->keyLen]; keyLen = rec->keyLen; valLen = rec->valLen; flags = rec->flags; type = rec->type; } } while( !rec || (rec->flags&zhDELETED) ); return rec == 0 ? 0 : 1; } }; char *zHashTablePack( ZHashTable &hash, unsigned int *size=0 ); // Packs the hash into malloced mem, optionally returns size of malloc int zHashTableUnpack( char *bin, ZHashTable &hash ); // Unpacks from mem to hash void zHashTableSave( ZHashTable &hash, FILE *f ); // save to disk void zHashTableLoad( FILE *f, ZHashTable &hash ); // load from disk #endif