3 Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
10 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
11 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
12 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
13 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
14 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
15 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
16 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
17 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
18 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
19 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 //#define HASH_BLOOM 16
27 #include <string.h> /* memcmp,strlen */
28 #include <stddef.h> /* ptrdiff_t */
29 #include <stdlib.h> /* exit() */
31 /* These macros use decltype or the earlier __typeof GNU extension.
32 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
33 when compiling c++ source) this code uses whatever method is needed
34 or, for VS2008 where neither is available, uses casting workarounds. */
35 #if defined(_MSC_VER) /* MS compiler */
36 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
37 #define DECLTYPE(x) (decltype(x))
38 #else /* VS2008 or older (or VS2010 in C mode) */
42 #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
45 #else /* GNU, Sun and other compilers */
46 #define DECLTYPE(x) (__typeof(x))
50 #define DECLTYPE_ASSIGN(dst,src) \
52 char **_da_dst = (char**)(&(dst)); \
53 *_da_dst = (char*)(src); \
56 #define DECLTYPE_ASSIGN(dst,src) \
58 (dst) = DECLTYPE(dst)(src); \
62 /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
64 #if defined(_MSC_VER) && _MSC_VER >= 1600
66 #elif defined(__WATCOMC__)
69 //typedef unsigned int uint32_t;
70 //typedef unsigned char uint8_t;
76 #define UTHASH_VERSION 1.9.9
79 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
82 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */
85 #define uthash_free(ptr,sz) free(ptr) /* free fcn */
88 #ifndef uthash_noexpand_fyi
89 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
91 #ifndef uthash_expand_fyi
92 #define uthash_expand_fyi(tbl) /* can be defined to log expands */
95 /* initial number of buckets */
96 #ifndef HASH_INITIAL_NUM_BUCKETS_LOG2
97 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
99 #define HASH_INITIAL_NUM_BUCKETS (1<<HASH_INITIAL_NUM_BUCKETS_LOG2) /* initial number of buckets */
100 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
102 /* calculate the element whose hash handle address is hhe */
103 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
105 #define HASH_FIND(hh,head,keyptr,keylen,out) \
109 uint32_t _hf_bkt,_hf_hashv; \
110 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
111 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
112 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
113 keyptr,keylen,out); \
119 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
120 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
121 #define HASH_BLOOM_MAKE(tbl) \
123 (tbl)->bloom_nbits = HASH_BLOOM; \
124 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
125 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
126 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
127 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
130 #define HASH_BLOOM_FREE(tbl) \
132 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
135 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
136 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
138 #define HASH_BLOOM_ADD(tbl,hashv) \
139 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
141 #define HASH_BLOOM_TEST(tbl,hashv) \
142 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
145 #define HASH_BLOOM_MAKE(tbl)
146 #define HASH_BLOOM_FREE(tbl)
147 #define HASH_BLOOM_ADD(tbl,hashv)
148 #define HASH_BLOOM_TEST(tbl,hashv) (1)
149 #define HASH_BLOOM_BYTELEN 0
152 #define HASH_MAKE_TABLE(hh,head) \
154 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
155 sizeof(UT_hash_table)); \
156 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
157 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
158 (head)->hh.tbl->tail = &((head)->hh); \
159 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
160 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
161 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
162 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
163 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
164 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
165 memset((head)->hh.tbl->buckets, 0, \
166 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
167 HASH_BLOOM_MAKE((head)->hh.tbl); \
168 (head)->hh.tbl->signature = HASH_SIGNATURE; \
171 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \
172 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
174 #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \
177 HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
178 if (replaced!=NULL) { \
179 HASH_DELETE(hh,head,replaced); \
181 HASH_ADD(hh,head,fieldname,keylen_in,add); \
184 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
187 (add)->hh.next = NULL; \
188 (add)->hh.key = (char*)(keyptr); \
189 (add)->hh.keylen = (uint32_t)(keylen_in); \
192 (head)->hh.prev = NULL; \
193 HASH_MAKE_TABLE(hh,head); \
195 (head)->hh.tbl->tail->next = (add); \
196 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
197 (head)->hh.tbl->tail = &((add)->hh); \
199 (head)->hh.tbl->num_items++; \
200 (add)->hh.tbl = (head)->hh.tbl; \
201 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
202 (add)->hh.hashv, _ha_bkt); \
203 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
204 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
205 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
206 HASH_FSCK(hh,head); \
209 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \
211 bkt = ((hashv) & ((num_bkts) - 1)); \
214 /* delete "delptr" from the hash table.
215 * "the usual" patch-up process for the app-order doubly-linked-list.
216 * The use of _hd_hh_del below deserves special explanation.
217 * These used to be expressed using (delptr) but that led to a bug
218 * if someone used the same symbol for the head and deletee, like
219 * HASH_DELETE(hh,users,users);
220 * We want that to work, but by changing the head (users) below
221 * we were forfeiting our ability to further refer to the deletee (users)
222 * in the patch-up process. Solution: use scratch space to
223 * copy the deletee pointer, then the latter references are via that
224 * scratch pointer rather than through the repointed (users) symbol.
226 #define HASH_DELETE(hh,head,delptr) \
228 struct UT_hash_handle *_hd_hh_del; \
229 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
230 uthash_free((head)->hh.tbl->buckets, \
231 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
232 HASH_BLOOM_FREE((head)->hh.tbl); \
233 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
237 _hd_hh_del = &((delptr)->hh); \
238 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
239 (head)->hh.tbl->tail = \
240 (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
241 (head)->hh.tbl->hho); \
243 if ((delptr)->hh.prev) { \
244 ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
245 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
247 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
249 if (_hd_hh_del->next) { \
250 ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
251 (head)->hh.tbl->hho))->prev = \
254 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
255 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
256 (head)->hh.tbl->num_items--; \
258 HASH_FSCK(hh,head); \
262 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
263 #define HASH_FIND_STR(head,findstr,out) \
264 HASH_FIND(hh,head,findstr,(uint32_t)strlen(findstr),out)
265 #define HASH_ADD_STR(head,strfield,add) \
266 HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add)
267 #define HASH_REPLACE_STR(head,strfield,add,replaced) \
268 HASH_REPLACE(hh,head,strfield[0],(uint32_t)strlen(add->strfield),add,replaced)
269 #define HASH_FIND_INT(head,findint,out) \
270 HASH_FIND(hh,head,findint,sizeof(int),out)
271 #define HASH_ADD_INT(head,intfield,add) \
272 HASH_ADD(hh,head,intfield,sizeof(int),add)
273 #define HASH_REPLACE_INT(head,intfield,add,replaced) \
274 HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
275 #define HASH_FIND_PTR(head,findptr,out) \
276 HASH_FIND(hh,head,findptr,sizeof(void *),out)
277 #define HASH_ADD_PTR(head,ptrfield,add) \
278 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
279 #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \
280 HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
281 #define HASH_DEL(head,delptr) \
282 HASH_DELETE(hh,head,delptr)
284 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
285 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
288 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
289 #define HASH_FSCK(hh,head) \
291 struct UT_hash_handle *_thh; \
297 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
298 uint32_t _bkt_count = 0; \
299 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
302 if (_prev != (char*)(_thh->hh_prev)) { \
303 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
304 _thh->hh_prev, _prev ); \
307 _prev = (char*)(_thh); \
308 _thh = _thh->hh_next; \
310 _count += _bkt_count; \
311 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
312 HASH_OOPS("invalid bucket count %u, actual %u\n", \
313 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
316 if (_count != (head)->hh.tbl->num_items) { \
317 HASH_OOPS("invalid hh item count %u, actual %u\n", \
318 (head)->hh.tbl->num_items, _count ); \
320 /* traverse hh in app order; check next/prev integrity, count */ \
323 _thh = &(head)->hh; \
326 if (_prev !=(char*)(_thh->prev)) { \
327 HASH_OOPS("invalid prev %p, actual %p\n", \
328 _thh->prev, _prev ); \
330 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
331 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
332 (head)->hh.tbl->hho) : NULL ); \
334 if (_count != (head)->hh.tbl->num_items) { \
335 HASH_OOPS("invalid app item count %u, actual %u\n", \
336 (head)->hh.tbl->num_items, _count ); \
341 #define HASH_FSCK(hh,head)
344 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
345 * the descriptor to which this macro is defined for tuning the hash function.
346 * The app can #include <unistd.h> to get the prototype for write(2). */
347 #ifdef HASH_EMIT_KEYS
348 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
350 uint32_t _klen = fieldlen; \
351 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
352 write(HASH_EMIT_KEYS, keyptr, fieldlen); \
355 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
358 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
360 #define HASH_FCN HASH_FUNCTION
362 #define HASH_FCN HASH_JEN
365 /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
366 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
368 uint32_t _hb_keylen=keylen; \
369 char *_hb_key=(char*)(key); \
371 while (_hb_keylen--) { (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; } \
372 bkt = (hashv) & (num_bkts-1); \
376 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
377 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
378 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
381 char *_hs_key=(char*)(key); \
383 for(_sx_i=0; _sx_i < keylen; _sx_i++) \
384 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
385 bkt = hashv & (num_bkts-1); \
387 /* FNV-1a variation */
388 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
391 char *_hf_key=(char*)(key); \
392 hashv = 2166136261UL; \
393 for(_fn_i=0; _fn_i < keylen; _fn_i++) { \
394 hashv = hashv ^ _hf_key[_fn_i]; \
395 hashv = hashv * 16777619; \
397 bkt = hashv & (num_bkts-1); \
400 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
403 char *_ho_key=(char*)(key); \
405 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
406 hashv += _ho_key[_ho_i]; \
407 hashv += (hashv << 10); \
408 hashv ^= (hashv >> 6); \
410 hashv += (hashv << 3); \
411 hashv ^= (hashv >> 11); \
412 hashv += (hashv << 15); \
413 bkt = hashv & (num_bkts-1); \
416 #define HASH_JEN_MIX(a,b,c) \
418 a -= b; a -= c; a ^= ( c >> 13 ); \
419 b -= c; b -= a; b ^= ( a << 8 ); \
420 c -= a; c -= b; c ^= ( b >> 13 ); \
421 a -= b; a -= c; a ^= ( c >> 12 ); \
422 b -= c; b -= a; b ^= ( a << 16 ); \
423 c -= a; c -= b; c ^= ( b >> 5 ); \
424 a -= b; a -= c; a ^= ( c >> 3 ); \
425 b -= c; b -= a; b ^= ( a << 10 ); \
426 c -= a; c -= b; c ^= ( b >> 15 ); \
429 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
431 uint32_t _hj_i,_hj_j,_hj_k; \
432 unsigned char *_hj_key=(unsigned char*)(key); \
433 hashv = 0xfeedbeef; \
434 _hj_i = _hj_j = 0x9e3779b9; \
435 _hj_k = (uint32_t)(keylen); \
436 while (_hj_k >= 12) { \
437 _hj_i += (_hj_key[0] + ( (uint32_t)_hj_key[1] << 8 ) \
438 + ( (uint32_t)_hj_key[2] << 16 ) \
439 + ( (uint32_t)_hj_key[3] << 24 ) ); \
440 _hj_j += (_hj_key[4] + ( (uint32_t)_hj_key[5] << 8 ) \
441 + ( (uint32_t)_hj_key[6] << 16 ) \
442 + ( (uint32_t)_hj_key[7] << 24 ) ); \
443 hashv += (_hj_key[8] + ( (uint32_t)_hj_key[9] << 8 ) \
444 + ( (uint32_t)_hj_key[10] << 16 ) \
445 + ( (uint32_t)_hj_key[11] << 24 ) ); \
447 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
454 case 11: hashv += ( (uint32_t)_hj_key[10] << 24 ); \
455 case 10: hashv += ( (uint32_t)_hj_key[9] << 16 ); \
456 case 9: hashv += ( (uint32_t)_hj_key[8] << 8 ); \
457 case 8: _hj_j += ( (uint32_t)_hj_key[7] << 24 ); \
458 case 7: _hj_j += ( (uint32_t)_hj_key[6] << 16 ); \
459 case 6: _hj_j += ( (uint32_t)_hj_key[5] << 8 ); \
460 case 5: _hj_j += _hj_key[4]; \
461 case 4: _hj_i += ( (uint32_t)_hj_key[3] << 24 ); \
462 case 3: _hj_i += ( (uint32_t)_hj_key[2] << 16 ); \
463 case 2: _hj_i += ( (uint32_t)_hj_key[1] << 8 ); \
464 case 1: _hj_i += _hj_key[0]; \
466 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
467 bkt = hashv & (num_bkts-1); \
470 /* The Paul Hsieh hash function */
472 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
473 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
474 #define get16bits(d) (*((const uint16_t *) (d)))
477 #if !defined (get16bits)
478 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
479 +(uint32_t)(((const uint8_t *)(d))[0]) )
481 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
483 unsigned char *_sfh_key=(unsigned char*)(key); \
484 uint32_t _sfh_tmp, _sfh_len = keylen; \
486 int _sfh_rem = _sfh_len & 3; \
488 hashv = 0xcafebabe; \
491 for (;_sfh_len > 0; _sfh_len--) { \
492 hashv += get16bits (_sfh_key); \
493 _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \
494 hashv = (hashv << 16) ^ _sfh_tmp; \
495 _sfh_key += 2*sizeof (uint16_t); \
496 hashv += hashv >> 11; \
499 /* Handle end cases */ \
500 switch (_sfh_rem) { \
501 case 3: hashv += get16bits (_sfh_key); \
502 hashv ^= hashv << 16; \
503 hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \
504 hashv += hashv >> 11; \
506 case 2: hashv += get16bits (_sfh_key); \
507 hashv ^= hashv << 11; \
508 hashv += hashv >> 17; \
510 case 1: hashv += *_sfh_key; \
511 hashv ^= hashv << 10; \
512 hashv += hashv >> 1; \
515 /* Force "avalanching" of final 127 bits */ \
516 hashv ^= hashv << 3; \
517 hashv += hashv >> 5; \
518 hashv ^= hashv << 4; \
519 hashv += hashv >> 17; \
520 hashv ^= hashv << 25; \
521 hashv += hashv >> 6; \
522 bkt = hashv & (num_bkts-1); \
525 #ifdef HASH_USING_NO_STRICT_ALIASING
526 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
527 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
528 * MurmurHash uses the faster approach only on CPU's where we know it's safe.
530 * Note the preprocessor built-in defines can be emitted using:
532 * gcc -m64 -dM -E - < /dev/null (on gcc)
533 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
535 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
536 #define MUR_GETBLOCK(p,i) p[i]
537 #else /* non intel */
538 #define MUR_PLUS0_ALIGNED(p) (((uint64_t)p & 0x3) == 0)
539 #define MUR_PLUS1_ALIGNED(p) (((uint64_t)p & 0x3) == 1)
540 #define MUR_PLUS2_ALIGNED(p) (((uint64_t)p & 0x3) == 2)
541 #define MUR_PLUS3_ALIGNED(p) (((uint64_t)p & 0x3) == 3)
542 #define WP(p) ((uint32_t*)((uint64_t)(p) & ~3UL))
543 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
544 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
545 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
546 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
547 #else /* assume little endian non-intel */
548 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
549 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
550 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
552 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
553 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
554 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
557 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
558 #define MUR_FMIX(_h) \
567 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
569 const uint8_t *_mur_data = (const uint8_t*)(key); \
570 const int _mur_nblocks = (keylen) / 4; \
571 uint32_t _mur_h1 = 0xf88D5353; \
572 uint32_t _mur_c1 = 0xcc9e2d51; \
573 uint32_t _mur_c2 = 0x1b873593; \
574 uint32_t _mur_k1 = 0; \
575 const uint8_t *_mur_tail; \
576 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
578 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
579 _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
580 _mur_k1 *= _mur_c1; \
581 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
582 _mur_k1 *= _mur_c2; \
584 _mur_h1 ^= _mur_k1; \
585 _mur_h1 = MUR_ROTL32(_mur_h1,13); \
586 _mur_h1 = _mur_h1*5+0xe6546b64; \
588 _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
590 switch((keylen) & 3) { \
591 case 3: _mur_k1 ^= _mur_tail[2] << 16; \
592 case 2: _mur_k1 ^= _mur_tail[1] << 8; \
593 case 1: _mur_k1 ^= _mur_tail[0]; \
594 _mur_k1 *= _mur_c1; \
595 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
596 _mur_k1 *= _mur_c2; \
597 _mur_h1 ^= _mur_k1; \
599 _mur_h1 ^= (keylen); \
602 bkt = hashv & (num_bkts-1); \
604 #endif /* HASH_USING_NO_STRICT_ALIASING */
606 /* key comparison function; return 0 if keys equal */
607 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
609 /* iterate over items in a known bucket to find desired item */
610 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
612 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
615 if ((out)->hh.keylen == keylen_in) { \
616 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
618 if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
623 /* add an item to a bucket */
624 #define HASH_ADD_TO_BKT(head,addhh) \
627 (addhh)->hh_next = head.hh_head; \
628 (addhh)->hh_prev = NULL; \
629 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
630 (head).hh_head=addhh; \
631 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
632 && (addhh)->tbl->noexpand != 1) { \
633 HASH_EXPAND_BUCKETS((addhh)->tbl); \
637 /* remove an item from a given bucket */
638 #define HASH_DEL_IN_BKT(hh,head,hh_del) \
640 if ((head).hh_head == hh_del) { \
641 (head).hh_head = hh_del->hh_next; \
643 if (hh_del->hh_prev) { \
644 hh_del->hh_prev->hh_next = hh_del->hh_next; \
646 if (hh_del->hh_next) { \
647 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
650 /* Bucket expansion has the effect of doubling the number of buckets
651 * and redistributing the items into the new buckets. Ideally the
652 * items will distribute more or less evenly into the new buckets
653 * (the extent to which this is true is a measure of the quality of
654 * the hash function as it applies to the key domain).
656 * With the items distributed into more buckets, the chain length
657 * (item count) in each bucket is reduced. Thus by expanding buckets
658 * the hash keeps a bound on the chain length. This bounded chain
659 * length is the essence of how a hash provides constant time lookup.
661 * The calculation of tbl->ideal_chain_maxlen below deserves some
662 * explanation. First, keep in mind that we're calculating the ideal
663 * maximum chain length based on the *new* (doubled) bucket count.
664 * In fractions this is just n/b (n=number of items,b=new num buckets).
665 * Since the ideal chain length is an integer, we want to calculate
666 * ceil(n/b). We don't depend on floating point arithmetic in this
667 * hash, so to calculate ceil(n/b) with integers we could write
669 * ceil(n/b) = (n/b) + ((n%b)?1:0)
671 * and in fact a previous version of this hash did just that.
672 * But now we have improved things a bit by recognizing that b is
673 * always a power of two. We keep its base 2 log handy (call it lb),
674 * so now we can write this with a bit shift and logical AND:
676 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
679 #define HASH_EXPAND_BUCKETS(tbl) \
682 uint32_t _he_bkt_i; \
683 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
684 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
685 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
686 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
687 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
688 memset(_he_new_buckets, 0, \
689 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
690 tbl->ideal_chain_maxlen = \
691 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
692 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
693 tbl->nonideal_items = 0; \
694 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
696 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
698 _he_hh_nxt = _he_thh->hh_next; \
699 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
700 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
701 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
702 tbl->nonideal_items++; \
703 _he_newbkt->expand_mult = _he_newbkt->count / \
704 tbl->ideal_chain_maxlen; \
706 _he_thh->hh_prev = NULL; \
707 _he_thh->hh_next = _he_newbkt->hh_head; \
708 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
710 _he_newbkt->hh_head = _he_thh; \
711 _he_thh = _he_hh_nxt; \
714 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
715 tbl->num_buckets *= 2; \
716 tbl->log2_num_buckets++; \
717 tbl->buckets = _he_new_buckets; \
718 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
719 (tbl->ineff_expands+1) : 0; \
720 if (tbl->ineff_expands > 1) { \
722 uthash_noexpand_fyi(tbl); \
724 uthash_expand_fyi(tbl); \
728 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
729 /* Note that HASH_SORT assumes the hash handle name to be hh.
730 * HASH_SRT was added to allow the hash handle name to be passed in. */
731 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
732 #define HASH_SRT(hh,head,cmpfcn) \
735 uint32_t _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
736 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
740 _hs_list = &((head)->hh); \
741 while (_hs_looping) { \
750 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
752 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
753 ((void*)((char*)(_hs_q->next) + \
754 (head)->hh.tbl->hho)) : NULL); \
755 if (! (_hs_q) ) break; \
757 _hs_qsize = _hs_insize; \
758 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
759 if (_hs_psize == 0) { \
761 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
762 ((void*)((char*)(_hs_q->next) + \
763 (head)->hh.tbl->hho)) : NULL); \
765 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
768 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
769 ((void*)((char*)(_hs_p->next) + \
770 (head)->hh.tbl->hho)) : NULL); \
774 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
775 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
779 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
780 ((void*)((char*)(_hs_p->next) + \
781 (head)->hh.tbl->hho)) : NULL); \
786 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
787 ((void*)((char*)(_hs_q->next) + \
788 (head)->hh.tbl->hho)) : NULL); \
792 _hs_tail->next = ((_hs_e) ? \
793 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
798 _hs_e->prev = ((_hs_tail) ? \
799 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
806 _hs_tail->next = NULL; \
808 if ( _hs_nmerges <= 1 ) { \
810 (head)->hh.tbl->tail = _hs_tail; \
811 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
815 HASH_FSCK(hh,head); \
819 /* This function selects items from one hash into another hash.
820 * The end result is that the selected items have dual presence
821 * in both hashes. There is no copy of the items made; rather
822 * they are added into the new hash through a secondary hash
823 * hash handle that must be present in the structure. */
824 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
826 uint32_t _src_bkt, _dst_bkt; \
827 void *_last_elt=NULL, *_elt; \
828 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
829 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
831 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
832 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
834 _src_hh = _src_hh->hh_next) { \
835 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
837 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
838 _dst_hh->key = _src_hh->key; \
839 _dst_hh->keylen = _src_hh->keylen; \
840 _dst_hh->hashv = _src_hh->hashv; \
841 _dst_hh->prev = _last_elt; \
842 _dst_hh->next = NULL; \
843 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
845 DECLTYPE_ASSIGN(dst,_elt); \
846 HASH_MAKE_TABLE(hh_dst,dst); \
848 _dst_hh->tbl = (dst)->hh_dst.tbl; \
850 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
851 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
852 (dst)->hh_dst.tbl->num_items++; \
854 _last_elt_hh = _dst_hh; \
859 HASH_FSCK(hh_dst,dst); \
862 #define HASH_CLEAR(hh,head) \
865 uthash_free((head)->hh.tbl->buckets, \
866 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
867 HASH_BLOOM_FREE((head)->hh.tbl); \
868 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
873 #define HASH_OVERHEAD(hh,head) \
874 (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
875 ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
876 (sizeof(UT_hash_table)) + \
877 (HASH_BLOOM_BYTELEN)))
880 #define HASH_ITER(hh,head,el,tmp) \
881 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
882 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
884 #define HASH_ITER(hh,head,el,tmp) \
885 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
886 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
889 /* obtain a count of items in the hash */
890 #define HASH_COUNT(head) HASH_CNT(hh,head)
891 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
893 typedef struct UT_hash_bucket {
894 struct UT_hash_handle *hh_head;
897 /* expand_mult is normally set to 0. In this situation, the max chain length
898 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
899 * the bucket's chain exceeds this length, bucket expansion is triggered).
900 * However, setting expand_mult to a non-zero value delays bucket expansion
901 * (that would be triggered by additions to this particular bucket)
902 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
903 * (The multiplier is simply expand_mult+1). The whole idea of this
904 * multiplier is to reduce bucket expansions, since they are expensive, in
905 * situations where we know that a particular bucket tends to be overused.
906 * It is better to let its chain length grow to a longer yet-still-bounded
907 * value, than to do an O(n) bucket expansion too often.
909 uint32_t expand_mult;
913 /* random signature used only to find hash tables in external analysis */
914 #define HASH_SIGNATURE 0xa0111fe1
915 #define HASH_BLOOM_SIGNATURE 0xb12220f2
917 typedef struct UT_hash_table {
918 UT_hash_bucket *buckets;
919 uint32_t num_buckets, log2_num_buckets;
921 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
922 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
924 /* in an ideal situation (all buckets used equally), no bucket would have
925 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
926 uint32_t ideal_chain_maxlen;
928 /* nonideal_items is the number of items in the hash whose chain position
929 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
930 * hash distribution; reaching them in a chain traversal takes >ideal steps */
931 uint32_t nonideal_items;
933 /* ineffective expands occur when a bucket doubling was performed, but
934 * afterward, more than half the items in the hash had nonideal chain
935 * positions. If this happens on two consecutive expansions we inhibit any
936 * further expansion, as it's not helping; this happens when the hash
937 * function isn't a good fit for the key domain. When expansion is inhibited
938 * the hash will still work, albeit no longer in constant time. */
939 uint32_t ineff_expands, noexpand;
941 uint32_t signature; /* used only to find hash tables in external analysis */
943 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
950 typedef struct UT_hash_handle {
951 struct UT_hash_table *tbl;
952 void *prev; /* prev element in app order */
953 void *next; /* next element in app order */
954 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
955 struct UT_hash_handle *hh_next; /* next hh in bucket order */
956 void *key; /* ptr to enclosing struct's key */
957 uint32_t hashv; /* result of hash-fcn(key) */
958 uint8_t keylen; /* enclosing struct's key len */
962 #endif /* UTHASH_H */