1 module dklib.khash; 2 3 import std.traits : isNumeric; 4 import core.stdc.stdint; // uint32_t, etc. 5 6 /*! 7 @header 8 9 Generic hash table library. 10 */ 11 12 enum AC_VERSION_KHASH_H = "0.2.8"; 13 14 import core.stdc.stdlib; 15 import core.stdc..string; 16 import core.stdc.limits; 17 18 /* compiler specific configuration */ 19 20 alias khint32_t = uint; 21 22 alias khint64_t = ulong; 23 24 alias khint_t = khint32_t; 25 alias khiter_t = khint_t; 26 27 pragma(inline, true) 28 { 29 /// bucket empty? 30 auto __ac_isempty(T)(const(khint32_t)* flag, T i) 31 { 32 return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2); 33 } 34 35 /// bucket deleted? 36 auto __ac_isdel(T)(const(khint32_t)* flag, T i) 37 { 38 return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1); 39 } 40 41 /// bucket empty OR deleted? 42 auto __ac_iseither(T)(const(khint32_t)* flag, T i) 43 { 44 return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3); 45 } 46 47 /// unmark deleted 48 void __ac_set_isdel_false(T)(khint32_t* flag, T i) 49 { 50 flag[i >> 4] &= ~(1uL << ((i & 0xfU) << 1)); 51 } 52 53 /// unmark empty 54 void __ac_set_isempty_false(T)(khint32_t* flag, T i) 55 { 56 flag[i >> 4] &= ~(2uL << ((i & 0xfU) << 1)); 57 } 58 59 /// mark neither empty nor deleted 60 void __ac_set_isboth_false(T)(khint32_t* flag, T i) 61 { 62 flag[i >> 4] &= ~(3uL << ((i & 0xfU) << 1)); 63 } 64 65 /// mark deleted 66 void __ac_set_isdel_true(T)(khint32_t* flag, T i) 67 { 68 flag[i >> 4] |= 1uL << ((i & 0xfU) << 1); 69 } 70 71 auto __ac_fsize(T)(T m) 72 { 73 return ((m) < 16 ? 1 : (m) >> 4); 74 } 75 76 void kroundup32(T)(ref T x) 77 { 78 (--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, 79 (x) |= (x) >> 8, (x) |= (x) >> 16, ++(x)); 80 } 81 } 82 83 alias kcalloc = calloc; 84 85 alias kmalloc = malloc; 86 87 alias krealloc = realloc; 88 89 alias kfree = free; 90 91 private const double __ac_HASH_UPPER = 0.77; 92 93 /// Straight port of khash's generic C approach 94 template khash(KT, VT, bool kh_is_map = true) 95 { 96 alias __hash_func = kh_hash!KT.kh_hash_func; 97 alias __hash_equal= kh_hash!KT.kh_hash_equal; 98 99 struct kh_t 100 { 101 khint_t n_buckets, size, n_occupied, upper_bound; 102 khint32_t* flags; 103 KT* keys; 104 VT* vals; 105 106 ~this() 107 { 108 kfree(cast(void*) this.keys); 109 kfree(cast(void*) this.flags); 110 kfree(cast(void*) this.vals); 111 } 112 113 /// Lookup by key 114 ref VT opIndex(KT key) 115 { 116 auto x = kh_get(&this, key); 117 return this.vals[x]; 118 } 119 120 /// Assign by key 121 void opIndexAssign(VT val, KT key) 122 { 123 int absent; 124 auto x = khash!(uint, char).kh_put(&this, key, &absent); 125 //khash!(uint, char).kh_value(kh, k) = 10; 126 this.vals[x] = val; 127 } 128 129 /// remove key/value pair 130 void remove(KT key) 131 { 132 auto x = kh_get(&this, key); 133 kh_del(&this, x); 134 } 135 } 136 137 void kh_clear(kh_t* h); 138 int kh_resize(kh_t* h, khint_t new_n_buckets); 139 khint_t kh_put(kh_t* h, KT key, int* ret); 140 void kh_del(kh_t* h, khint_t x); 141 142 kh_t* kh_init() 143 { 144 return cast(kh_t*) kcalloc(1, kh_t.sizeof); 145 } 146 147 void kh_destroy(kh_t* h) 148 { 149 if (h) 150 { 151 kfree(cast(void*) h.keys); 152 kfree(cast(void*) h.flags); 153 kfree(cast(void*) h.vals); 154 kfree(h); 155 } 156 } 157 158 void kh_clear(kh_t* h) 159 { 160 if (h && h.flags) 161 { 162 memset(h.flags, 0xaa, __ac_fsize(h.n_buckets) * khint32_t.sizeof); 163 h.size = h.n_occupied = 0; 164 } 165 } 166 167 khint_t kh_get(const(kh_t)* h, KT key) 168 { 169 if (h.n_buckets) 170 { 171 khint_t k, i, last, mask, step = 0; 172 mask = h.n_buckets - 1; 173 k = __hash_func(key); 174 i = k & mask; 175 last = i; 176 while (!__ac_isempty(h.flags, i) && (__ac_isdel(h.flags, i) || !__hash_equal(h.keys[i], key))) 177 { 178 i = (i + (++step)) & mask; 179 if (i == last) 180 return h.n_buckets; 181 } 182 return __ac_iseither(h.flags, i) ? h.n_buckets : i; 183 } 184 else 185 return 0; 186 } 187 188 int kh_resize(kh_t *h, khint_t new_n_buckets) 189 { 190 /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ 191 khint32_t *new_flags = null; 192 khint_t j = 1; 193 { 194 kroundup32(new_n_buckets); 195 if (new_n_buckets < 4) new_n_buckets = 4; 196 if (h.size >= cast(khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ 197 else { /* hash table size to be changed (shrink or expand); rehash */ 198 new_flags = cast(khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * khint32_t.sizeof); 199 if (!new_flags) return -1; 200 memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * khint32_t.sizeof); 201 if (h.n_buckets < new_n_buckets) { /* expand */ 202 KT *new_keys = cast(KT*)krealloc(cast(void *)h.keys, new_n_buckets * KT.sizeof); 203 if (!new_keys) { kfree(new_flags); return -1; } 204 h.keys = new_keys; 205 if (kh_is_map) { 206 VT *new_vals = cast(VT*)krealloc(cast(void *)h.vals, new_n_buckets * VT.sizeof); 207 if (!new_vals) { kfree(new_flags); return -1; } 208 h.vals = new_vals; 209 } 210 } /* otherwise shrink */ 211 } 212 } 213 if (j) { /* rehashing is needed */ 214 for (j = 0; j != h.n_buckets; ++j) { 215 if (__ac_iseither(h.flags, j) == 0) { 216 KT key = h.keys[j]; 217 VT val; 218 khint_t new_mask; 219 new_mask = new_n_buckets - 1; 220 if (kh_is_map) val = h.vals[j]; 221 __ac_set_isdel_true(h.flags, j); 222 while (1) { /* kick-out process; sort of like in Cuckoo hashing */ 223 khint_t k, i, step = 0; 224 k = __hash_func(key); 225 i = k & new_mask; 226 while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; 227 __ac_set_isempty_false(new_flags, i); 228 if (i < h.n_buckets && __ac_iseither(h.flags, i) == 0) { /* kick out the existing element */ 229 { KT tmp = h.keys[i]; h.keys[i] = key; key = tmp; } 230 if (kh_is_map) { VT tmp = h.vals[i]; h.vals[i] = val; val = tmp; } 231 __ac_set_isdel_true(h.flags, i); /* mark it as deleted in the old hash table */ 232 } else { /* write the element and jump out of the loop */ 233 h.keys[i] = key; 234 if (kh_is_map) h.vals[i] = val; 235 break; 236 } 237 } 238 } 239 } 240 if (h.n_buckets > new_n_buckets) { /* shrink the hash table */ 241 h.keys = cast(KT*)krealloc(cast(void *)h.keys, new_n_buckets * KT.sizeof); 242 if (kh_is_map) h.vals = cast(VT*)krealloc(cast(void *)h.vals, new_n_buckets * VT.sizeof); 243 } 244 kfree(h.flags); /* free the working space */ 245 h.flags = new_flags; 246 h.n_buckets = new_n_buckets; 247 h.n_occupied = h.size; 248 h.upper_bound = cast(khint_t)(h.n_buckets * __ac_HASH_UPPER + 0.5); 249 } 250 return 0; 251 } 252 253 khint_t kh_put(kh_t *h, KT key, int *ret) 254 { 255 khint_t x; 256 if (h.n_occupied >= h.upper_bound) { /* update the hash table */ 257 if (h.n_buckets > (h.size<<1)) { 258 if (kh_resize(h, h.n_buckets - 1) < 0) { /* clear "deleted" elements */ 259 *ret = -1; return h.n_buckets; 260 } 261 } else if (kh_resize(h, h.n_buckets + 1) < 0) { /* expand the hash table */ 262 *ret = -1; return h.n_buckets; 263 } 264 } /* TODO: to implement automatically shrinking; resize() already support shrinking */ 265 { 266 khint_t k, i, site, last, mask = h.n_buckets - 1, step = 0; 267 x = site = h.n_buckets; k = __hash_func(key); i = k & mask; 268 if (__ac_isempty(h.flags, i)) x = i; /* for speed up */ 269 else { 270 last = i; 271 while (!__ac_isempty(h.flags, i) && (__ac_isdel(h.flags, i) || !__hash_equal(h.keys[i], key))) { 272 if (__ac_isdel(h.flags, i)) site = i; 273 i = (i + (++step)) & mask; 274 if (i == last) { x = site; break; } 275 } 276 if (x == h.n_buckets) { 277 if (__ac_isempty(h.flags, i) && site != h.n_buckets) x = site; 278 else x = i; 279 } 280 } 281 } 282 if (__ac_isempty(h.flags, x)) { /* not present at all */ 283 h.keys[x] = key; 284 __ac_set_isboth_false(h.flags, x); 285 ++h.size; ++h.n_occupied; 286 *ret = 1; 287 } else if (__ac_isdel(h.flags, x)) { /* deleted */ 288 h.keys[x] = key; 289 __ac_set_isboth_false(h.flags, x); 290 ++h.size; 291 *ret = 2; 292 } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ 293 return x; 294 } 295 296 void kh_del(kh_t *h, khint_t x) 297 { 298 if (x != h.n_buckets && !__ac_iseither(h.flags, x)) { 299 __ac_set_isdel_true(h.flags, x); 300 --h.size; 301 } 302 } 303 304 auto kh_exists(kh_t* h, khint_t x) 305 { 306 return (!__ac_iseither(h.flags, x)); 307 } 308 309 auto kh_key(kh_t* h, khint_t x) 310 { 311 return h.keys[x]; 312 } 313 314 auto kh_val(kh_t* h, khint_t x) 315 { 316 return h.vals[x]; 317 } 318 319 auto kh_begin(kh_t* h) 320 { 321 return cast(khint_t) 0; 322 } 323 324 auto kh_end(kh_t* h) 325 { 326 return h.n_buckets; 327 } 328 329 auto kh_size(kh_t* h) 330 { 331 return h.size; 332 } 333 334 alias kh_n_buckets = kh_end; 335 336 alias kh_value = kh_val; 337 338 } 339 340 /* --- BEGIN OF HASH FUNCTIONS --- */ 341 342 template kh_hash(T) 343 { 344 auto kh_hash_func(T)(T key) 345 if (is(T == uint) || is(T == uint32_t) || is(T == khint32_t)) 346 { 347 return key; 348 } 349 350 bool kh_hash_equal(T)(T a, T b) 351 if (isNumeric!T) 352 { 353 return (a == b); 354 } 355 356 auto kh_hash_func(T)(T key) 357 if (is(T == ulong) || is(T == uint64_t) || is(T == khint64_t)) 358 { 359 return cast(khint32_t) ((key)>>33^(key)^(key)<<11); 360 } 361 362 khint_t __ac_X31_hash_string(const(char)* s) 363 { 364 khint_t h = cast(khint_t)*s; 365 if (h) for (++s; *s; ++s) h = (h << 5) - h + cast(khint_t)*s; 366 return h; 367 } 368 369 auto kh_hash_func(T)(T* key) 370 if(is(T == char) || is(T == const(char)) || is(T == immutable(char))) 371 { 372 return __ac_X31_hash_string(key); 373 } 374 375 bool kh_hash_equal(T)(T* a, T* b) 376 if(is(T == char) || is(T == const(char)) || is(T == immutable(char))) 377 { 378 return (strcmp(a, b) == 0); 379 } 380 381 auto __ac_Wang_hash(T)(T key) 382 if (is(T == uint) || is(T == uint32_t) || is(T == khint32_t)) 383 { 384 key += ~(key << 15); 385 key ^= (key >> 10); 386 key += (key << 3); 387 key ^= (key >> 6); 388 key += ~(key << 11); 389 key ^= (key >> 16); 390 return key; 391 } 392 393 // TODO 394 alias kh_int_hash_func2 = __ac_Wang_hash; 395 } 396 397 /* --- END OF HASH FUNCTIONS --- */ 398 399 /* Other convenient macros... */ 400 401 /*! 402 @abstract Type of the hash table. 403 @param name Name of the hash table [symbol] 404 */ 405 //#define khash_t(name) kh_##name##_t 406 407 /*! @function 408 @abstract Initiate a hash table. 409 @param name Name of the hash table [symbol] 410 @return Pointer to the hash table [khash_t(name)*] 411 */ 412 //#define kh_init(name) kh_init_##name() 413 414 /*! @function 415 @abstract Destroy a hash table. 416 @param name Name of the hash table [symbol] 417 @param h Pointer to the hash table [khash_t(name)*] 418 */ 419 //#define kh_destroy(name, h) kh_destroy_##name(h) 420 421 /*! @function 422 @abstract Reset a hash table without deallocating memory. 423 @param name Name of the hash table [symbol] 424 @param h Pointer to the hash table [khash_t(name)*] 425 */ 426 //#define kh_clear(name, h) kh_clear_##name(h) 427 428 /*! @function 429 @abstract Resize a hash table. 430 @param name Name of the hash table [symbol] 431 @param h Pointer to the hash table [khash_t(name)*] 432 @param s New size [khint_t] 433 */ 434 //#define kh_resize(name, h, s) kh_resize_##name(h, s) 435 436 /*! @function 437 @abstract Insert a key to the hash table. 438 @param name Name of the hash table [symbol] 439 @param h Pointer to the hash table [khash_t(name)*] 440 @param k Key [type of keys] 441 @param r Extra return code: -1 if the operation failed; 442 0 if the key is present in the hash table; 443 1 if the bucket is empty (never used); 2 if the element in 444 the bucket has been deleted [int*] 445 @return Iterator to the inserted element [khint_t] 446 */ 447 //#define kh_put(name, h, k, r) kh_put_##name(h, k, r) 448 449 /*! @function 450 @abstract Retrieve a key from the hash table. 451 @param name Name of the hash table [symbol] 452 @param h Pointer to the hash table [khash_t(name)*] 453 @param k Key [type of keys] 454 @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] 455 */ 456 //#define kh_get(name, h, k) kh_get_##name(h, k) 457 458 /*! @function 459 @abstract Remove a key from the hash table. 460 @param name Name of the hash table [symbol] 461 @param h Pointer to the hash table [khash_t(name)*] 462 @param k Iterator to the element to be deleted [khint_t] 463 */ 464 //#define kh_del(name, h, k) kh_del_##name(h, k) 465 466 /*! @function 467 @abstract Test whether a bucket contains data. 468 @param h Pointer to the hash table [khash_t(name)*] 469 @param x Iterator to the bucket [khint_t] 470 @return 1 if containing data; 0 otherwise [int] 471 */ 472 //#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) 473 // Moved into template khash(KT, VT) 474 475 /*! @function 476 @abstract Get key given an iterator 477 @param h Pointer to the hash table [khash_t(name)*] 478 @param x Iterator to the bucket [khint_t] 479 @return Key [type of keys] 480 */ 481 //#define kh_key(h, x) ((h)->keys[x]) 482 // Moved into template khash(KT, VT) 483 484 /*! @function 485 @abstract Get value given an iterator 486 @param h Pointer to the hash table [khash_t(name)*] 487 @param x Iterator to the bucket [khint_t] 488 @return Value [type of values] 489 @discussion For hash sets, calling this results in segfault. 490 */ 491 //#define kh_val(h, x) ((h)->vals[x]) 492 // Moved into template khash(KT, VT) 493 494 /*! @function 495 @abstract Alias of kh_val() 496 */ 497 //#define kh_value(h, x) ((h)->vals[x]) 498 // Moved into template khash(KT, VT) 499 500 /*! @function 501 @abstract Get the start iterator 502 @param h Pointer to the hash table [khash_t(name)*] 503 @return The start iterator [khint_t] 504 */ 505 //#define kh_begin(h) (khint_t)(0) 506 // Moved into template khash(KT, VT) 507 508 /*! @function 509 @abstract Get the end iterator 510 @param h Pointer to the hash table [khash_t(name)*] 511 @return The end iterator [khint_t] 512 */ 513 //#define kh_end(h) ((h)->n_buckets) 514 // Moved into template khash(KT, VT) 515 516 /*! @function 517 @abstract Get the number of elements in the hash table 518 @param h Pointer to the hash table [khash_t(name)*] 519 @return Number of elements in the hash table [khint_t] 520 */ 521 //#define kh_size(h) ((h)->size) 522 // Moved into template khash(KT, VT) 523 524 /*! @function 525 @abstract Get the number of buckets in the hash table 526 @param h Pointer to the hash table [khash_t(name)*] 527 @return Number of buckets in the hash table [khint_t] 528 */ 529 //#define kh_n_buckets(h) ((h)->n_buckets) 530 // Moved into template khash(KT, VT) 531 532 /++ foreach: TODO 533 534 /*! @function 535 @abstract Iterate over the entries in the hash table 536 @param h Pointer to the hash table [khash_t(name)*] 537 @param kvar Variable to which key will be assigned 538 @param vvar Variable to which value will be assigned 539 @param code Block of code to execute 540 */ 541 auto kh_foreach(kh_t* h, kvar, vvar, code) 542 { 543 khint_t __i; 544 for (__i = kh_begin(h); __i != kh_end(h); ++__i) { 545 if (!kh_exist(h, __i)) continue; 546 kvar = kh_key(h, __i); 547 vvar = kh_val(h, __i); 548 code; 549 } 550 } 551 552 /*! @function 553 @abstract Iterate over the values in the hash table 554 @param h Pointer to the hash table [khash_t(name)*] 555 @param vvar Variable to which value will be assigned 556 @param code Block of code to execute 557 */ 558 auto kh_foreach_value(kh_t* h, vvar, code) 559 { 560 khint_t __i; 561 for (__i = kh_begin(h); __i != kh_end(h); ++__i) { 562 if (!kh_exist(h, __i)) continue; 563 vvar = kh_val(h, __i); 564 code; 565 } 566 } 567 +/ 568 unittest 569 { 570 import std.stdio : writeln; 571 572 writeln("khash unit test"); 573 574 // auto kh = khash!(uint, char).kh_init(); 575 576 //int absent; 577 //auto k = khash!(uint, char).kh_put(kh, 5, &absent); 578 ////khash!(uint, char).kh_value(kh, k) = 10; 579 //kh.vals[k] = 'J'; 580 581 // (*kh)[5] = 'J'; 582 // writeln("Entry value:", (*kh)[5]); 583 584 // khash!(uint, char).kh_destroy(kh); 585 586 auto kh = khash!(uint, char).kh_t(); 587 kh[5] = 'J'; 588 writeln("Value: ", kh[5]); 589 }