1 module dklib.khash;
2 
3 import std.traits : isNumeric;
4 import core.stdc.stdint;    // uint32_t, etc.
5 
6 /*!
7   @header
8 
9   Generic hash table library.
10  */
11 
12 enum AC_VERSION_KHASH_H = "0.2.8";
13 
14 import core.stdc.stdlib;
15 import core.stdc..string;
16 import core.stdc.limits;
17 
18 /* compiler specific configuration */
19 
20 alias khint32_t = uint;
21 
22 alias khint64_t = ulong;
23 
24 alias khint_t = khint32_t;
25 alias khiter_t = khint_t;
26 
27 pragma(inline, true)
28 {
29     /// bucket empty?
30     auto __ac_isempty(T)(const(khint32_t)* flag, T i)
31     {
32         return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2);
33     }
34 
35     /// bucket deleted?
36     auto __ac_isdel(T)(const(khint32_t)* flag, T i)
37     {
38         return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1);
39     }
40 
41     /// bucket empty OR deleted?
42     auto __ac_iseither(T)(const(khint32_t)* flag, T i)
43     {
44         return ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3);
45     }
46 
47     /// unmark deleted
48     void __ac_set_isdel_false(T)(khint32_t* flag, T i)
49     {
50         flag[i >> 4] &= ~(1uL << ((i & 0xfU) << 1));
51     }
52 
53     /// unmark empty
54     void __ac_set_isempty_false(T)(khint32_t* flag, T i)
55     {
56         flag[i >> 4] &= ~(2uL << ((i & 0xfU) << 1));
57     }
58 
59     /// mark neither empty nor deleted
60     void __ac_set_isboth_false(T)(khint32_t* flag, T i)
61     {
62         flag[i >> 4] &= ~(3uL << ((i & 0xfU) << 1));
63     }
64 
65     /// mark deleted
66     void __ac_set_isdel_true(T)(khint32_t* flag, T i)
67     {
68         flag[i >> 4] |= 1uL << ((i & 0xfU) << 1);
69     }
70 
71     auto __ac_fsize(T)(T m)
72     {
73         return ((m) < 16 ? 1 : (m) >> 4);
74     }
75 
76     void kroundup32(T)(ref T x)
77     {
78         (--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4,
79             (x) |= (x) >> 8, (x) |= (x) >> 16, ++(x));
80     }
81 }
82 
83 alias kcalloc = calloc;
84 
85 alias kmalloc = malloc;
86 
87 alias krealloc = realloc;
88 
89 alias kfree = free;
90 
91 private const double __ac_HASH_UPPER = 0.77;
92 
93 /// Straight port of khash's generic C approach
94 template khash(KT, VT, bool kh_is_map = true)
95 {
96     alias __hash_func = kh_hash!KT.kh_hash_func;
97     alias __hash_equal= kh_hash!KT.kh_hash_equal;
98 
99     struct kh_t
100     {
101         khint_t n_buckets, size, n_occupied, upper_bound;
102         khint32_t* flags;
103         KT* keys;
104         VT* vals;
105 
106         ~this()
107         {
108             kfree(cast(void*) this.keys);
109             kfree(cast(void*) this.flags);
110             kfree(cast(void*) this.vals);
111         }
112         
113         /// Lookup by key
114         ref VT opIndex(KT key)
115         {
116             auto x = kh_get(&this, key);
117             return this.vals[x];
118         }
119 
120         /// Assign by key
121         void opIndexAssign(VT val, KT key)
122         {
123             int absent;
124             auto x = khash!(uint, char).kh_put(&this, key, &absent);
125             //khash!(uint, char).kh_value(kh, k) = 10;
126             this.vals[x] = val;
127         }
128 
129         /// remove key/value pair
130         void remove(KT key)
131         {
132             auto x = kh_get(&this, key);
133             kh_del(&this, x);
134         }
135     }
136   
137     void kh_clear(kh_t* h);
138     int kh_resize(kh_t* h, khint_t new_n_buckets);
139     khint_t kh_put(kh_t* h, KT key, int* ret);
140     void kh_del(kh_t* h, khint_t x);
141   
142     kh_t* kh_init()
143     {
144         return cast(kh_t*) kcalloc(1, kh_t.sizeof);
145     }
146   
147     void kh_destroy(kh_t* h)
148     {
149         if (h)
150         {
151             kfree(cast(void*) h.keys);
152             kfree(cast(void*) h.flags);
153             kfree(cast(void*) h.vals);
154             kfree(h);
155         }
156     }
157   
158     void kh_clear(kh_t* h)
159     {
160       if (h && h.flags)
161       {
162         memset(h.flags, 0xaa, __ac_fsize(h.n_buckets) * khint32_t.sizeof);
163         h.size = h.n_occupied = 0;
164       }
165     }
166   
167     khint_t kh_get(const(kh_t)* h, KT key)
168     {
169         if (h.n_buckets)
170         {
171           khint_t k, i, last, mask, step = 0;
172           mask = h.n_buckets - 1;
173           k = __hash_func(key);
174           i = k & mask;
175           last = i;
176           while (!__ac_isempty(h.flags, i) && (__ac_isdel(h.flags, i) || !__hash_equal(h.keys[i], key)))
177           {
178               i = (i + (++step)) & mask;
179               if (i == last)
180                   return h.n_buckets;
181           }
182           return __ac_iseither(h.flags, i) ? h.n_buckets : i;
183         }
184         else
185             return 0;
186     }
187 
188     int kh_resize(kh_t *h, khint_t new_n_buckets)
189 	{
190         /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
191 		khint32_t *new_flags = null;
192 		khint_t j = 1;
193 		{
194 			kroundup32(new_n_buckets);
195 			if (new_n_buckets < 4) new_n_buckets = 4;
196 			if (h.size >= cast(khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	/* requested size is too small */
197 			else { /* hash table size to be changed (shrink or expand); rehash */
198 				new_flags = cast(khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * khint32_t.sizeof);
199 				if (!new_flags) return -1;
200 				memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * khint32_t.sizeof);
201 				if (h.n_buckets < new_n_buckets) {	/* expand */
202 					KT *new_keys = cast(KT*)krealloc(cast(void *)h.keys, new_n_buckets * KT.sizeof);
203 					if (!new_keys) { kfree(new_flags); return -1; }
204 					h.keys = new_keys;
205 					if (kh_is_map) {
206 						VT *new_vals = cast(VT*)krealloc(cast(void *)h.vals, new_n_buckets * VT.sizeof);
207 						if (!new_vals) { kfree(new_flags); return -1; }
208 						h.vals = new_vals;
209 					}
210 				} /* otherwise shrink */
211 			}
212 		}
213 		if (j) { /* rehashing is needed */
214 			for (j = 0; j != h.n_buckets; ++j) {
215 				if (__ac_iseither(h.flags, j) == 0) {
216 					KT key = h.keys[j];
217 					VT val;
218 					khint_t new_mask;
219 					new_mask = new_n_buckets - 1;
220 					if (kh_is_map) val = h.vals[j];
221 					__ac_set_isdel_true(h.flags, j);
222 					while (1) { /* kick-out process; sort of like in Cuckoo hashing */
223 						khint_t k, i, step = 0;
224 						k = __hash_func(key);
225 						i = k & new_mask;
226 						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask;
227 						__ac_set_isempty_false(new_flags, i);
228 						if (i < h.n_buckets && __ac_iseither(h.flags, i) == 0) { /* kick out the existing element */
229 							{ KT tmp = h.keys[i]; h.keys[i] = key; key = tmp; }
230 							if (kh_is_map) { VT tmp = h.vals[i]; h.vals[i] = val; val = tmp; }
231 							__ac_set_isdel_true(h.flags, i); /* mark it as deleted in the old hash table */
232 						} else { /* write the element and jump out of the loop */
233 							h.keys[i] = key;
234 							if (kh_is_map) h.vals[i] = val;
235 							break;
236 						}
237 					}
238 				}
239 			}
240 			if (h.n_buckets > new_n_buckets) { /* shrink the hash table */
241 				h.keys = cast(KT*)krealloc(cast(void *)h.keys, new_n_buckets * KT.sizeof);
242 				if (kh_is_map) h.vals = cast(VT*)krealloc(cast(void *)h.vals, new_n_buckets * VT.sizeof);
243 			}
244 			kfree(h.flags); /* free the working space */
245 			h.flags = new_flags;
246 			h.n_buckets = new_n_buckets;
247 			h.n_occupied = h.size;
248 			h.upper_bound = cast(khint_t)(h.n_buckets * __ac_HASH_UPPER + 0.5);
249 		}
250 		return 0;
251 	}
252 
253 	khint_t kh_put(kh_t *h, KT key, int *ret)
254 	{
255 		khint_t x;
256 		if (h.n_occupied >= h.upper_bound) { /* update the hash table */
257 			if (h.n_buckets > (h.size<<1)) {
258 				if (kh_resize(h, h.n_buckets - 1) < 0) { /* clear "deleted" elements */
259 					*ret = -1; return h.n_buckets;
260 				}
261 			} else if (kh_resize(h, h.n_buckets + 1) < 0) { /* expand the hash table */
262 				*ret = -1; return h.n_buckets;
263 			}
264 		} /* TODO: to implement automatically shrinking; resize() already support shrinking */
265 		{
266 			khint_t k, i, site, last, mask = h.n_buckets - 1, step = 0;
267 			x = site = h.n_buckets; k = __hash_func(key); i = k & mask;
268 			if (__ac_isempty(h.flags, i)) x = i; /* for speed up */
269 			else {
270 				last = i;
271 				while (!__ac_isempty(h.flags, i) && (__ac_isdel(h.flags, i) || !__hash_equal(h.keys[i], key))) {
272 					if (__ac_isdel(h.flags, i)) site = i;
273 					i = (i + (++step)) & mask;
274 					if (i == last) { x = site; break; }
275 				}
276 				if (x == h.n_buckets) {
277 					if (__ac_isempty(h.flags, i) && site != h.n_buckets) x = site;
278 					else x = i;
279 				}
280 			}
281 		}
282 		if (__ac_isempty(h.flags, x)) { /* not present at all */
283 			h.keys[x] = key;
284 			__ac_set_isboth_false(h.flags, x);
285 			++h.size; ++h.n_occupied;
286 			*ret = 1;
287 		} else if (__ac_isdel(h.flags, x)) { /* deleted */
288 			h.keys[x] = key;
289 			__ac_set_isboth_false(h.flags, x);
290 			++h.size;
291 			*ret = 2;
292 		} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */
293 		return x;
294 	}
295 
296     void kh_del(kh_t *h, khint_t x)
297     {
298         if (x != h.n_buckets && !__ac_iseither(h.flags, x)) {
299             __ac_set_isdel_true(h.flags, x);
300             --h.size;
301         }
302     }
303 
304     auto kh_exists(kh_t* h, khint_t x)
305     {
306         return (!__ac_iseither(h.flags, x));
307     }
308 
309     auto kh_key(kh_t* h, khint_t x)
310     {
311         return h.keys[x];
312     }
313 
314     auto kh_val(kh_t* h, khint_t x)
315     {
316         return h.vals[x];
317     }
318 
319     auto kh_begin(kh_t* h)
320     {
321         return cast(khint_t) 0;
322     }
323 
324     auto kh_end(kh_t* h)
325     {
326         return h.n_buckets;
327     }
328 
329     auto kh_size(kh_t* h)
330     {
331         return h.size;
332     }
333 
334     alias kh_n_buckets = kh_end;
335 
336     alias kh_value = kh_val;
337 
338 }
339 
340 /* --- BEGIN OF HASH FUNCTIONS --- */
341 
342 template kh_hash(T)
343 {
344     auto kh_hash_func(T)(T key)
345     if (is(T == uint) || is(T == uint32_t) || is(T == khint32_t))
346     {
347         return key;
348     }
349 
350     bool kh_hash_equal(T)(T a, T b)
351     if (isNumeric!T)
352     {
353         return (a == b);
354     }
355 
356     auto kh_hash_func(T)(T key)
357     if (is(T == ulong) || is(T == uint64_t) || is(T == khint64_t))
358     {
359         return cast(khint32_t) ((key)>>33^(key)^(key)<<11);
360     }
361 
362     khint_t __ac_X31_hash_string(const(char)* s)
363     {
364         khint_t h = cast(khint_t)*s;
365         if (h) for  (++s; *s; ++s) h = (h << 5) - h + cast(khint_t)*s;
366         return h;
367     }
368     
369     auto kh_hash_func(T)(T* key)
370     if(is(T == char) || is(T == const(char)) || is(T == immutable(char)))
371     {
372         return __ac_X31_hash_string(key);
373     }
374 
375     bool kh_hash_equal(T)(T* a, T* b)
376     if(is(T == char) || is(T == const(char)) || is(T == immutable(char)))
377     {
378         return (strcmp(a, b) == 0);
379     }
380 
381     auto __ac_Wang_hash(T)(T key)
382     if (is(T == uint) || is(T == uint32_t) || is(T == khint32_t))
383     {
384         key += ~(key << 15);
385         key ^=  (key >> 10);
386         key +=  (key << 3);
387         key ^=  (key >> 6);
388         key += ~(key << 11);
389         key ^=  (key >> 16);
390         return key;
391     }
392 
393     // TODO
394     alias kh_int_hash_func2 = __ac_Wang_hash;
395 }
396 
397 /* --- END OF HASH FUNCTIONS --- */
398 
399 /* Other convenient macros... */
400 
401 /*!
402   @abstract Type of the hash table.
403   @param  name  Name of the hash table [symbol]
404  */
405 //#define khash_t(name) kh_##name##_t
406 
407 /*! @function
408   @abstract     Initiate a hash table.
409   @param  name  Name of the hash table [symbol]
410   @return       Pointer to the hash table [khash_t(name)*]
411  */
412 //#define kh_init(name) kh_init_##name()
413 
414 /*! @function
415   @abstract     Destroy a hash table.
416   @param  name  Name of the hash table [symbol]
417   @param  h     Pointer to the hash table [khash_t(name)*]
418  */
419 //#define kh_destroy(name, h) kh_destroy_##name(h)
420 
421 /*! @function
422   @abstract     Reset a hash table without deallocating memory.
423   @param  name  Name of the hash table [symbol]
424   @param  h     Pointer to the hash table [khash_t(name)*]
425  */
426 //#define kh_clear(name, h) kh_clear_##name(h)
427 
428 /*! @function
429   @abstract     Resize a hash table.
430   @param  name  Name of the hash table [symbol]
431   @param  h     Pointer to the hash table [khash_t(name)*]
432   @param  s     New size [khint_t]
433  */
434 //#define kh_resize(name, h, s) kh_resize_##name(h, s)
435 
436 /*! @function
437   @abstract     Insert a key to the hash table.
438   @param  name  Name of the hash table [symbol]
439   @param  h     Pointer to the hash table [khash_t(name)*]
440   @param  k     Key [type of keys]
441   @param  r     Extra return code: -1 if the operation failed;
442                 0 if the key is present in the hash table;
443                 1 if the bucket is empty (never used); 2 if the element in
444 				the bucket has been deleted [int*]
445   @return       Iterator to the inserted element [khint_t]
446  */
447 //#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
448 
449 /*! @function
450   @abstract     Retrieve a key from the hash table.
451   @param  name  Name of the hash table [symbol]
452   @param  h     Pointer to the hash table [khash_t(name)*]
453   @param  k     Key [type of keys]
454   @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
455  */
456 //#define kh_get(name, h, k) kh_get_##name(h, k)
457 
458 /*! @function
459   @abstract     Remove a key from the hash table.
460   @param  name  Name of the hash table [symbol]
461   @param  h     Pointer to the hash table [khash_t(name)*]
462   @param  k     Iterator to the element to be deleted [khint_t]
463  */
464 //#define kh_del(name, h, k) kh_del_##name(h, k)
465 
466 /*! @function
467   @abstract     Test whether a bucket contains data.
468   @param  h     Pointer to the hash table [khash_t(name)*]
469   @param  x     Iterator to the bucket [khint_t]
470   @return       1 if containing data; 0 otherwise [int]
471  */
472 //#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
473 // Moved into template khash(KT, VT)
474 
475 /*! @function
476   @abstract     Get key given an iterator
477   @param  h     Pointer to the hash table [khash_t(name)*]
478   @param  x     Iterator to the bucket [khint_t]
479   @return       Key [type of keys]
480  */
481 //#define kh_key(h, x) ((h)->keys[x])
482 // Moved into template khash(KT, VT)
483 
484 /*! @function
485   @abstract     Get value given an iterator
486   @param  h     Pointer to the hash table [khash_t(name)*]
487   @param  x     Iterator to the bucket [khint_t]
488   @return       Value [type of values]
489   @discussion   For hash sets, calling this results in segfault.
490  */
491 //#define kh_val(h, x) ((h)->vals[x])
492 // Moved into template khash(KT, VT)
493 
494 /*! @function
495   @abstract     Alias of kh_val()
496  */
497 //#define kh_value(h, x) ((h)->vals[x])
498 // Moved into template khash(KT, VT)
499 
500 /*! @function
501   @abstract     Get the start iterator
502   @param  h     Pointer to the hash table [khash_t(name)*]
503   @return       The start iterator [khint_t]
504  */
505 //#define kh_begin(h) (khint_t)(0)
506 // Moved into template khash(KT, VT)
507 
508 /*! @function
509   @abstract     Get the end iterator
510   @param  h     Pointer to the hash table [khash_t(name)*]
511   @return       The end iterator [khint_t]
512  */
513 //#define kh_end(h) ((h)->n_buckets)
514 // Moved into template khash(KT, VT)
515 
516 /*! @function
517   @abstract     Get the number of elements in the hash table
518   @param  h     Pointer to the hash table [khash_t(name)*]
519   @return       Number of elements in the hash table [khint_t]
520  */
521 //#define kh_size(h) ((h)->size)
522 // Moved into template khash(KT, VT)
523 
524 /*! @function
525   @abstract     Get the number of buckets in the hash table
526   @param  h     Pointer to the hash table [khash_t(name)*]
527   @return       Number of buckets in the hash table [khint_t]
528  */
529 //#define kh_n_buckets(h) ((h)->n_buckets)
530 // Moved into template khash(KT, VT)
531 
532 /++ foreach: TODO
533 
534 /*! @function
535   @abstract     Iterate over the entries in the hash table
536   @param  h     Pointer to the hash table [khash_t(name)*]
537   @param  kvar  Variable to which key will be assigned
538   @param  vvar  Variable to which value will be assigned
539   @param  code  Block of code to execute
540  */
541 auto kh_foreach(kh_t* h, kvar, vvar, code)
542 {
543     khint_t __i;
544     for (__i = kh_begin(h); __i != kh_end(h); ++__i) {
545         if (!kh_exist(h, __i)) continue;
546         kvar = kh_key(h, __i);
547         vvar = kh_val(h, __i);
548         code;
549     }
550 }
551 
552 /*! @function
553   @abstract     Iterate over the values in the hash table
554   @param  h     Pointer to the hash table [khash_t(name)*]
555   @param  vvar  Variable to which value will be assigned
556   @param  code  Block of code to execute
557  */
558  auto kh_foreach_value(kh_t* h, vvar, code)
559  {
560      khint_t __i;
561      for (__i = kh_begin(h); __i != kh_end(h); ++__i) {
562          if (!kh_exist(h, __i)) continue;
563          vvar = kh_val(h, __i);
564          code;
565      }
566  }
567 +/
568 unittest
569 {
570     import std.stdio : writeln;
571 
572     writeln("khash unit test");
573 
574 //    auto kh = khash!(uint, char).kh_init();
575 
576     //int absent;
577     //auto k = khash!(uint, char).kh_put(kh, 5, &absent);
578     ////khash!(uint, char).kh_value(kh, k) = 10;
579     //kh.vals[k] = 'J';
580 
581 //    (*kh)[5] = 'J';
582 //    writeln("Entry value:", (*kh)[5]);
583     
584 //    khash!(uint, char).kh_destroy(kh);
585 
586     auto kh = khash!(uint, char).kh_t();
587     kh[5] = 'J';
588     writeln("Value: ", kh[5]);
589 }