Ruby  3.4.0dev (2024-11-05 revision e440268d51fe02b303e3817a7a733a0dac1c5091)
st.c (e440268d51fe02b303e3817a7a733a0dac1c5091)
1 /* This is a public domain general purpose hash table package
2  originally written by Peter Moore @ UCB.
3 
4  The hash table data structures were redesigned and the package was
5  rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6 
7 /* The original package implemented classic bucket-based hash tables
8  with entries doubly linked for an access by their insertion order.
9  To decrease pointer chasing and as a consequence to improve a data
10  locality the current implementation is based on storing entries in
11  an array and using hash tables with open addressing. The current
12  entries are more compact in comparison with the original ones and
13  this also improves the data locality.
14 
15  The hash table has two arrays called *bins* and *entries*.
16 
17  bins:
18  -------
19  | | entries array:
20  |-------| --------------------------------
21  | index | | | entry: | | |
22  |-------| | | | | |
23  | ... | | ... | hash | ... | ... |
24  |-------| | | key | | |
25  | empty | | | record | | |
26  |-------| --------------------------------
27  | ... | ^ ^
28  |-------| |_ entries start |_ entries bound
29  |deleted|
30  -------
31 
32  o The entry array contains table entries in the same order as they
33  were inserted.
34 
35  When the first entry is deleted, a variable containing index of
36  the current first entry (*entries start*) is changed. In all
37  other cases of the deletion, we just mark the entry as deleted by
38  using a reserved hash value.
39 
40  Such organization of the entry storage makes operations of the
41  table shift and the entries traversal very fast.
42 
43  o The bins provide access to the entries by their keys. The
44  key hash is mapped to a bin containing *index* of the
45  corresponding entry in the entry array.
46 
47  The bin array size is always power of two, it makes mapping very
48  fast by using the corresponding lower bits of the hash.
49  Generally it is not a good idea to ignore some part of the hash.
50  But alternative approach is worse. For example, we could use a
51  modulo operation for mapping and a prime number for the size of
52  the bin array. Unfortunately, the modulo operation for big
53  64-bit numbers are extremely slow (it takes more than 100 cycles
54  on modern Intel CPUs).
55 
56  Still other bits of the hash value are used when the mapping
57  results in a collision. In this case we use a secondary hash
58  value which is a result of a function of the collision bin
59  index and the original hash value. The function choice
60  guarantees that we can traverse all bins and finally find the
61  corresponding bin as after several iterations the function
62  becomes a full cycle linear congruential generator because it
63  satisfies requirements of the Hull-Dobell theorem.
64 
65  When an entry is removed from the table besides marking the
66  hash in the corresponding entry described above, we also mark
67  the bin by a special value in order to find entries which had
68  a collision with the removed entries.
69 
70  There are two reserved values for the bins. One denotes an
71  empty bin, another one denotes a bin for a deleted entry.
72 
73  o The length of the bin array is at least two times more than the
74  entry array length. This keeps the table load factor healthy.
75  The trigger of rebuilding the table is always a case when we can
76  not insert an entry anymore at the entries bound. We could
77  change the entries bound too in case of deletion but than we need
78  a special code to count bins with corresponding deleted entries
79  and reset the bin values when there are too many bins
80  corresponding deleted entries
81 
82  Table rebuilding is done by creation of a new entry array and
83  bins of an appropriate size. We also try to reuse the arrays
84  in some cases by compacting the array and removing deleted
85  entries.
86 
87  o To save memory very small tables have no allocated arrays
88  bins. We use a linear search for an access by a key.
89 
90  o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91  bins depending on the current hash table size.
92 
93  o The implementation takes into account that the table can be
94  rebuilt during hashing or comparison functions. It can happen if
95  the functions are implemented in Ruby and a thread switch occurs
96  during their execution.
97 
98  This implementation speeds up the Ruby hash table benchmarks in
99  average by more 40% on Intel Haswell CPU.
100 
101 */
102 
103 #ifdef NOT_RUBY
104 #include "regint.h"
105 #include "st.h"
106 #elif defined RUBY_EXPORT
107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
111 #include "internal/st.h"
112 #endif
113 
114 #include <stdio.h>
115 #ifdef HAVE_STDLIB_H
116 #include <stdlib.h>
117 #endif
118 #include <string.h>
119 #include <assert.h>
120 
121 #ifdef __GNUC__
122 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
123 #define EXPECT(expr, val) __builtin_expect(expr, val)
124 #define ATTRIBUTE_UNUSED __attribute__((unused))
125 #else
126 #define PREFETCH(addr, write_p)
127 #define EXPECT(expr, val) (expr)
128 #define ATTRIBUTE_UNUSED
129 #endif
130 
131 /* The type of hashes. */
132 typedef st_index_t st_hash_t;
133 
135  st_hash_t hash;
136  st_data_t key;
137  st_data_t record;
138 };
139 
140 #define type_numhash st_hashtype_num
141 static const struct st_hash_type st_hashtype_num = {
142  st_numcmp,
143  st_numhash,
144 };
145 
146 static int st_strcmp(st_data_t, st_data_t);
147 static st_index_t strhash(st_data_t);
148 static const struct st_hash_type type_strhash = {
149  st_strcmp,
150  strhash,
151 };
152 
153 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
154 static st_index_t strcasehash(st_data_t);
155 static const struct st_hash_type type_strcasehash = {
156  st_locale_insensitive_strcasecmp_i,
157  strcasehash,
158 };
159 
160 /* Value used to catch uninitialized entries/bins during debugging.
161  There is a possibility for a false alarm, but its probability is
162  extremely small. */
163 #define ST_INIT_VAL 0xafafafafafafafaf
164 #define ST_INIT_VAL_BYTE 0xafa
165 
166 #ifdef RUBY
167 #undef malloc
168 #undef realloc
169 #undef calloc
170 #undef free
171 #define malloc ruby_xmalloc
172 #define calloc ruby_xcalloc
173 #define realloc ruby_xrealloc
174 #define free ruby_xfree
175 #endif
176 
177 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
178 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
179  ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
180 
181 /* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
182  up to TRUE if the table is rebuilt during the comparison. */
183 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
184  do { \
185  unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
186  res = PTR_EQUAL(tab, ptr, hash_val, key); \
187  rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
188  } while (FALSE)
189 
190 /* Features of a table. */
191 struct st_features {
192  /* Power of 2 used for number of allocated entries. */
193  unsigned char entry_power;
194  /* Power of 2 used for number of allocated bins. Depending on the
195  table size, the number of bins is 2-4 times more than the
196  number of entries. */
197  unsigned char bin_power;
198  /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
199  unsigned char size_ind;
200  /* Bins are packed in words of type st_index_t. The following is
201  a size of bins counted by words. */
202  st_index_t bins_words;
203 };
204 
205 /* Features of all possible size tables. */
206 #if SIZEOF_ST_INDEX_T == 8
207 #define MAX_POWER2 62
208 static const struct st_features features[] = {
209  {0, 1, 0, 0x0},
210  {1, 2, 0, 0x1},
211  {2, 3, 0, 0x1},
212  {3, 4, 0, 0x2},
213  {4, 5, 0, 0x4},
214  {5, 6, 0, 0x8},
215  {6, 7, 0, 0x10},
216  {7, 8, 0, 0x20},
217  {8, 9, 1, 0x80},
218  {9, 10, 1, 0x100},
219  {10, 11, 1, 0x200},
220  {11, 12, 1, 0x400},
221  {12, 13, 1, 0x800},
222  {13, 14, 1, 0x1000},
223  {14, 15, 1, 0x2000},
224  {15, 16, 1, 0x4000},
225  {16, 17, 2, 0x10000},
226  {17, 18, 2, 0x20000},
227  {18, 19, 2, 0x40000},
228  {19, 20, 2, 0x80000},
229  {20, 21, 2, 0x100000},
230  {21, 22, 2, 0x200000},
231  {22, 23, 2, 0x400000},
232  {23, 24, 2, 0x800000},
233  {24, 25, 2, 0x1000000},
234  {25, 26, 2, 0x2000000},
235  {26, 27, 2, 0x4000000},
236  {27, 28, 2, 0x8000000},
237  {28, 29, 2, 0x10000000},
238  {29, 30, 2, 0x20000000},
239  {30, 31, 2, 0x40000000},
240  {31, 32, 2, 0x80000000},
241  {32, 33, 3, 0x200000000},
242  {33, 34, 3, 0x400000000},
243  {34, 35, 3, 0x800000000},
244  {35, 36, 3, 0x1000000000},
245  {36, 37, 3, 0x2000000000},
246  {37, 38, 3, 0x4000000000},
247  {38, 39, 3, 0x8000000000},
248  {39, 40, 3, 0x10000000000},
249  {40, 41, 3, 0x20000000000},
250  {41, 42, 3, 0x40000000000},
251  {42, 43, 3, 0x80000000000},
252  {43, 44, 3, 0x100000000000},
253  {44, 45, 3, 0x200000000000},
254  {45, 46, 3, 0x400000000000},
255  {46, 47, 3, 0x800000000000},
256  {47, 48, 3, 0x1000000000000},
257  {48, 49, 3, 0x2000000000000},
258  {49, 50, 3, 0x4000000000000},
259  {50, 51, 3, 0x8000000000000},
260  {51, 52, 3, 0x10000000000000},
261  {52, 53, 3, 0x20000000000000},
262  {53, 54, 3, 0x40000000000000},
263  {54, 55, 3, 0x80000000000000},
264  {55, 56, 3, 0x100000000000000},
265  {56, 57, 3, 0x200000000000000},
266  {57, 58, 3, 0x400000000000000},
267  {58, 59, 3, 0x800000000000000},
268  {59, 60, 3, 0x1000000000000000},
269  {60, 61, 3, 0x2000000000000000},
270  {61, 62, 3, 0x4000000000000000},
271  {62, 63, 3, 0x8000000000000000},
272 };
273 
274 #else
275 #define MAX_POWER2 30
276 
277 static const struct st_features features[] = {
278  {0, 1, 0, 0x1},
279  {1, 2, 0, 0x1},
280  {2, 3, 0, 0x2},
281  {3, 4, 0, 0x4},
282  {4, 5, 0, 0x8},
283  {5, 6, 0, 0x10},
284  {6, 7, 0, 0x20},
285  {7, 8, 0, 0x40},
286  {8, 9, 1, 0x100},
287  {9, 10, 1, 0x200},
288  {10, 11, 1, 0x400},
289  {11, 12, 1, 0x800},
290  {12, 13, 1, 0x1000},
291  {13, 14, 1, 0x2000},
292  {14, 15, 1, 0x4000},
293  {15, 16, 1, 0x8000},
294  {16, 17, 2, 0x20000},
295  {17, 18, 2, 0x40000},
296  {18, 19, 2, 0x80000},
297  {19, 20, 2, 0x100000},
298  {20, 21, 2, 0x200000},
299  {21, 22, 2, 0x400000},
300  {22, 23, 2, 0x800000},
301  {23, 24, 2, 0x1000000},
302  {24, 25, 2, 0x2000000},
303  {25, 26, 2, 0x4000000},
304  {26, 27, 2, 0x8000000},
305  {27, 28, 2, 0x10000000},
306  {28, 29, 2, 0x20000000},
307  {29, 30, 2, 0x40000000},
308  {30, 31, 2, 0x80000000},
309 };
310 
311 #endif
312 
313 /* The reserved hash value and its substitution. */
314 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
315 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
316 
317 /* Return hash value of KEY for table TAB. */
318 static inline st_hash_t
319 do_hash(st_data_t key, st_table *tab)
320 {
321  st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
322 
323  /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
324  another value. Such mapping should be extremely rare. */
325  return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
326 }
327 
328 /* Power of 2 defining the minimal number of allocated entries. */
329 #define MINIMAL_POWER2 2
330 
331 #if MINIMAL_POWER2 < 2
332 #error "MINIMAL_POWER2 should be >= 2"
333 #endif
334 
335 /* If the power2 of the allocated `entries` is less than the following
336  value, don't allocate bins and use a linear search. */
337 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
338 
339 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
340 static int
341 get_power2(st_index_t size)
342 {
343  unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
344  if (n <= MAX_POWER2)
345  return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
346 #ifdef RUBY
347  /* Ran out of the table entries */
348  rb_raise(rb_eRuntimeError, "st_table too big");
349 #endif
350  /* should raise exception */
351  return -1;
352 }
353 
354 /* Return value of N-th bin in array BINS of table with bins size
355  index S. */
356 static inline st_index_t
357 get_bin(st_index_t *bins, int s, st_index_t n)
358 {
359  return (s == 0 ? ((unsigned char *) bins)[n]
360  : s == 1 ? ((unsigned short *) bins)[n]
361  : s == 2 ? ((unsigned int *) bins)[n]
362  : ((st_index_t *) bins)[n]);
363 }
364 
365 /* Set up N-th bin in array BINS of table with bins size index S to
366  value V. */
367 static inline void
368 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
369 {
370  if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
371  else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
372  else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
373  else ((st_index_t *) bins)[n] = v;
374 }
375 
376 /* These macros define reserved values for empty table bin and table
377  bin which contains a deleted entry. We will never use such values
378  for an entry index in bins. */
379 #define EMPTY_BIN 0
380 #define DELETED_BIN 1
381 /* Base of a real entry index in the bins. */
382 #define ENTRY_BASE 2
383 
384 /* Mark I-th bin of table TAB as empty, in other words not
385  corresponding to any entry. */
386 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
387 
388 /* Values used for not found entry and bin with given
389  characteristics. */
390 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
391 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
392 
393 /* Entry and bin values returned when we found a table rebuild during
394  the search. */
395 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
396 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
397 
398 /* Mark I-th bin of table TAB as corresponding to a deleted table
399  entry. Update number of entries in the table and number of bins
400  corresponding to deleted entries. */
401 #define MARK_BIN_DELETED(tab, i) \
402  do { \
403  set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
404  } while (0)
405 
406 /* Macros to check that value B is used empty bins and bins
407  corresponding deleted entries. */
408 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
409 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
410 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
411 
412 /* Macros to check empty bins and bins corresponding to deleted
413  entries. Bins are given by their index I in table TAB. */
414 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
417 
418 /* Macros for marking and checking deleted entries given by their
419  pointer E_PTR. */
420 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
421 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
422 
423 /* Return bin size index of table TAB. */
424 static inline unsigned int
425 get_size_ind(const st_table *tab)
426 {
427  return tab->size_ind;
428 }
429 
430 /* Return the number of allocated bins of table TAB. */
431 static inline st_index_t
432 get_bins_num(const st_table *tab)
433 {
434  return ((st_index_t) 1)<<tab->bin_power;
435 }
436 
437 /* Return mask for a bin index in table TAB. */
438 static inline st_index_t
439 bins_mask(const st_table *tab)
440 {
441  return get_bins_num(tab) - 1;
442 }
443 
444 /* Return the index of table TAB bin corresponding to
445  HASH_VALUE. */
446 static inline st_index_t
447 hash_bin(st_hash_t hash_value, st_table *tab)
448 {
449  return hash_value & bins_mask(tab);
450 }
451 
452 /* Return the number of allocated entries of table TAB. */
453 static inline st_index_t
454 get_allocated_entries(const st_table *tab)
455 {
456  return ((st_index_t) 1)<<tab->entry_power;
457 }
458 
459 /* Return size of the allocated bins of table TAB. */
460 static inline st_index_t
461 bins_size(const st_table *tab)
462 {
463  return features[tab->entry_power].bins_words * sizeof (st_index_t);
464 }
465 
466 /* Mark all bins of table TAB as empty. */
467 static void
468 initialize_bins(st_table *tab)
469 {
470  memset(tab->bins, 0, bins_size(tab));
471 }
472 
473 /* Make table TAB empty. */
474 static void
475 make_tab_empty(st_table *tab)
476 {
477  tab->num_entries = 0;
478  tab->entries_start = tab->entries_bound = 0;
479  if (tab->bins != NULL)
480  initialize_bins(tab);
481 }
482 
483 #ifdef HASH_LOG
484 #ifdef HAVE_UNISTD_H
485 #include <unistd.h>
486 #endif
487 static struct {
488  int all, total, num, str, strcase;
489 } collision;
490 
491 /* Flag switching off output of package statistics at the end of
492  program. */
493 static int init_st = 0;
494 
495 /* Output overall number of table searches and collisions into a
496  temporary file. */
497 static void
498 stat_col(void)
499 {
500  char fname[10+sizeof(long)*3];
501  FILE *f;
502  if (!collision.total) return;
503  f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
504  if (f == NULL)
505  return;
506  fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
507  ((double)collision.all / (collision.total)) * 100);
508  fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
509  fclose(f);
510 }
511 #endif
512 
513 st_table *
514 st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
515 {
516  int n;
517 
518 #ifdef HASH_LOG
519 #if HASH_LOG+0 < 0
520  {
521  const char *e = getenv("ST_HASH_LOG");
522  if (!e || !*e) init_st = 1;
523  }
524 #endif
525  if (init_st == 0) {
526  init_st = 1;
527  atexit(stat_col);
528  }
529 #endif
530 
531  n = get_power2(size);
532 #ifndef RUBY
533  if (n < 0)
534  return NULL;
535 #endif
536 
537  tab->type = type;
538  tab->entry_power = n;
539  tab->bin_power = features[n].bin_power;
540  tab->size_ind = features[n].size_ind;
541  if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
542  tab->bins = NULL;
543  else {
544  tab->bins = (st_index_t *) malloc(bins_size(tab));
545 #ifndef RUBY
546  if (tab->bins == NULL) {
547  free(tab);
548  return NULL;
549  }
550 #endif
551  }
552  tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
553  * sizeof(st_table_entry));
554 #ifndef RUBY
555  if (tab->entries == NULL) {
556  st_free_table(tab);
557  return NULL;
558  }
559 #endif
560  make_tab_empty(tab);
561  tab->rebuilds_num = 0;
562  return tab;
563 }
564 
565 /* Create and return table with TYPE which can hold at least SIZE
566  entries. The real number of entries which the table can hold is
567  the nearest power of two for SIZE. */
568 st_table *
569 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
570 {
571  st_table *tab = malloc(sizeof(st_table));
572 #ifndef RUBY
573  if (tab == NULL)
574  return NULL;
575 #endif
576 
577 #ifdef RUBY
578  st_init_existing_table_with_size(tab, type, size);
579 #else
580  if (st_init_existing_table_with_size(tab, type, size) == NULL) {
581  free(tab);
582  return NULL;
583  }
584 #endif
585 
586  return tab;
587 }
588 
589 size_t
590 st_table_size(const struct st_table *tbl)
591 {
592  return tbl->num_entries;
593 }
594 
595 /* Create and return table with TYPE which can hold a minimal number
596  of entries (see comments for get_power2). */
597 st_table *
598 st_init_table(const struct st_hash_type *type)
599 {
600  return st_init_table_with_size(type, 0);
601 }
602 
603 /* Create and return table which can hold a minimal number of
604  numbers. */
605 st_table *
606 st_init_numtable(void)
607 {
608  return st_init_table(&type_numhash);
609 }
610 
611 /* Create and return table which can hold SIZE numbers. */
612 st_table *
613 st_init_numtable_with_size(st_index_t size)
614 {
615  return st_init_table_with_size(&type_numhash, size);
616 }
617 
618 /* Create and return table which can hold a minimal number of
619  strings. */
620 st_table *
621 st_init_strtable(void)
622 {
623  return st_init_table(&type_strhash);
624 }
625 
626 /* Create and return table which can hold SIZE strings. */
627 st_table *
628 st_init_strtable_with_size(st_index_t size)
629 {
630  return st_init_table_with_size(&type_strhash, size);
631 }
632 
633 /* Create and return table which can hold a minimal number of strings
634  whose character case is ignored. */
635 st_table *
636 st_init_strcasetable(void)
637 {
638  return st_init_table(&type_strcasehash);
639 }
640 
641 /* Create and return table which can hold SIZE strings whose character
642  case is ignored. */
643 st_table *
644 st_init_strcasetable_with_size(st_index_t size)
645 {
646  return st_init_table_with_size(&type_strcasehash, size);
647 }
648 
649 /* Make table TAB empty. */
650 void
651 st_clear(st_table *tab)
652 {
653  make_tab_empty(tab);
654  tab->rebuilds_num++;
655 }
656 
657 /* Free table TAB space. */
658 void
659 st_free_table(st_table *tab)
660 {
661  free(tab->bins);
662  free(tab->entries);
663  free(tab);
664 }
665 
666 /* Return byte size of memory allocated for table TAB. */
667 size_t
668 st_memsize(const st_table *tab)
669 {
670  return(sizeof(st_table)
671  + (tab->bins == NULL ? 0 : bins_size(tab))
672  + get_allocated_entries(tab) * sizeof(st_table_entry));
673 }
674 
675 static st_index_t
676 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
677 
678 static st_index_t
679 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
680 
681 static st_index_t
682 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
683 
684 static st_index_t
685 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
686  st_data_t key, st_index_t *bin_ind);
687 
688 #ifdef HASH_LOG
689 static void
690 count_collision(const struct st_hash_type *type)
691 {
692  collision.all++;
693  if (type == &type_numhash) {
694  collision.num++;
695  }
696  else if (type == &type_strhash) {
697  collision.strcase++;
698  }
699  else if (type == &type_strcasehash) {
700  collision.str++;
701  }
702 }
703 
704 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
705 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
706 #define collision_check 0
707 #else
708 #define COLLISION
709 #define FOUND_BIN
710 #endif
711 
712 /* If the number of entries in the table is at least REBUILD_THRESHOLD
713  times less than the entry array length, decrease the table
714  size. */
715 #define REBUILD_THRESHOLD 4
716 
717 #if REBUILD_THRESHOLD < 2
718 #error "REBUILD_THRESHOLD should be >= 2"
719 #endif
720 
721 static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
722 static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
723 static void rebuild_cleanup(st_table *const tab);
724 
725 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
726  and can change size of the table entries and bins arrays.
727  Rebuilding is implemented by creation of a new table or by
728  compaction of the existing one. */
729 static void
730 rebuild_table(st_table *tab)
731 {
732  if ((2 * tab->num_entries <= get_allocated_entries(tab)
733  && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
734  || tab->num_entries < (1 << MINIMAL_POWER2)) {
735  /* Compaction: */
736  tab->num_entries = 0;
737  if (tab->bins != NULL)
738  initialize_bins(tab);
739  rebuild_table_with(tab, tab);
740  }
741  else {
742  st_table *new_tab;
743  /* This allocation could trigger GC and compaction. If tab is the
744  * gen_iv_tbl, then tab could have changed in size due to objects being
745  * freed and/or moved. Do not store attributes of tab before this line. */
746  new_tab = st_init_table_with_size(tab->type,
747  2 * tab->num_entries - 1);
748  rebuild_table_with(new_tab, tab);
749  rebuild_move_table(new_tab, tab);
750  }
751  rebuild_cleanup(tab);
752 }
753 
754 static void
755 rebuild_table_with(st_table *const new_tab, st_table *const tab)
756 {
757  st_index_t i, ni;
758  unsigned int size_ind;
759  st_table_entry *new_entries;
760  st_table_entry *curr_entry_ptr;
761  st_index_t *bins;
762  st_index_t bin_ind;
763 
764  new_entries = new_tab->entries;
765 
766  ni = 0;
767  bins = new_tab->bins;
768  size_ind = get_size_ind(new_tab);
769  st_index_t bound = tab->entries_bound;
770  st_table_entry *entries = tab->entries;
771 
772  for (i = tab->entries_start; i < bound; i++) {
773  curr_entry_ptr = &entries[i];
774  PREFETCH(entries + i + 1, 0);
775  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
776  continue;
777  if (&new_entries[ni] != curr_entry_ptr)
778  new_entries[ni] = *curr_entry_ptr;
779  if (EXPECT(bins != NULL, 1)) {
780  bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
781  curr_entry_ptr->key);
782  set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
783  }
784  new_tab->num_entries++;
785  ni++;
786  }
787 }
788 
789 static void
790 rebuild_move_table(st_table *const new_tab, st_table *const tab)
791 {
792  tab->entry_power = new_tab->entry_power;
793  tab->bin_power = new_tab->bin_power;
794  tab->size_ind = new_tab->size_ind;
795  free(tab->bins);
796  tab->bins = new_tab->bins;
797  free(tab->entries);
798  tab->entries = new_tab->entries;
799  free(new_tab);
800 }
801 
802 static void
803 rebuild_cleanup(st_table *const tab)
804 {
805  tab->entries_start = 0;
806  tab->entries_bound = tab->num_entries;
807  tab->rebuilds_num++;
808 }
809 
810 /* Return the next secondary hash index for table TAB using previous
811  index IND and PERTURB. Finally modulo of the function becomes a
812  full *cycle linear congruential generator*, in other words it
813  guarantees traversing all table bins in extreme case.
814 
815  According the Hull-Dobell theorem a generator
816  "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
817  o m and c are relatively prime
818  o a-1 is divisible by all prime factors of m
819  o a-1 is divisible by 4 if m is divisible by 4.
820 
821  For our case a is 5, c is 1, and m is a power of two. */
822 static inline st_index_t
823 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
824 {
825  *perturb >>= 11;
826  ind = (ind << 2) + ind + *perturb + 1;
827  return hash_bin(ind, tab);
828 }
829 
830 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
831  search. Return the index of the found entry in array `entries`.
832  If it is not found, return UNDEFINED_ENTRY_IND. If the table was
833  rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
834 static inline st_index_t
835 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
836 {
837  int eq_p, rebuilt_p;
838  st_index_t i, bound;
839  st_table_entry *entries;
840 
841  bound = tab->entries_bound;
842  entries = tab->entries;
843  for (i = tab->entries_start; i < bound; i++) {
844  DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
845  if (EXPECT(rebuilt_p, 0))
846  return REBUILT_TABLE_ENTRY_IND;
847  if (eq_p)
848  return i;
849  }
850  return UNDEFINED_ENTRY_IND;
851 }
852 
853 /* Use the quadratic probing. The method has a better data locality
854  but more collisions than the current approach. In average it
855  results in a bit slower search. */
856 /*#define QUADRATIC_PROBE*/
857 
858 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
859  there is no such entry, return UNDEFINED_ENTRY_IND. If the table
860  was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
861 static st_index_t
862 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
863 {
864  int eq_p, rebuilt_p;
865  st_index_t ind;
866 #ifdef QUADRATIC_PROBE
867  st_index_t d;
868 #else
869  st_index_t perturb;
870 #endif
871  st_index_t bin;
872  st_table_entry *entries = tab->entries;
873 
874  ind = hash_bin(hash_value, tab);
875 #ifdef QUADRATIC_PROBE
876  d = 1;
877 #else
878  perturb = hash_value;
879 #endif
880  FOUND_BIN;
881  for (;;) {
882  bin = get_bin(tab->bins, get_size_ind(tab), ind);
883  if (! EMPTY_OR_DELETED_BIN_P(bin)) {
884  DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
885  if (EXPECT(rebuilt_p, 0))
886  return REBUILT_TABLE_ENTRY_IND;
887  if (eq_p)
888  break;
889  }
890  else if (EMPTY_BIN_P(bin))
891  return UNDEFINED_ENTRY_IND;
892 #ifdef QUADRATIC_PROBE
893  ind = hash_bin(ind + d, tab);
894  d++;
895 #else
896  ind = secondary_hash(ind, tab, &perturb);
897 #endif
898  COLLISION;
899  }
900  return bin;
901 }
902 
903 /* Find and return index of table TAB bin corresponding to an entry
904  with HASH_VALUE and KEY. If there is no such bin, return
905  UNDEFINED_BIN_IND. If the table was rebuilt during the search,
906  return REBUILT_TABLE_BIN_IND. */
907 static st_index_t
908 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
909 {
910  int eq_p, rebuilt_p;
911  st_index_t ind;
912 #ifdef QUADRATIC_PROBE
913  st_index_t d;
914 #else
915  st_index_t perturb;
916 #endif
917  st_index_t bin;
918  st_table_entry *entries = tab->entries;
919 
920  ind = hash_bin(hash_value, tab);
921 #ifdef QUADRATIC_PROBE
922  d = 1;
923 #else
924  perturb = hash_value;
925 #endif
926  FOUND_BIN;
927  for (;;) {
928  bin = get_bin(tab->bins, get_size_ind(tab), ind);
929  if (! EMPTY_OR_DELETED_BIN_P(bin)) {
930  DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
931  if (EXPECT(rebuilt_p, 0))
932  return REBUILT_TABLE_BIN_IND;
933  if (eq_p)
934  break;
935  }
936  else if (EMPTY_BIN_P(bin))
937  return UNDEFINED_BIN_IND;
938 #ifdef QUADRATIC_PROBE
939  ind = hash_bin(ind + d, tab);
940  d++;
941 #else
942  ind = secondary_hash(ind, tab, &perturb);
943 #endif
944  COLLISION;
945  }
946  return ind;
947 }
948 
949 /* Find and return index of table TAB bin corresponding to an entry
950  with HASH_VALUE and KEY. The entry should be in the table
951  already. */
952 static st_index_t
953 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
954 {
955  st_index_t ind;
956 #ifdef QUADRATIC_PROBE
957  st_index_t d;
958 #else
959  st_index_t perturb;
960 #endif
961  st_index_t bin;
962 
963  ind = hash_bin(hash_value, tab);
964 #ifdef QUADRATIC_PROBE
965  d = 1;
966 #else
967  perturb = hash_value;
968 #endif
969  FOUND_BIN;
970  for (;;) {
971  bin = get_bin(tab->bins, get_size_ind(tab), ind);
972  if (EMPTY_OR_DELETED_BIN_P(bin))
973  return ind;
974 #ifdef QUADRATIC_PROBE
975  ind = hash_bin(ind + d, tab);
976  d++;
977 #else
978  ind = secondary_hash(ind, tab, &perturb);
979 #endif
980  COLLISION;
981  }
982 }
983 
984 /* Return index of table TAB bin for HASH_VALUE and KEY through
985  BIN_IND and the pointed value as the function result. Reserve the
986  bin for inclusion of the corresponding entry into the table if it
987  is not there yet. We always find such bin as bins array length is
988  bigger entries array. Although we can reuse a deleted bin, the
989  result bin value is always empty if the table has no entry with
990  KEY. Return the entries array index of the found entry or
991  UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
992  during the search, return REBUILT_TABLE_ENTRY_IND. */
993 static st_index_t
994 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
995  st_data_t key, st_index_t *bin_ind)
996 {
997  int eq_p, rebuilt_p;
998  st_index_t ind;
999  st_hash_t curr_hash_value = *hash_value;
1000 #ifdef QUADRATIC_PROBE
1001  st_index_t d;
1002 #else
1003  st_index_t perturb;
1004 #endif
1005  st_index_t entry_index;
1006  st_index_t first_deleted_bin_ind;
1007  st_table_entry *entries;
1008 
1009  ind = hash_bin(curr_hash_value, tab);
1010 #ifdef QUADRATIC_PROBE
1011  d = 1;
1012 #else
1013  perturb = curr_hash_value;
1014 #endif
1015  FOUND_BIN;
1016  first_deleted_bin_ind = UNDEFINED_BIN_IND;
1017  entries = tab->entries;
1018  for (;;) {
1019  entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1020  if (EMPTY_BIN_P(entry_index)) {
1021  tab->num_entries++;
1022  entry_index = UNDEFINED_ENTRY_IND;
1023  if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1024  /* We can reuse bin of a deleted entry. */
1025  ind = first_deleted_bin_ind;
1026  MARK_BIN_EMPTY(tab, ind);
1027  }
1028  break;
1029  }
1030  else if (! DELETED_BIN_P(entry_index)) {
1031  DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1032  if (EXPECT(rebuilt_p, 0))
1033  return REBUILT_TABLE_ENTRY_IND;
1034  if (eq_p)
1035  break;
1036  }
1037  else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1038  first_deleted_bin_ind = ind;
1039 #ifdef QUADRATIC_PROBE
1040  ind = hash_bin(ind + d, tab);
1041  d++;
1042 #else
1043  ind = secondary_hash(ind, tab, &perturb);
1044 #endif
1045  COLLISION;
1046  }
1047  *bin_ind = ind;
1048  return entry_index;
1049 }
1050 
1051 /* Find an entry with KEY in table TAB. Return non-zero if we found
1052  it. Set up *RECORD to the found entry record. */
1053 int
1054 st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1055 {
1056  st_index_t bin;
1057  st_hash_t hash = do_hash(key, tab);
1058 
1059  retry:
1060  if (tab->bins == NULL) {
1061  bin = find_entry(tab, hash, key);
1062  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1063  goto retry;
1064  if (bin == UNDEFINED_ENTRY_IND)
1065  return 0;
1066  }
1067  else {
1068  bin = find_table_entry_ind(tab, hash, key);
1069  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1070  goto retry;
1071  if (bin == UNDEFINED_ENTRY_IND)
1072  return 0;
1073  bin -= ENTRY_BASE;
1074  }
1075  if (value != 0)
1076  *value = tab->entries[bin].record;
1077  return 1;
1078 }
1079 
1080 /* Find an entry with KEY in table TAB. Return non-zero if we found
1081  it. Set up *RESULT to the found table entry key. */
1082 int
1083 st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1084 {
1085  st_index_t bin;
1086  st_hash_t hash = do_hash(key, tab);
1087 
1088  retry:
1089  if (tab->bins == NULL) {
1090  bin = find_entry(tab, hash, key);
1091  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1092  goto retry;
1093  if (bin == UNDEFINED_ENTRY_IND)
1094  return 0;
1095  }
1096  else {
1097  bin = find_table_entry_ind(tab, hash, key);
1098  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1099  goto retry;
1100  if (bin == UNDEFINED_ENTRY_IND)
1101  return 0;
1102  bin -= ENTRY_BASE;
1103  }
1104  if (result != 0)
1105  *result = tab->entries[bin].key;
1106  return 1;
1107 }
1108 
1109 /* Check the table and rebuild it if it is necessary. */
1110 static inline void
1111 rebuild_table_if_necessary (st_table *tab)
1112 {
1113  st_index_t bound = tab->entries_bound;
1114 
1115  if (bound == get_allocated_entries(tab))
1116  rebuild_table(tab);
1117 }
1118 
1119 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1120  already entry with KEY in the table, return nonzero and update
1121  the value of the found entry. */
1122 int
1123 st_insert(st_table *tab, st_data_t key, st_data_t value)
1124 {
1125  st_table_entry *entry;
1126  st_index_t bin;
1127  st_index_t ind;
1128  st_hash_t hash_value;
1129  st_index_t bin_ind;
1130  int new_p;
1131 
1132  hash_value = do_hash(key, tab);
1133  retry:
1134  rebuild_table_if_necessary(tab);
1135  if (tab->bins == NULL) {
1136  bin = find_entry(tab, hash_value, key);
1137  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1138  goto retry;
1139  new_p = bin == UNDEFINED_ENTRY_IND;
1140  if (new_p)
1141  tab->num_entries++;
1142  bin_ind = UNDEFINED_BIN_IND;
1143  }
1144  else {
1145  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1146  key, &bin_ind);
1147  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1148  goto retry;
1149  new_p = bin == UNDEFINED_ENTRY_IND;
1150  bin -= ENTRY_BASE;
1151  }
1152  if (new_p) {
1153  ind = tab->entries_bound++;
1154  entry = &tab->entries[ind];
1155  entry->hash = hash_value;
1156  entry->key = key;
1157  entry->record = value;
1158  if (bin_ind != UNDEFINED_BIN_IND)
1159  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1160  return 0;
1161  }
1162  tab->entries[bin].record = value;
1163  return 1;
1164 }
1165 
1166 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1167  entry with KEY before the insertion. */
1168 static inline void
1169 st_add_direct_with_hash(st_table *tab,
1170  st_data_t key, st_data_t value, st_hash_t hash)
1171 {
1172  st_table_entry *entry;
1173  st_index_t ind;
1174  st_index_t bin_ind;
1175 
1176  rebuild_table_if_necessary(tab);
1177  ind = tab->entries_bound++;
1178  entry = &tab->entries[ind];
1179  entry->hash = hash;
1180  entry->key = key;
1181  entry->record = value;
1182  tab->num_entries++;
1183  if (tab->bins != NULL) {
1184  bin_ind = find_table_bin_ind_direct(tab, hash, key);
1185  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1186  }
1187 }
1188 
1189 void
1190 rb_st_add_direct_with_hash(st_table *tab,
1191  st_data_t key, st_data_t value, st_hash_t hash)
1192 {
1193  st_add_direct_with_hash(tab, key, value, hash);
1194 }
1195 
1196 /* Insert (KEY, VALUE) into table TAB. The table should not have
1197  entry with KEY before the insertion. */
1198 void
1199 st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1200 {
1201  st_hash_t hash_value;
1202 
1203  hash_value = do_hash(key, tab);
1204  st_add_direct_with_hash(tab, key, value, hash_value);
1205 }
1206 
1207 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1208  there is already entry with KEY in the table, return nonzero and
1209  update the value of the found entry. */
1210 int
1211 st_insert2(st_table *tab, st_data_t key, st_data_t value,
1212  st_data_t (*func)(st_data_t))
1213 {
1214  st_table_entry *entry;
1215  st_index_t bin;
1216  st_index_t ind;
1217  st_hash_t hash_value;
1218  st_index_t bin_ind;
1219  int new_p;
1220 
1221  hash_value = do_hash(key, tab);
1222  retry:
1223  rebuild_table_if_necessary (tab);
1224  if (tab->bins == NULL) {
1225  bin = find_entry(tab, hash_value, key);
1226  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1227  goto retry;
1228  new_p = bin == UNDEFINED_ENTRY_IND;
1229  if (new_p)
1230  tab->num_entries++;
1231  bin_ind = UNDEFINED_BIN_IND;
1232  }
1233  else {
1234  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1235  key, &bin_ind);
1236  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1237  goto retry;
1238  new_p = bin == UNDEFINED_ENTRY_IND;
1239  bin -= ENTRY_BASE;
1240  }
1241  if (new_p) {
1242  key = (*func)(key);
1243  ind = tab->entries_bound++;
1244  entry = &tab->entries[ind];
1245  entry->hash = hash_value;
1246  entry->key = key;
1247  entry->record = value;
1248  if (bin_ind != UNDEFINED_BIN_IND)
1249  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1250  return 0;
1251  }
1252  tab->entries[bin].record = value;
1253  return 1;
1254 }
1255 
1256 /* Create a copy of old_tab into new_tab. */
1257 st_table *
1258 st_replace(st_table *new_tab, st_table *old_tab)
1259 {
1260  *new_tab = *old_tab;
1261  if (old_tab->bins == NULL)
1262  new_tab->bins = NULL;
1263  else {
1264  new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1265 #ifndef RUBY
1266  if (new_tab->bins == NULL) {
1267  return NULL;
1268  }
1269 #endif
1270  }
1271  new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1272  * sizeof(st_table_entry));
1273 #ifndef RUBY
1274  if (new_tab->entries == NULL) {
1275  return NULL;
1276  }
1277 #endif
1278  MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1279  get_allocated_entries(old_tab));
1280  if (old_tab->bins != NULL)
1281  MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1282 
1283  return new_tab;
1284 }
1285 
1286 /* Create and return a copy of table OLD_TAB. */
1287 st_table *
1288 st_copy(st_table *old_tab)
1289 {
1290  st_table *new_tab;
1291 
1292  new_tab = (st_table *) malloc(sizeof(st_table));
1293 #ifndef RUBY
1294  if (new_tab == NULL)
1295  return NULL;
1296 #endif
1297 
1298  if (st_replace(new_tab, old_tab) == NULL) {
1299  st_free_table(new_tab);
1300  return NULL;
1301  }
1302 
1303  return new_tab;
1304 }
1305 
1306 /* Update the entries start of table TAB after removing an entry
1307  with index N in the array entries. */
1308 static inline void
1309 update_range_for_deleted(st_table *tab, st_index_t n)
1310 {
1311  /* Do not update entries_bound here. Otherwise, we can fill all
1312  bins by deleted entry value before rebuilding the table. */
1313  if (tab->entries_start == n) {
1314  st_index_t start = n + 1;
1315  st_index_t bound = tab->entries_bound;
1316  st_table_entry *entries = tab->entries;
1317  while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1318  tab->entries_start = start;
1319  }
1320 }
1321 
1322 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1323  VALUE is zero) from deleted table entry, and return non-zero. If
1324  there is no entry with KEY in the table, clear *VALUE (unless VALUE
1325  is zero), and return zero. */
1326 static int
1327 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1328 {
1329  st_table_entry *entry;
1330  st_index_t bin;
1331  st_index_t bin_ind;
1332  st_hash_t hash;
1333 
1334  hash = do_hash(*key, tab);
1335  retry:
1336  if (tab->bins == NULL) {
1337  bin = find_entry(tab, hash, *key);
1338  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1339  goto retry;
1340  if (bin == UNDEFINED_ENTRY_IND) {
1341  if (value != 0) *value = 0;
1342  return 0;
1343  }
1344  }
1345  else {
1346  bin_ind = find_table_bin_ind(tab, hash, *key);
1347  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1348  goto retry;
1349  if (bin_ind == UNDEFINED_BIN_IND) {
1350  if (value != 0) *value = 0;
1351  return 0;
1352  }
1353  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1354  MARK_BIN_DELETED(tab, bin_ind);
1355  }
1356  entry = &tab->entries[bin];
1357  *key = entry->key;
1358  if (value != 0) *value = entry->record;
1359  MARK_ENTRY_DELETED(entry);
1360  tab->num_entries--;
1361  update_range_for_deleted(tab, bin);
1362  return 1;
1363 }
1364 
1365 int
1366 st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1367 {
1368  return st_general_delete(tab, key, value);
1369 }
1370 
1371 /* The function and other functions with suffix '_safe' or '_check'
1372  are originated from the previous implementation of the hash tables.
1373  It was necessary for correct deleting entries during traversing
1374  tables. The current implementation permits deletion during
1375  traversing without a specific way to do this. */
1376 int
1377 st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1378  st_data_t never ATTRIBUTE_UNUSED)
1379 {
1380  return st_general_delete(tab, key, value);
1381 }
1382 
1383 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1384  return zero. Otherwise, remove the first entry in the table.
1385  Return its key through KEY and its record through VALUE (unless
1386  VALUE is zero). */
1387 int
1388 st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1389 {
1390  st_index_t i, bound;
1391  st_index_t bin;
1392  st_table_entry *entries, *curr_entry_ptr;
1393  st_index_t bin_ind;
1394 
1395  entries = tab->entries;
1396  bound = tab->entries_bound;
1397  for (i = tab->entries_start; i < bound; i++) {
1398  curr_entry_ptr = &entries[i];
1399  if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1400  st_hash_t entry_hash = curr_entry_ptr->hash;
1401  st_data_t entry_key = curr_entry_ptr->key;
1402 
1403  if (value != 0) *value = curr_entry_ptr->record;
1404  *key = entry_key;
1405  retry:
1406  if (tab->bins == NULL) {
1407  bin = find_entry(tab, entry_hash, entry_key);
1408  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1409  entries = tab->entries;
1410  goto retry;
1411  }
1412  curr_entry_ptr = &entries[bin];
1413  }
1414  else {
1415  bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1416  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1417  entries = tab->entries;
1418  goto retry;
1419  }
1420  curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1421  - ENTRY_BASE];
1422  MARK_BIN_DELETED(tab, bin_ind);
1423  }
1424  MARK_ENTRY_DELETED(curr_entry_ptr);
1425  tab->num_entries--;
1426  update_range_for_deleted(tab, i);
1427  return 1;
1428  }
1429  }
1430  if (value != 0) *value = 0;
1431  return 0;
1432 }
1433 
1434 /* See comments for function st_delete_safe. */
1435 void
1436 st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1437  st_data_t never ATTRIBUTE_UNUSED)
1438 {
1439 }
1440 
1441 /* Find entry with KEY in table TAB, call FUNC with pointers to copies
1442  of the key and the value of the found entry, and non-zero as the
1443  3rd argument. If the entry is not found, call FUNC with a pointer
1444  to KEY, a pointer to zero, and a zero argument. If the call
1445  returns ST_CONTINUE, the table will have an entry with key and
1446  value returned by FUNC through the 1st and 2nd parameters. If the
1447  call of FUNC returns ST_DELETE, the table will not have entry with
1448  KEY. The function returns flag of that the entry with KEY was in
1449  the table before the call. */
1450 int
1451 st_update(st_table *tab, st_data_t key,
1452  st_update_callback_func *func, st_data_t arg)
1453 {
1454  st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1455  st_index_t bin = 0; /* Ditto */
1456  st_table_entry *entries;
1457  st_index_t bin_ind;
1458  st_data_t value = 0, old_key;
1459  int retval, existing;
1460  st_hash_t hash = do_hash(key, tab);
1461 
1462  retry:
1463  entries = tab->entries;
1464  if (tab->bins == NULL) {
1465  bin = find_entry(tab, hash, key);
1466  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1467  goto retry;
1468  existing = bin != UNDEFINED_ENTRY_IND;
1469  entry = &entries[bin];
1470  bin_ind = UNDEFINED_BIN_IND;
1471  }
1472  else {
1473  bin_ind = find_table_bin_ind(tab, hash, key);
1474  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1475  goto retry;
1476  existing = bin_ind != UNDEFINED_BIN_IND;
1477  if (existing) {
1478  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1479  entry = &entries[bin];
1480  }
1481  }
1482  if (existing) {
1483  key = entry->key;
1484  value = entry->record;
1485  }
1486  old_key = key;
1487  retval = (*func)(&key, &value, arg, existing);
1488  switch (retval) {
1489  case ST_CONTINUE:
1490  if (! existing) {
1491  st_add_direct_with_hash(tab, key, value, hash);
1492  break;
1493  }
1494  if (old_key != key) {
1495  entry->key = key;
1496  }
1497  entry->record = value;
1498  break;
1499  case ST_DELETE:
1500  if (existing) {
1501  if (bin_ind != UNDEFINED_BIN_IND)
1502  MARK_BIN_DELETED(tab, bin_ind);
1503  MARK_ENTRY_DELETED(entry);
1504  tab->num_entries--;
1505  update_range_for_deleted(tab, bin);
1506  }
1507  break;
1508  }
1509  return existing;
1510 }
1511 
1512 /* Traverse all entries in table TAB calling FUNC with current entry
1513  key and value and zero. If the call returns ST_STOP, stop
1514  traversing. If the call returns ST_DELETE, delete the current
1515  entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1516  traversing. The function returns zero unless an error is found.
1517  CHECK_P is flag of st_foreach_check call. The behavior is a bit
1518  different for ST_CHECK and when the current element is removed
1519  during traversing. */
1520 static inline int
1521 st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1522  int check_p)
1523 {
1524  st_index_t bin;
1525  st_index_t bin_ind;
1526  st_table_entry *entries, *curr_entry_ptr;
1527  enum st_retval retval;
1528  st_index_t i, rebuilds_num;
1529  st_hash_t hash;
1530  st_data_t key;
1531  int error_p, packed_p = tab->bins == NULL;
1532 
1533  entries = tab->entries;
1534  /* The bound can change inside the loop even without rebuilding
1535  the table, e.g. by an entry insertion. */
1536  for (i = tab->entries_start; i < tab->entries_bound; i++) {
1537  curr_entry_ptr = &entries[i];
1538  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1539  continue;
1540  key = curr_entry_ptr->key;
1541  rebuilds_num = tab->rebuilds_num;
1542  hash = curr_entry_ptr->hash;
1543  retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1544 
1545  if (retval == ST_REPLACE && replace) {
1546  st_data_t value;
1547  value = curr_entry_ptr->record;
1548  retval = (*replace)(&key, &value, arg, TRUE);
1549  curr_entry_ptr->key = key;
1550  curr_entry_ptr->record = value;
1551  }
1552 
1553  if (rebuilds_num != tab->rebuilds_num) {
1554  retry:
1555  entries = tab->entries;
1556  packed_p = tab->bins == NULL;
1557  if (packed_p) {
1558  i = find_entry(tab, hash, key);
1559  if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1560  goto retry;
1561  error_p = i == UNDEFINED_ENTRY_IND;
1562  }
1563  else {
1564  i = find_table_entry_ind(tab, hash, key);
1565  if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1566  goto retry;
1567  error_p = i == UNDEFINED_ENTRY_IND;
1568  i -= ENTRY_BASE;
1569  }
1570  if (error_p && check_p) {
1571  /* call func with error notice */
1572  retval = (*func)(0, 0, arg, 1);
1573  return 1;
1574  }
1575  curr_entry_ptr = &entries[i];
1576  }
1577  switch (retval) {
1578  case ST_REPLACE:
1579  break;
1580  case ST_CONTINUE:
1581  break;
1582  case ST_CHECK:
1583  if (check_p)
1584  break;
1585  case ST_STOP:
1586  return 0;
1587  case ST_DELETE: {
1588  st_data_t key = curr_entry_ptr->key;
1589 
1590  again:
1591  if (packed_p) {
1592  bin = find_entry(tab, hash, key);
1593  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1594  goto again;
1595  if (bin == UNDEFINED_ENTRY_IND)
1596  break;
1597  }
1598  else {
1599  bin_ind = find_table_bin_ind(tab, hash, key);
1600  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1601  goto again;
1602  if (bin_ind == UNDEFINED_BIN_IND)
1603  break;
1604  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1605  MARK_BIN_DELETED(tab, bin_ind);
1606  }
1607  curr_entry_ptr = &entries[bin];
1608  MARK_ENTRY_DELETED(curr_entry_ptr);
1609  tab->num_entries--;
1610  update_range_for_deleted(tab, bin);
1611  break;
1612  }
1613  }
1614  }
1615  return 0;
1616 }
1617 
1618 int
1619 st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1620 {
1621  return st_general_foreach(tab, func, replace, arg, TRUE);
1622 }
1623 
1624 struct functor {
1625  st_foreach_callback_func *func;
1626  st_data_t arg;
1627 };
1628 
1629 static int
1630 apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1631 {
1632  const struct functor *f = (void *)d;
1633  return f->func(k, v, f->arg);
1634 }
1635 
1636 int
1637 st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1638 {
1639  const struct functor f = { func, arg };
1640  return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1641 }
1642 
1643 /* See comments for function st_delete_safe. */
1644 int
1645 st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1646  st_data_t never ATTRIBUTE_UNUSED)
1647 {
1648  return st_general_foreach(tab, func, 0, arg, TRUE);
1649 }
1650 
1651 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1652  Return the number of keys set up in array KEYS. */
1653 static inline st_index_t
1654 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1655 {
1656  st_index_t i, bound;
1657  st_data_t key, *keys_start, *keys_end;
1658  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1659 
1660  bound = tab->entries_bound;
1661  keys_start = keys;
1662  keys_end = keys + size;
1663  for (i = tab->entries_start; i < bound; i++) {
1664  if (keys == keys_end)
1665  break;
1666  curr_entry_ptr = &entries[i];
1667  key = curr_entry_ptr->key;
1668  if (! DELETED_ENTRY_P(curr_entry_ptr))
1669  *keys++ = key;
1670  }
1671 
1672  return keys - keys_start;
1673 }
1674 
1675 st_index_t
1676 st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1677 {
1678  return st_general_keys(tab, keys, size);
1679 }
1680 
1681 /* See comments for function st_delete_safe. */
1682 st_index_t
1683 st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1684  st_data_t never ATTRIBUTE_UNUSED)
1685 {
1686  return st_general_keys(tab, keys, size);
1687 }
1688 
1689 /* Set up array VALUES by at most SIZE values of head table TAB
1690  entries. Return the number of values set up in array VALUES. */
1691 static inline st_index_t
1692 st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1693 {
1694  st_index_t i, bound;
1695  st_data_t *values_start, *values_end;
1696  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1697 
1698  values_start = values;
1699  values_end = values + size;
1700  bound = tab->entries_bound;
1701  for (i = tab->entries_start; i < bound; i++) {
1702  if (values == values_end)
1703  break;
1704  curr_entry_ptr = &entries[i];
1705  if (! DELETED_ENTRY_P(curr_entry_ptr))
1706  *values++ = curr_entry_ptr->record;
1707  }
1708 
1709  return values - values_start;
1710 }
1711 
1712 st_index_t
1713 st_values(st_table *tab, st_data_t *values, st_index_t size)
1714 {
1715  return st_general_values(tab, values, size);
1716 }
1717 
1718 /* See comments for function st_delete_safe. */
1719 st_index_t
1720 st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1721  st_data_t never ATTRIBUTE_UNUSED)
1722 {
1723  return st_general_values(tab, values, size);
1724 }
1725 
1726 #define FNV1_32A_INIT 0x811c9dc5
1727 
1728 /*
1729  * 32 bit magic FNV-1a prime
1730  */
1731 #define FNV_32_PRIME 0x01000193
1732 
1733 /* __POWERPC__ added to accommodate Darwin case. */
1734 #ifndef UNALIGNED_WORD_ACCESS
1735 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1736  defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1737  defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1738  defined(__mc68020__)
1739 # define UNALIGNED_WORD_ACCESS 1
1740 # endif
1741 #endif
1742 #ifndef UNALIGNED_WORD_ACCESS
1743 # define UNALIGNED_WORD_ACCESS 0
1744 #endif
1745 
1746 /* This hash function is quite simplified MurmurHash3
1747  * Simplification is legal, cause most of magic still happens in finalizator.
1748  * And finalizator is almost the same as in MurmurHash3 */
1749 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1750 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1751 
1752 #if ST_INDEX_BITS <= 32
1753 #define C1 (st_index_t)0xcc9e2d51
1754 #define C2 (st_index_t)0x1b873593
1755 #else
1756 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1757 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1758 #endif
1759 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1760 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1761 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1762 
1763 static inline st_index_t
1764 murmur_step(st_index_t h, st_index_t k)
1765 {
1766 #if ST_INDEX_BITS <= 32
1767 #define r1 (17)
1768 #define r2 (11)
1769 #else
1770 #define r1 (33)
1771 #define r2 (24)
1772 #endif
1773  k *= C1;
1774  h ^= ROTL(k, r1);
1775  h *= C2;
1776  h = ROTL(h, r2);
1777  return h;
1778 }
1779 #undef r1
1780 #undef r2
1781 
1782 static inline st_index_t
1783 murmur_finish(st_index_t h)
1784 {
1785 #if ST_INDEX_BITS <= 32
1786 #define r1 (16)
1787 #define r2 (13)
1788 #define r3 (16)
1789  const st_index_t c1 = 0x85ebca6b;
1790  const st_index_t c2 = 0xc2b2ae35;
1791 #else
1792 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1793 #define r1 (30)
1794 #define r2 (27)
1795 #define r3 (31)
1796  const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1797  const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1798 #endif
1799 #if ST_INDEX_BITS > 64
1800  h ^= h >> 64;
1801  h *= c2;
1802  h ^= h >> 65;
1803 #endif
1804  h ^= h >> r1;
1805  h *= c1;
1806  h ^= h >> r2;
1807  h *= c2;
1808  h ^= h >> r3;
1809  return h;
1810 }
1811 #undef r1
1812 #undef r2
1813 #undef r3
1814 
1815 st_index_t
1816 st_hash(const void *ptr, size_t len, st_index_t h)
1817 {
1818  const char *data = ptr;
1819  st_index_t t = 0;
1820  size_t l = len;
1821 
1822 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1823 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1824 #if SIZEOF_ST_INDEX_T > 4
1825 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1826 #if SIZEOF_ST_INDEX_T > 8
1827 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1828  UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1829 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1830 #endif
1831 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1832 #else
1833 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1834 #endif
1835 #undef SKIP_TAIL
1836  if (len >= sizeof(st_index_t)) {
1837 #if !UNALIGNED_WORD_ACCESS
1838  int align = (int)((st_data_t)data % sizeof(st_index_t));
1839  if (align) {
1840  st_index_t d = 0;
1841  int sl, sr, pack;
1842 
1843  switch (align) {
1844 #ifdef WORDS_BIGENDIAN
1845 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1846  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1847 #else
1848 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1849  t |= data_at(n) << CHAR_BIT*(n)
1850 #endif
1851  UNALIGNED_ADD_ALL;
1852 #undef UNALIGNED_ADD
1853  }
1854 
1855 #ifdef WORDS_BIGENDIAN
1856  t >>= (CHAR_BIT * align) - CHAR_BIT;
1857 #else
1858  t <<= (CHAR_BIT * align);
1859 #endif
1860 
1861  data += sizeof(st_index_t)-align;
1862  len -= sizeof(st_index_t)-align;
1863 
1864  sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1865  sr = CHAR_BIT * align;
1866 
1867  while (len >= sizeof(st_index_t)) {
1868  d = *(st_index_t *)data;
1869 #ifdef WORDS_BIGENDIAN
1870  t = (t << sr) | (d >> sl);
1871 #else
1872  t = (t >> sr) | (d << sl);
1873 #endif
1874  h = murmur_step(h, t);
1875  t = d;
1876  data += sizeof(st_index_t);
1877  len -= sizeof(st_index_t);
1878  }
1879 
1880  pack = len < (size_t)align ? (int)len : align;
1881  d = 0;
1882  switch (pack) {
1883 #ifdef WORDS_BIGENDIAN
1884 # define UNALIGNED_ADD(n) case (n) + 1: \
1885  d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1886 #else
1887 # define UNALIGNED_ADD(n) case (n) + 1: \
1888  d |= data_at(n) << CHAR_BIT*(n)
1889 #endif
1890  UNALIGNED_ADD_ALL;
1891 #undef UNALIGNED_ADD
1892  }
1893 #ifdef WORDS_BIGENDIAN
1894  t = (t << sr) | (d >> sl);
1895 #else
1896  t = (t >> sr) | (d << sl);
1897 #endif
1898 
1899  if (len < (size_t)align) goto skip_tail;
1900 # define SKIP_TAIL 1
1901  h = murmur_step(h, t);
1902  data += pack;
1903  len -= pack;
1904  }
1905  else
1906 #endif
1907 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1908 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1909 #else
1910 #define aligned_data data
1911 #endif
1912  {
1913  do {
1914  h = murmur_step(h, *(st_index_t *)aligned_data);
1915  data += sizeof(st_index_t);
1916  len -= sizeof(st_index_t);
1917  } while (len >= sizeof(st_index_t));
1918  }
1919  }
1920 
1921  t = 0;
1922  switch (len) {
1923 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1924  /* in this case byteorder doesn't really matter */
1925 #if SIZEOF_ST_INDEX_T > 4
1926  case 7: t |= data_at(6) << 48;
1927  case 6: t |= data_at(5) << 40;
1928  case 5: t |= data_at(4) << 32;
1929  case 4:
1930  t |= (st_index_t)*(uint32_t*)aligned_data;
1931  goto skip_tail;
1932 # define SKIP_TAIL 1
1933 #endif
1934  case 3: t |= data_at(2) << 16;
1935  case 2: t |= data_at(1) << 8;
1936  case 1: t |= data_at(0);
1937 #else
1938 #ifdef WORDS_BIGENDIAN
1939 # define UNALIGNED_ADD(n) case (n) + 1: \
1940  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1941 #else
1942 # define UNALIGNED_ADD(n) case (n) + 1: \
1943  t |= data_at(n) << CHAR_BIT*(n)
1944 #endif
1945  UNALIGNED_ADD_ALL;
1946 #undef UNALIGNED_ADD
1947 #endif
1948 #ifdef SKIP_TAIL
1949  skip_tail:
1950 #endif
1951  h ^= t; h -= ROTL(t, 7);
1952  h *= C2;
1953  }
1954  h ^= l;
1955 #undef aligned_data
1956 
1957  return murmur_finish(h);
1958 }
1959 
1960 st_index_t
1961 st_hash_uint32(st_index_t h, uint32_t i)
1962 {
1963  return murmur_step(h, i);
1964 }
1965 
1966 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1967 st_index_t
1968 st_hash_uint(st_index_t h, st_index_t i)
1969 {
1970  i += h;
1971 /* no matter if it is BigEndian or LittleEndian,
1972  * we hash just integers */
1973 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1974  h = murmur_step(h, i >> 8*8);
1975 #endif
1976  h = murmur_step(h, i);
1977  return h;
1978 }
1979 
1980 st_index_t
1981 st_hash_end(st_index_t h)
1982 {
1983  h = murmur_finish(h);
1984  return h;
1985 }
1986 
1987 #undef st_hash_start
1988 st_index_t
1989 rb_st_hash_start(st_index_t h)
1990 {
1991  return h;
1992 }
1993 
1994 static st_index_t
1995 strhash(st_data_t arg)
1996 {
1997  register const char *string = (const char *)arg;
1998  return st_hash(string, strlen(string), FNV1_32A_INIT);
1999 }
2000 
2001 int
2002 st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2003 {
2004  char c1, c2;
2005 
2006  while (1) {
2007  c1 = *s1++;
2008  c2 = *s2++;
2009  if (c1 == '\0' || c2 == '\0') {
2010  if (c1 != '\0') return 1;
2011  if (c2 != '\0') return -1;
2012  return 0;
2013  }
2014  if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2015  if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2016  if (c1 != c2) {
2017  if (c1 > c2)
2018  return 1;
2019  else
2020  return -1;
2021  }
2022  }
2023 }
2024 
2025 int
2026 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2027 {
2028  char c1, c2;
2029  size_t i;
2030 
2031  for (i = 0; i < n; i++) {
2032  c1 = *s1++;
2033  c2 = *s2++;
2034  if (c1 == '\0' || c2 == '\0') {
2035  if (c1 != '\0') return 1;
2036  if (c2 != '\0') return -1;
2037  return 0;
2038  }
2039  if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2040  if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2041  if (c1 != c2) {
2042  if (c1 > c2)
2043  return 1;
2044  else
2045  return -1;
2046  }
2047  }
2048  return 0;
2049 }
2050 
2051 static int
2052 st_strcmp(st_data_t lhs, st_data_t rhs)
2053 {
2054  const char *s1 = (char *)lhs;
2055  const char *s2 = (char *)rhs;
2056  return strcmp(s1, s2);
2057 }
2058 
2059 static int
2060 st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2061 {
2062  const char *s1 = (char *)lhs;
2063  const char *s2 = (char *)rhs;
2064  return st_locale_insensitive_strcasecmp(s1, s2);
2065 }
2066 
2067 NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2068 static st_index_t
2069 strcasehash(st_data_t arg)
2070 {
2071  register const char *string = (const char *)arg;
2072  register st_index_t hval = FNV1_32A_INIT;
2073 
2074  /*
2075  * FNV-1a hash each octet in the buffer
2076  */
2077  while (*string) {
2078  unsigned int c = (unsigned char)*string++;
2079  if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2080  hval ^= c;
2081 
2082  /* multiply by the 32 bit FNV magic prime mod 2^32 */
2083  hval *= FNV_32_PRIME;
2084  }
2085  return hval;
2086 }
2087 
2088 int
2089 st_numcmp(st_data_t x, st_data_t y)
2090 {
2091  return x != y;
2092 }
2093 
2094 st_index_t
2095 st_numhash(st_data_t n)
2096 {
2097  enum {s1 = 11, s2 = 3};
2098  return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2099 }
2100 
2101 #ifdef RUBY
2102 /* Expand TAB to be suitable for holding SIZ entries in total.
2103  Pre-existing entries remain not deleted inside of TAB, but its bins
2104  are cleared to expect future reconstruction. See rehash below. */
2105 static void
2106 st_expand_table(st_table *tab, st_index_t siz)
2107 {
2108  st_table *tmp;
2109  st_index_t n;
2110 
2111  if (siz <= get_allocated_entries(tab))
2112  return; /* enough room already */
2113 
2114  tmp = st_init_table_with_size(tab->type, siz);
2115  n = get_allocated_entries(tab);
2116  MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2117  free(tab->entries);
2118  free(tab->bins);
2119  free(tmp->bins);
2120  tab->entry_power = tmp->entry_power;
2121  tab->bin_power = tmp->bin_power;
2122  tab->size_ind = tmp->size_ind;
2123  tab->entries = tmp->entries;
2124  tab->bins = NULL;
2125  tab->rebuilds_num++;
2126  free(tmp);
2127 }
2128 
2129 /* Rehash using linear search. Return TRUE if we found that the table
2130  was rebuilt. */
2131 static int
2132 st_rehash_linear(st_table *tab)
2133 {
2134  int eq_p, rebuilt_p;
2135  st_index_t i, j;
2136  st_table_entry *p, *q;
2137 
2138  free(tab->bins);
2139  tab->bins = NULL;
2140 
2141  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2142  p = &tab->entries[i];
2143  if (DELETED_ENTRY_P(p))
2144  continue;
2145  for (j = i + 1; j < tab->entries_bound; j++) {
2146  q = &tab->entries[j];
2147  if (DELETED_ENTRY_P(q))
2148  continue;
2149  DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2150  if (EXPECT(rebuilt_p, 0))
2151  return TRUE;
2152  if (eq_p) {
2153  *p = *q;
2154  MARK_ENTRY_DELETED(q);
2155  tab->num_entries--;
2156  update_range_for_deleted(tab, j);
2157  }
2158  }
2159  }
2160  return FALSE;
2161 }
2162 
2163 /* Rehash using index. Return TRUE if we found that the table was
2164  rebuilt. */
2165 static int
2166 st_rehash_indexed(st_table *tab)
2167 {
2168  int eq_p, rebuilt_p;
2169  st_index_t i;
2170  st_index_t const n = bins_size(tab);
2171  unsigned int const size_ind = get_size_ind(tab);
2172  st_index_t *bins = realloc(tab->bins, n);
2173  tab->bins = bins;
2174  initialize_bins(tab);
2175  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2176  st_table_entry *p = &tab->entries[i];
2177  st_index_t ind;
2178 #ifdef QUADRATIC_PROBE
2179  st_index_t d = 1;
2180 #else
2181  st_index_t perturb = p->hash;
2182 #endif
2183 
2184  if (DELETED_ENTRY_P(p))
2185  continue;
2186 
2187  ind = hash_bin(p->hash, tab);
2188  for (;;) {
2189  st_index_t bin = get_bin(bins, size_ind, ind);
2190  if (EMPTY_OR_DELETED_BIN_P(bin)) {
2191  /* ok, new room */
2192  set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2193  break;
2194  }
2195  else {
2196  st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2197  DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2198  if (EXPECT(rebuilt_p, 0))
2199  return TRUE;
2200  if (eq_p) {
2201  /* duplicated key; delete it */
2202  q->record = p->record;
2203  MARK_ENTRY_DELETED(p);
2204  tab->num_entries--;
2205  update_range_for_deleted(tab, bin);
2206  break;
2207  }
2208  else {
2209  /* hash collision; skip it */
2210 #ifdef QUADRATIC_PROBE
2211  ind = hash_bin(ind + d, tab);
2212  d++;
2213 #else
2214  ind = secondary_hash(ind, tab, &perturb);
2215 #endif
2216  }
2217  }
2218  }
2219  }
2220  return FALSE;
2221 }
2222 
2223 /* Reconstruct TAB's bins according to TAB's entries. This function
2224  permits conflicting keys inside of entries. No errors are reported
2225  then. All but one of them are discarded silently. */
2226 static void
2227 st_rehash(st_table *tab)
2228 {
2229  int rebuilt_p;
2230 
2231  do {
2232  if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2233  rebuilt_p = st_rehash_linear(tab);
2234  else
2235  rebuilt_p = st_rehash_indexed(tab);
2236  } while (rebuilt_p);
2237 }
2238 
2239 static st_data_t
2240 st_stringify(VALUE key)
2241 {
2242  return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2243  rb_hash_key_str(key) : key;
2244 }
2245 
2246 static void
2247 st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2248 {
2249  st_data_t k = st_stringify(key);
2250  st_table_entry e;
2251  e.hash = do_hash(k, tab);
2252  e.key = k;
2253  e.record = val;
2254 
2255  tab->entries[tab->entries_bound++] = e;
2256  tab->num_entries++;
2257  RB_OBJ_WRITTEN(hash, Qundef, k);
2258  RB_OBJ_WRITTEN(hash, Qundef, val);
2259 }
2260 
2261 static void
2262 st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2263 {
2264  long i;
2265 
2266  for (i = 0; i < argc; /* */) {
2267  st_data_t k = st_stringify(argv[i++]);
2268  st_data_t v = argv[i++];
2269  st_insert(tab, k, v);
2270  RB_OBJ_WRITTEN(hash, Qundef, k);
2271  RB_OBJ_WRITTEN(hash, Qundef, v);
2272  }
2273 }
2274 
2275 static void
2276 st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2277 {
2278  long i;
2279 
2280  /* push elems */
2281  for (i = 0; i < argc; /* */) {
2282  VALUE key = argv[i++];
2283  VALUE val = argv[i++];
2284  st_insert_single(tab, hash, key, val);
2285  }
2286 
2287  /* reindex */
2288  st_rehash(tab);
2289 }
2290 
2291 /* Mimics ruby's { foo => bar } syntax. This function is subpart
2292  of rb_hash_bulk_insert. */
2293 void
2294 rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2295 {
2296  st_index_t n, size = argc / 2;
2297  st_table *tab = RHASH_ST_TABLE(hash);
2298 
2299  tab = RHASH_TBL_RAW(hash);
2300  n = tab->entries_bound + size;
2301  st_expand_table(tab, n);
2302  if (UNLIKELY(tab->num_entries))
2303  st_insert_generic(tab, argc, argv, hash);
2304  else if (argc <= 2)
2305  st_insert_single(tab, hash, argv[0], argv[1]);
2306  else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2307  st_insert_linear(tab, argc, argv, hash);
2308  else
2309  st_insert_generic(tab, argc, argv, hash);
2310 }
2311 
2312 // to iterate iv_index_tbl
2313 st_data_t
2314 rb_st_nth_key(st_table *tab, st_index_t index)
2315 {
2316  if (LIKELY(tab->entries_start == 0 &&
2317  tab->num_entries == tab->entries_bound &&
2318  index < tab->num_entries)) {
2319  return tab->entries[index].key;
2320  }
2321  else {
2322  rb_bug("unreachable");
2323  }
2324 }
2325 
2326 void
2327 rb_st_compact_table(st_table *tab)
2328 {
2329  st_index_t num = tab->num_entries;
2330  if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2331  /* Compaction: */
2332  st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2333  rebuild_table_with(new_tab, tab);
2334  rebuild_move_table(new_tab, tab);
2335  rebuild_cleanup(tab);
2336  }
2337 }
2338 
2339 #endif
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Our own locale-insensitive version of strcasecmp(3).
Definition: st.c:2002
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Our own locale-insensitive version of strcnasecmp(3).
Definition: st.c:2026
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition: fl_type.h:898
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3627
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:1088
VALUE rb_eRuntimeError
RuntimeError exception.
Definition: error.c:1401
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition: object.c:247
VALUE rb_cString
String class.
Definition: string.c:78
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition: gc.h:615
char * ptr
Pointer to the underlying memory region, of at least capa bytes.
Definition: io.h:2
int len
Length of the buffer.
Definition: io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition: memory.h:367
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
Definition: cxxanyargs.hpp:432
int st_foreach_check(st_table *q, int_type *w, st_data_t e, st_data_t)
Iteration over the given table.
Definition: cxxanyargs.hpp:450
#define _(args)
This was a transition path from K&R to ANSI.
Definition: stdarg.h:35
Definition: hash.c:875
Definition: st.c:134
Definition: st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40