Ruby 4.1.0dev (2026-04-04 revision 3b6245536cf55da9e8bfcdb03c845fe9ef931d7f)
st.c (3b6245536cf55da9e8bfcdb03c845fe9ef931d7f)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/gc.h"
111#include "internal/hash.h"
112#include "internal/sanitizers.h"
113#include "internal/set_table.h"
114#include "internal/st.h"
115#include "ruby_assert.h"
116#endif
117
118#include <stdio.h>
119#ifdef HAVE_STDLIB_H
120#include <stdlib.h>
121#endif
122#include <string.h>
123
124#ifdef __GNUC__
125#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
126#define EXPECT(expr, val) __builtin_expect(expr, val)
127#define ATTRIBUTE_UNUSED __attribute__((unused))
128#else
129#define PREFETCH(addr, write_p)
130#define EXPECT(expr, val) (expr)
131#define ATTRIBUTE_UNUSED
132#endif
133
134/* The type of hashes. */
135typedef st_index_t st_hash_t;
136
138 st_hash_t hash;
139 st_data_t key;
140 st_data_t record;
141};
142
143#define type_numhash st_hashtype_num
144static const struct st_hash_type st_hashtype_num = {
145 st_numcmp,
146 st_numhash,
147};
148
149static int st_strcmp(st_data_t, st_data_t);
150static st_index_t strhash(st_data_t);
151static const struct st_hash_type type_strhash = {
152 st_strcmp,
153 strhash,
154};
155
156static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
157static st_index_t strcasehash(st_data_t);
158static const struct st_hash_type type_strcasehash = {
159 st_locale_insensitive_strcasecmp_i,
160 strcasehash,
161};
162
163/* Value used to catch uninitialized entries/bins during debugging.
164 There is a possibility for a false alarm, but its probability is
165 extremely small. */
166#define ST_INIT_VAL 0xafafafafafafafaf
167#define ST_INIT_VAL_BYTE 0xafa
168
169#ifdef RUBY
170#undef malloc
171#undef realloc
172#undef calloc
173#undef free
174#define malloc ruby_xmalloc
175#define calloc ruby_xcalloc
176#define realloc ruby_xrealloc
177#define sized_realloc ruby_xrealloc_sized
178#define free ruby_xfree
179#define sized_free ruby_xfree_sized
180#define free_fixed_ptr(v) ruby_xfree_sized((v), sizeof(*(v)))
181#else
182#define sized_realloc(ptr, new_size, old_size) realloc(ptr, new_size)
183#define sized_free(v, s) free(v)
184#define free_fixed_ptr(v) free(v)
185#endif
186
187#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
188#define PTR_EQUAL(tab, ptr, hash_val, key_) \
189 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
190
191/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
192 up to TRUE if the table is rebuilt during the comparison. */
193#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
194 do { \
195 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
196 res = PTR_EQUAL(tab, ptr, hash_val, key); \
197 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
198 } while (FALSE)
199
200/* Features of a table. */
202 /* Power of 2 used for number of allocated entries. */
203 unsigned char entry_power;
204 /* Power of 2 used for number of allocated bins. Depending on the
205 table size, the number of bins is 2-4 times more than the
206 number of entries. */
207 unsigned char bin_power;
208 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
209 unsigned char size_ind;
210 /* Bins are packed in words of type st_index_t. The following is
211 a size of bins counted by words. */
212 st_index_t bins_words;
213};
214
215/* Features of all possible size tables. */
216#if SIZEOF_ST_INDEX_T == 8
217#define MAX_POWER2 62
218static const struct st_features features[] = {
219 {0, 1, 0, 0x0},
220 {1, 2, 0, 0x1},
221 {2, 3, 0, 0x1},
222 {3, 4, 0, 0x2},
223 {4, 5, 0, 0x4},
224 {5, 6, 0, 0x8},
225 {6, 7, 0, 0x10},
226 {7, 8, 0, 0x20},
227 {8, 9, 1, 0x80},
228 {9, 10, 1, 0x100},
229 {10, 11, 1, 0x200},
230 {11, 12, 1, 0x400},
231 {12, 13, 1, 0x800},
232 {13, 14, 1, 0x1000},
233 {14, 15, 1, 0x2000},
234 {15, 16, 1, 0x4000},
235 {16, 17, 2, 0x10000},
236 {17, 18, 2, 0x20000},
237 {18, 19, 2, 0x40000},
238 {19, 20, 2, 0x80000},
239 {20, 21, 2, 0x100000},
240 {21, 22, 2, 0x200000},
241 {22, 23, 2, 0x400000},
242 {23, 24, 2, 0x800000},
243 {24, 25, 2, 0x1000000},
244 {25, 26, 2, 0x2000000},
245 {26, 27, 2, 0x4000000},
246 {27, 28, 2, 0x8000000},
247 {28, 29, 2, 0x10000000},
248 {29, 30, 2, 0x20000000},
249 {30, 31, 2, 0x40000000},
250 {31, 32, 2, 0x80000000},
251 {32, 33, 3, 0x200000000},
252 {33, 34, 3, 0x400000000},
253 {34, 35, 3, 0x800000000},
254 {35, 36, 3, 0x1000000000},
255 {36, 37, 3, 0x2000000000},
256 {37, 38, 3, 0x4000000000},
257 {38, 39, 3, 0x8000000000},
258 {39, 40, 3, 0x10000000000},
259 {40, 41, 3, 0x20000000000},
260 {41, 42, 3, 0x40000000000},
261 {42, 43, 3, 0x80000000000},
262 {43, 44, 3, 0x100000000000},
263 {44, 45, 3, 0x200000000000},
264 {45, 46, 3, 0x400000000000},
265 {46, 47, 3, 0x800000000000},
266 {47, 48, 3, 0x1000000000000},
267 {48, 49, 3, 0x2000000000000},
268 {49, 50, 3, 0x4000000000000},
269 {50, 51, 3, 0x8000000000000},
270 {51, 52, 3, 0x10000000000000},
271 {52, 53, 3, 0x20000000000000},
272 {53, 54, 3, 0x40000000000000},
273 {54, 55, 3, 0x80000000000000},
274 {55, 56, 3, 0x100000000000000},
275 {56, 57, 3, 0x200000000000000},
276 {57, 58, 3, 0x400000000000000},
277 {58, 59, 3, 0x800000000000000},
278 {59, 60, 3, 0x1000000000000000},
279 {60, 61, 3, 0x2000000000000000},
280 {61, 62, 3, 0x4000000000000000},
281 {62, 63, 3, 0x8000000000000000},
282};
283
284#else
285#define MAX_POWER2 30
286
287static const struct st_features features[] = {
288 {0, 1, 0, 0x1},
289 {1, 2, 0, 0x1},
290 {2, 3, 0, 0x2},
291 {3, 4, 0, 0x4},
292 {4, 5, 0, 0x8},
293 {5, 6, 0, 0x10},
294 {6, 7, 0, 0x20},
295 {7, 8, 0, 0x40},
296 {8, 9, 1, 0x100},
297 {9, 10, 1, 0x200},
298 {10, 11, 1, 0x400},
299 {11, 12, 1, 0x800},
300 {12, 13, 1, 0x1000},
301 {13, 14, 1, 0x2000},
302 {14, 15, 1, 0x4000},
303 {15, 16, 1, 0x8000},
304 {16, 17, 2, 0x20000},
305 {17, 18, 2, 0x40000},
306 {18, 19, 2, 0x80000},
307 {19, 20, 2, 0x100000},
308 {20, 21, 2, 0x200000},
309 {21, 22, 2, 0x400000},
310 {22, 23, 2, 0x800000},
311 {23, 24, 2, 0x1000000},
312 {24, 25, 2, 0x2000000},
313 {25, 26, 2, 0x4000000},
314 {26, 27, 2, 0x8000000},
315 {27, 28, 2, 0x10000000},
316 {28, 29, 2, 0x20000000},
317 {29, 30, 2, 0x40000000},
318 {30, 31, 2, 0x80000000},
319};
320
321#endif
322
323/* The reserved hash value and its substitution. */
324#define RESERVED_HASH_VAL (~(st_hash_t) 0)
325#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
326
327static inline st_hash_t
328normalize_hash_value(st_hash_t hash)
329{
330 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
331 another value. Such mapping should be extremely rare. */
332 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
333}
334
335/* Return hash value of KEY for table TAB. */
336static inline st_hash_t
337do_hash(st_data_t key, st_table *tab)
338{
339 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
340 return normalize_hash_value(hash);
341}
342
343/* Power of 2 defining the minimal number of allocated entries. */
344#define MINIMAL_POWER2 2
345
346#if MINIMAL_POWER2 < 2
347#error "MINIMAL_POWER2 should be >= 2"
348#endif
349
350/* If the power2 of the allocated `entries` is less than the following
351 value, don't allocate bins and use a linear search. */
352#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
353
354/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
355static int
356get_power2(st_index_t size)
357{
358 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
359 if (n <= MAX_POWER2)
360 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
361#ifdef RUBY
362 /* Ran out of the table entries */
363 rb_raise(rb_eRuntimeError, "st_table too big");
364#endif
365 /* should raise exception */
366 return -1;
367}
368
369/* Return value of N-th bin in array BINS of table with bins size
370 index S. */
371static inline st_index_t
372get_bin(st_index_t *bins, int s, st_index_t n)
373{
374 return (s == 0 ? ((unsigned char *) bins)[n]
375 : s == 1 ? ((unsigned short *) bins)[n]
376 : s == 2 ? ((unsigned int *) bins)[n]
377 : ((st_index_t *) bins)[n]);
378}
379
380/* Set up N-th bin in array BINS of table with bins size index S to
381 value V. */
382static inline void
383set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
384{
385 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
386 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
387 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
388 else ((st_index_t *) bins)[n] = v;
389}
390
391/* These macros define reserved values for empty table bin and table
392 bin which contains a deleted entry. We will never use such values
393 for an entry index in bins. */
394#define EMPTY_BIN 0
395#define DELETED_BIN 1
396/* Base of a real entry index in the bins. */
397#define ENTRY_BASE 2
398
399/* Mark I-th bin of table TAB as empty, in other words not
400 corresponding to any entry. */
401#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
402
403/* Values used for not found entry and bin with given
404 characteristics. */
405#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
406#define UNDEFINED_BIN_IND (~(st_index_t) 0)
407
408/* Entry and bin values returned when we found a table rebuild during
409 the search. */
410#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
411#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
412
413/* Mark I-th bin of table TAB as corresponding to a deleted table
414 entry. Update number of entries in the table and number of bins
415 corresponding to deleted entries. */
416#define MARK_BIN_DELETED(tab, i) \
417 do { \
418 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
419 } while (0)
420
421/* Macros to check that value B is used empty bins and bins
422 corresponding deleted entries. */
423#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
424#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
425#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
426
427/* Macros to check empty bins and bins corresponding to deleted
428 entries. Bins are given by their index I in table TAB. */
429#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
430#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
431#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
432
433/* Macros for marking and checking deleted entries given by their
434 pointer E_PTR. */
435#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
436#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
437
438/* Return bin size index of table TAB. */
439static inline unsigned int
440get_size_ind(const st_table *tab)
441{
442 return tab->size_ind;
443}
444
445/* Return the number of allocated bins of table TAB. */
446static inline st_index_t
447get_bins_num(const st_table *tab)
448{
449 return ((st_index_t) 1)<<tab->bin_power;
450}
451
452/* Return mask for a bin index in table TAB. */
453static inline st_index_t
454bins_mask(const st_table *tab)
455{
456 return get_bins_num(tab) - 1;
457}
458
459/* Return the index of table TAB bin corresponding to
460 HASH_VALUE. */
461static inline st_index_t
462hash_bin(st_hash_t hash_value, st_table *tab)
463{
464 return hash_value & bins_mask(tab);
465}
466
467/* Return the number of allocated entries of table TAB. */
468static inline st_index_t
469get_allocated_entries(const st_table *tab)
470{
471 return ((st_index_t) 1)<<tab->entry_power;
472}
473
474/* Return size of the allocated bins of table TAB. */
475static inline st_index_t
476bins_size(const st_table *tab)
477{
478 return features[tab->entry_power].bins_words * sizeof (st_index_t);
479}
480
481/* Mark all bins of table TAB as empty. */
482static void
483initialize_bins(st_table *tab)
484{
485 memset(tab->bins, 0, bins_size(tab));
486}
487
488/* Make table TAB empty. */
489static void
490make_tab_empty(st_table *tab)
491{
492 tab->num_entries = 0;
493 tab->entries_start = tab->entries_bound = 0;
494 if (tab->bins != NULL)
495 initialize_bins(tab);
496}
497
498#ifdef HASH_LOG
499#ifdef HAVE_UNISTD_H
500#include <unistd.h>
501#endif
502static struct {
503 int all, total, num, str, strcase;
504} collision;
505
506/* Flag switching off output of package statistics at the end of
507 program. */
508static int init_st = 0;
509
510/* Output overall number of table searches and collisions into a
511 temporary file. */
512static void
513stat_col(void)
514{
515 char fname[10+sizeof(long)*3];
516 FILE *f;
517 if (!collision.total) return;
518 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
519 if (f == NULL)
520 return;
521 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
522 ((double)collision.all / (collision.total)) * 100);
523 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
524 fclose(f);
525}
526#endif
527
528st_table *
529st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
530{
531 int n;
532
533#ifdef HASH_LOG
534#if HASH_LOG+0 < 0
535 {
536 const char *e = getenv("ST_HASH_LOG");
537 if (!e || !*e) init_st = 1;
538 }
539#endif
540 if (init_st == 0) {
541 init_st = 1;
542 atexit(stat_col);
543 }
544#endif
545
546 n = get_power2(size);
547#ifndef RUBY
548 if (n < 0)
549 return NULL;
550#endif
551
552 tab->type = type;
553 tab->entry_power = n;
554 tab->bin_power = features[n].bin_power;
555 tab->size_ind = features[n].size_ind;
556 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
557 tab->bins = NULL;
558 else {
559 tab->bins = (st_index_t *) malloc(bins_size(tab));
560#ifndef RUBY
561 if (tab->bins == NULL) {
562 free_fixed_ptr(tab);
563 return NULL;
564 }
565#endif
566 }
567 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
568 * sizeof(st_table_entry));
569#ifndef RUBY
570 if (tab->entries == NULL) {
571 st_free_table(tab);
572 return NULL;
573 }
574#endif
575 make_tab_empty(tab);
576 tab->rebuilds_num = 0;
577 return tab;
578}
579
580st_table *
581st_init_existing_numtable_with_size(st_table *tab, st_index_t size)
582{
583 return st_init_existing_table_with_size(tab, &type_numhash, size);
584}
585
586/* Create and return table with TYPE which can hold at least SIZE
587 entries. The real number of entries which the table can hold is
588 the nearest power of two for SIZE. */
589st_table *
590st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
591{
592 st_table *tab = malloc(sizeof(st_table));
593#ifndef RUBY
594 if (tab == NULL)
595 return NULL;
596#endif
597
598#ifdef RUBY
599 st_init_existing_table_with_size(tab, type, size);
600#else
601 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
602 free_fixed_ptr(tab);
603 return NULL;
604 }
605#endif
606
607 return tab;
608}
609
610size_t
611st_table_size(const struct st_table *tbl)
612{
613 return tbl->num_entries;
614}
615
616/* Create and return table with TYPE which can hold a minimal number
617 of entries (see comments for get_power2). */
618st_table *
619st_init_table(const struct st_hash_type *type)
620{
621 return st_init_table_with_size(type, 0);
622}
623
624/* Create and return table which can hold a minimal number of
625 numbers. */
626st_table *
627st_init_numtable(void)
628{
629 return st_init_table(&type_numhash);
630}
631
632/* Create and return table which can hold SIZE numbers. */
633st_table *
634st_init_numtable_with_size(st_index_t size)
635{
636 return st_init_table_with_size(&type_numhash, size);
637}
638
639/* Create and return table which can hold a minimal number of
640 strings. */
641st_table *
642st_init_strtable(void)
643{
644 return st_init_table(&type_strhash);
645}
646
647/* Create and return table which can hold SIZE strings. */
648st_table *
649st_init_strtable_with_size(st_index_t size)
650{
651 return st_init_table_with_size(&type_strhash, size);
652}
653
654st_table *
655st_init_existing_strtable_with_size(st_table *tab, st_index_t size)
656{
657 return st_init_existing_table_with_size(tab, &type_strhash, size);
658}
659
660
661/* Create and return table which can hold a minimal number of strings
662 whose character case is ignored. */
663st_table *
664st_init_strcasetable(void)
665{
666 return st_init_table(&type_strcasehash);
667}
668
669/* Create and return table which can hold SIZE strings whose character
670 case is ignored. */
671st_table *
672st_init_strcasetable_with_size(st_index_t size)
673{
674 return st_init_table_with_size(&type_strcasehash, size);
675}
676
677/* Make table TAB empty. */
678void
679st_clear(st_table *tab)
680{
681 make_tab_empty(tab);
682 tab->rebuilds_num++;
683}
684
685static inline size_t
686st_entries_memsize(const st_table *tab)
687{
688 return get_allocated_entries(tab) * sizeof(st_table_entry);
689}
690
691static inline size_t
692st_bins_memsize(const st_table *tab)
693{
694 return tab->bins == NULL ? 0 : bins_size(tab);
695}
696
697static inline void
698st_free_entries(const st_table *tab)
699{
700 sized_free(tab->entries, st_entries_memsize(tab));
701}
702
703static inline void
704st_free_bins(const st_table *tab)
705{
706 sized_free(tab->bins, st_bins_memsize(tab));
707}
708
709void
710st_free_embedded_table(st_table *tab)
711{
712 st_free_bins(tab);
713 st_free_entries(tab);
714}
715
716/* Free table TAB space. */
717void
718st_free_table(st_table *tab)
719{
720 st_free_embedded_table(tab);
721 free_fixed_ptr(tab);
722}
723
724/* Return byte size of memory allocated for table TAB. */
725size_t
726st_memsize(const st_table *tab)
727{
728 RUBY_ASSERT(tab != NULL);
729 return(sizeof(st_table)
730 + st_bins_memsize(tab)
731 + st_entries_memsize(tab));
732}
733
734static st_index_t
735find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
736
737static st_index_t
738find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
739
740static st_index_t
741find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
742
743static st_index_t
744find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
745 st_data_t key, st_index_t *bin_ind);
746
747#ifdef HASH_LOG
748static void
749count_collision(const struct st_hash_type *type)
750{
751 collision.all++;
752 if (type == &type_numhash) {
753 collision.num++;
754 }
755 else if (type == &type_strhash) {
756 collision.strcase++;
757 }
758 else if (type == &type_strcasehash) {
759 collision.str++;
760 }
761}
762
763#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
764#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
765#define collision_check 0
766#else
767#define COLLISION
768#define FOUND_BIN
769#endif
770
771/* If the number of entries in the table is at least REBUILD_THRESHOLD
772 times less than the entry array length, decrease the table
773 size. */
774#define REBUILD_THRESHOLD 4
775
776#if REBUILD_THRESHOLD < 2
777#error "REBUILD_THRESHOLD should be >= 2"
778#endif
779
780static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
781static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
782static void rebuild_cleanup(st_table *const tab);
783
784/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
785 and can change size of the table entries and bins arrays.
786 Rebuilding is implemented by creation of a new table or by
787 compaction of the existing one. */
788static void
789rebuild_table(st_table *tab)
790{
791 if ((2 * tab->num_entries <= get_allocated_entries(tab)
792 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
793 || tab->num_entries < (1 << MINIMAL_POWER2)) {
794 /* Compaction: */
795 tab->num_entries = 0;
796 if (tab->bins != NULL)
797 initialize_bins(tab);
798 rebuild_table_with(tab, tab);
799 }
800 else {
801 st_table *new_tab;
802 /* This allocation could trigger GC and compaction. If tab is the
803 * gen_fields_tbl, then tab could have changed in size due to objects being
804 * freed and/or moved. Do not store attributes of tab before this line. */
805 new_tab = st_init_table_with_size(tab->type,
806 2 * tab->num_entries - 1);
807 rebuild_table_with(new_tab, tab);
808 rebuild_move_table(new_tab, tab);
809 }
810 rebuild_cleanup(tab);
811}
812
813static void
814rebuild_table_with(st_table *const new_tab, st_table *const tab)
815{
816 st_index_t i, ni;
817 unsigned int size_ind;
818 st_table_entry *new_entries;
819 st_table_entry *curr_entry_ptr;
820 st_index_t *bins;
821 st_index_t bin_ind;
822
823 new_entries = new_tab->entries;
824
825 ni = 0;
826 bins = new_tab->bins;
827 size_ind = get_size_ind(new_tab);
828 st_index_t bound = tab->entries_bound;
829 st_table_entry *entries = tab->entries;
830
831 for (i = tab->entries_start; i < bound; i++) {
832 curr_entry_ptr = &entries[i];
833 PREFETCH(entries + i + 1, 0);
834 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
835 continue;
836 if (&new_entries[ni] != curr_entry_ptr)
837 new_entries[ni] = *curr_entry_ptr;
838 if (EXPECT(bins != NULL, 1)) {
839 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
840 curr_entry_ptr->key);
841 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
842 }
843 new_tab->num_entries++;
844 ni++;
845 }
846
847 assert(new_tab->num_entries == tab->num_entries);
848}
849
850static void
851rebuild_move_table(st_table *const new_tab, st_table *const tab)
852{
853 st_free_bins(tab);
854 st_free_entries(tab);
855
856 tab->entry_power = new_tab->entry_power;
857 tab->bin_power = new_tab->bin_power;
858 tab->size_ind = new_tab->size_ind;
859 tab->bins = new_tab->bins;
860 tab->entries = new_tab->entries;
861 free_fixed_ptr(new_tab);
862}
863
864static void
865rebuild_cleanup(st_table *const tab)
866{
867 tab->entries_start = 0;
868 tab->entries_bound = tab->num_entries;
869 tab->rebuilds_num++;
870}
871
872/* Return the next secondary hash index for table TAB using previous
873 index IND and PERTURB. Finally modulo of the function becomes a
874 full *cycle linear congruential generator*, in other words it
875 guarantees traversing all table bins in extreme case.
876
877 According the Hull-Dobell theorem a generator
878 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
879 o m and c are relatively prime
880 o a-1 is divisible by all prime factors of m
881 o a-1 is divisible by 4 if m is divisible by 4.
882
883 For our case a is 5, c is 1, and m is a power of two. */
884static inline st_index_t
885secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
886{
887 *perturb >>= 11;
888 ind = (ind << 2) + ind + *perturb + 1;
889 return hash_bin(ind, tab);
890}
891
892/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
893 search. Return the index of the found entry in array `entries`.
894 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
895 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
896static inline st_index_t
897find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
898{
899 int eq_p, rebuilt_p;
900 st_index_t i, bound;
901 st_table_entry *entries;
902
903 bound = tab->entries_bound;
904 entries = tab->entries;
905 for (i = tab->entries_start; i < bound; i++) {
906 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
907 if (EXPECT(rebuilt_p, 0))
908 return REBUILT_TABLE_ENTRY_IND;
909 if (eq_p)
910 return i;
911 }
912 return UNDEFINED_ENTRY_IND;
913}
914
915/* Use the quadratic probing. The method has a better data locality
916 but more collisions than the current approach. In average it
917 results in a bit slower search. */
918/*#define QUADRATIC_PROBE*/
919
920/* Return index of entry with HASH_VALUE and KEY in table TAB. If
921 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
922 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
923static st_index_t
924find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
925{
926 int eq_p, rebuilt_p;
927 st_index_t ind;
928#ifdef QUADRATIC_PROBE
929 st_index_t d;
930#else
931 st_index_t perturb;
932#endif
933 st_index_t bin;
934 st_table_entry *entries = tab->entries;
935
936 ind = hash_bin(hash_value, tab);
937#ifdef QUADRATIC_PROBE
938 d = 1;
939#else
940 perturb = hash_value;
941#endif
942 FOUND_BIN;
943 for (;;) {
944 bin = get_bin(tab->bins, get_size_ind(tab), ind);
945 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
946 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
947 if (EXPECT(rebuilt_p, 0))
948 return REBUILT_TABLE_ENTRY_IND;
949 if (eq_p)
950 break;
951 }
952 else if (EMPTY_BIN_P(bin))
953 return UNDEFINED_ENTRY_IND;
954#ifdef QUADRATIC_PROBE
955 ind = hash_bin(ind + d, tab);
956 d++;
957#else
958 ind = secondary_hash(ind, tab, &perturb);
959#endif
960 COLLISION;
961 }
962 return bin;
963}
964
965/* Find and return index of table TAB bin corresponding to an entry
966 with HASH_VALUE and KEY. If there is no such bin, return
967 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
968 return REBUILT_TABLE_BIN_IND. */
969static st_index_t
970find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
971{
972 int eq_p, rebuilt_p;
973 st_index_t ind;
974#ifdef QUADRATIC_PROBE
975 st_index_t d;
976#else
977 st_index_t perturb;
978#endif
979 st_index_t bin;
980 st_table_entry *entries = tab->entries;
981
982 ind = hash_bin(hash_value, tab);
983#ifdef QUADRATIC_PROBE
984 d = 1;
985#else
986 perturb = hash_value;
987#endif
988 FOUND_BIN;
989 for (;;) {
990 bin = get_bin(tab->bins, get_size_ind(tab), ind);
991 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
992 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
993 if (EXPECT(rebuilt_p, 0))
994 return REBUILT_TABLE_BIN_IND;
995 if (eq_p)
996 break;
997 }
998 else if (EMPTY_BIN_P(bin))
999 return UNDEFINED_BIN_IND;
1000#ifdef QUADRATIC_PROBE
1001 ind = hash_bin(ind + d, tab);
1002 d++;
1003#else
1004 ind = secondary_hash(ind, tab, &perturb);
1005#endif
1006 COLLISION;
1007 }
1008 return ind;
1009}
1010
1011/* Find and return index of table TAB bin corresponding to an entry
1012 with HASH_VALUE and KEY. The entry should be in the table
1013 already. */
1014static st_index_t
1015find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
1016{
1017 st_index_t ind;
1018#ifdef QUADRATIC_PROBE
1019 st_index_t d;
1020#else
1021 st_index_t perturb;
1022#endif
1023 st_index_t bin;
1024
1025 ind = hash_bin(hash_value, tab);
1026#ifdef QUADRATIC_PROBE
1027 d = 1;
1028#else
1029 perturb = hash_value;
1030#endif
1031 FOUND_BIN;
1032 for (;;) {
1033 bin = get_bin(tab->bins, get_size_ind(tab), ind);
1034 if (EMPTY_OR_DELETED_BIN_P(bin))
1035 return ind;
1036#ifdef QUADRATIC_PROBE
1037 ind = hash_bin(ind + d, tab);
1038 d++;
1039#else
1040 ind = secondary_hash(ind, tab, &perturb);
1041#endif
1042 COLLISION;
1043 }
1044}
1045
1046/* Return index of table TAB bin for HASH_VALUE and KEY through
1047 BIN_IND and the pointed value as the function result. Reserve the
1048 bin for inclusion of the corresponding entry into the table if it
1049 is not there yet. We always find such bin as bins array length is
1050 bigger entries array. Although we can reuse a deleted bin, the
1051 result bin value is always empty if the table has no entry with
1052 KEY. Return the entries array index of the found entry or
1053 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1054 during the search, return REBUILT_TABLE_ENTRY_IND. */
1055static st_index_t
1056find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1057 st_data_t key, st_index_t *bin_ind)
1058{
1059 int eq_p, rebuilt_p;
1060 st_index_t ind;
1061 st_hash_t curr_hash_value = *hash_value;
1062#ifdef QUADRATIC_PROBE
1063 st_index_t d;
1064#else
1065 st_index_t perturb;
1066#endif
1067 st_index_t entry_index;
1068 st_index_t first_deleted_bin_ind;
1069 st_table_entry *entries;
1070
1071 ind = hash_bin(curr_hash_value, tab);
1072#ifdef QUADRATIC_PROBE
1073 d = 1;
1074#else
1075 perturb = curr_hash_value;
1076#endif
1077 FOUND_BIN;
1078 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1079 entries = tab->entries;
1080 for (;;) {
1081 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1082 if (EMPTY_BIN_P(entry_index)) {
1083 tab->num_entries++;
1084 entry_index = UNDEFINED_ENTRY_IND;
1085 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1086 /* We can reuse bin of a deleted entry. */
1087 ind = first_deleted_bin_ind;
1088 MARK_BIN_EMPTY(tab, ind);
1089 }
1090 break;
1091 }
1092 else if (! DELETED_BIN_P(entry_index)) {
1093 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1094 if (EXPECT(rebuilt_p, 0))
1095 return REBUILT_TABLE_ENTRY_IND;
1096 if (eq_p)
1097 break;
1098 }
1099 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1100 first_deleted_bin_ind = ind;
1101#ifdef QUADRATIC_PROBE
1102 ind = hash_bin(ind + d, tab);
1103 d++;
1104#else
1105 ind = secondary_hash(ind, tab, &perturb);
1106#endif
1107 COLLISION;
1108 }
1109 *bin_ind = ind;
1110 return entry_index;
1111}
1112
1113/* Find an entry with KEY in table TAB. Return non-zero if we found
1114 it. Set up *RECORD to the found entry record. */
1115int
1116st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1117{
1118 st_index_t bin;
1119 st_hash_t hash = do_hash(key, tab);
1120
1121 retry:
1122 if (tab->bins == NULL) {
1123 bin = find_entry(tab, hash, key);
1124 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1125 goto retry;
1126 if (bin == UNDEFINED_ENTRY_IND)
1127 return 0;
1128 }
1129 else {
1130 bin = find_table_entry_ind(tab, hash, key);
1131 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1132 goto retry;
1133 if (bin == UNDEFINED_ENTRY_IND)
1134 return 0;
1135 bin -= ENTRY_BASE;
1136 }
1137 if (value != 0)
1138 *value = tab->entries[bin].record;
1139 return 1;
1140}
1141
1142/* Find an entry with KEY in table TAB. Return non-zero if we found
1143 it. Set up *RESULT to the found table entry key. */
1144int
1145st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1146{
1147 st_index_t bin;
1148 st_hash_t hash = do_hash(key, tab);
1149
1150 retry:
1151 if (tab->bins == NULL) {
1152 bin = find_entry(tab, hash, key);
1153 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1154 goto retry;
1155 if (bin == UNDEFINED_ENTRY_IND)
1156 return 0;
1157 }
1158 else {
1159 bin = find_table_entry_ind(tab, hash, key);
1160 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1161 goto retry;
1162 if (bin == UNDEFINED_ENTRY_IND)
1163 return 0;
1164 bin -= ENTRY_BASE;
1165 }
1166 if (result != 0)
1167 *result = tab->entries[bin].key;
1168 return 1;
1169}
1170
1171/* Check the table and rebuild it if it is necessary. */
1172static inline void
1173rebuild_table_if_necessary (st_table *tab)
1174{
1175 st_index_t bound = tab->entries_bound;
1176
1177 if (bound == get_allocated_entries(tab))
1178 rebuild_table(tab);
1179}
1180
1181/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1182 already entry with KEY in the table, return nonzero and update
1183 the value of the found entry. */
1184int
1185st_insert(st_table *tab, st_data_t key, st_data_t value)
1186{
1187 st_table_entry *entry;
1188 st_index_t bin;
1189 st_index_t ind;
1190 st_hash_t hash_value;
1191 st_index_t bin_ind;
1192 int new_p;
1193
1194 hash_value = do_hash(key, tab);
1195 retry:
1196 rebuild_table_if_necessary(tab);
1197 if (tab->bins == NULL) {
1198 bin = find_entry(tab, hash_value, key);
1199 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1200 goto retry;
1201 new_p = bin == UNDEFINED_ENTRY_IND;
1202 if (new_p)
1203 tab->num_entries++;
1204 bin_ind = UNDEFINED_BIN_IND;
1205 }
1206 else {
1207 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1208 key, &bin_ind);
1209 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1210 goto retry;
1211 new_p = bin == UNDEFINED_ENTRY_IND;
1212 bin -= ENTRY_BASE;
1213 }
1214 if (new_p) {
1215 ind = tab->entries_bound++;
1216 entry = &tab->entries[ind];
1217 entry->hash = hash_value;
1218 entry->key = key;
1219 entry->record = value;
1220 if (bin_ind != UNDEFINED_BIN_IND)
1221 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1222 return 0;
1223 }
1224 tab->entries[bin].record = value;
1225 return 1;
1226}
1227
1228/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1229 entry with KEY before the insertion. */
1230static inline void
1231st_add_direct_with_hash(st_table *tab,
1232 st_data_t key, st_data_t value, st_hash_t hash)
1233{
1234 st_table_entry *entry;
1235 st_index_t ind;
1236 st_index_t bin_ind;
1237
1238 assert(hash != RESERVED_HASH_VAL);
1239
1240 rebuild_table_if_necessary(tab);
1241 ind = tab->entries_bound++;
1242 entry = &tab->entries[ind];
1243 entry->hash = hash;
1244 entry->key = key;
1245 entry->record = value;
1246 tab->num_entries++;
1247 if (tab->bins != NULL) {
1248 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1249 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1250 }
1251}
1252
1253void
1254rb_st_add_direct_with_hash(st_table *tab,
1255 st_data_t key, st_data_t value, st_hash_t hash)
1256{
1257 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1258}
1259
1260/* Insert (KEY, VALUE) into table TAB. The table should not have
1261 entry with KEY before the insertion. */
1262void
1263st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1264{
1265 st_hash_t hash_value;
1266
1267 hash_value = do_hash(key, tab);
1268 st_add_direct_with_hash(tab, key, value, hash_value);
1269}
1270
1271/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1272 there is already entry with KEY in the table, return nonzero and
1273 update the value of the found entry. */
1274int
1275st_insert2(st_table *tab, st_data_t key, st_data_t value,
1276 st_data_t (*func)(st_data_t))
1277{
1278 st_table_entry *entry;
1279 st_index_t bin;
1280 st_index_t ind;
1281 st_hash_t hash_value;
1282 st_index_t bin_ind;
1283 int new_p;
1284
1285 hash_value = do_hash(key, tab);
1286 retry:
1287 rebuild_table_if_necessary (tab);
1288 if (tab->bins == NULL) {
1289 bin = find_entry(tab, hash_value, key);
1290 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1291 goto retry;
1292 new_p = bin == UNDEFINED_ENTRY_IND;
1293 if (new_p)
1294 tab->num_entries++;
1295 bin_ind = UNDEFINED_BIN_IND;
1296 }
1297 else {
1298 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1299 key, &bin_ind);
1300 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1301 goto retry;
1302 new_p = bin == UNDEFINED_ENTRY_IND;
1303 bin -= ENTRY_BASE;
1304 }
1305 if (new_p) {
1306 key = (*func)(key);
1307 ind = tab->entries_bound++;
1308 entry = &tab->entries[ind];
1309 entry->hash = hash_value;
1310 entry->key = key;
1311 entry->record = value;
1312 if (bin_ind != UNDEFINED_BIN_IND)
1313 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1314 return 0;
1315 }
1316 tab->entries[bin].record = value;
1317 return 1;
1318}
1319
1320/* Create a copy of old_tab into new_tab. */
1321st_table *
1322st_replace(st_table *new_tab, st_table *old_tab)
1323{
1324 *new_tab = *old_tab;
1325 if (old_tab->bins == NULL)
1326 new_tab->bins = NULL;
1327 else {
1328 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1329#ifndef RUBY
1330 if (new_tab->bins == NULL) {
1331 return NULL;
1332 }
1333#endif
1334 }
1335 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1336 * sizeof(st_table_entry));
1337#ifndef RUBY
1338 if (new_tab->entries == NULL) {
1339 return NULL;
1340 }
1341#endif
1342 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1343 get_allocated_entries(old_tab));
1344 if (old_tab->bins != NULL)
1345 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1346
1347 return new_tab;
1348}
1349
1350/* Create and return a copy of table OLD_TAB. */
1351st_table *
1352st_copy(st_table *old_tab)
1353{
1354 st_table *new_tab;
1355
1356 new_tab = (st_table *) malloc(sizeof(st_table));
1357#ifndef RUBY
1358 if (new_tab == NULL)
1359 return NULL;
1360#endif
1361
1362 if (st_replace(new_tab, old_tab) == NULL) {
1363 st_free_table(new_tab);
1364 return NULL;
1365 }
1366
1367 return new_tab;
1368}
1369
1370/* Update the entries start of table TAB after removing an entry
1371 with index N in the array entries. */
1372static inline void
1373update_range_for_deleted(st_table *tab, st_index_t n)
1374{
1375 /* Do not update entries_bound here. Otherwise, we can fill all
1376 bins by deleted entry value before rebuilding the table. */
1377 if (tab->entries_start == n) {
1378 st_index_t start = n + 1;
1379 st_index_t bound = tab->entries_bound;
1380 st_table_entry *entries = tab->entries;
1381 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1382 tab->entries_start = start;
1383 }
1384}
1385
1386/* Delete entry with KEY from table TAB, set up *VALUE (unless
1387 VALUE is zero) from deleted table entry, and return non-zero. If
1388 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1389 is zero), and return zero. */
1390static int
1391st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1392{
1393 st_table_entry *entry;
1394 st_index_t bin;
1395 st_index_t bin_ind;
1396 st_hash_t hash;
1397
1398 hash = do_hash(*key, tab);
1399 retry:
1400 if (tab->bins == NULL) {
1401 bin = find_entry(tab, hash, *key);
1402 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1403 goto retry;
1404 if (bin == UNDEFINED_ENTRY_IND) {
1405 if (value != 0) *value = 0;
1406 return 0;
1407 }
1408 }
1409 else {
1410 bin_ind = find_table_bin_ind(tab, hash, *key);
1411 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1412 goto retry;
1413 if (bin_ind == UNDEFINED_BIN_IND) {
1414 if (value != 0) *value = 0;
1415 return 0;
1416 }
1417 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1418 MARK_BIN_DELETED(tab, bin_ind);
1419 }
1420 entry = &tab->entries[bin];
1421 *key = entry->key;
1422 if (value != 0) *value = entry->record;
1423 MARK_ENTRY_DELETED(entry);
1424 tab->num_entries--;
1425 update_range_for_deleted(tab, bin);
1426 return 1;
1427}
1428
1429int
1430st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1431{
1432 return st_general_delete(tab, key, value);
1433}
1434
1435/* The function and other functions with suffix '_safe' or '_check'
1436 are originated from the previous implementation of the hash tables.
1437 It was necessary for correct deleting entries during traversing
1438 tables. The current implementation permits deletion during
1439 traversing without a specific way to do this. */
1440int
1441st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1442 st_data_t never ATTRIBUTE_UNUSED)
1443{
1444 return st_general_delete(tab, key, value);
1445}
1446
1447/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1448 return zero. Otherwise, remove the first entry in the table.
1449 Return its key through KEY and its record through VALUE (unless
1450 VALUE is zero). */
1451int
1452st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1453{
1454 st_index_t i, bound;
1455 st_index_t bin;
1456 st_table_entry *entries, *curr_entry_ptr;
1457 st_index_t bin_ind;
1458
1459 entries = tab->entries;
1460 bound = tab->entries_bound;
1461 for (i = tab->entries_start; i < bound; i++) {
1462 curr_entry_ptr = &entries[i];
1463 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1464 st_hash_t entry_hash = curr_entry_ptr->hash;
1465 st_data_t entry_key = curr_entry_ptr->key;
1466
1467 if (value != 0) *value = curr_entry_ptr->record;
1468 *key = entry_key;
1469 retry:
1470 if (tab->bins == NULL) {
1471 bin = find_entry(tab, entry_hash, entry_key);
1472 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1473 entries = tab->entries;
1474 goto retry;
1475 }
1476 curr_entry_ptr = &entries[bin];
1477 }
1478 else {
1479 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1480 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1481 entries = tab->entries;
1482 goto retry;
1483 }
1484 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1485 - ENTRY_BASE];
1486 MARK_BIN_DELETED(tab, bin_ind);
1487 }
1488 MARK_ENTRY_DELETED(curr_entry_ptr);
1489 tab->num_entries--;
1490 update_range_for_deleted(tab, i);
1491 return 1;
1492 }
1493 }
1494 if (value != 0) *value = 0;
1495 return 0;
1496}
1497
1498/* See comments for function st_delete_safe. */
1499void
1500st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1501 st_data_t never ATTRIBUTE_UNUSED)
1502{
1503}
1504
1505/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1506 of the key and the value of the found entry, and non-zero as the
1507 3rd argument. If the entry is not found, call FUNC with a pointer
1508 to KEY, a pointer to zero, and a zero argument. If the call
1509 returns ST_CONTINUE, the table will have an entry with key and
1510 value returned by FUNC through the 1st and 2nd parameters. If the
1511 call of FUNC returns ST_DELETE, the table will not have entry with
1512 KEY. The function returns flag of that the entry with KEY was in
1513 the table before the call. */
1514int
1515st_update(st_table *tab, st_data_t key,
1516 st_update_callback_func *func, st_data_t arg)
1517{
1518 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1519 st_index_t bin = 0; /* Ditto */
1520 st_table_entry *entries;
1521 st_index_t bin_ind;
1522 st_data_t value = 0, old_key;
1523 int retval, existing;
1524 st_hash_t hash = do_hash(key, tab);
1525
1526 retry:
1527 entries = tab->entries;
1528 if (tab->bins == NULL) {
1529 bin = find_entry(tab, hash, key);
1530 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1531 goto retry;
1532 existing = bin != UNDEFINED_ENTRY_IND;
1533 entry = &entries[bin];
1534 bin_ind = UNDEFINED_BIN_IND;
1535 }
1536 else {
1537 bin_ind = find_table_bin_ind(tab, hash, key);
1538 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1539 goto retry;
1540 existing = bin_ind != UNDEFINED_BIN_IND;
1541 if (existing) {
1542 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1543 entry = &entries[bin];
1544 }
1545 }
1546 if (existing) {
1547 key = entry->key;
1548 value = entry->record;
1549 }
1550 old_key = key;
1551
1552 unsigned int rebuilds_num = tab->rebuilds_num;
1553
1554 retval = (*func)(&key, &value, arg, existing);
1555
1556 // We need to make sure that the callback didn't cause a table rebuild
1557 // Ideally we would make sure no operations happened
1558 assert(rebuilds_num == tab->rebuilds_num);
1559 (void)rebuilds_num;
1560
1561 switch (retval) {
1562 case ST_CONTINUE:
1563 if (! existing) {
1564 st_add_direct_with_hash(tab, key, value, hash);
1565 break;
1566 }
1567 if (old_key != key) {
1568 entry->key = key;
1569 }
1570 entry->record = value;
1571 break;
1572 case ST_DELETE:
1573 if (existing) {
1574 if (bin_ind != UNDEFINED_BIN_IND)
1575 MARK_BIN_DELETED(tab, bin_ind);
1576 MARK_ENTRY_DELETED(entry);
1577 tab->num_entries--;
1578 update_range_for_deleted(tab, bin);
1579 }
1580 break;
1581 }
1582 return existing;
1583}
1584
1585/* Traverse all entries in table TAB calling FUNC with current entry
1586 key and value and zero. If the call returns ST_STOP, stop
1587 traversing. If the call returns ST_DELETE, delete the current
1588 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1589 traversing. The function returns zero unless an error is found.
1590 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1591 different for ST_CHECK and when the current element is removed
1592 during traversing. */
1593static inline int
1594st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1595 int check_p)
1596{
1597 st_index_t bin;
1598 st_index_t bin_ind;
1599 st_table_entry *entries, *curr_entry_ptr;
1600 enum st_retval retval;
1601 st_index_t i, rebuilds_num;
1602 st_hash_t hash;
1603 st_data_t key;
1604 int error_p, packed_p = tab->bins == NULL;
1605
1606 entries = tab->entries;
1607 /* The bound can change inside the loop even without rebuilding
1608 the table, e.g. by an entry insertion. */
1609 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1610 curr_entry_ptr = &entries[i];
1611 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1612 continue;
1613 key = curr_entry_ptr->key;
1614 rebuilds_num = tab->rebuilds_num;
1615 hash = curr_entry_ptr->hash;
1616 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1617
1618 if (retval == ST_REPLACE && replace) {
1619 st_data_t value;
1620 value = curr_entry_ptr->record;
1621 retval = (*replace)(&key, &value, arg, TRUE);
1622 curr_entry_ptr->key = key;
1623 curr_entry_ptr->record = value;
1624 }
1625
1626 if (rebuilds_num != tab->rebuilds_num) {
1627 retry:
1628 entries = tab->entries;
1629 packed_p = tab->bins == NULL;
1630 if (packed_p) {
1631 i = find_entry(tab, hash, key);
1632 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1633 goto retry;
1634 error_p = i == UNDEFINED_ENTRY_IND;
1635 }
1636 else {
1637 i = find_table_entry_ind(tab, hash, key);
1638 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1639 goto retry;
1640 error_p = i == UNDEFINED_ENTRY_IND;
1641 i -= ENTRY_BASE;
1642 }
1643 if (error_p && check_p) {
1644 /* call func with error notice */
1645 retval = (*func)(0, 0, arg, 1);
1646 return 1;
1647 }
1648 curr_entry_ptr = &entries[i];
1649 }
1650 switch (retval) {
1651 case ST_REPLACE:
1652 break;
1653 case ST_CONTINUE:
1654 break;
1655 case ST_CHECK:
1656 if (check_p)
1657 break;
1658 case ST_STOP:
1659 return 0;
1660 case ST_DELETE: {
1661 st_data_t key = curr_entry_ptr->key;
1662
1663 again:
1664 if (packed_p) {
1665 bin = find_entry(tab, hash, key);
1666 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1667 goto again;
1668 if (bin == UNDEFINED_ENTRY_IND)
1669 break;
1670 }
1671 else {
1672 bin_ind = find_table_bin_ind(tab, hash, key);
1673 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1674 goto again;
1675 if (bin_ind == UNDEFINED_BIN_IND)
1676 break;
1677 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1678 MARK_BIN_DELETED(tab, bin_ind);
1679 }
1680 curr_entry_ptr = &entries[bin];
1681 MARK_ENTRY_DELETED(curr_entry_ptr);
1682 tab->num_entries--;
1683 update_range_for_deleted(tab, bin);
1684 break;
1685 }
1686 }
1687 }
1688 return 0;
1689}
1690
1691int
1692st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1693{
1694 return st_general_foreach(tab, func, replace, arg, TRUE);
1695}
1696
1697struct functor {
1698 st_foreach_callback_func *func;
1699 st_data_t arg;
1700};
1701
1702static int
1703apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1704{
1705 const struct functor *f = (void *)d;
1706 return f->func(k, v, f->arg);
1707}
1708
1709int
1710st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1711{
1712 const struct functor f = { func, arg };
1713 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1714}
1715
1716/* See comments for function st_delete_safe. */
1717int
1718st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1719 st_data_t never ATTRIBUTE_UNUSED)
1720{
1721 return st_general_foreach(tab, func, 0, arg, TRUE);
1722}
1723
1724/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1725 Return the number of keys set up in array KEYS. */
1726static inline st_index_t
1727st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1728{
1729 st_index_t i, bound;
1730 st_data_t key, *keys_start, *keys_end;
1731 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1732
1733 bound = tab->entries_bound;
1734 keys_start = keys;
1735 keys_end = keys + size;
1736 for (i = tab->entries_start; i < bound; i++) {
1737 if (keys == keys_end)
1738 break;
1739 curr_entry_ptr = &entries[i];
1740 key = curr_entry_ptr->key;
1741 if (! DELETED_ENTRY_P(curr_entry_ptr))
1742 *keys++ = key;
1743 }
1744
1745 return keys - keys_start;
1746}
1747
1748st_index_t
1749st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1750{
1751 return st_general_keys(tab, keys, size);
1752}
1753
1754/* See comments for function st_delete_safe. */
1755st_index_t
1756st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1757 st_data_t never ATTRIBUTE_UNUSED)
1758{
1759 return st_general_keys(tab, keys, size);
1760}
1761
1762/* Set up array VALUES by at most SIZE values of head table TAB
1763 entries. Return the number of values set up in array VALUES. */
1764static inline st_index_t
1765st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1766{
1767 st_index_t i, bound;
1768 st_data_t *values_start, *values_end;
1769 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1770
1771 values_start = values;
1772 values_end = values + size;
1773 bound = tab->entries_bound;
1774 for (i = tab->entries_start; i < bound; i++) {
1775 if (values == values_end)
1776 break;
1777 curr_entry_ptr = &entries[i];
1778 if (! DELETED_ENTRY_P(curr_entry_ptr))
1779 *values++ = curr_entry_ptr->record;
1780 }
1781
1782 return values - values_start;
1783}
1784
1785st_index_t
1786st_values(st_table *tab, st_data_t *values, st_index_t size)
1787{
1788 return st_general_values(tab, values, size);
1789}
1790
1791/* See comments for function st_delete_safe. */
1792st_index_t
1793st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1794 st_data_t never ATTRIBUTE_UNUSED)
1795{
1796 return st_general_values(tab, values, size);
1797}
1798
1799#define FNV1_32A_INIT 0x811c9dc5
1800
1801/*
1802 * 32 bit magic FNV-1a prime
1803 */
1804#define FNV_32_PRIME 0x01000193
1805
1806/* __POWERPC__ added to accommodate Darwin case. */
1807#ifndef UNALIGNED_WORD_ACCESS
1808# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1809 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1810 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1811 defined(__mc68020__)
1812# define UNALIGNED_WORD_ACCESS 1
1813# endif
1814#endif
1815#ifndef UNALIGNED_WORD_ACCESS
1816# define UNALIGNED_WORD_ACCESS 0
1817#endif
1818
1819/* This hash function is quite simplified MurmurHash3
1820 * Simplification is legal, cause most of magic still happens in finalizator.
1821 * And finalizator is almost the same as in MurmurHash3 */
1822#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1823#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1824
1825#if ST_INDEX_BITS <= 32
1826#define C1 (st_index_t)0xcc9e2d51
1827#define C2 (st_index_t)0x1b873593
1828#else
1829#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1830#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1831#endif
1832NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1833NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1834NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1835
1836static inline st_index_t
1837murmur_step(st_index_t h, st_index_t k)
1838{
1839#if ST_INDEX_BITS <= 32
1840#define r1 (17)
1841#define r2 (11)
1842#else
1843#define r1 (33)
1844#define r2 (24)
1845#endif
1846 k *= C1;
1847 h ^= ROTL(k, r1);
1848 h *= C2;
1849 h = ROTL(h, r2);
1850 return h;
1851}
1852#undef r1
1853#undef r2
1854
1855static inline st_index_t
1856murmur_finish(st_index_t h)
1857{
1858#if ST_INDEX_BITS <= 32
1859#define r1 (16)
1860#define r2 (13)
1861#define r3 (16)
1862 const st_index_t c1 = 0x85ebca6b;
1863 const st_index_t c2 = 0xc2b2ae35;
1864#else
1865/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1866#define r1 (30)
1867#define r2 (27)
1868#define r3 (31)
1869 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1870 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1871#endif
1872#if ST_INDEX_BITS > 64
1873 h ^= h >> 64;
1874 h *= c2;
1875 h ^= h >> 65;
1876#endif
1877 h ^= h >> r1;
1878 h *= c1;
1879 h ^= h >> r2;
1880 h *= c2;
1881 h ^= h >> r3;
1882 return h;
1883}
1884#undef r1
1885#undef r2
1886#undef r3
1887
1888st_index_t
1889st_hash(const void *ptr, size_t len, st_index_t h)
1890{
1891 const char *data = ptr;
1892 st_index_t t = 0;
1893 size_t l = len;
1894
1895#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1896#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1897#if SIZEOF_ST_INDEX_T > 4
1898#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1899#if SIZEOF_ST_INDEX_T > 8
1900#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1901 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1902#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1903#endif
1904#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1905#else
1906#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1907#endif
1908#undef SKIP_TAIL
1909 if (len >= sizeof(st_index_t)) {
1910#if !UNALIGNED_WORD_ACCESS
1911 int align = (int)((st_data_t)data % sizeof(st_index_t));
1912 if (align) {
1913 st_index_t d = 0;
1914 int sl, sr, pack;
1915
1916 switch (align) {
1917#ifdef WORDS_BIGENDIAN
1918# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1919 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1920#else
1921# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1922 t |= data_at(n) << CHAR_BIT*(n)
1923#endif
1924 UNALIGNED_ADD_ALL;
1925#undef UNALIGNED_ADD
1926 }
1927
1928#ifdef WORDS_BIGENDIAN
1929 t >>= (CHAR_BIT * align) - CHAR_BIT;
1930#else
1931 t <<= (CHAR_BIT * align);
1932#endif
1933
1934 data += sizeof(st_index_t)-align;
1935 len -= sizeof(st_index_t)-align;
1936
1937 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1938 sr = CHAR_BIT * align;
1939
1940 while (len >= sizeof(st_index_t)) {
1941 d = *(st_index_t *)data;
1942#ifdef WORDS_BIGENDIAN
1943 t = (t << sr) | (d >> sl);
1944#else
1945 t = (t >> sr) | (d << sl);
1946#endif
1947 h = murmur_step(h, t);
1948 t = d;
1949 data += sizeof(st_index_t);
1950 len -= sizeof(st_index_t);
1951 }
1952
1953 pack = len < (size_t)align ? (int)len : align;
1954 d = 0;
1955 switch (pack) {
1956#ifdef WORDS_BIGENDIAN
1957# define UNALIGNED_ADD(n) case (n) + 1: \
1958 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1959#else
1960# define UNALIGNED_ADD(n) case (n) + 1: \
1961 d |= data_at(n) << CHAR_BIT*(n)
1962#endif
1963 UNALIGNED_ADD_ALL;
1964#undef UNALIGNED_ADD
1965 }
1966#ifdef WORDS_BIGENDIAN
1967 t = (t << sr) | (d >> sl);
1968#else
1969 t = (t >> sr) | (d << sl);
1970#endif
1971
1972 if (len < (size_t)align) goto skip_tail;
1973# define SKIP_TAIL 1
1974 h = murmur_step(h, t);
1975 data += pack;
1976 len -= pack;
1977 }
1978 else
1979#endif
1980#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1981#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1982#else
1983#define aligned_data data
1984#endif
1985 {
1986 do {
1987 h = murmur_step(h, *(st_index_t *)aligned_data);
1988 data += sizeof(st_index_t);
1989 len -= sizeof(st_index_t);
1990 } while (len >= sizeof(st_index_t));
1991 }
1992 }
1993
1994 t = 0;
1995 switch (len) {
1996#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1997 /* in this case byteorder doesn't really matter */
1998#if SIZEOF_ST_INDEX_T > 4
1999 case 7: t |= data_at(6) << 48;
2000 case 6: t |= data_at(5) << 40;
2001 case 5: t |= data_at(4) << 32;
2002 case 4:
2003 t |= (st_index_t)*(uint32_t*)aligned_data;
2004 goto skip_tail;
2005# define SKIP_TAIL 1
2006#endif
2007 case 3: t |= data_at(2) << 16;
2008 case 2: t |= data_at(1) << 8;
2009 case 1: t |= data_at(0);
2010#else
2011#ifdef WORDS_BIGENDIAN
2012# define UNALIGNED_ADD(n) case (n) + 1: \
2013 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2014#else
2015# define UNALIGNED_ADD(n) case (n) + 1: \
2016 t |= data_at(n) << CHAR_BIT*(n)
2017#endif
2018 UNALIGNED_ADD_ALL;
2019#undef UNALIGNED_ADD
2020#endif
2021#ifdef SKIP_TAIL
2022 skip_tail:
2023#endif
2024 h ^= t; h -= ROTL(t, 7);
2025 h *= C2;
2026 }
2027 h ^= l;
2028#undef aligned_data
2029
2030 return murmur_finish(h);
2031}
2032
2033st_index_t
2034st_hash_uint32(st_index_t h, uint32_t i)
2035{
2036 return murmur_step(h, i);
2037}
2038
2039NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2040st_index_t
2041st_hash_uint(st_index_t h, st_index_t i)
2042{
2043 i += h;
2044/* no matter if it is BigEndian or LittleEndian,
2045 * we hash just integers */
2046#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2047 h = murmur_step(h, i >> 8*8);
2048#endif
2049 h = murmur_step(h, i);
2050 return h;
2051}
2052
2053st_index_t
2054st_hash_end(st_index_t h)
2055{
2056 h = murmur_finish(h);
2057 return h;
2058}
2059
2060#undef st_hash_start
2061st_index_t
2062rb_st_hash_start(st_index_t h)
2063{
2064 return h;
2065}
2066
2067static st_index_t
2068strhash(st_data_t arg)
2069{
2070 register const char *string = (const char *)arg;
2071 return st_hash(string, strlen(string), FNV1_32A_INIT);
2072}
2073
2074int
2075st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2076{
2077 char c1, c2;
2078
2079 while (1) {
2080 c1 = *s1++;
2081 c2 = *s2++;
2082 if (c1 == '\0' || c2 == '\0') {
2083 if (c1 != '\0') return 1;
2084 if (c2 != '\0') return -1;
2085 return 0;
2086 }
2087 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2088 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2089 if (c1 != c2) {
2090 if (c1 > c2)
2091 return 1;
2092 else
2093 return -1;
2094 }
2095 }
2096}
2097
2098int
2099st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2100{
2101 char c1, c2;
2102 size_t i;
2103
2104 for (i = 0; i < n; i++) {
2105 c1 = *s1++;
2106 c2 = *s2++;
2107 if (c1 == '\0' || c2 == '\0') {
2108 if (c1 != '\0') return 1;
2109 if (c2 != '\0') return -1;
2110 return 0;
2111 }
2112 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2113 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2114 if (c1 != c2) {
2115 if (c1 > c2)
2116 return 1;
2117 else
2118 return -1;
2119 }
2120 }
2121 return 0;
2122}
2123
2124static int
2125st_strcmp(st_data_t lhs, st_data_t rhs)
2126{
2127 const char *s1 = (char *)lhs;
2128 const char *s2 = (char *)rhs;
2129 return strcmp(s1, s2);
2130}
2131
2132static int
2133st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2134{
2135 const char *s1 = (char *)lhs;
2136 const char *s2 = (char *)rhs;
2137 return st_locale_insensitive_strcasecmp(s1, s2);
2138}
2139
2140NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2141static st_index_t
2142strcasehash(st_data_t arg)
2143{
2144 register const char *string = (const char *)arg;
2145 register st_index_t hval = FNV1_32A_INIT;
2146
2147 /*
2148 * FNV-1a hash each octet in the buffer
2149 */
2150 while (*string) {
2151 unsigned int c = (unsigned char)*string++;
2152 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2153 hval ^= c;
2154
2155 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2156 hval *= FNV_32_PRIME;
2157 }
2158 return hval;
2159}
2160
2161int
2162st_numcmp(st_data_t x, st_data_t y)
2163{
2164 return x != y;
2165}
2166
2167st_index_t
2168st_numhash(st_data_t n)
2169{
2170 enum {s1 = 11, s2 = 3};
2171 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2172}
2173
2174#ifdef RUBY
2175/* Expand TAB to be suitable for holding SIZ entries in total.
2176 Pre-existing entries remain not deleted inside of TAB, but its bins
2177 are cleared to expect future reconstruction. See rehash below. */
2178static void
2179st_expand_table(st_table *tab, st_index_t siz)
2180{
2181 st_table *tmp;
2182 st_index_t n;
2183
2184 if (siz <= get_allocated_entries(tab))
2185 return; /* enough room already */
2186
2187 tmp = st_init_table_with_size(tab->type, siz);
2188 n = get_allocated_entries(tab);
2189 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2190 st_free_bins(tab);
2191 st_free_entries(tab);
2192 st_free_bins(tmp);
2193
2194 tab->entry_power = tmp->entry_power;
2195 tab->bin_power = tmp->bin_power;
2196 tab->size_ind = tmp->size_ind;
2197 tab->entries = tmp->entries;
2198 tab->bins = NULL;
2199 tab->rebuilds_num++;
2200 free_fixed_ptr(tmp);
2201}
2202
2203/* Rehash using linear search. Return TRUE if we found that the table
2204 was rebuilt. */
2205static int
2206st_rehash_linear(st_table *tab)
2207{
2208 int eq_p, rebuilt_p;
2209 st_index_t i, j;
2210 st_table_entry *p, *q;
2211
2212 st_free_bins(tab);
2213 tab->bins = NULL;
2214
2215 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2216 p = &tab->entries[i];
2217 if (DELETED_ENTRY_P(p))
2218 continue;
2219 for (j = i + 1; j < tab->entries_bound; j++) {
2220 q = &tab->entries[j];
2221 if (DELETED_ENTRY_P(q))
2222 continue;
2223 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2224 if (EXPECT(rebuilt_p, 0))
2225 return TRUE;
2226 if (eq_p) {
2227 *p = *q;
2228 MARK_ENTRY_DELETED(q);
2229 tab->num_entries--;
2230 update_range_for_deleted(tab, j);
2231 }
2232 }
2233 }
2234 return FALSE;
2235}
2236
2237/* Rehash using index. Return TRUE if we found that the table was
2238 rebuilt. */
2239static int
2240st_rehash_indexed(st_table *tab)
2241{
2242 int eq_p, rebuilt_p;
2243 st_index_t i;
2244
2245 if (!tab->bins) {
2246 tab->bins = malloc(bins_size(tab));
2247 }
2248 unsigned int const size_ind = get_size_ind(tab);
2249 initialize_bins(tab);
2250 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2251 st_table_entry *p = &tab->entries[i];
2252 st_index_t ind;
2253#ifdef QUADRATIC_PROBE
2254 st_index_t d = 1;
2255#else
2256 st_index_t perturb = p->hash;
2257#endif
2258
2259 if (DELETED_ENTRY_P(p))
2260 continue;
2261
2262 ind = hash_bin(p->hash, tab);
2263 for (;;) {
2264 st_index_t bin = get_bin(tab->bins, size_ind, ind);
2265 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2266 /* ok, new room */
2267 set_bin(tab->bins, size_ind, ind, i + ENTRY_BASE);
2268 break;
2269 }
2270 else {
2271 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2272 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2273 if (EXPECT(rebuilt_p, 0))
2274 return TRUE;
2275 if (eq_p) {
2276 /* duplicated key; delete it */
2277 q->record = p->record;
2278 MARK_ENTRY_DELETED(p);
2279 tab->num_entries--;
2280 update_range_for_deleted(tab, bin);
2281 break;
2282 }
2283 else {
2284 /* hash collision; skip it */
2285#ifdef QUADRATIC_PROBE
2286 ind = hash_bin(ind + d, tab);
2287 d++;
2288#else
2289 ind = secondary_hash(ind, tab, &perturb);
2290#endif
2291 }
2292 }
2293 }
2294 }
2295 return FALSE;
2296}
2297
2298/* Reconstruct TAB's bins according to TAB's entries. This function
2299 permits conflicting keys inside of entries. No errors are reported
2300 then. All but one of them are discarded silently. */
2301static void
2302st_rehash(st_table *tab)
2303{
2304 int rebuilt_p;
2305
2306 do {
2307 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2308 rebuilt_p = st_rehash_linear(tab);
2309 else
2310 rebuilt_p = st_rehash_indexed(tab);
2311 } while (rebuilt_p);
2312}
2313
2314static st_data_t
2315st_stringify(VALUE key)
2316{
2317 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2318 rb_hash_key_str(key) : key;
2319}
2320
2321static void
2322st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2323{
2324 st_data_t k = st_stringify(key);
2326 e.hash = do_hash(k, tab);
2327 e.key = k;
2328 e.record = val;
2329
2330 tab->entries[tab->entries_bound++] = e;
2331 tab->num_entries++;
2332 RB_OBJ_WRITTEN(hash, Qundef, k);
2333 RB_OBJ_WRITTEN(hash, Qundef, val);
2334}
2335
2336static void
2337st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2338{
2339 long i;
2340
2341 for (i = 0; i < argc; /* */) {
2342 st_data_t k = st_stringify(argv[i++]);
2343 st_data_t v = argv[i++];
2344 st_insert(tab, k, v);
2345 RB_OBJ_WRITTEN(hash, Qundef, k);
2346 RB_OBJ_WRITTEN(hash, Qundef, v);
2347 }
2348}
2349
2350static void
2351st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2352{
2353 long i;
2354
2355 /* push elems */
2356 for (i = 0; i < argc; /* */) {
2357 VALUE key = argv[i++];
2358 VALUE val = argv[i++];
2359 st_insert_single(tab, hash, key, val);
2360 }
2361
2362 /* reindex */
2363 st_rehash(tab);
2364}
2365
2366/* Mimics ruby's { foo => bar } syntax. This function is subpart
2367 of rb_hash_bulk_insert. */
2368void
2369rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2370{
2371 st_index_t n, size = argc / 2;
2372 st_table *tab = RHASH_ST_TABLE(hash);
2373
2374 tab = RHASH_TBL_RAW(hash);
2375 n = tab->entries_bound + size;
2376 st_expand_table(tab, n);
2377 if (UNLIKELY(tab->num_entries))
2378 st_insert_generic(tab, argc, argv, hash);
2379 else if (argc <= 2)
2380 st_insert_single(tab, hash, argv[0], argv[1]);
2381 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2382 st_insert_linear(tab, argc, argv, hash);
2383 else
2384 st_insert_generic(tab, argc, argv, hash);
2385}
2386
2387void
2388rb_st_compact_table(st_table *tab)
2389{
2390 st_index_t num = tab->num_entries;
2391 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2392 /* Compaction: */
2393 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2394 rebuild_table_with(new_tab, tab);
2395 rebuild_move_table(new_tab, tab);
2396 rebuild_cleanup(tab);
2397 }
2398}
2399
2400/*
2401 * set_table related code
2402 */
2403
2404struct set_table_entry {
2405 st_hash_t hash;
2406 st_data_t key;
2407};
2408
2409/* Return hash value of KEY for table TAB. */
2410static inline st_hash_t
2411set_do_hash(st_data_t key, set_table *tab)
2412{
2413 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2414 return normalize_hash_value(hash);
2415}
2416
2417/* Return bin size index of table TAB. */
2418static inline unsigned int
2419set_get_size_ind(const set_table *tab)
2420{
2421 return tab->size_ind;
2422}
2423
2424/* Return the number of allocated bins of table TAB. */
2425static inline st_index_t
2426set_get_bins_num(const set_table *tab)
2427{
2428 return ((st_index_t) 1)<<tab->bin_power;
2429}
2430
2431/* Return mask for a bin index in table TAB. */
2432static inline st_index_t
2433set_bins_mask(const set_table *tab)
2434{
2435 return set_get_bins_num(tab) - 1;
2436}
2437
2438/* Return the index of table TAB bin corresponding to
2439 HASH_VALUE. */
2440static inline st_index_t
2441set_hash_bin(st_hash_t hash_value, set_table *tab)
2442{
2443 return hash_value & set_bins_mask(tab);
2444}
2445
2446/* Return the number of allocated entries of table TAB. */
2447static inline st_index_t
2448set_get_allocated_entries(const set_table *tab)
2449{
2450 return ((st_index_t) 1)<<tab->entry_power;
2451}
2452
2453static inline size_t
2454set_allocated_entries_size(const set_table *tab)
2455{
2456 return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2457}
2458
2459static inline bool
2460set_has_bins(const set_table *tab)
2461{
2462 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2463}
2464
2465/* Return size of the allocated bins of table TAB. */
2466static inline st_index_t
2467set_bins_size(const set_table *tab)
2468{
2469 if (set_has_bins(tab)) {
2470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2471 }
2472
2473 return 0;
2474}
2475
2476static inline st_index_t *
2477set_bins_ptr(const set_table *tab)
2478{
2479 if (set_has_bins(tab)) {
2480 return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2481 }
2482
2483 return NULL;
2484}
2485
2486/* Mark all bins of table TAB as empty. */
2487static void
2488set_initialize_bins(set_table *tab)
2489{
2490 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2491}
2492
2493/* Make table TAB empty. */
2494static void
2495set_make_tab_empty(set_table *tab)
2496{
2497 tab->num_entries = 0;
2498 tab->entries_start = tab->entries_bound = 0;
2499 if (set_bins_ptr(tab) != NULL)
2500 set_initialize_bins(tab);
2501}
2502
2503static inline size_t
2504set_entries_memsize(set_table *tab)
2505{
2506 size_t memsize = set_get_allocated_entries(tab) * sizeof(set_table_entry);
2507 if (set_has_bins(tab)) {
2508 memsize += set_bins_size(tab);
2509 }
2510 return memsize;
2511}
2512
2513static set_table *
2514set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2515{
2516 int n;
2517
2518#ifdef HASH_LOG
2519#if HASH_LOG+0 < 0
2520 {
2521 const char *e = getenv("ST_HASH_LOG");
2522 if (!e || !*e) init_st = 1;
2523 }
2524#endif
2525 if (init_st == 0) {
2526 init_st = 1;
2527 atexit(stat_col);
2528 }
2529#endif
2530
2531 n = get_power2(size);
2532
2533 tab->type = type;
2534 tab->entry_power = n;
2535 tab->bin_power = features[n].bin_power;
2536 tab->size_ind = features[n].size_ind;
2537
2538 tab->entries = (set_table_entry *)malloc(set_entries_memsize(tab));
2539 set_make_tab_empty(tab);
2540 tab->rebuilds_num = 0;
2541 return tab;
2542}
2543
2544/* Create and return table with TYPE which can hold at least SIZE
2545 entries. The real number of entries which the table can hold is
2546 the nearest power of two for SIZE. */
2547set_table *
2548set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2549{
2550 if (tab == NULL) tab = malloc(sizeof(set_table));
2551
2552 set_init_existing_table_with_size(tab, type, size);
2553
2554 return tab;
2555}
2556
2557set_table *
2558set_init_numtable(void)
2559{
2560 return set_init_table_with_size(NULL, &type_numhash, 0);
2561}
2562
2563set_table *
2564set_init_numtable_with_size(st_index_t size)
2565{
2566 return set_init_table_with_size(NULL, &type_numhash, size);
2567}
2568
2569set_table *
2570set_init_embedded_numtable_with_size(set_table *tab, st_index_t size)
2571{
2572 return set_init_existing_table_with_size(tab, &type_numhash, size);
2573}
2574
2575size_t
2576set_table_size(const struct set_table *tbl)
2577{
2578 return tbl->num_entries;
2579}
2580
2581/* Make table TAB empty. */
2582void
2583set_table_clear(set_table *tab)
2584{
2585 set_make_tab_empty(tab);
2586 tab->rebuilds_num++;
2587}
2588
2589void
2590set_free_embedded_table(set_table *tab)
2591{
2592 sized_free(tab->entries, set_entries_memsize(tab));
2593}
2594
2595/* Free table TAB space. This should only be used if you passed NULL to
2596 set_init_table_with_size/set_copy when creating the table. */
2597void
2598set_free_table(set_table *tab)
2599{
2600 set_free_embedded_table(tab);
2601 free_fixed_ptr(tab);
2602}
2603
2604/* Return byte size of memory allocated for table TAB. */
2605size_t
2606set_memsize(const set_table *tab)
2607{
2608 return(sizeof(set_table)
2609 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2610 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2611}
2612
2613static st_index_t
2614set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2615
2616static st_index_t
2617set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2618
2619static st_index_t
2620set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2621
2622static st_index_t
2623set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2624 st_data_t key, st_index_t *bin_ind);
2625
2626static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2627static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2628static void set_rebuild_cleanup(set_table *const tab);
2629
2630/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2631 and can change size of the table entries and bins arrays.
2632 Rebuilding is implemented by creation of a new table or by
2633 compaction of the existing one. */
2634static void
2635set_rebuild_table(set_table *tab)
2636{
2637 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2638 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2639 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2640 /* Compaction: */
2641 tab->num_entries = 0;
2642 if (set_has_bins(tab))
2643 set_initialize_bins(tab);
2644 set_rebuild_table_with(tab, tab);
2645 }
2646 else {
2647 set_table *new_tab;
2648 /* This allocation could trigger GC and compaction. If tab is the
2649 * gen_fields_tbl, then tab could have changed in size due to objects being
2650 * freed and/or moved. Do not store attributes of tab before this line. */
2651 new_tab = set_init_table_with_size(NULL, tab->type,
2652 2 * tab->num_entries - 1);
2653 set_rebuild_table_with(new_tab, tab);
2654 set_rebuild_move_table(new_tab, tab);
2655 }
2656 set_rebuild_cleanup(tab);
2657}
2658
2659static void
2660set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2661{
2662 st_index_t i, ni;
2663 unsigned int size_ind;
2664 set_table_entry *new_entries;
2665 set_table_entry *curr_entry_ptr;
2666 st_index_t *bins;
2667 st_index_t bin_ind;
2668
2669 new_entries = new_tab->entries;
2670
2671 ni = 0;
2672 bins = set_bins_ptr(new_tab);
2673 size_ind = set_get_size_ind(new_tab);
2674 st_index_t bound = tab->entries_bound;
2675 set_table_entry *entries = tab->entries;
2676
2677 for (i = tab->entries_start; i < bound; i++) {
2678 curr_entry_ptr = &entries[i];
2679 PREFETCH(entries + i + 1, 0);
2680 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2681 continue;
2682 if (&new_entries[ni] != curr_entry_ptr)
2683 new_entries[ni] = *curr_entry_ptr;
2684 if (EXPECT(bins != NULL, 1)) {
2685 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2686 curr_entry_ptr->key);
2687 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2688 }
2689 new_tab->num_entries++;
2690 ni++;
2691 }
2692
2693 assert(new_tab->num_entries == tab->num_entries);
2694}
2695
2696static void
2697set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2698{
2699 sized_free(tab->entries, set_entries_memsize(tab));
2700 tab->entries = new_tab->entries;
2701
2702 tab->entry_power = new_tab->entry_power;
2703 tab->bin_power = new_tab->bin_power;
2704 tab->size_ind = new_tab->size_ind;
2705
2706 free_fixed_ptr(new_tab);
2707}
2708
2709static void
2710set_rebuild_cleanup(set_table *const tab)
2711{
2712 tab->entries_start = 0;
2713 tab->entries_bound = tab->num_entries;
2714 tab->rebuilds_num++;
2715}
2716
2717/* Return the next secondary hash index for table TAB using previous
2718 index IND and PERTURB. Finally modulo of the function becomes a
2719 full *cycle linear congruential generator*, in other words it
2720 guarantees traversing all table bins in extreme case.
2721
2722 According the Hull-Dobell theorem a generator
2723 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2724 o m and c are relatively prime
2725 o a-1 is divisible by all prime factors of m
2726 o a-1 is divisible by 4 if m is divisible by 4.
2727
2728 For our case a is 5, c is 1, and m is a power of two. */
2729static inline st_index_t
2730set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2731{
2732 *perturb >>= 11;
2733 ind = (ind << 2) + ind + *perturb + 1;
2734 return set_hash_bin(ind, tab);
2735}
2736
2737/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2738 search. Return the index of the found entry in array `entries`.
2739 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2740 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2741static inline st_index_t
2742set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2743{
2744 int eq_p, rebuilt_p;
2745 st_index_t i, bound;
2746 set_table_entry *entries;
2747
2748 bound = tab->entries_bound;
2749 entries = tab->entries;
2750 for (i = tab->entries_start; i < bound; i++) {
2751 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2752 if (EXPECT(rebuilt_p, 0))
2753 return REBUILT_TABLE_ENTRY_IND;
2754 if (eq_p)
2755 return i;
2756 }
2757 return UNDEFINED_ENTRY_IND;
2758}
2759
2760/* Use the quadratic probing. The method has a better data locality
2761 but more collisions than the current approach. In average it
2762 results in a bit slower search. */
2763/*#define QUADRATIC_PROBE*/
2764
2765/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2766 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2767 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2768static st_index_t
2769set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2770{
2771 int eq_p, rebuilt_p;
2772 st_index_t ind;
2773#ifdef QUADRATIC_PROBE
2774 st_index_t d;
2775#else
2776 st_index_t perturb;
2777#endif
2778 st_index_t bin;
2779 set_table_entry *entries = tab->entries;
2780
2781 ind = set_hash_bin(hash_value, tab);
2782#ifdef QUADRATIC_PROBE
2783 d = 1;
2784#else
2785 perturb = hash_value;
2786#endif
2787 for (;;) {
2788 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2789 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2790 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2791 if (EXPECT(rebuilt_p, 0))
2792 return REBUILT_TABLE_ENTRY_IND;
2793 if (eq_p)
2794 break;
2795 }
2796 else if (EMPTY_BIN_P(bin))
2797 return UNDEFINED_ENTRY_IND;
2798#ifdef QUADRATIC_PROBE
2799 ind = set_hash_bin(ind + d, tab);
2800 d++;
2801#else
2802 ind = set_secondary_hash(ind, tab, &perturb);
2803#endif
2804 }
2805 return bin;
2806}
2807
2808/* Find and return index of table TAB bin corresponding to an entry
2809 with HASH_VALUE and KEY. If there is no such bin, return
2810 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2811 return REBUILT_TABLE_BIN_IND. */
2812static st_index_t
2813set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2814{
2815 int eq_p, rebuilt_p;
2816 st_index_t ind;
2817#ifdef QUADRATIC_PROBE
2818 st_index_t d;
2819#else
2820 st_index_t perturb;
2821#endif
2822 st_index_t bin;
2823 set_table_entry *entries = tab->entries;
2824
2825 ind = set_hash_bin(hash_value, tab);
2826#ifdef QUADRATIC_PROBE
2827 d = 1;
2828#else
2829 perturb = hash_value;
2830#endif
2831 for (;;) {
2832 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2833 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2834 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2835 if (EXPECT(rebuilt_p, 0))
2836 return REBUILT_TABLE_BIN_IND;
2837 if (eq_p)
2838 break;
2839 }
2840 else if (EMPTY_BIN_P(bin))
2841 return UNDEFINED_BIN_IND;
2842#ifdef QUADRATIC_PROBE
2843 ind = set_hash_bin(ind + d, tab);
2844 d++;
2845#else
2846 ind = set_secondary_hash(ind, tab, &perturb);
2847#endif
2848 }
2849 return ind;
2850}
2851
2852/* Find and return index of table TAB bin corresponding to an entry
2853 with HASH_VALUE and KEY. The entry should be in the table
2854 already. */
2855static st_index_t
2856set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2857{
2858 st_index_t ind;
2859#ifdef QUADRATIC_PROBE
2860 st_index_t d;
2861#else
2862 st_index_t perturb;
2863#endif
2864 st_index_t bin;
2865
2866 ind = set_hash_bin(hash_value, tab);
2867#ifdef QUADRATIC_PROBE
2868 d = 1;
2869#else
2870 perturb = hash_value;
2871#endif
2872 for (;;) {
2873 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2874 if (EMPTY_OR_DELETED_BIN_P(bin))
2875 return ind;
2876#ifdef QUADRATIC_PROBE
2877 ind = set_hash_bin(ind + d, tab);
2878 d++;
2879#else
2880 ind = set_secondary_hash(ind, tab, &perturb);
2881#endif
2882 }
2883}
2884
2885/* Mark I-th bin of table TAB as empty, in other words not
2886 corresponding to any entry. */
2887#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2888
2889/* Return index of table TAB bin for HASH_VALUE and KEY through
2890 BIN_IND and the pointed value as the function result. Reserve the
2891 bin for inclusion of the corresponding entry into the table if it
2892 is not there yet. We always find such bin as bins array length is
2893 bigger entries array. Although we can reuse a deleted bin, the
2894 result bin value is always empty if the table has no entry with
2895 KEY. Return the entries array index of the found entry or
2896 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2897 during the search, return REBUILT_TABLE_ENTRY_IND. */
2898static st_index_t
2899set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2900 st_data_t key, st_index_t *bin_ind)
2901{
2902 int eq_p, rebuilt_p;
2903 st_index_t ind;
2904 st_hash_t curr_hash_value = *hash_value;
2905#ifdef QUADRATIC_PROBE
2906 st_index_t d;
2907#else
2908 st_index_t perturb;
2909#endif
2910 st_index_t entry_index;
2911 st_index_t firset_deleted_bin_ind;
2912 set_table_entry *entries;
2913
2914 ind = set_hash_bin(curr_hash_value, tab);
2915#ifdef QUADRATIC_PROBE
2916 d = 1;
2917#else
2918 perturb = curr_hash_value;
2919#endif
2920 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2921 entries = tab->entries;
2922 for (;;) {
2923 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2924 if (EMPTY_BIN_P(entry_index)) {
2925 tab->num_entries++;
2926 entry_index = UNDEFINED_ENTRY_IND;
2927 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2928 /* We can reuse bin of a deleted entry. */
2929 ind = firset_deleted_bin_ind;
2930 MARK_SET_BIN_EMPTY(tab, ind);
2931 }
2932 break;
2933 }
2934 else if (! DELETED_BIN_P(entry_index)) {
2935 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2936 if (EXPECT(rebuilt_p, 0))
2937 return REBUILT_TABLE_ENTRY_IND;
2938 if (eq_p)
2939 break;
2940 }
2941 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2942 firset_deleted_bin_ind = ind;
2943#ifdef QUADRATIC_PROBE
2944 ind = set_hash_bin(ind + d, tab);
2945 d++;
2946#else
2947 ind = set_secondary_hash(ind, tab, &perturb);
2948#endif
2949 }
2950 *bin_ind = ind;
2951 return entry_index;
2952}
2953
2954/* Find an entry with KEY in table TAB. Return non-zero if we found
2955 it. */
2956int
2957set_table_lookup(set_table *tab, st_data_t key)
2958{
2959 st_index_t bin;
2960 st_hash_t hash = set_do_hash(key, tab);
2961
2962 retry:
2963 if (!set_has_bins(tab)) {
2964 bin = set_find_entry(tab, hash, key);
2965 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2966 goto retry;
2967 if (bin == UNDEFINED_ENTRY_IND)
2968 return 0;
2969 }
2970 else {
2971 bin = set_find_table_entry_ind(tab, hash, key);
2972 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2973 goto retry;
2974 if (bin == UNDEFINED_ENTRY_IND)
2975 return 0;
2976 bin -= ENTRY_BASE;
2977 }
2978 return 1;
2979}
2980
2981/* Check the table and rebuild it if it is necessary. */
2982static inline void
2983set_rebuild_table_if_necessary (set_table *tab)
2984{
2985 st_index_t bound = tab->entries_bound;
2986
2987 if (bound == set_get_allocated_entries(tab))
2988 set_rebuild_table(tab);
2989}
2990
2991/* Insert KEY into table TAB and return zero. If there is
2992 already entry with KEY in the table, return nonzero and update
2993 the value of the found entry. */
2994int
2995set_insert(set_table *tab, st_data_t key)
2996{
2997 set_table_entry *entry;
2998 st_index_t bin;
2999 st_index_t ind;
3000 st_hash_t hash_value;
3001 st_index_t bin_ind;
3002 int new_p;
3003
3004 hash_value = set_do_hash(key, tab);
3005 retry:
3006 set_rebuild_table_if_necessary(tab);
3007 if (!set_has_bins(tab)) {
3008 bin = set_find_entry(tab, hash_value, key);
3009 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3010 goto retry;
3011 new_p = bin == UNDEFINED_ENTRY_IND;
3012 if (new_p)
3013 tab->num_entries++;
3014 bin_ind = UNDEFINED_BIN_IND;
3015 }
3016 else {
3017 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
3018 key, &bin_ind);
3019 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3020 goto retry;
3021 new_p = bin == UNDEFINED_ENTRY_IND;
3022 bin -= ENTRY_BASE;
3023 }
3024 if (new_p) {
3025 ind = tab->entries_bound++;
3026 entry = &tab->entries[ind];
3027 entry->hash = hash_value;
3028 entry->key = key;
3029 if (bin_ind != UNDEFINED_BIN_IND)
3030 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
3031 return 0;
3032 }
3033 return 1;
3034}
3035
3036/* Create a copy of old_tab into new_tab. */
3037static set_table *
3038set_replace(set_table *new_tab, set_table *old_tab)
3039{
3040 *new_tab = *old_tab;
3041 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
3042 new_tab->entries = (set_table_entry *)malloc(memsize);
3043 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
3044 return new_tab;
3045}
3046
3047/* Create and return a copy of table OLD_TAB. */
3048set_table *
3049set_copy(set_table *new_tab, set_table *old_tab)
3050{
3051 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
3052
3053 if (set_replace(new_tab, old_tab) == NULL) {
3054 set_free_table(new_tab);
3055 return NULL;
3056 }
3057
3058 return new_tab;
3059}
3060
3061/* Update the entries start of table TAB after removing an entry
3062 with index N in the array entries. */
3063static inline void
3064set_update_range_for_deleted(set_table *tab, st_index_t n)
3065{
3066 /* Do not update entries_bound here. Otherwise, we can fill all
3067 bins by deleted entry value before rebuilding the table. */
3068 if (tab->entries_start == n) {
3069 st_index_t start = n + 1;
3070 st_index_t bound = tab->entries_bound;
3071 set_table_entry *entries = tab->entries;
3072 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3073 tab->entries_start = start;
3074 }
3075}
3076
3077/* Mark I-th bin of table TAB as corresponding to a deleted table
3078 entry. Update number of entries in the table and number of bins
3079 corresponding to deleted entries. */
3080#define MARK_SET_BIN_DELETED(tab, i) \
3081 do { \
3082 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3083 } while (0)
3084
3085/* Delete entry with KEY from table TAB, and return non-zero. If
3086 there is no entry with KEY in the table, return zero. */
3087int
3088set_table_delete(set_table *tab, st_data_t *key)
3089{
3090 set_table_entry *entry;
3091 st_index_t bin;
3092 st_index_t bin_ind;
3093 st_hash_t hash;
3094
3095 hash = set_do_hash(*key, tab);
3096 retry:
3097 if (!set_has_bins(tab)) {
3098 bin = set_find_entry(tab, hash, *key);
3099 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3100 goto retry;
3101 if (bin == UNDEFINED_ENTRY_IND) {
3102 return 0;
3103 }
3104 }
3105 else {
3106 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3107 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3108 goto retry;
3109 if (bin_ind == UNDEFINED_BIN_IND) {
3110 return 0;
3111 }
3112 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3113 MARK_SET_BIN_DELETED(tab, bin_ind);
3114 }
3115 entry = &tab->entries[bin];
3116 *key = entry->key;
3117 MARK_ENTRY_DELETED(entry);
3118 tab->num_entries--;
3119 set_update_range_for_deleted(tab, bin);
3120 return 1;
3121}
3122
3123/* Traverse all entries in table TAB calling FUNC with current entry
3124 key and zero. If the call returns ST_STOP, stop
3125 traversing. If the call returns ST_DELETE, delete the current
3126 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3127 traversing. The function returns zero unless an error is found.
3128 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3129 different for ST_CHECK and when the current element is removed
3130 during traversing. */
3131static inline int
3132set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3133 set_update_callback_func *replace, st_data_t arg,
3134 int check_p)
3135{
3136 st_index_t bin;
3137 st_index_t bin_ind;
3138 set_table_entry *entries, *curr_entry_ptr;
3139 enum st_retval retval;
3140 st_index_t i, rebuilds_num;
3141 st_hash_t hash;
3142 st_data_t key;
3143 int error_p, packed_p = !set_has_bins(tab);
3144
3145 entries = tab->entries;
3146 /* The bound can change inside the loop even without rebuilding
3147 the table, e.g. by an entry insertion. */
3148 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3149 curr_entry_ptr = &entries[i];
3150 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3151 continue;
3152 key = curr_entry_ptr->key;
3153 rebuilds_num = tab->rebuilds_num;
3154 hash = curr_entry_ptr->hash;
3155 retval = (*func)(key, arg, 0);
3156
3157 if (retval == ST_REPLACE && replace) {
3158 retval = (*replace)(&key, arg, TRUE);
3159 curr_entry_ptr->key = key;
3160 }
3161
3162 if (rebuilds_num != tab->rebuilds_num) {
3163 retry:
3164 entries = tab->entries;
3165 packed_p = !set_has_bins(tab);
3166 if (packed_p) {
3167 i = set_find_entry(tab, hash, key);
3168 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3169 goto retry;
3170 error_p = i == UNDEFINED_ENTRY_IND;
3171 }
3172 else {
3173 i = set_find_table_entry_ind(tab, hash, key);
3174 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3175 goto retry;
3176 error_p = i == UNDEFINED_ENTRY_IND;
3177 i -= ENTRY_BASE;
3178 }
3179 if (error_p && check_p) {
3180 /* call func with error notice */
3181 retval = (*func)(0, arg, 1);
3182 return 1;
3183 }
3184 curr_entry_ptr = &entries[i];
3185 }
3186 switch (retval) {
3187 case ST_REPLACE:
3188 break;
3189 case ST_CONTINUE:
3190 break;
3191 case ST_CHECK:
3192 if (check_p)
3193 break;
3194 case ST_STOP:
3195 return 0;
3196 case ST_DELETE: {
3197 st_data_t key = curr_entry_ptr->key;
3198
3199 again:
3200 if (packed_p) {
3201 bin = set_find_entry(tab, hash, key);
3202 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3203 goto again;
3204 if (bin == UNDEFINED_ENTRY_IND)
3205 break;
3206 }
3207 else {
3208 bin_ind = set_find_table_bin_ind(tab, hash, key);
3209 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3210 goto again;
3211 if (bin_ind == UNDEFINED_BIN_IND)
3212 break;
3213 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3214 MARK_SET_BIN_DELETED(tab, bin_ind);
3215 }
3216 curr_entry_ptr = &entries[bin];
3217 MARK_ENTRY_DELETED(curr_entry_ptr);
3218 tab->num_entries--;
3219 set_update_range_for_deleted(tab, bin);
3220 break;
3221 }
3222 }
3223 }
3224 return 0;
3225}
3226
3227int
3228set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3229{
3230 return set_general_foreach(tab, func, replace, arg, TRUE);
3231}
3232
3233struct set_functor {
3234 set_foreach_callback_func *func;
3235 st_data_t arg;
3236};
3237
3238static int
3239set_apply_functor(st_data_t k, st_data_t d, int _)
3240{
3241 const struct set_functor *f = (void *)d;
3242 return f->func(k, f->arg);
3243}
3244
3245int
3246set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3247{
3248 const struct set_functor f = { func, arg };
3249 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3250}
3251
3252/* See comments for function set_delete_safe. */
3253int
3254set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3255 st_data_t never ATTRIBUTE_UNUSED)
3256{
3257 return set_general_foreach(tab, func, NULL, arg, TRUE);
3258}
3259
3260/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3261 Return the number of keys set up in array KEYS. */
3262inline st_index_t
3263set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3264{
3265 st_index_t i, bound;
3266 st_data_t key, *keys_start, *keys_end;
3267 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3268
3269 bound = tab->entries_bound;
3270 keys_start = keys;
3271 keys_end = keys + size;
3272 for (i = tab->entries_start; i < bound; i++) {
3273 if (keys == keys_end)
3274 break;
3275 curr_entry_ptr = &entries[i];
3276 key = curr_entry_ptr->key;
3277 if (! DELETED_ENTRY_P(curr_entry_ptr))
3278 *keys++ = key;
3279 }
3280
3281 return keys - keys_start;
3282}
3283
3284void
3285set_compact_table(set_table *tab)
3286{
3287 st_index_t num = tab->num_entries;
3288 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3289 /* Compaction: */
3290 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3291 set_rebuild_table_with(new_tab, tab);
3292 set_rebuild_move_table(new_tab, tab);
3293 set_rebuild_cleanup(tab);
3294 }
3295}
3296
3297#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:711
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1425
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:229
VALUE rb_cString
String class.
Definition string.c:84
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
set_table_entry * entries
Array of size 2^entry_power.
Definition set_table.h:28
Definition st.c:137
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40