Ruby 4.1.0dev (2026-03-01 revision d68e4be1873e364c5ee24ed112bce4bc86e3a406)
st.c (d68e4be1873e364c5ee24ed112bce4bc86e3a406)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/gc.h"
111#include "internal/hash.h"
112#include "internal/sanitizers.h"
113#include "internal/set_table.h"
114#include "internal/st.h"
115#include "ruby_assert.h"
116#endif
117
118#include <stdio.h>
119#ifdef HAVE_STDLIB_H
120#include <stdlib.h>
121#endif
122#include <string.h>
123
124#ifdef __GNUC__
125#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
126#define EXPECT(expr, val) __builtin_expect(expr, val)
127#define ATTRIBUTE_UNUSED __attribute__((unused))
128#else
129#define PREFETCH(addr, write_p)
130#define EXPECT(expr, val) (expr)
131#define ATTRIBUTE_UNUSED
132#endif
133
134/* The type of hashes. */
135typedef st_index_t st_hash_t;
136
138 st_hash_t hash;
139 st_data_t key;
140 st_data_t record;
141};
142
143#define type_numhash st_hashtype_num
144static const struct st_hash_type st_hashtype_num = {
145 st_numcmp,
146 st_numhash,
147};
148
149static int st_strcmp(st_data_t, st_data_t);
150static st_index_t strhash(st_data_t);
151static const struct st_hash_type type_strhash = {
152 st_strcmp,
153 strhash,
154};
155
156static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
157static st_index_t strcasehash(st_data_t);
158static const struct st_hash_type type_strcasehash = {
159 st_locale_insensitive_strcasecmp_i,
160 strcasehash,
161};
162
163/* Value used to catch uninitialized entries/bins during debugging.
164 There is a possibility for a false alarm, but its probability is
165 extremely small. */
166#define ST_INIT_VAL 0xafafafafafafafaf
167#define ST_INIT_VAL_BYTE 0xafa
168
169#ifdef RUBY
170#undef malloc
171#undef realloc
172#undef calloc
173#undef free
174#define malloc ruby_xmalloc
175#define calloc ruby_xcalloc
176#define realloc ruby_xrealloc
177#define sized_realloc ruby_sized_xrealloc
178#define free ruby_xfree
179#define sized_free ruby_sized_xfree
180#define free_fixed_ptr(v) ruby_sized_xfree((v), sizeof(*(v)))
181#else
182#define sized_realloc(ptr, new_size, old_size) realloc(ptr, new_size)
183#define sized_free(v, s) free(v)
184#define free_fixed_ptr(v) free(v)
185#endif
186
187#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
188#define PTR_EQUAL(tab, ptr, hash_val, key_) \
189 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
190
191/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
192 up to TRUE if the table is rebuilt during the comparison. */
193#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
194 do { \
195 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
196 res = PTR_EQUAL(tab, ptr, hash_val, key); \
197 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
198 } while (FALSE)
199
200/* Features of a table. */
202 /* Power of 2 used for number of allocated entries. */
203 unsigned char entry_power;
204 /* Power of 2 used for number of allocated bins. Depending on the
205 table size, the number of bins is 2-4 times more than the
206 number of entries. */
207 unsigned char bin_power;
208 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
209 unsigned char size_ind;
210 /* Bins are packed in words of type st_index_t. The following is
211 a size of bins counted by words. */
212 st_index_t bins_words;
213};
214
215/* Features of all possible size tables. */
216#if SIZEOF_ST_INDEX_T == 8
217#define MAX_POWER2 62
218static const struct st_features features[] = {
219 {0, 1, 0, 0x0},
220 {1, 2, 0, 0x1},
221 {2, 3, 0, 0x1},
222 {3, 4, 0, 0x2},
223 {4, 5, 0, 0x4},
224 {5, 6, 0, 0x8},
225 {6, 7, 0, 0x10},
226 {7, 8, 0, 0x20},
227 {8, 9, 1, 0x80},
228 {9, 10, 1, 0x100},
229 {10, 11, 1, 0x200},
230 {11, 12, 1, 0x400},
231 {12, 13, 1, 0x800},
232 {13, 14, 1, 0x1000},
233 {14, 15, 1, 0x2000},
234 {15, 16, 1, 0x4000},
235 {16, 17, 2, 0x10000},
236 {17, 18, 2, 0x20000},
237 {18, 19, 2, 0x40000},
238 {19, 20, 2, 0x80000},
239 {20, 21, 2, 0x100000},
240 {21, 22, 2, 0x200000},
241 {22, 23, 2, 0x400000},
242 {23, 24, 2, 0x800000},
243 {24, 25, 2, 0x1000000},
244 {25, 26, 2, 0x2000000},
245 {26, 27, 2, 0x4000000},
246 {27, 28, 2, 0x8000000},
247 {28, 29, 2, 0x10000000},
248 {29, 30, 2, 0x20000000},
249 {30, 31, 2, 0x40000000},
250 {31, 32, 2, 0x80000000},
251 {32, 33, 3, 0x200000000},
252 {33, 34, 3, 0x400000000},
253 {34, 35, 3, 0x800000000},
254 {35, 36, 3, 0x1000000000},
255 {36, 37, 3, 0x2000000000},
256 {37, 38, 3, 0x4000000000},
257 {38, 39, 3, 0x8000000000},
258 {39, 40, 3, 0x10000000000},
259 {40, 41, 3, 0x20000000000},
260 {41, 42, 3, 0x40000000000},
261 {42, 43, 3, 0x80000000000},
262 {43, 44, 3, 0x100000000000},
263 {44, 45, 3, 0x200000000000},
264 {45, 46, 3, 0x400000000000},
265 {46, 47, 3, 0x800000000000},
266 {47, 48, 3, 0x1000000000000},
267 {48, 49, 3, 0x2000000000000},
268 {49, 50, 3, 0x4000000000000},
269 {50, 51, 3, 0x8000000000000},
270 {51, 52, 3, 0x10000000000000},
271 {52, 53, 3, 0x20000000000000},
272 {53, 54, 3, 0x40000000000000},
273 {54, 55, 3, 0x80000000000000},
274 {55, 56, 3, 0x100000000000000},
275 {56, 57, 3, 0x200000000000000},
276 {57, 58, 3, 0x400000000000000},
277 {58, 59, 3, 0x800000000000000},
278 {59, 60, 3, 0x1000000000000000},
279 {60, 61, 3, 0x2000000000000000},
280 {61, 62, 3, 0x4000000000000000},
281 {62, 63, 3, 0x8000000000000000},
282};
283
284#else
285#define MAX_POWER2 30
286
287static const struct st_features features[] = {
288 {0, 1, 0, 0x1},
289 {1, 2, 0, 0x1},
290 {2, 3, 0, 0x2},
291 {3, 4, 0, 0x4},
292 {4, 5, 0, 0x8},
293 {5, 6, 0, 0x10},
294 {6, 7, 0, 0x20},
295 {7, 8, 0, 0x40},
296 {8, 9, 1, 0x100},
297 {9, 10, 1, 0x200},
298 {10, 11, 1, 0x400},
299 {11, 12, 1, 0x800},
300 {12, 13, 1, 0x1000},
301 {13, 14, 1, 0x2000},
302 {14, 15, 1, 0x4000},
303 {15, 16, 1, 0x8000},
304 {16, 17, 2, 0x20000},
305 {17, 18, 2, 0x40000},
306 {18, 19, 2, 0x80000},
307 {19, 20, 2, 0x100000},
308 {20, 21, 2, 0x200000},
309 {21, 22, 2, 0x400000},
310 {22, 23, 2, 0x800000},
311 {23, 24, 2, 0x1000000},
312 {24, 25, 2, 0x2000000},
313 {25, 26, 2, 0x4000000},
314 {26, 27, 2, 0x8000000},
315 {27, 28, 2, 0x10000000},
316 {28, 29, 2, 0x20000000},
317 {29, 30, 2, 0x40000000},
318 {30, 31, 2, 0x80000000},
319};
320
321#endif
322
323/* The reserved hash value and its substitution. */
324#define RESERVED_HASH_VAL (~(st_hash_t) 0)
325#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
326
327static inline st_hash_t
328normalize_hash_value(st_hash_t hash)
329{
330 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
331 another value. Such mapping should be extremely rare. */
332 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
333}
334
335/* Return hash value of KEY for table TAB. */
336static inline st_hash_t
337do_hash(st_data_t key, st_table *tab)
338{
339 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
340 return normalize_hash_value(hash);
341}
342
343/* Power of 2 defining the minimal number of allocated entries. */
344#define MINIMAL_POWER2 2
345
346#if MINIMAL_POWER2 < 2
347#error "MINIMAL_POWER2 should be >= 2"
348#endif
349
350/* If the power2 of the allocated `entries` is less than the following
351 value, don't allocate bins and use a linear search. */
352#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
353
354/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
355static int
356get_power2(st_index_t size)
357{
358 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
359 if (n <= MAX_POWER2)
360 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
361#ifdef RUBY
362 /* Ran out of the table entries */
363 rb_raise(rb_eRuntimeError, "st_table too big");
364#endif
365 /* should raise exception */
366 return -1;
367}
368
369/* Return value of N-th bin in array BINS of table with bins size
370 index S. */
371static inline st_index_t
372get_bin(st_index_t *bins, int s, st_index_t n)
373{
374 return (s == 0 ? ((unsigned char *) bins)[n]
375 : s == 1 ? ((unsigned short *) bins)[n]
376 : s == 2 ? ((unsigned int *) bins)[n]
377 : ((st_index_t *) bins)[n]);
378}
379
380/* Set up N-th bin in array BINS of table with bins size index S to
381 value V. */
382static inline void
383set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
384{
385 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
386 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
387 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
388 else ((st_index_t *) bins)[n] = v;
389}
390
391/* These macros define reserved values for empty table bin and table
392 bin which contains a deleted entry. We will never use such values
393 for an entry index in bins. */
394#define EMPTY_BIN 0
395#define DELETED_BIN 1
396/* Base of a real entry index in the bins. */
397#define ENTRY_BASE 2
398
399/* Mark I-th bin of table TAB as empty, in other words not
400 corresponding to any entry. */
401#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
402
403/* Values used for not found entry and bin with given
404 characteristics. */
405#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
406#define UNDEFINED_BIN_IND (~(st_index_t) 0)
407
408/* Entry and bin values returned when we found a table rebuild during
409 the search. */
410#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
411#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
412
413/* Mark I-th bin of table TAB as corresponding to a deleted table
414 entry. Update number of entries in the table and number of bins
415 corresponding to deleted entries. */
416#define MARK_BIN_DELETED(tab, i) \
417 do { \
418 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
419 } while (0)
420
421/* Macros to check that value B is used empty bins and bins
422 corresponding deleted entries. */
423#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
424#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
425#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
426
427/* Macros to check empty bins and bins corresponding to deleted
428 entries. Bins are given by their index I in table TAB. */
429#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
430#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
431#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
432
433/* Macros for marking and checking deleted entries given by their
434 pointer E_PTR. */
435#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
436#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
437
438/* Return bin size index of table TAB. */
439static inline unsigned int
440get_size_ind(const st_table *tab)
441{
442 return tab->size_ind;
443}
444
445/* Return the number of allocated bins of table TAB. */
446static inline st_index_t
447get_bins_num(const st_table *tab)
448{
449 return ((st_index_t) 1)<<tab->bin_power;
450}
451
452/* Return mask for a bin index in table TAB. */
453static inline st_index_t
454bins_mask(const st_table *tab)
455{
456 return get_bins_num(tab) - 1;
457}
458
459/* Return the index of table TAB bin corresponding to
460 HASH_VALUE. */
461static inline st_index_t
462hash_bin(st_hash_t hash_value, st_table *tab)
463{
464 return hash_value & bins_mask(tab);
465}
466
467/* Return the number of allocated entries of table TAB. */
468static inline st_index_t
469get_allocated_entries(const st_table *tab)
470{
471 return ((st_index_t) 1)<<tab->entry_power;
472}
473
474/* Return size of the allocated bins of table TAB. */
475static inline st_index_t
476bins_size(const st_table *tab)
477{
478 return features[tab->entry_power].bins_words * sizeof (st_index_t);
479}
480
481/* Mark all bins of table TAB as empty. */
482static void
483initialize_bins(st_table *tab)
484{
485 memset(tab->bins, 0, bins_size(tab));
486}
487
488/* Make table TAB empty. */
489static void
490make_tab_empty(st_table *tab)
491{
492 tab->num_entries = 0;
493 tab->entries_start = tab->entries_bound = 0;
494 if (tab->bins != NULL)
495 initialize_bins(tab);
496}
497
498#ifdef HASH_LOG
499#ifdef HAVE_UNISTD_H
500#include <unistd.h>
501#endif
502static struct {
503 int all, total, num, str, strcase;
504} collision;
505
506/* Flag switching off output of package statistics at the end of
507 program. */
508static int init_st = 0;
509
510/* Output overall number of table searches and collisions into a
511 temporary file. */
512static void
513stat_col(void)
514{
515 char fname[10+sizeof(long)*3];
516 FILE *f;
517 if (!collision.total) return;
518 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
519 if (f == NULL)
520 return;
521 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
522 ((double)collision.all / (collision.total)) * 100);
523 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
524 fclose(f);
525}
526#endif
527
528st_table *
529st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
530{
531 int n;
532
533#ifdef HASH_LOG
534#if HASH_LOG+0 < 0
535 {
536 const char *e = getenv("ST_HASH_LOG");
537 if (!e || !*e) init_st = 1;
538 }
539#endif
540 if (init_st == 0) {
541 init_st = 1;
542 atexit(stat_col);
543 }
544#endif
545
546 n = get_power2(size);
547#ifndef RUBY
548 if (n < 0)
549 return NULL;
550#endif
551
552 tab->type = type;
553 tab->entry_power = n;
554 tab->bin_power = features[n].bin_power;
555 tab->size_ind = features[n].size_ind;
556 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
557 tab->bins = NULL;
558 else {
559 tab->bins = (st_index_t *) malloc(bins_size(tab));
560#ifndef RUBY
561 if (tab->bins == NULL) {
562 free_fixed_ptr(tab);
563 return NULL;
564 }
565#endif
566 }
567 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
568 * sizeof(st_table_entry));
569#ifndef RUBY
570 if (tab->entries == NULL) {
571 st_free_table(tab);
572 return NULL;
573 }
574#endif
575 make_tab_empty(tab);
576 tab->rebuilds_num = 0;
577 return tab;
578}
579
580/* Create and return table with TYPE which can hold at least SIZE
581 entries. The real number of entries which the table can hold is
582 the nearest power of two for SIZE. */
583st_table *
584st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
585{
586 st_table *tab = malloc(sizeof(st_table));
587#ifndef RUBY
588 if (tab == NULL)
589 return NULL;
590#endif
591
592#ifdef RUBY
593 st_init_existing_table_with_size(tab, type, size);
594#else
595 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
596 free_fixed_ptr(tab);
597 return NULL;
598 }
599#endif
600
601 return tab;
602}
603
604size_t
605st_table_size(const struct st_table *tbl)
606{
607 return tbl->num_entries;
608}
609
610/* Create and return table with TYPE which can hold a minimal number
611 of entries (see comments for get_power2). */
612st_table *
613st_init_table(const struct st_hash_type *type)
614{
615 return st_init_table_with_size(type, 0);
616}
617
618/* Create and return table which can hold a minimal number of
619 numbers. */
620st_table *
621st_init_numtable(void)
622{
623 return st_init_table(&type_numhash);
624}
625
626/* Create and return table which can hold SIZE numbers. */
627st_table *
628st_init_numtable_with_size(st_index_t size)
629{
630 return st_init_table_with_size(&type_numhash, size);
631}
632
633/* Create and return table which can hold a minimal number of
634 strings. */
635st_table *
636st_init_strtable(void)
637{
638 return st_init_table(&type_strhash);
639}
640
641/* Create and return table which can hold SIZE strings. */
642st_table *
643st_init_strtable_with_size(st_index_t size)
644{
645 return st_init_table_with_size(&type_strhash, size);
646}
647
648/* Create and return table which can hold a minimal number of strings
649 whose character case is ignored. */
650st_table *
651st_init_strcasetable(void)
652{
653 return st_init_table(&type_strcasehash);
654}
655
656/* Create and return table which can hold SIZE strings whose character
657 case is ignored. */
658st_table *
659st_init_strcasetable_with_size(st_index_t size)
660{
661 return st_init_table_with_size(&type_strcasehash, size);
662}
663
664/* Make table TAB empty. */
665void
666st_clear(st_table *tab)
667{
668 make_tab_empty(tab);
669 tab->rebuilds_num++;
670}
671
672static inline size_t
673st_entries_memsize(const st_table *tab)
674{
675 return get_allocated_entries(tab) * sizeof(st_table_entry);
676}
677
678static inline size_t
679st_bins_memsize(const st_table *tab)
680{
681 return tab->bins == NULL ? 0 : bins_size(tab);
682}
683
684static inline void
685st_free_entries(const st_table *tab)
686{
687 sized_free(tab->entries, st_entries_memsize(tab));
688}
689
690static inline void
691st_free_bins(const st_table *tab)
692{
693 sized_free(tab->bins, st_bins_memsize(tab));
694}
695
696void
697st_free_embedded_table(st_table *tab)
698{
699 st_free_bins(tab);
700 st_free_entries(tab);
701}
702
703/* Free table TAB space. */
704void
705st_free_table(st_table *tab)
706{
707 st_free_embedded_table(tab);
708 free_fixed_ptr(tab);
709}
710
711/* Return byte size of memory allocated for table TAB. */
712size_t
713st_memsize(const st_table *tab)
714{
715 RUBY_ASSERT(tab != NULL);
716 return(sizeof(st_table)
717 + st_bins_memsize(tab)
718 + st_entries_memsize(tab));
719}
720
721static st_index_t
722find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
723
724static st_index_t
725find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
726
727static st_index_t
728find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
729
730static st_index_t
731find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
732 st_data_t key, st_index_t *bin_ind);
733
734#ifdef HASH_LOG
735static void
736count_collision(const struct st_hash_type *type)
737{
738 collision.all++;
739 if (type == &type_numhash) {
740 collision.num++;
741 }
742 else if (type == &type_strhash) {
743 collision.strcase++;
744 }
745 else if (type == &type_strcasehash) {
746 collision.str++;
747 }
748}
749
750#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
751#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
752#define collision_check 0
753#else
754#define COLLISION
755#define FOUND_BIN
756#endif
757
758/* If the number of entries in the table is at least REBUILD_THRESHOLD
759 times less than the entry array length, decrease the table
760 size. */
761#define REBUILD_THRESHOLD 4
762
763#if REBUILD_THRESHOLD < 2
764#error "REBUILD_THRESHOLD should be >= 2"
765#endif
766
767static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
768static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
769static void rebuild_cleanup(st_table *const tab);
770
771/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
772 and can change size of the table entries and bins arrays.
773 Rebuilding is implemented by creation of a new table or by
774 compaction of the existing one. */
775static void
776rebuild_table(st_table *tab)
777{
778 if ((2 * tab->num_entries <= get_allocated_entries(tab)
779 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
780 || tab->num_entries < (1 << MINIMAL_POWER2)) {
781 /* Compaction: */
782 tab->num_entries = 0;
783 if (tab->bins != NULL)
784 initialize_bins(tab);
785 rebuild_table_with(tab, tab);
786 }
787 else {
788 st_table *new_tab;
789 /* This allocation could trigger GC and compaction. If tab is the
790 * gen_fields_tbl, then tab could have changed in size due to objects being
791 * freed and/or moved. Do not store attributes of tab before this line. */
792 new_tab = st_init_table_with_size(tab->type,
793 2 * tab->num_entries - 1);
794 rebuild_table_with(new_tab, tab);
795 rebuild_move_table(new_tab, tab);
796 }
797 rebuild_cleanup(tab);
798}
799
800static void
801rebuild_table_with(st_table *const new_tab, st_table *const tab)
802{
803 st_index_t i, ni;
804 unsigned int size_ind;
805 st_table_entry *new_entries;
806 st_table_entry *curr_entry_ptr;
807 st_index_t *bins;
808 st_index_t bin_ind;
809
810 new_entries = new_tab->entries;
811
812 ni = 0;
813 bins = new_tab->bins;
814 size_ind = get_size_ind(new_tab);
815 st_index_t bound = tab->entries_bound;
816 st_table_entry *entries = tab->entries;
817
818 for (i = tab->entries_start; i < bound; i++) {
819 curr_entry_ptr = &entries[i];
820 PREFETCH(entries + i + 1, 0);
821 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
822 continue;
823 if (&new_entries[ni] != curr_entry_ptr)
824 new_entries[ni] = *curr_entry_ptr;
825 if (EXPECT(bins != NULL, 1)) {
826 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
827 curr_entry_ptr->key);
828 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
829 }
830 new_tab->num_entries++;
831 ni++;
832 }
833
834 assert(new_tab->num_entries == tab->num_entries);
835}
836
837static void
838rebuild_move_table(st_table *const new_tab, st_table *const tab)
839{
840 st_free_bins(tab);
841 st_free_entries(tab);
842
843 tab->entry_power = new_tab->entry_power;
844 tab->bin_power = new_tab->bin_power;
845 tab->size_ind = new_tab->size_ind;
846 tab->bins = new_tab->bins;
847 tab->entries = new_tab->entries;
848 free_fixed_ptr(new_tab);
849}
850
851static void
852rebuild_cleanup(st_table *const tab)
853{
854 tab->entries_start = 0;
855 tab->entries_bound = tab->num_entries;
856 tab->rebuilds_num++;
857}
858
859/* Return the next secondary hash index for table TAB using previous
860 index IND and PERTURB. Finally modulo of the function becomes a
861 full *cycle linear congruential generator*, in other words it
862 guarantees traversing all table bins in extreme case.
863
864 According the Hull-Dobell theorem a generator
865 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
866 o m and c are relatively prime
867 o a-1 is divisible by all prime factors of m
868 o a-1 is divisible by 4 if m is divisible by 4.
869
870 For our case a is 5, c is 1, and m is a power of two. */
871static inline st_index_t
872secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
873{
874 *perturb >>= 11;
875 ind = (ind << 2) + ind + *perturb + 1;
876 return hash_bin(ind, tab);
877}
878
879/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
880 search. Return the index of the found entry in array `entries`.
881 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
882 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
883static inline st_index_t
884find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
885{
886 int eq_p, rebuilt_p;
887 st_index_t i, bound;
888 st_table_entry *entries;
889
890 bound = tab->entries_bound;
891 entries = tab->entries;
892 for (i = tab->entries_start; i < bound; i++) {
893 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
894 if (EXPECT(rebuilt_p, 0))
895 return REBUILT_TABLE_ENTRY_IND;
896 if (eq_p)
897 return i;
898 }
899 return UNDEFINED_ENTRY_IND;
900}
901
902/* Use the quadratic probing. The method has a better data locality
903 but more collisions than the current approach. In average it
904 results in a bit slower search. */
905/*#define QUADRATIC_PROBE*/
906
907/* Return index of entry with HASH_VALUE and KEY in table TAB. If
908 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
909 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
910static st_index_t
911find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
912{
913 int eq_p, rebuilt_p;
914 st_index_t ind;
915#ifdef QUADRATIC_PROBE
916 st_index_t d;
917#else
918 st_index_t perturb;
919#endif
920 st_index_t bin;
921 st_table_entry *entries = tab->entries;
922
923 ind = hash_bin(hash_value, tab);
924#ifdef QUADRATIC_PROBE
925 d = 1;
926#else
927 perturb = hash_value;
928#endif
929 FOUND_BIN;
930 for (;;) {
931 bin = get_bin(tab->bins, get_size_ind(tab), ind);
932 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
933 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
934 if (EXPECT(rebuilt_p, 0))
935 return REBUILT_TABLE_ENTRY_IND;
936 if (eq_p)
937 break;
938 }
939 else if (EMPTY_BIN_P(bin))
940 return UNDEFINED_ENTRY_IND;
941#ifdef QUADRATIC_PROBE
942 ind = hash_bin(ind + d, tab);
943 d++;
944#else
945 ind = secondary_hash(ind, tab, &perturb);
946#endif
947 COLLISION;
948 }
949 return bin;
950}
951
952/* Find and return index of table TAB bin corresponding to an entry
953 with HASH_VALUE and KEY. If there is no such bin, return
954 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
955 return REBUILT_TABLE_BIN_IND. */
956static st_index_t
957find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
958{
959 int eq_p, rebuilt_p;
960 st_index_t ind;
961#ifdef QUADRATIC_PROBE
962 st_index_t d;
963#else
964 st_index_t perturb;
965#endif
966 st_index_t bin;
967 st_table_entry *entries = tab->entries;
968
969 ind = hash_bin(hash_value, tab);
970#ifdef QUADRATIC_PROBE
971 d = 1;
972#else
973 perturb = hash_value;
974#endif
975 FOUND_BIN;
976 for (;;) {
977 bin = get_bin(tab->bins, get_size_ind(tab), ind);
978 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
979 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
980 if (EXPECT(rebuilt_p, 0))
981 return REBUILT_TABLE_BIN_IND;
982 if (eq_p)
983 break;
984 }
985 else if (EMPTY_BIN_P(bin))
986 return UNDEFINED_BIN_IND;
987#ifdef QUADRATIC_PROBE
988 ind = hash_bin(ind + d, tab);
989 d++;
990#else
991 ind = secondary_hash(ind, tab, &perturb);
992#endif
993 COLLISION;
994 }
995 return ind;
996}
997
998/* Find and return index of table TAB bin corresponding to an entry
999 with HASH_VALUE and KEY. The entry should be in the table
1000 already. */
1001static st_index_t
1002find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
1003{
1004 st_index_t ind;
1005#ifdef QUADRATIC_PROBE
1006 st_index_t d;
1007#else
1008 st_index_t perturb;
1009#endif
1010 st_index_t bin;
1011
1012 ind = hash_bin(hash_value, tab);
1013#ifdef QUADRATIC_PROBE
1014 d = 1;
1015#else
1016 perturb = hash_value;
1017#endif
1018 FOUND_BIN;
1019 for (;;) {
1020 bin = get_bin(tab->bins, get_size_ind(tab), ind);
1021 if (EMPTY_OR_DELETED_BIN_P(bin))
1022 return ind;
1023#ifdef QUADRATIC_PROBE
1024 ind = hash_bin(ind + d, tab);
1025 d++;
1026#else
1027 ind = secondary_hash(ind, tab, &perturb);
1028#endif
1029 COLLISION;
1030 }
1031}
1032
1033/* Return index of table TAB bin for HASH_VALUE and KEY through
1034 BIN_IND and the pointed value as the function result. Reserve the
1035 bin for inclusion of the corresponding entry into the table if it
1036 is not there yet. We always find such bin as bins array length is
1037 bigger entries array. Although we can reuse a deleted bin, the
1038 result bin value is always empty if the table has no entry with
1039 KEY. Return the entries array index of the found entry or
1040 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1041 during the search, return REBUILT_TABLE_ENTRY_IND. */
1042static st_index_t
1043find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1044 st_data_t key, st_index_t *bin_ind)
1045{
1046 int eq_p, rebuilt_p;
1047 st_index_t ind;
1048 st_hash_t curr_hash_value = *hash_value;
1049#ifdef QUADRATIC_PROBE
1050 st_index_t d;
1051#else
1052 st_index_t perturb;
1053#endif
1054 st_index_t entry_index;
1055 st_index_t first_deleted_bin_ind;
1056 st_table_entry *entries;
1057
1058 ind = hash_bin(curr_hash_value, tab);
1059#ifdef QUADRATIC_PROBE
1060 d = 1;
1061#else
1062 perturb = curr_hash_value;
1063#endif
1064 FOUND_BIN;
1065 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1066 entries = tab->entries;
1067 for (;;) {
1068 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1069 if (EMPTY_BIN_P(entry_index)) {
1070 tab->num_entries++;
1071 entry_index = UNDEFINED_ENTRY_IND;
1072 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1073 /* We can reuse bin of a deleted entry. */
1074 ind = first_deleted_bin_ind;
1075 MARK_BIN_EMPTY(tab, ind);
1076 }
1077 break;
1078 }
1079 else if (! DELETED_BIN_P(entry_index)) {
1080 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1081 if (EXPECT(rebuilt_p, 0))
1082 return REBUILT_TABLE_ENTRY_IND;
1083 if (eq_p)
1084 break;
1085 }
1086 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1087 first_deleted_bin_ind = ind;
1088#ifdef QUADRATIC_PROBE
1089 ind = hash_bin(ind + d, tab);
1090 d++;
1091#else
1092 ind = secondary_hash(ind, tab, &perturb);
1093#endif
1094 COLLISION;
1095 }
1096 *bin_ind = ind;
1097 return entry_index;
1098}
1099
1100/* Find an entry with KEY in table TAB. Return non-zero if we found
1101 it. Set up *RECORD to the found entry record. */
1102int
1103st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1104{
1105 st_index_t bin;
1106 st_hash_t hash = do_hash(key, tab);
1107
1108 retry:
1109 if (tab->bins == NULL) {
1110 bin = find_entry(tab, hash, key);
1111 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1112 goto retry;
1113 if (bin == UNDEFINED_ENTRY_IND)
1114 return 0;
1115 }
1116 else {
1117 bin = find_table_entry_ind(tab, hash, key);
1118 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1119 goto retry;
1120 if (bin == UNDEFINED_ENTRY_IND)
1121 return 0;
1122 bin -= ENTRY_BASE;
1123 }
1124 if (value != 0)
1125 *value = tab->entries[bin].record;
1126 return 1;
1127}
1128
1129/* Find an entry with KEY in table TAB. Return non-zero if we found
1130 it. Set up *RESULT to the found table entry key. */
1131int
1132st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1133{
1134 st_index_t bin;
1135 st_hash_t hash = do_hash(key, tab);
1136
1137 retry:
1138 if (tab->bins == NULL) {
1139 bin = find_entry(tab, hash, key);
1140 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1141 goto retry;
1142 if (bin == UNDEFINED_ENTRY_IND)
1143 return 0;
1144 }
1145 else {
1146 bin = find_table_entry_ind(tab, hash, key);
1147 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1148 goto retry;
1149 if (bin == UNDEFINED_ENTRY_IND)
1150 return 0;
1151 bin -= ENTRY_BASE;
1152 }
1153 if (result != 0)
1154 *result = tab->entries[bin].key;
1155 return 1;
1156}
1157
1158/* Check the table and rebuild it if it is necessary. */
1159static inline void
1160rebuild_table_if_necessary (st_table *tab)
1161{
1162 st_index_t bound = tab->entries_bound;
1163
1164 if (bound == get_allocated_entries(tab))
1165 rebuild_table(tab);
1166}
1167
1168/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1169 already entry with KEY in the table, return nonzero and update
1170 the value of the found entry. */
1171int
1172st_insert(st_table *tab, st_data_t key, st_data_t value)
1173{
1174 st_table_entry *entry;
1175 st_index_t bin;
1176 st_index_t ind;
1177 st_hash_t hash_value;
1178 st_index_t bin_ind;
1179 int new_p;
1180
1181 hash_value = do_hash(key, tab);
1182 retry:
1183 rebuild_table_if_necessary(tab);
1184 if (tab->bins == NULL) {
1185 bin = find_entry(tab, hash_value, key);
1186 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1187 goto retry;
1188 new_p = bin == UNDEFINED_ENTRY_IND;
1189 if (new_p)
1190 tab->num_entries++;
1191 bin_ind = UNDEFINED_BIN_IND;
1192 }
1193 else {
1194 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1195 key, &bin_ind);
1196 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1197 goto retry;
1198 new_p = bin == UNDEFINED_ENTRY_IND;
1199 bin -= ENTRY_BASE;
1200 }
1201 if (new_p) {
1202 ind = tab->entries_bound++;
1203 entry = &tab->entries[ind];
1204 entry->hash = hash_value;
1205 entry->key = key;
1206 entry->record = value;
1207 if (bin_ind != UNDEFINED_BIN_IND)
1208 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1209 return 0;
1210 }
1211 tab->entries[bin].record = value;
1212 return 1;
1213}
1214
1215/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1216 entry with KEY before the insertion. */
1217static inline void
1218st_add_direct_with_hash(st_table *tab,
1219 st_data_t key, st_data_t value, st_hash_t hash)
1220{
1221 st_table_entry *entry;
1222 st_index_t ind;
1223 st_index_t bin_ind;
1224
1225 assert(hash != RESERVED_HASH_VAL);
1226
1227 rebuild_table_if_necessary(tab);
1228 ind = tab->entries_bound++;
1229 entry = &tab->entries[ind];
1230 entry->hash = hash;
1231 entry->key = key;
1232 entry->record = value;
1233 tab->num_entries++;
1234 if (tab->bins != NULL) {
1235 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1236 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1237 }
1238}
1239
1240void
1241rb_st_add_direct_with_hash(st_table *tab,
1242 st_data_t key, st_data_t value, st_hash_t hash)
1243{
1244 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1245}
1246
1247/* Insert (KEY, VALUE) into table TAB. The table should not have
1248 entry with KEY before the insertion. */
1249void
1250st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1251{
1252 st_hash_t hash_value;
1253
1254 hash_value = do_hash(key, tab);
1255 st_add_direct_with_hash(tab, key, value, hash_value);
1256}
1257
1258/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1259 there is already entry with KEY in the table, return nonzero and
1260 update the value of the found entry. */
1261int
1262st_insert2(st_table *tab, st_data_t key, st_data_t value,
1263 st_data_t (*func)(st_data_t))
1264{
1265 st_table_entry *entry;
1266 st_index_t bin;
1267 st_index_t ind;
1268 st_hash_t hash_value;
1269 st_index_t bin_ind;
1270 int new_p;
1271
1272 hash_value = do_hash(key, tab);
1273 retry:
1274 rebuild_table_if_necessary (tab);
1275 if (tab->bins == NULL) {
1276 bin = find_entry(tab, hash_value, key);
1277 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1278 goto retry;
1279 new_p = bin == UNDEFINED_ENTRY_IND;
1280 if (new_p)
1281 tab->num_entries++;
1282 bin_ind = UNDEFINED_BIN_IND;
1283 }
1284 else {
1285 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1286 key, &bin_ind);
1287 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1288 goto retry;
1289 new_p = bin == UNDEFINED_ENTRY_IND;
1290 bin -= ENTRY_BASE;
1291 }
1292 if (new_p) {
1293 key = (*func)(key);
1294 ind = tab->entries_bound++;
1295 entry = &tab->entries[ind];
1296 entry->hash = hash_value;
1297 entry->key = key;
1298 entry->record = value;
1299 if (bin_ind != UNDEFINED_BIN_IND)
1300 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1301 return 0;
1302 }
1303 tab->entries[bin].record = value;
1304 return 1;
1305}
1306
1307/* Create a copy of old_tab into new_tab. */
1308st_table *
1309st_replace(st_table *new_tab, st_table *old_tab)
1310{
1311 *new_tab = *old_tab;
1312 if (old_tab->bins == NULL)
1313 new_tab->bins = NULL;
1314 else {
1315 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1316#ifndef RUBY
1317 if (new_tab->bins == NULL) {
1318 return NULL;
1319 }
1320#endif
1321 }
1322 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1323 * sizeof(st_table_entry));
1324#ifndef RUBY
1325 if (new_tab->entries == NULL) {
1326 return NULL;
1327 }
1328#endif
1329 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1330 get_allocated_entries(old_tab));
1331 if (old_tab->bins != NULL)
1332 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1333
1334 return new_tab;
1335}
1336
1337/* Create and return a copy of table OLD_TAB. */
1338st_table *
1339st_copy(st_table *old_tab)
1340{
1341 st_table *new_tab;
1342
1343 new_tab = (st_table *) malloc(sizeof(st_table));
1344#ifndef RUBY
1345 if (new_tab == NULL)
1346 return NULL;
1347#endif
1348
1349 if (st_replace(new_tab, old_tab) == NULL) {
1350 st_free_table(new_tab);
1351 return NULL;
1352 }
1353
1354 return new_tab;
1355}
1356
1357/* Update the entries start of table TAB after removing an entry
1358 with index N in the array entries. */
1359static inline void
1360update_range_for_deleted(st_table *tab, st_index_t n)
1361{
1362 /* Do not update entries_bound here. Otherwise, we can fill all
1363 bins by deleted entry value before rebuilding the table. */
1364 if (tab->entries_start == n) {
1365 st_index_t start = n + 1;
1366 st_index_t bound = tab->entries_bound;
1367 st_table_entry *entries = tab->entries;
1368 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1369 tab->entries_start = start;
1370 }
1371}
1372
1373/* Delete entry with KEY from table TAB, set up *VALUE (unless
1374 VALUE is zero) from deleted table entry, and return non-zero. If
1375 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1376 is zero), and return zero. */
1377static int
1378st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1379{
1380 st_table_entry *entry;
1381 st_index_t bin;
1382 st_index_t bin_ind;
1383 st_hash_t hash;
1384
1385 hash = do_hash(*key, tab);
1386 retry:
1387 if (tab->bins == NULL) {
1388 bin = find_entry(tab, hash, *key);
1389 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1390 goto retry;
1391 if (bin == UNDEFINED_ENTRY_IND) {
1392 if (value != 0) *value = 0;
1393 return 0;
1394 }
1395 }
1396 else {
1397 bin_ind = find_table_bin_ind(tab, hash, *key);
1398 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1399 goto retry;
1400 if (bin_ind == UNDEFINED_BIN_IND) {
1401 if (value != 0) *value = 0;
1402 return 0;
1403 }
1404 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1405 MARK_BIN_DELETED(tab, bin_ind);
1406 }
1407 entry = &tab->entries[bin];
1408 *key = entry->key;
1409 if (value != 0) *value = entry->record;
1410 MARK_ENTRY_DELETED(entry);
1411 tab->num_entries--;
1412 update_range_for_deleted(tab, bin);
1413 return 1;
1414}
1415
1416int
1417st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1418{
1419 return st_general_delete(tab, key, value);
1420}
1421
1422/* The function and other functions with suffix '_safe' or '_check'
1423 are originated from the previous implementation of the hash tables.
1424 It was necessary for correct deleting entries during traversing
1425 tables. The current implementation permits deletion during
1426 traversing without a specific way to do this. */
1427int
1428st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1429 st_data_t never ATTRIBUTE_UNUSED)
1430{
1431 return st_general_delete(tab, key, value);
1432}
1433
1434/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1435 return zero. Otherwise, remove the first entry in the table.
1436 Return its key through KEY and its record through VALUE (unless
1437 VALUE is zero). */
1438int
1439st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1440{
1441 st_index_t i, bound;
1442 st_index_t bin;
1443 st_table_entry *entries, *curr_entry_ptr;
1444 st_index_t bin_ind;
1445
1446 entries = tab->entries;
1447 bound = tab->entries_bound;
1448 for (i = tab->entries_start; i < bound; i++) {
1449 curr_entry_ptr = &entries[i];
1450 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1451 st_hash_t entry_hash = curr_entry_ptr->hash;
1452 st_data_t entry_key = curr_entry_ptr->key;
1453
1454 if (value != 0) *value = curr_entry_ptr->record;
1455 *key = entry_key;
1456 retry:
1457 if (tab->bins == NULL) {
1458 bin = find_entry(tab, entry_hash, entry_key);
1459 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1460 entries = tab->entries;
1461 goto retry;
1462 }
1463 curr_entry_ptr = &entries[bin];
1464 }
1465 else {
1466 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1467 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1468 entries = tab->entries;
1469 goto retry;
1470 }
1471 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1472 - ENTRY_BASE];
1473 MARK_BIN_DELETED(tab, bin_ind);
1474 }
1475 MARK_ENTRY_DELETED(curr_entry_ptr);
1476 tab->num_entries--;
1477 update_range_for_deleted(tab, i);
1478 return 1;
1479 }
1480 }
1481 if (value != 0) *value = 0;
1482 return 0;
1483}
1484
1485/* See comments for function st_delete_safe. */
1486void
1487st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1488 st_data_t never ATTRIBUTE_UNUSED)
1489{
1490}
1491
1492/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1493 of the key and the value of the found entry, and non-zero as the
1494 3rd argument. If the entry is not found, call FUNC with a pointer
1495 to KEY, a pointer to zero, and a zero argument. If the call
1496 returns ST_CONTINUE, the table will have an entry with key and
1497 value returned by FUNC through the 1st and 2nd parameters. If the
1498 call of FUNC returns ST_DELETE, the table will not have entry with
1499 KEY. The function returns flag of that the entry with KEY was in
1500 the table before the call. */
1501int
1502st_update(st_table *tab, st_data_t key,
1503 st_update_callback_func *func, st_data_t arg)
1504{
1505 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1506 st_index_t bin = 0; /* Ditto */
1507 st_table_entry *entries;
1508 st_index_t bin_ind;
1509 st_data_t value = 0, old_key;
1510 int retval, existing;
1511 st_hash_t hash = do_hash(key, tab);
1512
1513 retry:
1514 entries = tab->entries;
1515 if (tab->bins == NULL) {
1516 bin = find_entry(tab, hash, key);
1517 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1518 goto retry;
1519 existing = bin != UNDEFINED_ENTRY_IND;
1520 entry = &entries[bin];
1521 bin_ind = UNDEFINED_BIN_IND;
1522 }
1523 else {
1524 bin_ind = find_table_bin_ind(tab, hash, key);
1525 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1526 goto retry;
1527 existing = bin_ind != UNDEFINED_BIN_IND;
1528 if (existing) {
1529 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1530 entry = &entries[bin];
1531 }
1532 }
1533 if (existing) {
1534 key = entry->key;
1535 value = entry->record;
1536 }
1537 old_key = key;
1538
1539 unsigned int rebuilds_num = tab->rebuilds_num;
1540
1541 retval = (*func)(&key, &value, arg, existing);
1542
1543 // We need to make sure that the callback didn't cause a table rebuild
1544 // Ideally we would make sure no operations happened
1545 assert(rebuilds_num == tab->rebuilds_num);
1546 (void)rebuilds_num;
1547
1548 switch (retval) {
1549 case ST_CONTINUE:
1550 if (! existing) {
1551 st_add_direct_with_hash(tab, key, value, hash);
1552 break;
1553 }
1554 if (old_key != key) {
1555 entry->key = key;
1556 }
1557 entry->record = value;
1558 break;
1559 case ST_DELETE:
1560 if (existing) {
1561 if (bin_ind != UNDEFINED_BIN_IND)
1562 MARK_BIN_DELETED(tab, bin_ind);
1563 MARK_ENTRY_DELETED(entry);
1564 tab->num_entries--;
1565 update_range_for_deleted(tab, bin);
1566 }
1567 break;
1568 }
1569 return existing;
1570}
1571
1572/* Traverse all entries in table TAB calling FUNC with current entry
1573 key and value and zero. If the call returns ST_STOP, stop
1574 traversing. If the call returns ST_DELETE, delete the current
1575 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1576 traversing. The function returns zero unless an error is found.
1577 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1578 different for ST_CHECK and when the current element is removed
1579 during traversing. */
1580static inline int
1581st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1582 int check_p)
1583{
1584 st_index_t bin;
1585 st_index_t bin_ind;
1586 st_table_entry *entries, *curr_entry_ptr;
1587 enum st_retval retval;
1588 st_index_t i, rebuilds_num;
1589 st_hash_t hash;
1590 st_data_t key;
1591 int error_p, packed_p = tab->bins == NULL;
1592
1593 entries = tab->entries;
1594 /* The bound can change inside the loop even without rebuilding
1595 the table, e.g. by an entry insertion. */
1596 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1597 curr_entry_ptr = &entries[i];
1598 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1599 continue;
1600 key = curr_entry_ptr->key;
1601 rebuilds_num = tab->rebuilds_num;
1602 hash = curr_entry_ptr->hash;
1603 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1604
1605 if (retval == ST_REPLACE && replace) {
1606 st_data_t value;
1607 value = curr_entry_ptr->record;
1608 retval = (*replace)(&key, &value, arg, TRUE);
1609 curr_entry_ptr->key = key;
1610 curr_entry_ptr->record = value;
1611 }
1612
1613 if (rebuilds_num != tab->rebuilds_num) {
1614 retry:
1615 entries = tab->entries;
1616 packed_p = tab->bins == NULL;
1617 if (packed_p) {
1618 i = find_entry(tab, hash, key);
1619 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1620 goto retry;
1621 error_p = i == UNDEFINED_ENTRY_IND;
1622 }
1623 else {
1624 i = find_table_entry_ind(tab, hash, key);
1625 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1626 goto retry;
1627 error_p = i == UNDEFINED_ENTRY_IND;
1628 i -= ENTRY_BASE;
1629 }
1630 if (error_p && check_p) {
1631 /* call func with error notice */
1632 retval = (*func)(0, 0, arg, 1);
1633 return 1;
1634 }
1635 curr_entry_ptr = &entries[i];
1636 }
1637 switch (retval) {
1638 case ST_REPLACE:
1639 break;
1640 case ST_CONTINUE:
1641 break;
1642 case ST_CHECK:
1643 if (check_p)
1644 break;
1645 case ST_STOP:
1646 return 0;
1647 case ST_DELETE: {
1648 st_data_t key = curr_entry_ptr->key;
1649
1650 again:
1651 if (packed_p) {
1652 bin = find_entry(tab, hash, key);
1653 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1654 goto again;
1655 if (bin == UNDEFINED_ENTRY_IND)
1656 break;
1657 }
1658 else {
1659 bin_ind = find_table_bin_ind(tab, hash, key);
1660 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1661 goto again;
1662 if (bin_ind == UNDEFINED_BIN_IND)
1663 break;
1664 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1665 MARK_BIN_DELETED(tab, bin_ind);
1666 }
1667 curr_entry_ptr = &entries[bin];
1668 MARK_ENTRY_DELETED(curr_entry_ptr);
1669 tab->num_entries--;
1670 update_range_for_deleted(tab, bin);
1671 break;
1672 }
1673 }
1674 }
1675 return 0;
1676}
1677
1678int
1679st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1680{
1681 return st_general_foreach(tab, func, replace, arg, TRUE);
1682}
1683
1684struct functor {
1685 st_foreach_callback_func *func;
1686 st_data_t arg;
1687};
1688
1689static int
1690apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1691{
1692 const struct functor *f = (void *)d;
1693 return f->func(k, v, f->arg);
1694}
1695
1696int
1697st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1698{
1699 const struct functor f = { func, arg };
1700 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1701}
1702
1703/* See comments for function st_delete_safe. */
1704int
1705st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1706 st_data_t never ATTRIBUTE_UNUSED)
1707{
1708 return st_general_foreach(tab, func, 0, arg, TRUE);
1709}
1710
1711/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1712 Return the number of keys set up in array KEYS. */
1713static inline st_index_t
1714st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1715{
1716 st_index_t i, bound;
1717 st_data_t key, *keys_start, *keys_end;
1718 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1719
1720 bound = tab->entries_bound;
1721 keys_start = keys;
1722 keys_end = keys + size;
1723 for (i = tab->entries_start; i < bound; i++) {
1724 if (keys == keys_end)
1725 break;
1726 curr_entry_ptr = &entries[i];
1727 key = curr_entry_ptr->key;
1728 if (! DELETED_ENTRY_P(curr_entry_ptr))
1729 *keys++ = key;
1730 }
1731
1732 return keys - keys_start;
1733}
1734
1735st_index_t
1736st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1737{
1738 return st_general_keys(tab, keys, size);
1739}
1740
1741/* See comments for function st_delete_safe. */
1742st_index_t
1743st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1744 st_data_t never ATTRIBUTE_UNUSED)
1745{
1746 return st_general_keys(tab, keys, size);
1747}
1748
1749/* Set up array VALUES by at most SIZE values of head table TAB
1750 entries. Return the number of values set up in array VALUES. */
1751static inline st_index_t
1752st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1753{
1754 st_index_t i, bound;
1755 st_data_t *values_start, *values_end;
1756 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1757
1758 values_start = values;
1759 values_end = values + size;
1760 bound = tab->entries_bound;
1761 for (i = tab->entries_start; i < bound; i++) {
1762 if (values == values_end)
1763 break;
1764 curr_entry_ptr = &entries[i];
1765 if (! DELETED_ENTRY_P(curr_entry_ptr))
1766 *values++ = curr_entry_ptr->record;
1767 }
1768
1769 return values - values_start;
1770}
1771
1772st_index_t
1773st_values(st_table *tab, st_data_t *values, st_index_t size)
1774{
1775 return st_general_values(tab, values, size);
1776}
1777
1778/* See comments for function st_delete_safe. */
1779st_index_t
1780st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1781 st_data_t never ATTRIBUTE_UNUSED)
1782{
1783 return st_general_values(tab, values, size);
1784}
1785
1786#define FNV1_32A_INIT 0x811c9dc5
1787
1788/*
1789 * 32 bit magic FNV-1a prime
1790 */
1791#define FNV_32_PRIME 0x01000193
1792
1793/* __POWERPC__ added to accommodate Darwin case. */
1794#ifndef UNALIGNED_WORD_ACCESS
1795# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1796 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1797 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1798 defined(__mc68020__)
1799# define UNALIGNED_WORD_ACCESS 1
1800# endif
1801#endif
1802#ifndef UNALIGNED_WORD_ACCESS
1803# define UNALIGNED_WORD_ACCESS 0
1804#endif
1805
1806/* This hash function is quite simplified MurmurHash3
1807 * Simplification is legal, cause most of magic still happens in finalizator.
1808 * And finalizator is almost the same as in MurmurHash3 */
1809#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1810#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1811
1812#if ST_INDEX_BITS <= 32
1813#define C1 (st_index_t)0xcc9e2d51
1814#define C2 (st_index_t)0x1b873593
1815#else
1816#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1817#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1818#endif
1819NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1820NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1821NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1822
1823static inline st_index_t
1824murmur_step(st_index_t h, st_index_t k)
1825{
1826#if ST_INDEX_BITS <= 32
1827#define r1 (17)
1828#define r2 (11)
1829#else
1830#define r1 (33)
1831#define r2 (24)
1832#endif
1833 k *= C1;
1834 h ^= ROTL(k, r1);
1835 h *= C2;
1836 h = ROTL(h, r2);
1837 return h;
1838}
1839#undef r1
1840#undef r2
1841
1842static inline st_index_t
1843murmur_finish(st_index_t h)
1844{
1845#if ST_INDEX_BITS <= 32
1846#define r1 (16)
1847#define r2 (13)
1848#define r3 (16)
1849 const st_index_t c1 = 0x85ebca6b;
1850 const st_index_t c2 = 0xc2b2ae35;
1851#else
1852/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1853#define r1 (30)
1854#define r2 (27)
1855#define r3 (31)
1856 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1857 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1858#endif
1859#if ST_INDEX_BITS > 64
1860 h ^= h >> 64;
1861 h *= c2;
1862 h ^= h >> 65;
1863#endif
1864 h ^= h >> r1;
1865 h *= c1;
1866 h ^= h >> r2;
1867 h *= c2;
1868 h ^= h >> r3;
1869 return h;
1870}
1871#undef r1
1872#undef r2
1873#undef r3
1874
1875st_index_t
1876st_hash(const void *ptr, size_t len, st_index_t h)
1877{
1878 const char *data = ptr;
1879 st_index_t t = 0;
1880 size_t l = len;
1881
1882#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1883#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1884#if SIZEOF_ST_INDEX_T > 4
1885#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1886#if SIZEOF_ST_INDEX_T > 8
1887#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1888 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1889#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1890#endif
1891#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1892#else
1893#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1894#endif
1895#undef SKIP_TAIL
1896 if (len >= sizeof(st_index_t)) {
1897#if !UNALIGNED_WORD_ACCESS
1898 int align = (int)((st_data_t)data % sizeof(st_index_t));
1899 if (align) {
1900 st_index_t d = 0;
1901 int sl, sr, pack;
1902
1903 switch (align) {
1904#ifdef WORDS_BIGENDIAN
1905# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1906 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1907#else
1908# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1909 t |= data_at(n) << CHAR_BIT*(n)
1910#endif
1911 UNALIGNED_ADD_ALL;
1912#undef UNALIGNED_ADD
1913 }
1914
1915#ifdef WORDS_BIGENDIAN
1916 t >>= (CHAR_BIT * align) - CHAR_BIT;
1917#else
1918 t <<= (CHAR_BIT * align);
1919#endif
1920
1921 data += sizeof(st_index_t)-align;
1922 len -= sizeof(st_index_t)-align;
1923
1924 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1925 sr = CHAR_BIT * align;
1926
1927 while (len >= sizeof(st_index_t)) {
1928 d = *(st_index_t *)data;
1929#ifdef WORDS_BIGENDIAN
1930 t = (t << sr) | (d >> sl);
1931#else
1932 t = (t >> sr) | (d << sl);
1933#endif
1934 h = murmur_step(h, t);
1935 t = d;
1936 data += sizeof(st_index_t);
1937 len -= sizeof(st_index_t);
1938 }
1939
1940 pack = len < (size_t)align ? (int)len : align;
1941 d = 0;
1942 switch (pack) {
1943#ifdef WORDS_BIGENDIAN
1944# define UNALIGNED_ADD(n) case (n) + 1: \
1945 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1946#else
1947# define UNALIGNED_ADD(n) case (n) + 1: \
1948 d |= data_at(n) << CHAR_BIT*(n)
1949#endif
1950 UNALIGNED_ADD_ALL;
1951#undef UNALIGNED_ADD
1952 }
1953#ifdef WORDS_BIGENDIAN
1954 t = (t << sr) | (d >> sl);
1955#else
1956 t = (t >> sr) | (d << sl);
1957#endif
1958
1959 if (len < (size_t)align) goto skip_tail;
1960# define SKIP_TAIL 1
1961 h = murmur_step(h, t);
1962 data += pack;
1963 len -= pack;
1964 }
1965 else
1966#endif
1967#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1968#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1969#else
1970#define aligned_data data
1971#endif
1972 {
1973 do {
1974 h = murmur_step(h, *(st_index_t *)aligned_data);
1975 data += sizeof(st_index_t);
1976 len -= sizeof(st_index_t);
1977 } while (len >= sizeof(st_index_t));
1978 }
1979 }
1980
1981 t = 0;
1982 switch (len) {
1983#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1984 /* in this case byteorder doesn't really matter */
1985#if SIZEOF_ST_INDEX_T > 4
1986 case 7: t |= data_at(6) << 48;
1987 case 6: t |= data_at(5) << 40;
1988 case 5: t |= data_at(4) << 32;
1989 case 4:
1990 t |= (st_index_t)*(uint32_t*)aligned_data;
1991 goto skip_tail;
1992# define SKIP_TAIL 1
1993#endif
1994 case 3: t |= data_at(2) << 16;
1995 case 2: t |= data_at(1) << 8;
1996 case 1: t |= data_at(0);
1997#else
1998#ifdef WORDS_BIGENDIAN
1999# define UNALIGNED_ADD(n) case (n) + 1: \
2000 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2001#else
2002# define UNALIGNED_ADD(n) case (n) + 1: \
2003 t |= data_at(n) << CHAR_BIT*(n)
2004#endif
2005 UNALIGNED_ADD_ALL;
2006#undef UNALIGNED_ADD
2007#endif
2008#ifdef SKIP_TAIL
2009 skip_tail:
2010#endif
2011 h ^= t; h -= ROTL(t, 7);
2012 h *= C2;
2013 }
2014 h ^= l;
2015#undef aligned_data
2016
2017 return murmur_finish(h);
2018}
2019
2020st_index_t
2021st_hash_uint32(st_index_t h, uint32_t i)
2022{
2023 return murmur_step(h, i);
2024}
2025
2026NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2027st_index_t
2028st_hash_uint(st_index_t h, st_index_t i)
2029{
2030 i += h;
2031/* no matter if it is BigEndian or LittleEndian,
2032 * we hash just integers */
2033#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2034 h = murmur_step(h, i >> 8*8);
2035#endif
2036 h = murmur_step(h, i);
2037 return h;
2038}
2039
2040st_index_t
2041st_hash_end(st_index_t h)
2042{
2043 h = murmur_finish(h);
2044 return h;
2045}
2046
2047#undef st_hash_start
2048st_index_t
2049rb_st_hash_start(st_index_t h)
2050{
2051 return h;
2052}
2053
2054static st_index_t
2055strhash(st_data_t arg)
2056{
2057 register const char *string = (const char *)arg;
2058 return st_hash(string, strlen(string), FNV1_32A_INIT);
2059}
2060
2061int
2062st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2063{
2064 char c1, c2;
2065
2066 while (1) {
2067 c1 = *s1++;
2068 c2 = *s2++;
2069 if (c1 == '\0' || c2 == '\0') {
2070 if (c1 != '\0') return 1;
2071 if (c2 != '\0') return -1;
2072 return 0;
2073 }
2074 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2075 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2076 if (c1 != c2) {
2077 if (c1 > c2)
2078 return 1;
2079 else
2080 return -1;
2081 }
2082 }
2083}
2084
2085int
2086st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2087{
2088 char c1, c2;
2089 size_t i;
2090
2091 for (i = 0; i < n; i++) {
2092 c1 = *s1++;
2093 c2 = *s2++;
2094 if (c1 == '\0' || c2 == '\0') {
2095 if (c1 != '\0') return 1;
2096 if (c2 != '\0') return -1;
2097 return 0;
2098 }
2099 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2100 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2101 if (c1 != c2) {
2102 if (c1 > c2)
2103 return 1;
2104 else
2105 return -1;
2106 }
2107 }
2108 return 0;
2109}
2110
2111static int
2112st_strcmp(st_data_t lhs, st_data_t rhs)
2113{
2114 const char *s1 = (char *)lhs;
2115 const char *s2 = (char *)rhs;
2116 return strcmp(s1, s2);
2117}
2118
2119static int
2120st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2121{
2122 const char *s1 = (char *)lhs;
2123 const char *s2 = (char *)rhs;
2124 return st_locale_insensitive_strcasecmp(s1, s2);
2125}
2126
2127NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2128static st_index_t
2129strcasehash(st_data_t arg)
2130{
2131 register const char *string = (const char *)arg;
2132 register st_index_t hval = FNV1_32A_INIT;
2133
2134 /*
2135 * FNV-1a hash each octet in the buffer
2136 */
2137 while (*string) {
2138 unsigned int c = (unsigned char)*string++;
2139 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2140 hval ^= c;
2141
2142 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2143 hval *= FNV_32_PRIME;
2144 }
2145 return hval;
2146}
2147
2148int
2149st_numcmp(st_data_t x, st_data_t y)
2150{
2151 return x != y;
2152}
2153
2154st_index_t
2155st_numhash(st_data_t n)
2156{
2157 enum {s1 = 11, s2 = 3};
2158 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2159}
2160
2161#ifdef RUBY
2162/* Expand TAB to be suitable for holding SIZ entries in total.
2163 Pre-existing entries remain not deleted inside of TAB, but its bins
2164 are cleared to expect future reconstruction. See rehash below. */
2165static void
2166st_expand_table(st_table *tab, st_index_t siz)
2167{
2168 st_table *tmp;
2169 st_index_t n;
2170
2171 if (siz <= get_allocated_entries(tab))
2172 return; /* enough room already */
2173
2174 tmp = st_init_table_with_size(tab->type, siz);
2175 n = get_allocated_entries(tab);
2176 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2177 st_free_bins(tab);
2178 st_free_entries(tab);
2179 st_free_bins(tmp);
2180
2181 tab->entry_power = tmp->entry_power;
2182 tab->bin_power = tmp->bin_power;
2183 tab->size_ind = tmp->size_ind;
2184 tab->entries = tmp->entries;
2185 tab->bins = NULL;
2186 tab->rebuilds_num++;
2187 free_fixed_ptr(tmp);
2188}
2189
2190/* Rehash using linear search. Return TRUE if we found that the table
2191 was rebuilt. */
2192static int
2193st_rehash_linear(st_table *tab)
2194{
2195 int eq_p, rebuilt_p;
2196 st_index_t i, j;
2197 st_table_entry *p, *q;
2198
2199 st_free_bins(tab);
2200 tab->bins = NULL;
2201
2202 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2203 p = &tab->entries[i];
2204 if (DELETED_ENTRY_P(p))
2205 continue;
2206 for (j = i + 1; j < tab->entries_bound; j++) {
2207 q = &tab->entries[j];
2208 if (DELETED_ENTRY_P(q))
2209 continue;
2210 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2211 if (EXPECT(rebuilt_p, 0))
2212 return TRUE;
2213 if (eq_p) {
2214 *p = *q;
2215 MARK_ENTRY_DELETED(q);
2216 tab->num_entries--;
2217 update_range_for_deleted(tab, j);
2218 }
2219 }
2220 }
2221 return FALSE;
2222}
2223
2224/* Rehash using index. Return TRUE if we found that the table was
2225 rebuilt. */
2226static int
2227st_rehash_indexed(st_table *tab)
2228{
2229 int eq_p, rebuilt_p;
2230 st_index_t i;
2231
2232 if (!tab->bins) {
2233 tab->bins = malloc(bins_size(tab));
2234 }
2235 unsigned int const size_ind = get_size_ind(tab);
2236 initialize_bins(tab);
2237 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2238 st_table_entry *p = &tab->entries[i];
2239 st_index_t ind;
2240#ifdef QUADRATIC_PROBE
2241 st_index_t d = 1;
2242#else
2243 st_index_t perturb = p->hash;
2244#endif
2245
2246 if (DELETED_ENTRY_P(p))
2247 continue;
2248
2249 ind = hash_bin(p->hash, tab);
2250 for (;;) {
2251 st_index_t bin = get_bin(tab->bins, size_ind, ind);
2252 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2253 /* ok, new room */
2254 set_bin(tab->bins, size_ind, ind, i + ENTRY_BASE);
2255 break;
2256 }
2257 else {
2258 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2259 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2260 if (EXPECT(rebuilt_p, 0))
2261 return TRUE;
2262 if (eq_p) {
2263 /* duplicated key; delete it */
2264 q->record = p->record;
2265 MARK_ENTRY_DELETED(p);
2266 tab->num_entries--;
2267 update_range_for_deleted(tab, bin);
2268 break;
2269 }
2270 else {
2271 /* hash collision; skip it */
2272#ifdef QUADRATIC_PROBE
2273 ind = hash_bin(ind + d, tab);
2274 d++;
2275#else
2276 ind = secondary_hash(ind, tab, &perturb);
2277#endif
2278 }
2279 }
2280 }
2281 }
2282 return FALSE;
2283}
2284
2285/* Reconstruct TAB's bins according to TAB's entries. This function
2286 permits conflicting keys inside of entries. No errors are reported
2287 then. All but one of them are discarded silently. */
2288static void
2289st_rehash(st_table *tab)
2290{
2291 int rebuilt_p;
2292
2293 do {
2294 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2295 rebuilt_p = st_rehash_linear(tab);
2296 else
2297 rebuilt_p = st_rehash_indexed(tab);
2298 } while (rebuilt_p);
2299}
2300
2301static st_data_t
2302st_stringify(VALUE key)
2303{
2304 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2305 rb_hash_key_str(key) : key;
2306}
2307
2308static void
2309st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2310{
2311 st_data_t k = st_stringify(key);
2313 e.hash = do_hash(k, tab);
2314 e.key = k;
2315 e.record = val;
2316
2317 tab->entries[tab->entries_bound++] = e;
2318 tab->num_entries++;
2319 RB_OBJ_WRITTEN(hash, Qundef, k);
2320 RB_OBJ_WRITTEN(hash, Qundef, val);
2321}
2322
2323static void
2324st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2325{
2326 long i;
2327
2328 for (i = 0; i < argc; /* */) {
2329 st_data_t k = st_stringify(argv[i++]);
2330 st_data_t v = argv[i++];
2331 st_insert(tab, k, v);
2332 RB_OBJ_WRITTEN(hash, Qundef, k);
2333 RB_OBJ_WRITTEN(hash, Qundef, v);
2334 }
2335}
2336
2337static void
2338st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2339{
2340 long i;
2341
2342 /* push elems */
2343 for (i = 0; i < argc; /* */) {
2344 VALUE key = argv[i++];
2345 VALUE val = argv[i++];
2346 st_insert_single(tab, hash, key, val);
2347 }
2348
2349 /* reindex */
2350 st_rehash(tab);
2351}
2352
2353/* Mimics ruby's { foo => bar } syntax. This function is subpart
2354 of rb_hash_bulk_insert. */
2355void
2356rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2357{
2358 st_index_t n, size = argc / 2;
2359 st_table *tab = RHASH_ST_TABLE(hash);
2360
2361 tab = RHASH_TBL_RAW(hash);
2362 n = tab->entries_bound + size;
2363 st_expand_table(tab, n);
2364 if (UNLIKELY(tab->num_entries))
2365 st_insert_generic(tab, argc, argv, hash);
2366 else if (argc <= 2)
2367 st_insert_single(tab, hash, argv[0], argv[1]);
2368 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2369 st_insert_linear(tab, argc, argv, hash);
2370 else
2371 st_insert_generic(tab, argc, argv, hash);
2372}
2373
2374void
2375rb_st_compact_table(st_table *tab)
2376{
2377 st_index_t num = tab->num_entries;
2378 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2379 /* Compaction: */
2380 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2381 rebuild_table_with(new_tab, tab);
2382 rebuild_move_table(new_tab, tab);
2383 rebuild_cleanup(tab);
2384 }
2385}
2386
2387/*
2388 * set_table related code
2389 */
2390
2391struct set_table_entry {
2392 st_hash_t hash;
2393 st_data_t key;
2394};
2395
2396/* Return hash value of KEY for table TAB. */
2397static inline st_hash_t
2398set_do_hash(st_data_t key, set_table *tab)
2399{
2400 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2401 return normalize_hash_value(hash);
2402}
2403
2404/* Return bin size index of table TAB. */
2405static inline unsigned int
2406set_get_size_ind(const set_table *tab)
2407{
2408 return tab->size_ind;
2409}
2410
2411/* Return the number of allocated bins of table TAB. */
2412static inline st_index_t
2413set_get_bins_num(const set_table *tab)
2414{
2415 return ((st_index_t) 1)<<tab->bin_power;
2416}
2417
2418/* Return mask for a bin index in table TAB. */
2419static inline st_index_t
2420set_bins_mask(const set_table *tab)
2421{
2422 return set_get_bins_num(tab) - 1;
2423}
2424
2425/* Return the index of table TAB bin corresponding to
2426 HASH_VALUE. */
2427static inline st_index_t
2428set_hash_bin(st_hash_t hash_value, set_table *tab)
2429{
2430 return hash_value & set_bins_mask(tab);
2431}
2432
2433/* Return the number of allocated entries of table TAB. */
2434static inline st_index_t
2435set_get_allocated_entries(const set_table *tab)
2436{
2437 return ((st_index_t) 1)<<tab->entry_power;
2438}
2439
2440static inline size_t
2441set_allocated_entries_size(const set_table *tab)
2442{
2443 return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2444}
2445
2446static inline bool
2447set_has_bins(const set_table *tab)
2448{
2449 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2450}
2451
2452/* Return size of the allocated bins of table TAB. */
2453static inline st_index_t
2454set_bins_size(const set_table *tab)
2455{
2456 if (set_has_bins(tab)) {
2457 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2458 }
2459
2460 return 0;
2461}
2462
2463static inline st_index_t *
2464set_bins_ptr(const set_table *tab)
2465{
2466 if (set_has_bins(tab)) {
2467 return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2468 }
2469
2470 return NULL;
2471}
2472
2473/* Mark all bins of table TAB as empty. */
2474static void
2475set_initialize_bins(set_table *tab)
2476{
2477 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2478}
2479
2480/* Make table TAB empty. */
2481static void
2482set_make_tab_empty(set_table *tab)
2483{
2484 tab->num_entries = 0;
2485 tab->entries_start = tab->entries_bound = 0;
2486 if (set_bins_ptr(tab) != NULL)
2487 set_initialize_bins(tab);
2488}
2489
2490static inline size_t
2491set_entries_memsize(set_table *tab)
2492{
2493 size_t memsize = set_get_allocated_entries(tab) * sizeof(set_table_entry);
2494 if (set_has_bins(tab)) {
2495 memsize += set_bins_size(tab);
2496 }
2497 return memsize;
2498}
2499
2500static set_table *
2501set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2502{
2503 int n;
2504
2505#ifdef HASH_LOG
2506#if HASH_LOG+0 < 0
2507 {
2508 const char *e = getenv("ST_HASH_LOG");
2509 if (!e || !*e) init_st = 1;
2510 }
2511#endif
2512 if (init_st == 0) {
2513 init_st = 1;
2514 atexit(stat_col);
2515 }
2516#endif
2517
2518 n = get_power2(size);
2519
2520 tab->type = type;
2521 tab->entry_power = n;
2522 tab->bin_power = features[n].bin_power;
2523 tab->size_ind = features[n].size_ind;
2524
2525 tab->entries = (set_table_entry *)malloc(set_entries_memsize(tab));
2526 set_make_tab_empty(tab);
2527 tab->rebuilds_num = 0;
2528 return tab;
2529}
2530
2531/* Create and return table with TYPE which can hold at least SIZE
2532 entries. The real number of entries which the table can hold is
2533 the nearest power of two for SIZE. */
2534set_table *
2535set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2536{
2537 if (tab == NULL) tab = malloc(sizeof(set_table));
2538
2539 set_init_existing_table_with_size(tab, type, size);
2540
2541 return tab;
2542}
2543
2544set_table *
2545set_init_numtable(void)
2546{
2547 return set_init_table_with_size(NULL, &type_numhash, 0);
2548}
2549
2550set_table *
2551set_init_numtable_with_size(st_index_t size)
2552{
2553 return set_init_table_with_size(NULL, &type_numhash, size);
2554}
2555
2556size_t
2557set_table_size(const struct set_table *tbl)
2558{
2559 return tbl->num_entries;
2560}
2561
2562/* Make table TAB empty. */
2563void
2564set_table_clear(set_table *tab)
2565{
2566 set_make_tab_empty(tab);
2567 tab->rebuilds_num++;
2568}
2569
2570void
2571set_free_embedded_table(set_table *tab)
2572{
2573 sized_free(tab->entries, set_entries_memsize(tab));
2574}
2575
2576/* Free table TAB space. This should only be used if you passed NULL to
2577 set_init_table_with_size/set_copy when creating the table. */
2578void
2579set_free_table(set_table *tab)
2580{
2581 set_free_embedded_table(tab);
2582 free_fixed_ptr(tab);
2583}
2584
2585/* Return byte size of memory allocated for table TAB. */
2586size_t
2587set_memsize(const set_table *tab)
2588{
2589 return(sizeof(set_table)
2590 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2591 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2592}
2593
2594static st_index_t
2595set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2596
2597static st_index_t
2598set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2599
2600static st_index_t
2601set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2602
2603static st_index_t
2604set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2605 st_data_t key, st_index_t *bin_ind);
2606
2607static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2608static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2609static void set_rebuild_cleanup(set_table *const tab);
2610
2611/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2612 and can change size of the table entries and bins arrays.
2613 Rebuilding is implemented by creation of a new table or by
2614 compaction of the existing one. */
2615static void
2616set_rebuild_table(set_table *tab)
2617{
2618 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2619 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2620 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2621 /* Compaction: */
2622 tab->num_entries = 0;
2623 if (set_has_bins(tab))
2624 set_initialize_bins(tab);
2625 set_rebuild_table_with(tab, tab);
2626 }
2627 else {
2628 set_table *new_tab;
2629 /* This allocation could trigger GC and compaction. If tab is the
2630 * gen_fields_tbl, then tab could have changed in size due to objects being
2631 * freed and/or moved. Do not store attributes of tab before this line. */
2632 new_tab = set_init_table_with_size(NULL, tab->type,
2633 2 * tab->num_entries - 1);
2634 set_rebuild_table_with(new_tab, tab);
2635 set_rebuild_move_table(new_tab, tab);
2636 }
2637 set_rebuild_cleanup(tab);
2638}
2639
2640static void
2641set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2642{
2643 st_index_t i, ni;
2644 unsigned int size_ind;
2645 set_table_entry *new_entries;
2646 set_table_entry *curr_entry_ptr;
2647 st_index_t *bins;
2648 st_index_t bin_ind;
2649
2650 new_entries = new_tab->entries;
2651
2652 ni = 0;
2653 bins = set_bins_ptr(new_tab);
2654 size_ind = set_get_size_ind(new_tab);
2655 st_index_t bound = tab->entries_bound;
2656 set_table_entry *entries = tab->entries;
2657
2658 for (i = tab->entries_start; i < bound; i++) {
2659 curr_entry_ptr = &entries[i];
2660 PREFETCH(entries + i + 1, 0);
2661 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2662 continue;
2663 if (&new_entries[ni] != curr_entry_ptr)
2664 new_entries[ni] = *curr_entry_ptr;
2665 if (EXPECT(bins != NULL, 1)) {
2666 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2667 curr_entry_ptr->key);
2668 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2669 }
2670 new_tab->num_entries++;
2671 ni++;
2672 }
2673
2674 assert(new_tab->num_entries == tab->num_entries);
2675}
2676
2677static void
2678set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2679{
2680 sized_free(tab->entries, set_entries_memsize(tab));
2681 tab->entries = new_tab->entries;
2682
2683 tab->entry_power = new_tab->entry_power;
2684 tab->bin_power = new_tab->bin_power;
2685 tab->size_ind = new_tab->size_ind;
2686
2687 free_fixed_ptr(new_tab);
2688}
2689
2690static void
2691set_rebuild_cleanup(set_table *const tab)
2692{
2693 tab->entries_start = 0;
2694 tab->entries_bound = tab->num_entries;
2695 tab->rebuilds_num++;
2696}
2697
2698/* Return the next secondary hash index for table TAB using previous
2699 index IND and PERTURB. Finally modulo of the function becomes a
2700 full *cycle linear congruential generator*, in other words it
2701 guarantees traversing all table bins in extreme case.
2702
2703 According the Hull-Dobell theorem a generator
2704 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2705 o m and c are relatively prime
2706 o a-1 is divisible by all prime factors of m
2707 o a-1 is divisible by 4 if m is divisible by 4.
2708
2709 For our case a is 5, c is 1, and m is a power of two. */
2710static inline st_index_t
2711set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2712{
2713 *perturb >>= 11;
2714 ind = (ind << 2) + ind + *perturb + 1;
2715 return set_hash_bin(ind, tab);
2716}
2717
2718/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2719 search. Return the index of the found entry in array `entries`.
2720 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2721 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2722static inline st_index_t
2723set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2724{
2725 int eq_p, rebuilt_p;
2726 st_index_t i, bound;
2727 set_table_entry *entries;
2728
2729 bound = tab->entries_bound;
2730 entries = tab->entries;
2731 for (i = tab->entries_start; i < bound; i++) {
2732 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2733 if (EXPECT(rebuilt_p, 0))
2734 return REBUILT_TABLE_ENTRY_IND;
2735 if (eq_p)
2736 return i;
2737 }
2738 return UNDEFINED_ENTRY_IND;
2739}
2740
2741/* Use the quadratic probing. The method has a better data locality
2742 but more collisions than the current approach. In average it
2743 results in a bit slower search. */
2744/*#define QUADRATIC_PROBE*/
2745
2746/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2747 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2748 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2749static st_index_t
2750set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2751{
2752 int eq_p, rebuilt_p;
2753 st_index_t ind;
2754#ifdef QUADRATIC_PROBE
2755 st_index_t d;
2756#else
2757 st_index_t perturb;
2758#endif
2759 st_index_t bin;
2760 set_table_entry *entries = tab->entries;
2761
2762 ind = set_hash_bin(hash_value, tab);
2763#ifdef QUADRATIC_PROBE
2764 d = 1;
2765#else
2766 perturb = hash_value;
2767#endif
2768 for (;;) {
2769 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2770 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2771 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2772 if (EXPECT(rebuilt_p, 0))
2773 return REBUILT_TABLE_ENTRY_IND;
2774 if (eq_p)
2775 break;
2776 }
2777 else if (EMPTY_BIN_P(bin))
2778 return UNDEFINED_ENTRY_IND;
2779#ifdef QUADRATIC_PROBE
2780 ind = set_hash_bin(ind + d, tab);
2781 d++;
2782#else
2783 ind = set_secondary_hash(ind, tab, &perturb);
2784#endif
2785 }
2786 return bin;
2787}
2788
2789/* Find and return index of table TAB bin corresponding to an entry
2790 with HASH_VALUE and KEY. If there is no such bin, return
2791 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2792 return REBUILT_TABLE_BIN_IND. */
2793static st_index_t
2794set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2795{
2796 int eq_p, rebuilt_p;
2797 st_index_t ind;
2798#ifdef QUADRATIC_PROBE
2799 st_index_t d;
2800#else
2801 st_index_t perturb;
2802#endif
2803 st_index_t bin;
2804 set_table_entry *entries = tab->entries;
2805
2806 ind = set_hash_bin(hash_value, tab);
2807#ifdef QUADRATIC_PROBE
2808 d = 1;
2809#else
2810 perturb = hash_value;
2811#endif
2812 for (;;) {
2813 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2814 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2815 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2816 if (EXPECT(rebuilt_p, 0))
2817 return REBUILT_TABLE_BIN_IND;
2818 if (eq_p)
2819 break;
2820 }
2821 else if (EMPTY_BIN_P(bin))
2822 return UNDEFINED_BIN_IND;
2823#ifdef QUADRATIC_PROBE
2824 ind = set_hash_bin(ind + d, tab);
2825 d++;
2826#else
2827 ind = set_secondary_hash(ind, tab, &perturb);
2828#endif
2829 }
2830 return ind;
2831}
2832
2833/* Find and return index of table TAB bin corresponding to an entry
2834 with HASH_VALUE and KEY. The entry should be in the table
2835 already. */
2836static st_index_t
2837set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2838{
2839 st_index_t ind;
2840#ifdef QUADRATIC_PROBE
2841 st_index_t d;
2842#else
2843 st_index_t perturb;
2844#endif
2845 st_index_t bin;
2846
2847 ind = set_hash_bin(hash_value, tab);
2848#ifdef QUADRATIC_PROBE
2849 d = 1;
2850#else
2851 perturb = hash_value;
2852#endif
2853 for (;;) {
2854 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2855 if (EMPTY_OR_DELETED_BIN_P(bin))
2856 return ind;
2857#ifdef QUADRATIC_PROBE
2858 ind = set_hash_bin(ind + d, tab);
2859 d++;
2860#else
2861 ind = set_secondary_hash(ind, tab, &perturb);
2862#endif
2863 }
2864}
2865
2866/* Mark I-th bin of table TAB as empty, in other words not
2867 corresponding to any entry. */
2868#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2869
2870/* Return index of table TAB bin for HASH_VALUE and KEY through
2871 BIN_IND and the pointed value as the function result. Reserve the
2872 bin for inclusion of the corresponding entry into the table if it
2873 is not there yet. We always find such bin as bins array length is
2874 bigger entries array. Although we can reuse a deleted bin, the
2875 result bin value is always empty if the table has no entry with
2876 KEY. Return the entries array index of the found entry or
2877 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2878 during the search, return REBUILT_TABLE_ENTRY_IND. */
2879static st_index_t
2880set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2881 st_data_t key, st_index_t *bin_ind)
2882{
2883 int eq_p, rebuilt_p;
2884 st_index_t ind;
2885 st_hash_t curr_hash_value = *hash_value;
2886#ifdef QUADRATIC_PROBE
2887 st_index_t d;
2888#else
2889 st_index_t perturb;
2890#endif
2891 st_index_t entry_index;
2892 st_index_t firset_deleted_bin_ind;
2893 set_table_entry *entries;
2894
2895 ind = set_hash_bin(curr_hash_value, tab);
2896#ifdef QUADRATIC_PROBE
2897 d = 1;
2898#else
2899 perturb = curr_hash_value;
2900#endif
2901 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2902 entries = tab->entries;
2903 for (;;) {
2904 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2905 if (EMPTY_BIN_P(entry_index)) {
2906 tab->num_entries++;
2907 entry_index = UNDEFINED_ENTRY_IND;
2908 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2909 /* We can reuse bin of a deleted entry. */
2910 ind = firset_deleted_bin_ind;
2911 MARK_SET_BIN_EMPTY(tab, ind);
2912 }
2913 break;
2914 }
2915 else if (! DELETED_BIN_P(entry_index)) {
2916 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2917 if (EXPECT(rebuilt_p, 0))
2918 return REBUILT_TABLE_ENTRY_IND;
2919 if (eq_p)
2920 break;
2921 }
2922 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2923 firset_deleted_bin_ind = ind;
2924#ifdef QUADRATIC_PROBE
2925 ind = set_hash_bin(ind + d, tab);
2926 d++;
2927#else
2928 ind = set_secondary_hash(ind, tab, &perturb);
2929#endif
2930 }
2931 *bin_ind = ind;
2932 return entry_index;
2933}
2934
2935/* Find an entry with KEY in table TAB. Return non-zero if we found
2936 it. */
2937int
2938set_table_lookup(set_table *tab, st_data_t key)
2939{
2940 st_index_t bin;
2941 st_hash_t hash = set_do_hash(key, tab);
2942
2943 retry:
2944 if (!set_has_bins(tab)) {
2945 bin = set_find_entry(tab, hash, key);
2946 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2947 goto retry;
2948 if (bin == UNDEFINED_ENTRY_IND)
2949 return 0;
2950 }
2951 else {
2952 bin = set_find_table_entry_ind(tab, hash, key);
2953 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2954 goto retry;
2955 if (bin == UNDEFINED_ENTRY_IND)
2956 return 0;
2957 bin -= ENTRY_BASE;
2958 }
2959 return 1;
2960}
2961
2962/* Check the table and rebuild it if it is necessary. */
2963static inline void
2964set_rebuild_table_if_necessary (set_table *tab)
2965{
2966 st_index_t bound = tab->entries_bound;
2967
2968 if (bound == set_get_allocated_entries(tab))
2969 set_rebuild_table(tab);
2970}
2971
2972/* Insert KEY into table TAB and return zero. If there is
2973 already entry with KEY in the table, return nonzero and update
2974 the value of the found entry. */
2975int
2976set_insert(set_table *tab, st_data_t key)
2977{
2978 set_table_entry *entry;
2979 st_index_t bin;
2980 st_index_t ind;
2981 st_hash_t hash_value;
2982 st_index_t bin_ind;
2983 int new_p;
2984
2985 hash_value = set_do_hash(key, tab);
2986 retry:
2987 set_rebuild_table_if_necessary(tab);
2988 if (!set_has_bins(tab)) {
2989 bin = set_find_entry(tab, hash_value, key);
2990 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2991 goto retry;
2992 new_p = bin == UNDEFINED_ENTRY_IND;
2993 if (new_p)
2994 tab->num_entries++;
2995 bin_ind = UNDEFINED_BIN_IND;
2996 }
2997 else {
2998 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2999 key, &bin_ind);
3000 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3001 goto retry;
3002 new_p = bin == UNDEFINED_ENTRY_IND;
3003 bin -= ENTRY_BASE;
3004 }
3005 if (new_p) {
3006 ind = tab->entries_bound++;
3007 entry = &tab->entries[ind];
3008 entry->hash = hash_value;
3009 entry->key = key;
3010 if (bin_ind != UNDEFINED_BIN_IND)
3011 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
3012 return 0;
3013 }
3014 return 1;
3015}
3016
3017/* Create a copy of old_tab into new_tab. */
3018static set_table *
3019set_replace(set_table *new_tab, set_table *old_tab)
3020{
3021 *new_tab = *old_tab;
3022 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
3023 new_tab->entries = (set_table_entry *)malloc(memsize);
3024 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
3025 return new_tab;
3026}
3027
3028/* Create and return a copy of table OLD_TAB. */
3029set_table *
3030set_copy(set_table *new_tab, set_table *old_tab)
3031{
3032 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
3033
3034 if (set_replace(new_tab, old_tab) == NULL) {
3035 set_free_table(new_tab);
3036 return NULL;
3037 }
3038
3039 return new_tab;
3040}
3041
3042/* Update the entries start of table TAB after removing an entry
3043 with index N in the array entries. */
3044static inline void
3045set_update_range_for_deleted(set_table *tab, st_index_t n)
3046{
3047 /* Do not update entries_bound here. Otherwise, we can fill all
3048 bins by deleted entry value before rebuilding the table. */
3049 if (tab->entries_start == n) {
3050 st_index_t start = n + 1;
3051 st_index_t bound = tab->entries_bound;
3052 set_table_entry *entries = tab->entries;
3053 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3054 tab->entries_start = start;
3055 }
3056}
3057
3058/* Mark I-th bin of table TAB as corresponding to a deleted table
3059 entry. Update number of entries in the table and number of bins
3060 corresponding to deleted entries. */
3061#define MARK_SET_BIN_DELETED(tab, i) \
3062 do { \
3063 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3064 } while (0)
3065
3066/* Delete entry with KEY from table TAB, and return non-zero. If
3067 there is no entry with KEY in the table, return zero. */
3068int
3069set_table_delete(set_table *tab, st_data_t *key)
3070{
3071 set_table_entry *entry;
3072 st_index_t bin;
3073 st_index_t bin_ind;
3074 st_hash_t hash;
3075
3076 hash = set_do_hash(*key, tab);
3077 retry:
3078 if (!set_has_bins(tab)) {
3079 bin = set_find_entry(tab, hash, *key);
3080 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3081 goto retry;
3082 if (bin == UNDEFINED_ENTRY_IND) {
3083 return 0;
3084 }
3085 }
3086 else {
3087 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3088 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3089 goto retry;
3090 if (bin_ind == UNDEFINED_BIN_IND) {
3091 return 0;
3092 }
3093 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3094 MARK_SET_BIN_DELETED(tab, bin_ind);
3095 }
3096 entry = &tab->entries[bin];
3097 *key = entry->key;
3098 MARK_ENTRY_DELETED(entry);
3099 tab->num_entries--;
3100 set_update_range_for_deleted(tab, bin);
3101 return 1;
3102}
3103
3104/* Traverse all entries in table TAB calling FUNC with current entry
3105 key and zero. If the call returns ST_STOP, stop
3106 traversing. If the call returns ST_DELETE, delete the current
3107 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3108 traversing. The function returns zero unless an error is found.
3109 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3110 different for ST_CHECK and when the current element is removed
3111 during traversing. */
3112static inline int
3113set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3114 set_update_callback_func *replace, st_data_t arg,
3115 int check_p)
3116{
3117 st_index_t bin;
3118 st_index_t bin_ind;
3119 set_table_entry *entries, *curr_entry_ptr;
3120 enum st_retval retval;
3121 st_index_t i, rebuilds_num;
3122 st_hash_t hash;
3123 st_data_t key;
3124 int error_p, packed_p = !set_has_bins(tab);
3125
3126 entries = tab->entries;
3127 /* The bound can change inside the loop even without rebuilding
3128 the table, e.g. by an entry insertion. */
3129 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3130 curr_entry_ptr = &entries[i];
3131 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3132 continue;
3133 key = curr_entry_ptr->key;
3134 rebuilds_num = tab->rebuilds_num;
3135 hash = curr_entry_ptr->hash;
3136 retval = (*func)(key, arg, 0);
3137
3138 if (retval == ST_REPLACE && replace) {
3139 retval = (*replace)(&key, arg, TRUE);
3140 curr_entry_ptr->key = key;
3141 }
3142
3143 if (rebuilds_num != tab->rebuilds_num) {
3144 retry:
3145 entries = tab->entries;
3146 packed_p = !set_has_bins(tab);
3147 if (packed_p) {
3148 i = set_find_entry(tab, hash, key);
3149 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3150 goto retry;
3151 error_p = i == UNDEFINED_ENTRY_IND;
3152 }
3153 else {
3154 i = set_find_table_entry_ind(tab, hash, key);
3155 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3156 goto retry;
3157 error_p = i == UNDEFINED_ENTRY_IND;
3158 i -= ENTRY_BASE;
3159 }
3160 if (error_p && check_p) {
3161 /* call func with error notice */
3162 retval = (*func)(0, arg, 1);
3163 return 1;
3164 }
3165 curr_entry_ptr = &entries[i];
3166 }
3167 switch (retval) {
3168 case ST_REPLACE:
3169 break;
3170 case ST_CONTINUE:
3171 break;
3172 case ST_CHECK:
3173 if (check_p)
3174 break;
3175 case ST_STOP:
3176 return 0;
3177 case ST_DELETE: {
3178 st_data_t key = curr_entry_ptr->key;
3179
3180 again:
3181 if (packed_p) {
3182 bin = set_find_entry(tab, hash, key);
3183 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3184 goto again;
3185 if (bin == UNDEFINED_ENTRY_IND)
3186 break;
3187 }
3188 else {
3189 bin_ind = set_find_table_bin_ind(tab, hash, key);
3190 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3191 goto again;
3192 if (bin_ind == UNDEFINED_BIN_IND)
3193 break;
3194 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3195 MARK_SET_BIN_DELETED(tab, bin_ind);
3196 }
3197 curr_entry_ptr = &entries[bin];
3198 MARK_ENTRY_DELETED(curr_entry_ptr);
3199 tab->num_entries--;
3200 set_update_range_for_deleted(tab, bin);
3201 break;
3202 }
3203 }
3204 }
3205 return 0;
3206}
3207
3208int
3209set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3210{
3211 return set_general_foreach(tab, func, replace, arg, TRUE);
3212}
3213
3214struct set_functor {
3215 set_foreach_callback_func *func;
3216 st_data_t arg;
3217};
3218
3219static int
3220set_apply_functor(st_data_t k, st_data_t d, int _)
3221{
3222 const struct set_functor *f = (void *)d;
3223 return f->func(k, f->arg);
3224}
3225
3226int
3227set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3228{
3229 const struct set_functor f = { func, arg };
3230 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3231}
3232
3233/* See comments for function set_delete_safe. */
3234int
3235set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3236 st_data_t never ATTRIBUTE_UNUSED)
3237{
3238 return set_general_foreach(tab, func, NULL, arg, TRUE);
3239}
3240
3241/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3242 Return the number of keys set up in array KEYS. */
3243inline st_index_t
3244set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3245{
3246 st_index_t i, bound;
3247 st_data_t key, *keys_start, *keys_end;
3248 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3249
3250 bound = tab->entries_bound;
3251 keys_start = keys;
3252 keys_end = keys + size;
3253 for (i = tab->entries_start; i < bound; i++) {
3254 if (keys == keys_end)
3255 break;
3256 curr_entry_ptr = &entries[i];
3257 key = curr_entry_ptr->key;
3258 if (! DELETED_ENTRY_P(curr_entry_ptr))
3259 *keys++ = key;
3260 }
3261
3262 return keys - keys_start;
3263}
3264
3265void
3266set_compact_table(set_table *tab)
3267{
3268 st_index_t num = tab->num_entries;
3269 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3270 /* Compaction: */
3271 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3272 set_rebuild_table_with(new_tab, tab);
3273 set_rebuild_move_table(new_tab, tab);
3274 set_rebuild_cleanup(tab);
3275 }
3276}
3277
3278#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:711
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1416
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:264
VALUE rb_cString
String class.
Definition string.c:84
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
set_table_entry * entries
Array of size 2^entry_power.
Definition set_table.h:28
Definition st.c:137
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40