Ruby 3.5.0dev (2025-08-14 revision c30d900547a65c7996a3f868aa17d2a842734071)
st.c (c30d900547a65c7996a3f868aa17d2a842734071)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/set_table.h"
113#include "internal/st.h"
114#include "ruby_assert.h"
115#endif
116
117#include <stdio.h>
118#ifdef HAVE_STDLIB_H
119#include <stdlib.h>
120#endif
121#include <string.h>
122
123#ifdef __GNUC__
124#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
125#define EXPECT(expr, val) __builtin_expect(expr, val)
126#define ATTRIBUTE_UNUSED __attribute__((unused))
127#else
128#define PREFETCH(addr, write_p)
129#define EXPECT(expr, val) (expr)
130#define ATTRIBUTE_UNUSED
131#endif
132
133/* The type of hashes. */
134typedef st_index_t st_hash_t;
135
137 st_hash_t hash;
138 st_data_t key;
139 st_data_t record;
140};
141
142#define type_numhash st_hashtype_num
143static const struct st_hash_type st_hashtype_num = {
144 st_numcmp,
145 st_numhash,
146};
147
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
150static const struct st_hash_type type_strhash = {
151 st_strcmp,
152 strhash,
153};
154
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
157static const struct st_hash_type type_strcasehash = {
158 st_locale_insensitive_strcasecmp_i,
159 strcasehash,
160};
161
162/* Value used to catch uninitialized entries/bins during debugging.
163 There is a possibility for a false alarm, but its probability is
164 extremely small. */
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
167
168#ifdef RUBY
169#undef malloc
170#undef realloc
171#undef calloc
172#undef free
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
177#endif
178
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182
183/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
184 up to TRUE if the table is rebuilt during the comparison. */
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 do { \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 } while (FALSE)
191
192/* Features of a table. */
194 /* Power of 2 used for number of allocated entries. */
195 unsigned char entry_power;
196 /* Power of 2 used for number of allocated bins. Depending on the
197 table size, the number of bins is 2-4 times more than the
198 number of entries. */
199 unsigned char bin_power;
200 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
201 unsigned char size_ind;
202 /* Bins are packed in words of type st_index_t. The following is
203 a size of bins counted by words. */
204 st_index_t bins_words;
205};
206
207/* Features of all possible size tables. */
208#if SIZEOF_ST_INDEX_T == 8
209#define MAX_POWER2 62
210static const struct st_features features[] = {
211 {0, 1, 0, 0x0},
212 {1, 2, 0, 0x1},
213 {2, 3, 0, 0x1},
214 {3, 4, 0, 0x2},
215 {4, 5, 0, 0x4},
216 {5, 6, 0, 0x8},
217 {6, 7, 0, 0x10},
218 {7, 8, 0, 0x20},
219 {8, 9, 1, 0x80},
220 {9, 10, 1, 0x100},
221 {10, 11, 1, 0x200},
222 {11, 12, 1, 0x400},
223 {12, 13, 1, 0x800},
224 {13, 14, 1, 0x1000},
225 {14, 15, 1, 0x2000},
226 {15, 16, 1, 0x4000},
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
274};
275
276#else
277#define MAX_POWER2 30
278
279static const struct st_features features[] = {
280 {0, 1, 0, 0x1},
281 {1, 2, 0, 0x1},
282 {2, 3, 0, 0x2},
283 {3, 4, 0, 0x4},
284 {4, 5, 0, 0x8},
285 {5, 6, 0, 0x10},
286 {6, 7, 0, 0x20},
287 {7, 8, 0, 0x40},
288 {8, 9, 1, 0x100},
289 {9, 10, 1, 0x200},
290 {10, 11, 1, 0x400},
291 {11, 12, 1, 0x800},
292 {12, 13, 1, 0x1000},
293 {13, 14, 1, 0x2000},
294 {14, 15, 1, 0x4000},
295 {15, 16, 1, 0x8000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
311};
312
313#endif
314
315/* The reserved hash value and its substitution. */
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318
319static inline st_hash_t
320normalize_hash_value(st_hash_t hash)
321{
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Return hash value of KEY for table TAB. */
328static inline st_hash_t
329do_hash(st_data_t key, st_table *tab)
330{
331 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332 return normalize_hash_value(hash);
333}
334
335/* Power of 2 defining the minimal number of allocated entries. */
336#define MINIMAL_POWER2 2
337
338#if MINIMAL_POWER2 < 2
339#error "MINIMAL_POWER2 should be >= 2"
340#endif
341
342/* If the power2 of the allocated `entries` is less than the following
343 value, don't allocate bins and use a linear search. */
344#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
345
346/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
347static int
348get_power2(st_index_t size)
349{
350 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
351 if (n <= MAX_POWER2)
352 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
353#ifdef RUBY
354 /* Ran out of the table entries */
355 rb_raise(rb_eRuntimeError, "st_table too big");
356#endif
357 /* should raise exception */
358 return -1;
359}
360
361/* Return value of N-th bin in array BINS of table with bins size
362 index S. */
363static inline st_index_t
364get_bin(st_index_t *bins, int s, st_index_t n)
365{
366 return (s == 0 ? ((unsigned char *) bins)[n]
367 : s == 1 ? ((unsigned short *) bins)[n]
368 : s == 2 ? ((unsigned int *) bins)[n]
369 : ((st_index_t *) bins)[n]);
370}
371
372/* Set up N-th bin in array BINS of table with bins size index S to
373 value V. */
374static inline void
375set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
376{
377 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
378 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
379 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
380 else ((st_index_t *) bins)[n] = v;
381}
382
383/* These macros define reserved values for empty table bin and table
384 bin which contains a deleted entry. We will never use such values
385 for an entry index in bins. */
386#define EMPTY_BIN 0
387#define DELETED_BIN 1
388/* Base of a real entry index in the bins. */
389#define ENTRY_BASE 2
390
391/* Mark I-th bin of table TAB as empty, in other words not
392 corresponding to any entry. */
393#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
394
395/* Values used for not found entry and bin with given
396 characteristics. */
397#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
398#define UNDEFINED_BIN_IND (~(st_index_t) 0)
399
400/* Entry and bin values returned when we found a table rebuild during
401 the search. */
402#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
403#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
404
405/* Mark I-th bin of table TAB as corresponding to a deleted table
406 entry. Update number of entries in the table and number of bins
407 corresponding to deleted entries. */
408#define MARK_BIN_DELETED(tab, i) \
409 do { \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
411 } while (0)
412
413/* Macros to check that value B is used empty bins and bins
414 corresponding deleted entries. */
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
418
419/* Macros to check empty bins and bins corresponding to deleted
420 entries. Bins are given by their index I in table TAB. */
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
424
425/* Macros for marking and checking deleted entries given by their
426 pointer E_PTR. */
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
429
430/* Return bin size index of table TAB. */
431static inline unsigned int
432get_size_ind(const st_table *tab)
433{
434 return tab->size_ind;
435}
436
437/* Return the number of allocated bins of table TAB. */
438static inline st_index_t
439get_bins_num(const st_table *tab)
440{
441 return ((st_index_t) 1)<<tab->bin_power;
442}
443
444/* Return mask for a bin index in table TAB. */
445static inline st_index_t
446bins_mask(const st_table *tab)
447{
448 return get_bins_num(tab) - 1;
449}
450
451/* Return the index of table TAB bin corresponding to
452 HASH_VALUE. */
453static inline st_index_t
454hash_bin(st_hash_t hash_value, st_table *tab)
455{
456 return hash_value & bins_mask(tab);
457}
458
459/* Return the number of allocated entries of table TAB. */
460static inline st_index_t
461get_allocated_entries(const st_table *tab)
462{
463 return ((st_index_t) 1)<<tab->entry_power;
464}
465
466/* Return size of the allocated bins of table TAB. */
467static inline st_index_t
468bins_size(const st_table *tab)
469{
470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
471}
472
473/* Mark all bins of table TAB as empty. */
474static void
475initialize_bins(st_table *tab)
476{
477 memset(tab->bins, 0, bins_size(tab));
478}
479
480/* Make table TAB empty. */
481static void
482make_tab_empty(st_table *tab)
483{
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
488}
489
490#ifdef HASH_LOG
491#ifdef HAVE_UNISTD_H
492#include <unistd.h>
493#endif
494static struct {
495 int all, total, num, str, strcase;
496} collision;
497
498/* Flag switching off output of package statistics at the end of
499 program. */
500static int init_st = 0;
501
502/* Output overall number of table searches and collisions into a
503 temporary file. */
504static void
505stat_col(void)
506{
507 char fname[10+sizeof(long)*3];
508 FILE *f;
509 if (!collision.total) return;
510 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
511 if (f == NULL)
512 return;
513 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
514 ((double)collision.all / (collision.total)) * 100);
515 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
516 fclose(f);
517}
518#endif
519
520st_table *
521st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
522{
523 int n;
524
525#ifdef HASH_LOG
526#if HASH_LOG+0 < 0
527 {
528 const char *e = getenv("ST_HASH_LOG");
529 if (!e || !*e) init_st = 1;
530 }
531#endif
532 if (init_st == 0) {
533 init_st = 1;
534 atexit(stat_col);
535 }
536#endif
537
538 n = get_power2(size);
539#ifndef RUBY
540 if (n < 0)
541 return NULL;
542#endif
543
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572/* Create and return table with TYPE which can hold at least SIZE
573 entries. The real number of entries which the table can hold is
574 the nearest power of two for SIZE. */
575st_table *
576st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
577{
578 st_table *tab = malloc(sizeof(st_table));
579#ifndef RUBY
580 if (tab == NULL)
581 return NULL;
582#endif
583
584#ifdef RUBY
585 st_init_existing_table_with_size(tab, type, size);
586#else
587 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
588 free(tab);
589 return NULL;
590 }
591#endif
592
593 return tab;
594}
595
596size_t
597st_table_size(const struct st_table *tbl)
598{
599 return tbl->num_entries;
600}
601
602/* Create and return table with TYPE which can hold a minimal number
603 of entries (see comments for get_power2). */
604st_table *
605st_init_table(const struct st_hash_type *type)
606{
607 return st_init_table_with_size(type, 0);
608}
609
610/* Create and return table which can hold a minimal number of
611 numbers. */
612st_table *
613st_init_numtable(void)
614{
615 return st_init_table(&type_numhash);
616}
617
618/* Create and return table which can hold SIZE numbers. */
619st_table *
620st_init_numtable_with_size(st_index_t size)
621{
622 return st_init_table_with_size(&type_numhash, size);
623}
624
625/* Create and return table which can hold a minimal number of
626 strings. */
627st_table *
628st_init_strtable(void)
629{
630 return st_init_table(&type_strhash);
631}
632
633/* Create and return table which can hold SIZE strings. */
634st_table *
635st_init_strtable_with_size(st_index_t size)
636{
637 return st_init_table_with_size(&type_strhash, size);
638}
639
640/* Create and return table which can hold a minimal number of strings
641 whose character case is ignored. */
642st_table *
643st_init_strcasetable(void)
644{
645 return st_init_table(&type_strcasehash);
646}
647
648/* Create and return table which can hold SIZE strings whose character
649 case is ignored. */
650st_table *
651st_init_strcasetable_with_size(st_index_t size)
652{
653 return st_init_table_with_size(&type_strcasehash, size);
654}
655
656/* Make table TAB empty. */
657void
658st_clear(st_table *tab)
659{
660 make_tab_empty(tab);
661 tab->rebuilds_num++;
662}
663
664/* Free table TAB space. */
665void
666st_free_table(st_table *tab)
667{
668 free(tab->bins);
669 free(tab->entries);
670 free(tab);
671}
672
673/* Return byte size of memory allocated for table TAB. */
674size_t
675st_memsize(const st_table *tab)
676{
677 return(sizeof(st_table)
678 + (tab->bins == NULL ? 0 : bins_size(tab))
679 + get_allocated_entries(tab) * sizeof(st_table_entry));
680}
681
682static st_index_t
683find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
684
685static st_index_t
686find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
687
688static st_index_t
689find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
690
691static st_index_t
692find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
693 st_data_t key, st_index_t *bin_ind);
694
695#ifdef HASH_LOG
696static void
697count_collision(const struct st_hash_type *type)
698{
699 collision.all++;
700 if (type == &type_numhash) {
701 collision.num++;
702 }
703 else if (type == &type_strhash) {
704 collision.strcase++;
705 }
706 else if (type == &type_strcasehash) {
707 collision.str++;
708 }
709}
710
711#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
712#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
713#define collision_check 0
714#else
715#define COLLISION
716#define FOUND_BIN
717#endif
718
719/* If the number of entries in the table is at least REBUILD_THRESHOLD
720 times less than the entry array length, decrease the table
721 size. */
722#define REBUILD_THRESHOLD 4
723
724#if REBUILD_THRESHOLD < 2
725#error "REBUILD_THRESHOLD should be >= 2"
726#endif
727
728static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
729static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
730static void rebuild_cleanup(st_table *const tab);
731
732/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
733 and can change size of the table entries and bins arrays.
734 Rebuilding is implemented by creation of a new table or by
735 compaction of the existing one. */
736static void
737rebuild_table(st_table *tab)
738{
739 if ((2 * tab->num_entries <= get_allocated_entries(tab)
740 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
741 || tab->num_entries < (1 << MINIMAL_POWER2)) {
742 /* Compaction: */
743 tab->num_entries = 0;
744 if (tab->bins != NULL)
745 initialize_bins(tab);
746 rebuild_table_with(tab, tab);
747 }
748 else {
749 st_table *new_tab;
750 /* This allocation could trigger GC and compaction. If tab is the
751 * gen_fields_tbl, then tab could have changed in size due to objects being
752 * freed and/or moved. Do not store attributes of tab before this line. */
753 new_tab = st_init_table_with_size(tab->type,
754 2 * tab->num_entries - 1);
755 rebuild_table_with(new_tab, tab);
756 rebuild_move_table(new_tab, tab);
757 }
758 rebuild_cleanup(tab);
759}
760
761static void
762rebuild_table_with(st_table *const new_tab, st_table *const tab)
763{
764 st_index_t i, ni;
765 unsigned int size_ind;
766 st_table_entry *new_entries;
767 st_table_entry *curr_entry_ptr;
768 st_index_t *bins;
769 st_index_t bin_ind;
770
771 new_entries = new_tab->entries;
772
773 ni = 0;
774 bins = new_tab->bins;
775 size_ind = get_size_ind(new_tab);
776 st_index_t bound = tab->entries_bound;
777 st_table_entry *entries = tab->entries;
778
779 for (i = tab->entries_start; i < bound; i++) {
780 curr_entry_ptr = &entries[i];
781 PREFETCH(entries + i + 1, 0);
782 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
783 continue;
784 if (&new_entries[ni] != curr_entry_ptr)
785 new_entries[ni] = *curr_entry_ptr;
786 if (EXPECT(bins != NULL, 1)) {
787 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
788 curr_entry_ptr->key);
789 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
790 }
791 new_tab->num_entries++;
792 ni++;
793 }
794
795 assert(new_tab->num_entries == tab->num_entries);
796}
797
798static void
799rebuild_move_table(st_table *const new_tab, st_table *const tab)
800{
801 tab->entry_power = new_tab->entry_power;
802 tab->bin_power = new_tab->bin_power;
803 tab->size_ind = new_tab->size_ind;
804 free(tab->bins);
805 tab->bins = new_tab->bins;
806 free(tab->entries);
807 tab->entries = new_tab->entries;
808 free(new_tab);
809}
810
811static void
812rebuild_cleanup(st_table *const tab)
813{
814 tab->entries_start = 0;
815 tab->entries_bound = tab->num_entries;
816 tab->rebuilds_num++;
817}
818
819/* Return the next secondary hash index for table TAB using previous
820 index IND and PERTURB. Finally modulo of the function becomes a
821 full *cycle linear congruential generator*, in other words it
822 guarantees traversing all table bins in extreme case.
823
824 According the Hull-Dobell theorem a generator
825 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
826 o m and c are relatively prime
827 o a-1 is divisible by all prime factors of m
828 o a-1 is divisible by 4 if m is divisible by 4.
829
830 For our case a is 5, c is 1, and m is a power of two. */
831static inline st_index_t
832secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
833{
834 *perturb >>= 11;
835 ind = (ind << 2) + ind + *perturb + 1;
836 return hash_bin(ind, tab);
837}
838
839/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
840 search. Return the index of the found entry in array `entries`.
841 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
842 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
843static inline st_index_t
844find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
845{
846 int eq_p, rebuilt_p;
847 st_index_t i, bound;
848 st_table_entry *entries;
849
850 bound = tab->entries_bound;
851 entries = tab->entries;
852 for (i = tab->entries_start; i < bound; i++) {
853 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
854 if (EXPECT(rebuilt_p, 0))
855 return REBUILT_TABLE_ENTRY_IND;
856 if (eq_p)
857 return i;
858 }
859 return UNDEFINED_ENTRY_IND;
860}
861
862/* Use the quadratic probing. The method has a better data locality
863 but more collisions than the current approach. In average it
864 results in a bit slower search. */
865/*#define QUADRATIC_PROBE*/
866
867/* Return index of entry with HASH_VALUE and KEY in table TAB. If
868 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
869 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
870static st_index_t
871find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
872{
873 int eq_p, rebuilt_p;
874 st_index_t ind;
875#ifdef QUADRATIC_PROBE
876 st_index_t d;
877#else
878 st_index_t perturb;
879#endif
880 st_index_t bin;
881 st_table_entry *entries = tab->entries;
882
883 ind = hash_bin(hash_value, tab);
884#ifdef QUADRATIC_PROBE
885 d = 1;
886#else
887 perturb = hash_value;
888#endif
889 FOUND_BIN;
890 for (;;) {
891 bin = get_bin(tab->bins, get_size_ind(tab), ind);
892 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
893 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
894 if (EXPECT(rebuilt_p, 0))
895 return REBUILT_TABLE_ENTRY_IND;
896 if (eq_p)
897 break;
898 }
899 else if (EMPTY_BIN_P(bin))
900 return UNDEFINED_ENTRY_IND;
901#ifdef QUADRATIC_PROBE
902 ind = hash_bin(ind + d, tab);
903 d++;
904#else
905 ind = secondary_hash(ind, tab, &perturb);
906#endif
907 COLLISION;
908 }
909 return bin;
910}
911
912/* Find and return index of table TAB bin corresponding to an entry
913 with HASH_VALUE and KEY. If there is no such bin, return
914 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
915 return REBUILT_TABLE_BIN_IND. */
916static st_index_t
917find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
918{
919 int eq_p, rebuilt_p;
920 st_index_t ind;
921#ifdef QUADRATIC_PROBE
922 st_index_t d;
923#else
924 st_index_t perturb;
925#endif
926 st_index_t bin;
927 st_table_entry *entries = tab->entries;
928
929 ind = hash_bin(hash_value, tab);
930#ifdef QUADRATIC_PROBE
931 d = 1;
932#else
933 perturb = hash_value;
934#endif
935 FOUND_BIN;
936 for (;;) {
937 bin = get_bin(tab->bins, get_size_ind(tab), ind);
938 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
939 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
940 if (EXPECT(rebuilt_p, 0))
941 return REBUILT_TABLE_BIN_IND;
942 if (eq_p)
943 break;
944 }
945 else if (EMPTY_BIN_P(bin))
946 return UNDEFINED_BIN_IND;
947#ifdef QUADRATIC_PROBE
948 ind = hash_bin(ind + d, tab);
949 d++;
950#else
951 ind = secondary_hash(ind, tab, &perturb);
952#endif
953 COLLISION;
954 }
955 return ind;
956}
957
958/* Find and return index of table TAB bin corresponding to an entry
959 with HASH_VALUE and KEY. The entry should be in the table
960 already. */
961static st_index_t
962find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
963{
964 st_index_t ind;
965#ifdef QUADRATIC_PROBE
966 st_index_t d;
967#else
968 st_index_t perturb;
969#endif
970 st_index_t bin;
971
972 ind = hash_bin(hash_value, tab);
973#ifdef QUADRATIC_PROBE
974 d = 1;
975#else
976 perturb = hash_value;
977#endif
978 FOUND_BIN;
979 for (;;) {
980 bin = get_bin(tab->bins, get_size_ind(tab), ind);
981 if (EMPTY_OR_DELETED_BIN_P(bin))
982 return ind;
983#ifdef QUADRATIC_PROBE
984 ind = hash_bin(ind + d, tab);
985 d++;
986#else
987 ind = secondary_hash(ind, tab, &perturb);
988#endif
989 COLLISION;
990 }
991}
992
993/* Return index of table TAB bin for HASH_VALUE and KEY through
994 BIN_IND and the pointed value as the function result. Reserve the
995 bin for inclusion of the corresponding entry into the table if it
996 is not there yet. We always find such bin as bins array length is
997 bigger entries array. Although we can reuse a deleted bin, the
998 result bin value is always empty if the table has no entry with
999 KEY. Return the entries array index of the found entry or
1000 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1001 during the search, return REBUILT_TABLE_ENTRY_IND. */
1002static st_index_t
1003find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1004 st_data_t key, st_index_t *bin_ind)
1005{
1006 int eq_p, rebuilt_p;
1007 st_index_t ind;
1008 st_hash_t curr_hash_value = *hash_value;
1009#ifdef QUADRATIC_PROBE
1010 st_index_t d;
1011#else
1012 st_index_t perturb;
1013#endif
1014 st_index_t entry_index;
1015 st_index_t first_deleted_bin_ind;
1016 st_table_entry *entries;
1017
1018 ind = hash_bin(curr_hash_value, tab);
1019#ifdef QUADRATIC_PROBE
1020 d = 1;
1021#else
1022 perturb = curr_hash_value;
1023#endif
1024 FOUND_BIN;
1025 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1026 entries = tab->entries;
1027 for (;;) {
1028 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1029 if (EMPTY_BIN_P(entry_index)) {
1030 tab->num_entries++;
1031 entry_index = UNDEFINED_ENTRY_IND;
1032 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1033 /* We can reuse bin of a deleted entry. */
1034 ind = first_deleted_bin_ind;
1035 MARK_BIN_EMPTY(tab, ind);
1036 }
1037 break;
1038 }
1039 else if (! DELETED_BIN_P(entry_index)) {
1040 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1041 if (EXPECT(rebuilt_p, 0))
1042 return REBUILT_TABLE_ENTRY_IND;
1043 if (eq_p)
1044 break;
1045 }
1046 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1047 first_deleted_bin_ind = ind;
1048#ifdef QUADRATIC_PROBE
1049 ind = hash_bin(ind + d, tab);
1050 d++;
1051#else
1052 ind = secondary_hash(ind, tab, &perturb);
1053#endif
1054 COLLISION;
1055 }
1056 *bin_ind = ind;
1057 return entry_index;
1058}
1059
1060/* Find an entry with KEY in table TAB. Return non-zero if we found
1061 it. Set up *RECORD to the found entry record. */
1062int
1063st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1064{
1065 st_index_t bin;
1066 st_hash_t hash = do_hash(key, tab);
1067
1068 retry:
1069 if (tab->bins == NULL) {
1070 bin = find_entry(tab, hash, key);
1071 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 goto retry;
1073 if (bin == UNDEFINED_ENTRY_IND)
1074 return 0;
1075 }
1076 else {
1077 bin = find_table_entry_ind(tab, hash, key);
1078 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1079 goto retry;
1080 if (bin == UNDEFINED_ENTRY_IND)
1081 return 0;
1082 bin -= ENTRY_BASE;
1083 }
1084 if (value != 0)
1085 *value = tab->entries[bin].record;
1086 return 1;
1087}
1088
1089/* Find an entry with KEY in table TAB. Return non-zero if we found
1090 it. Set up *RESULT to the found table entry key. */
1091int
1092st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1093{
1094 st_index_t bin;
1095 st_hash_t hash = do_hash(key, tab);
1096
1097 retry:
1098 if (tab->bins == NULL) {
1099 bin = find_entry(tab, hash, key);
1100 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1101 goto retry;
1102 if (bin == UNDEFINED_ENTRY_IND)
1103 return 0;
1104 }
1105 else {
1106 bin = find_table_entry_ind(tab, hash, key);
1107 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108 goto retry;
1109 if (bin == UNDEFINED_ENTRY_IND)
1110 return 0;
1111 bin -= ENTRY_BASE;
1112 }
1113 if (result != 0)
1114 *result = tab->entries[bin].key;
1115 return 1;
1116}
1117
1118/* Check the table and rebuild it if it is necessary. */
1119static inline void
1120rebuild_table_if_necessary (st_table *tab)
1121{
1122 st_index_t bound = tab->entries_bound;
1123
1124 if (bound == get_allocated_entries(tab))
1125 rebuild_table(tab);
1126}
1127
1128/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1129 already entry with KEY in the table, return nonzero and update
1130 the value of the found entry. */
1131int
1132st_insert(st_table *tab, st_data_t key, st_data_t value)
1133{
1134 st_table_entry *entry;
1135 st_index_t bin;
1136 st_index_t ind;
1137 st_hash_t hash_value;
1138 st_index_t bin_ind;
1139 int new_p;
1140
1141 hash_value = do_hash(key, tab);
1142 retry:
1143 rebuild_table_if_necessary(tab);
1144 if (tab->bins == NULL) {
1145 bin = find_entry(tab, hash_value, key);
1146 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1147 goto retry;
1148 new_p = bin == UNDEFINED_ENTRY_IND;
1149 if (new_p)
1150 tab->num_entries++;
1151 bin_ind = UNDEFINED_BIN_IND;
1152 }
1153 else {
1154 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1155 key, &bin_ind);
1156 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1157 goto retry;
1158 new_p = bin == UNDEFINED_ENTRY_IND;
1159 bin -= ENTRY_BASE;
1160 }
1161 if (new_p) {
1162 ind = tab->entries_bound++;
1163 entry = &tab->entries[ind];
1164 entry->hash = hash_value;
1165 entry->key = key;
1166 entry->record = value;
1167 if (bin_ind != UNDEFINED_BIN_IND)
1168 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1169 return 0;
1170 }
1171 tab->entries[bin].record = value;
1172 return 1;
1173}
1174
1175/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1176 entry with KEY before the insertion. */
1177static inline void
1178st_add_direct_with_hash(st_table *tab,
1179 st_data_t key, st_data_t value, st_hash_t hash)
1180{
1181 st_table_entry *entry;
1182 st_index_t ind;
1183 st_index_t bin_ind;
1184
1185 assert(hash != RESERVED_HASH_VAL);
1186
1187 rebuild_table_if_necessary(tab);
1188 ind = tab->entries_bound++;
1189 entry = &tab->entries[ind];
1190 entry->hash = hash;
1191 entry->key = key;
1192 entry->record = value;
1193 tab->num_entries++;
1194 if (tab->bins != NULL) {
1195 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1196 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1197 }
1198}
1199
1200void
1201rb_st_add_direct_with_hash(st_table *tab,
1202 st_data_t key, st_data_t value, st_hash_t hash)
1203{
1204 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1205}
1206
1207/* Insert (KEY, VALUE) into table TAB. The table should not have
1208 entry with KEY before the insertion. */
1209void
1210st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1211{
1212 st_hash_t hash_value;
1213
1214 hash_value = do_hash(key, tab);
1215 st_add_direct_with_hash(tab, key, value, hash_value);
1216}
1217
1218/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1219 there is already entry with KEY in the table, return nonzero and
1220 update the value of the found entry. */
1221int
1222st_insert2(st_table *tab, st_data_t key, st_data_t value,
1223 st_data_t (*func)(st_data_t))
1224{
1225 st_table_entry *entry;
1226 st_index_t bin;
1227 st_index_t ind;
1228 st_hash_t hash_value;
1229 st_index_t bin_ind;
1230 int new_p;
1231
1232 hash_value = do_hash(key, tab);
1233 retry:
1234 rebuild_table_if_necessary (tab);
1235 if (tab->bins == NULL) {
1236 bin = find_entry(tab, hash_value, key);
1237 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1238 goto retry;
1239 new_p = bin == UNDEFINED_ENTRY_IND;
1240 if (new_p)
1241 tab->num_entries++;
1242 bin_ind = UNDEFINED_BIN_IND;
1243 }
1244 else {
1245 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1246 key, &bin_ind);
1247 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1248 goto retry;
1249 new_p = bin == UNDEFINED_ENTRY_IND;
1250 bin -= ENTRY_BASE;
1251 }
1252 if (new_p) {
1253 key = (*func)(key);
1254 ind = tab->entries_bound++;
1255 entry = &tab->entries[ind];
1256 entry->hash = hash_value;
1257 entry->key = key;
1258 entry->record = value;
1259 if (bin_ind != UNDEFINED_BIN_IND)
1260 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1261 return 0;
1262 }
1263 tab->entries[bin].record = value;
1264 return 1;
1265}
1266
1267/* Create a copy of old_tab into new_tab. */
1268st_table *
1269st_replace(st_table *new_tab, st_table *old_tab)
1270{
1271 *new_tab = *old_tab;
1272 if (old_tab->bins == NULL)
1273 new_tab->bins = NULL;
1274 else {
1275 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1276#ifndef RUBY
1277 if (new_tab->bins == NULL) {
1278 return NULL;
1279 }
1280#endif
1281 }
1282 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1283 * sizeof(st_table_entry));
1284#ifndef RUBY
1285 if (new_tab->entries == NULL) {
1286 return NULL;
1287 }
1288#endif
1289 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1290 get_allocated_entries(old_tab));
1291 if (old_tab->bins != NULL)
1292 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1293
1294 return new_tab;
1295}
1296
1297/* Create and return a copy of table OLD_TAB. */
1298st_table *
1299st_copy(st_table *old_tab)
1300{
1301 st_table *new_tab;
1302
1303 new_tab = (st_table *) malloc(sizeof(st_table));
1304#ifndef RUBY
1305 if (new_tab == NULL)
1306 return NULL;
1307#endif
1308
1309 if (st_replace(new_tab, old_tab) == NULL) {
1310 st_free_table(new_tab);
1311 return NULL;
1312 }
1313
1314 return new_tab;
1315}
1316
1317/* Update the entries start of table TAB after removing an entry
1318 with index N in the array entries. */
1319static inline void
1320update_range_for_deleted(st_table *tab, st_index_t n)
1321{
1322 /* Do not update entries_bound here. Otherwise, we can fill all
1323 bins by deleted entry value before rebuilding the table. */
1324 if (tab->entries_start == n) {
1325 st_index_t start = n + 1;
1326 st_index_t bound = tab->entries_bound;
1327 st_table_entry *entries = tab->entries;
1328 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1329 tab->entries_start = start;
1330 }
1331}
1332
1333/* Delete entry with KEY from table TAB, set up *VALUE (unless
1334 VALUE is zero) from deleted table entry, and return non-zero. If
1335 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1336 is zero), and return zero. */
1337static int
1338st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1339{
1340 st_table_entry *entry;
1341 st_index_t bin;
1342 st_index_t bin_ind;
1343 st_hash_t hash;
1344
1345 hash = do_hash(*key, tab);
1346 retry:
1347 if (tab->bins == NULL) {
1348 bin = find_entry(tab, hash, *key);
1349 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1350 goto retry;
1351 if (bin == UNDEFINED_ENTRY_IND) {
1352 if (value != 0) *value = 0;
1353 return 0;
1354 }
1355 }
1356 else {
1357 bin_ind = find_table_bin_ind(tab, hash, *key);
1358 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1359 goto retry;
1360 if (bin_ind == UNDEFINED_BIN_IND) {
1361 if (value != 0) *value = 0;
1362 return 0;
1363 }
1364 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1365 MARK_BIN_DELETED(tab, bin_ind);
1366 }
1367 entry = &tab->entries[bin];
1368 *key = entry->key;
1369 if (value != 0) *value = entry->record;
1370 MARK_ENTRY_DELETED(entry);
1371 tab->num_entries--;
1372 update_range_for_deleted(tab, bin);
1373 return 1;
1374}
1375
1376int
1377st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1378{
1379 return st_general_delete(tab, key, value);
1380}
1381
1382/* The function and other functions with suffix '_safe' or '_check'
1383 are originated from the previous implementation of the hash tables.
1384 It was necessary for correct deleting entries during traversing
1385 tables. The current implementation permits deletion during
1386 traversing without a specific way to do this. */
1387int
1388st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1389 st_data_t never ATTRIBUTE_UNUSED)
1390{
1391 return st_general_delete(tab, key, value);
1392}
1393
1394/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1395 return zero. Otherwise, remove the first entry in the table.
1396 Return its key through KEY and its record through VALUE (unless
1397 VALUE is zero). */
1398int
1399st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1400{
1401 st_index_t i, bound;
1402 st_index_t bin;
1403 st_table_entry *entries, *curr_entry_ptr;
1404 st_index_t bin_ind;
1405
1406 entries = tab->entries;
1407 bound = tab->entries_bound;
1408 for (i = tab->entries_start; i < bound; i++) {
1409 curr_entry_ptr = &entries[i];
1410 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1411 st_hash_t entry_hash = curr_entry_ptr->hash;
1412 st_data_t entry_key = curr_entry_ptr->key;
1413
1414 if (value != 0) *value = curr_entry_ptr->record;
1415 *key = entry_key;
1416 retry:
1417 if (tab->bins == NULL) {
1418 bin = find_entry(tab, entry_hash, entry_key);
1419 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1420 entries = tab->entries;
1421 goto retry;
1422 }
1423 curr_entry_ptr = &entries[bin];
1424 }
1425 else {
1426 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1427 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1428 entries = tab->entries;
1429 goto retry;
1430 }
1431 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1432 - ENTRY_BASE];
1433 MARK_BIN_DELETED(tab, bin_ind);
1434 }
1435 MARK_ENTRY_DELETED(curr_entry_ptr);
1436 tab->num_entries--;
1437 update_range_for_deleted(tab, i);
1438 return 1;
1439 }
1440 }
1441 if (value != 0) *value = 0;
1442 return 0;
1443}
1444
1445/* See comments for function st_delete_safe. */
1446void
1447st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1448 st_data_t never ATTRIBUTE_UNUSED)
1449{
1450}
1451
1452/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1453 of the key and the value of the found entry, and non-zero as the
1454 3rd argument. If the entry is not found, call FUNC with a pointer
1455 to KEY, a pointer to zero, and a zero argument. If the call
1456 returns ST_CONTINUE, the table will have an entry with key and
1457 value returned by FUNC through the 1st and 2nd parameters. If the
1458 call of FUNC returns ST_DELETE, the table will not have entry with
1459 KEY. The function returns flag of that the entry with KEY was in
1460 the table before the call. */
1461int
1462st_update(st_table *tab, st_data_t key,
1463 st_update_callback_func *func, st_data_t arg)
1464{
1465 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1466 st_index_t bin = 0; /* Ditto */
1467 st_table_entry *entries;
1468 st_index_t bin_ind;
1469 st_data_t value = 0, old_key;
1470 int retval, existing;
1471 st_hash_t hash = do_hash(key, tab);
1472
1473 retry:
1474 entries = tab->entries;
1475 if (tab->bins == NULL) {
1476 bin = find_entry(tab, hash, key);
1477 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1478 goto retry;
1479 existing = bin != UNDEFINED_ENTRY_IND;
1480 entry = &entries[bin];
1481 bin_ind = UNDEFINED_BIN_IND;
1482 }
1483 else {
1484 bin_ind = find_table_bin_ind(tab, hash, key);
1485 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1486 goto retry;
1487 existing = bin_ind != UNDEFINED_BIN_IND;
1488 if (existing) {
1489 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1490 entry = &entries[bin];
1491 }
1492 }
1493 if (existing) {
1494 key = entry->key;
1495 value = entry->record;
1496 }
1497 old_key = key;
1498
1499 unsigned int rebuilds_num = tab->rebuilds_num;
1500
1501 retval = (*func)(&key, &value, arg, existing);
1502
1503 // We need to make sure that the callback didn't cause a table rebuild
1504 // Ideally we would make sure no operations happened
1505 assert(rebuilds_num == tab->rebuilds_num);
1506 (void)rebuilds_num;
1507
1508 switch (retval) {
1509 case ST_CONTINUE:
1510 if (! existing) {
1511 st_add_direct_with_hash(tab, key, value, hash);
1512 break;
1513 }
1514 if (old_key != key) {
1515 entry->key = key;
1516 }
1517 entry->record = value;
1518 break;
1519 case ST_DELETE:
1520 if (existing) {
1521 if (bin_ind != UNDEFINED_BIN_IND)
1522 MARK_BIN_DELETED(tab, bin_ind);
1523 MARK_ENTRY_DELETED(entry);
1524 tab->num_entries--;
1525 update_range_for_deleted(tab, bin);
1526 }
1527 break;
1528 }
1529 return existing;
1530}
1531
1532/* Traverse all entries in table TAB calling FUNC with current entry
1533 key and value and zero. If the call returns ST_STOP, stop
1534 traversing. If the call returns ST_DELETE, delete the current
1535 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1536 traversing. The function returns zero unless an error is found.
1537 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1538 different for ST_CHECK and when the current element is removed
1539 during traversing. */
1540static inline int
1541st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1542 int check_p)
1543{
1544 st_index_t bin;
1545 st_index_t bin_ind;
1546 st_table_entry *entries, *curr_entry_ptr;
1547 enum st_retval retval;
1548 st_index_t i, rebuilds_num;
1549 st_hash_t hash;
1550 st_data_t key;
1551 int error_p, packed_p = tab->bins == NULL;
1552
1553 entries = tab->entries;
1554 /* The bound can change inside the loop even without rebuilding
1555 the table, e.g. by an entry insertion. */
1556 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1557 curr_entry_ptr = &entries[i];
1558 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1559 continue;
1560 key = curr_entry_ptr->key;
1561 rebuilds_num = tab->rebuilds_num;
1562 hash = curr_entry_ptr->hash;
1563 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1564
1565 if (retval == ST_REPLACE && replace) {
1566 st_data_t value;
1567 value = curr_entry_ptr->record;
1568 retval = (*replace)(&key, &value, arg, TRUE);
1569 curr_entry_ptr->key = key;
1570 curr_entry_ptr->record = value;
1571 }
1572
1573 if (rebuilds_num != tab->rebuilds_num) {
1574 retry:
1575 entries = tab->entries;
1576 packed_p = tab->bins == NULL;
1577 if (packed_p) {
1578 i = find_entry(tab, hash, key);
1579 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1580 goto retry;
1581 error_p = i == UNDEFINED_ENTRY_IND;
1582 }
1583 else {
1584 i = find_table_entry_ind(tab, hash, key);
1585 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1586 goto retry;
1587 error_p = i == UNDEFINED_ENTRY_IND;
1588 i -= ENTRY_BASE;
1589 }
1590 if (error_p && check_p) {
1591 /* call func with error notice */
1592 retval = (*func)(0, 0, arg, 1);
1593 return 1;
1594 }
1595 curr_entry_ptr = &entries[i];
1596 }
1597 switch (retval) {
1598 case ST_REPLACE:
1599 break;
1600 case ST_CONTINUE:
1601 break;
1602 case ST_CHECK:
1603 if (check_p)
1604 break;
1605 case ST_STOP:
1606 return 0;
1607 case ST_DELETE: {
1608 st_data_t key = curr_entry_ptr->key;
1609
1610 again:
1611 if (packed_p) {
1612 bin = find_entry(tab, hash, key);
1613 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1614 goto again;
1615 if (bin == UNDEFINED_ENTRY_IND)
1616 break;
1617 }
1618 else {
1619 bin_ind = find_table_bin_ind(tab, hash, key);
1620 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1621 goto again;
1622 if (bin_ind == UNDEFINED_BIN_IND)
1623 break;
1624 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1625 MARK_BIN_DELETED(tab, bin_ind);
1626 }
1627 curr_entry_ptr = &entries[bin];
1628 MARK_ENTRY_DELETED(curr_entry_ptr);
1629 tab->num_entries--;
1630 update_range_for_deleted(tab, bin);
1631 break;
1632 }
1633 }
1634 }
1635 return 0;
1636}
1637
1638int
1639st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1640{
1641 return st_general_foreach(tab, func, replace, arg, TRUE);
1642}
1643
1644struct functor {
1645 st_foreach_callback_func *func;
1646 st_data_t arg;
1647};
1648
1649static int
1650apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1651{
1652 const struct functor *f = (void *)d;
1653 return f->func(k, v, f->arg);
1654}
1655
1656int
1657st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1658{
1659 const struct functor f = { func, arg };
1660 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1661}
1662
1663/* See comments for function st_delete_safe. */
1664int
1665st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1666 st_data_t never ATTRIBUTE_UNUSED)
1667{
1668 return st_general_foreach(tab, func, 0, arg, TRUE);
1669}
1670
1671/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1672 Return the number of keys set up in array KEYS. */
1673static inline st_index_t
1674st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1675{
1676 st_index_t i, bound;
1677 st_data_t key, *keys_start, *keys_end;
1678 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1679
1680 bound = tab->entries_bound;
1681 keys_start = keys;
1682 keys_end = keys + size;
1683 for (i = tab->entries_start; i < bound; i++) {
1684 if (keys == keys_end)
1685 break;
1686 curr_entry_ptr = &entries[i];
1687 key = curr_entry_ptr->key;
1688 if (! DELETED_ENTRY_P(curr_entry_ptr))
1689 *keys++ = key;
1690 }
1691
1692 return keys - keys_start;
1693}
1694
1695st_index_t
1696st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1697{
1698 return st_general_keys(tab, keys, size);
1699}
1700
1701/* See comments for function st_delete_safe. */
1702st_index_t
1703st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1704 st_data_t never ATTRIBUTE_UNUSED)
1705{
1706 return st_general_keys(tab, keys, size);
1707}
1708
1709/* Set up array VALUES by at most SIZE values of head table TAB
1710 entries. Return the number of values set up in array VALUES. */
1711static inline st_index_t
1712st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1713{
1714 st_index_t i, bound;
1715 st_data_t *values_start, *values_end;
1716 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1717
1718 values_start = values;
1719 values_end = values + size;
1720 bound = tab->entries_bound;
1721 for (i = tab->entries_start; i < bound; i++) {
1722 if (values == values_end)
1723 break;
1724 curr_entry_ptr = &entries[i];
1725 if (! DELETED_ENTRY_P(curr_entry_ptr))
1726 *values++ = curr_entry_ptr->record;
1727 }
1728
1729 return values - values_start;
1730}
1731
1732st_index_t
1733st_values(st_table *tab, st_data_t *values, st_index_t size)
1734{
1735 return st_general_values(tab, values, size);
1736}
1737
1738/* See comments for function st_delete_safe. */
1739st_index_t
1740st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1741 st_data_t never ATTRIBUTE_UNUSED)
1742{
1743 return st_general_values(tab, values, size);
1744}
1745
1746#define FNV1_32A_INIT 0x811c9dc5
1747
1748/*
1749 * 32 bit magic FNV-1a prime
1750 */
1751#define FNV_32_PRIME 0x01000193
1752
1753/* __POWERPC__ added to accommodate Darwin case. */
1754#ifndef UNALIGNED_WORD_ACCESS
1755# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1756 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1757 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1758 defined(__mc68020__)
1759# define UNALIGNED_WORD_ACCESS 1
1760# endif
1761#endif
1762#ifndef UNALIGNED_WORD_ACCESS
1763# define UNALIGNED_WORD_ACCESS 0
1764#endif
1765
1766/* This hash function is quite simplified MurmurHash3
1767 * Simplification is legal, cause most of magic still happens in finalizator.
1768 * And finalizator is almost the same as in MurmurHash3 */
1769#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1770#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1771
1772#if ST_INDEX_BITS <= 32
1773#define C1 (st_index_t)0xcc9e2d51
1774#define C2 (st_index_t)0x1b873593
1775#else
1776#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1777#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1778#endif
1779NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1780NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1781NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1782
1783static inline st_index_t
1784murmur_step(st_index_t h, st_index_t k)
1785{
1786#if ST_INDEX_BITS <= 32
1787#define r1 (17)
1788#define r2 (11)
1789#else
1790#define r1 (33)
1791#define r2 (24)
1792#endif
1793 k *= C1;
1794 h ^= ROTL(k, r1);
1795 h *= C2;
1796 h = ROTL(h, r2);
1797 return h;
1798}
1799#undef r1
1800#undef r2
1801
1802static inline st_index_t
1803murmur_finish(st_index_t h)
1804{
1805#if ST_INDEX_BITS <= 32
1806#define r1 (16)
1807#define r2 (13)
1808#define r3 (16)
1809 const st_index_t c1 = 0x85ebca6b;
1810 const st_index_t c2 = 0xc2b2ae35;
1811#else
1812/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1813#define r1 (30)
1814#define r2 (27)
1815#define r3 (31)
1816 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1817 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1818#endif
1819#if ST_INDEX_BITS > 64
1820 h ^= h >> 64;
1821 h *= c2;
1822 h ^= h >> 65;
1823#endif
1824 h ^= h >> r1;
1825 h *= c1;
1826 h ^= h >> r2;
1827 h *= c2;
1828 h ^= h >> r3;
1829 return h;
1830}
1831#undef r1
1832#undef r2
1833#undef r3
1834
1835st_index_t
1836st_hash(const void *ptr, size_t len, st_index_t h)
1837{
1838 const char *data = ptr;
1839 st_index_t t = 0;
1840 size_t l = len;
1841
1842#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1843#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1844#if SIZEOF_ST_INDEX_T > 4
1845#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1846#if SIZEOF_ST_INDEX_T > 8
1847#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1848 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1849#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1850#endif
1851#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1852#else
1853#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1854#endif
1855#undef SKIP_TAIL
1856 if (len >= sizeof(st_index_t)) {
1857#if !UNALIGNED_WORD_ACCESS
1858 int align = (int)((st_data_t)data % sizeof(st_index_t));
1859 if (align) {
1860 st_index_t d = 0;
1861 int sl, sr, pack;
1862
1863 switch (align) {
1864#ifdef WORDS_BIGENDIAN
1865# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1866 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1867#else
1868# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1869 t |= data_at(n) << CHAR_BIT*(n)
1870#endif
1871 UNALIGNED_ADD_ALL;
1872#undef UNALIGNED_ADD
1873 }
1874
1875#ifdef WORDS_BIGENDIAN
1876 t >>= (CHAR_BIT * align) - CHAR_BIT;
1877#else
1878 t <<= (CHAR_BIT * align);
1879#endif
1880
1881 data += sizeof(st_index_t)-align;
1882 len -= sizeof(st_index_t)-align;
1883
1884 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1885 sr = CHAR_BIT * align;
1886
1887 while (len >= sizeof(st_index_t)) {
1888 d = *(st_index_t *)data;
1889#ifdef WORDS_BIGENDIAN
1890 t = (t << sr) | (d >> sl);
1891#else
1892 t = (t >> sr) | (d << sl);
1893#endif
1894 h = murmur_step(h, t);
1895 t = d;
1896 data += sizeof(st_index_t);
1897 len -= sizeof(st_index_t);
1898 }
1899
1900 pack = len < (size_t)align ? (int)len : align;
1901 d = 0;
1902 switch (pack) {
1903#ifdef WORDS_BIGENDIAN
1904# define UNALIGNED_ADD(n) case (n) + 1: \
1905 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1906#else
1907# define UNALIGNED_ADD(n) case (n) + 1: \
1908 d |= data_at(n) << CHAR_BIT*(n)
1909#endif
1910 UNALIGNED_ADD_ALL;
1911#undef UNALIGNED_ADD
1912 }
1913#ifdef WORDS_BIGENDIAN
1914 t = (t << sr) | (d >> sl);
1915#else
1916 t = (t >> sr) | (d << sl);
1917#endif
1918
1919 if (len < (size_t)align) goto skip_tail;
1920# define SKIP_TAIL 1
1921 h = murmur_step(h, t);
1922 data += pack;
1923 len -= pack;
1924 }
1925 else
1926#endif
1927#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1928#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1929#else
1930#define aligned_data data
1931#endif
1932 {
1933 do {
1934 h = murmur_step(h, *(st_index_t *)aligned_data);
1935 data += sizeof(st_index_t);
1936 len -= sizeof(st_index_t);
1937 } while (len >= sizeof(st_index_t));
1938 }
1939 }
1940
1941 t = 0;
1942 switch (len) {
1943#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1944 /* in this case byteorder doesn't really matter */
1945#if SIZEOF_ST_INDEX_T > 4
1946 case 7: t |= data_at(6) << 48;
1947 case 6: t |= data_at(5) << 40;
1948 case 5: t |= data_at(4) << 32;
1949 case 4:
1950 t |= (st_index_t)*(uint32_t*)aligned_data;
1951 goto skip_tail;
1952# define SKIP_TAIL 1
1953#endif
1954 case 3: t |= data_at(2) << 16;
1955 case 2: t |= data_at(1) << 8;
1956 case 1: t |= data_at(0);
1957#else
1958#ifdef WORDS_BIGENDIAN
1959# define UNALIGNED_ADD(n) case (n) + 1: \
1960 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1961#else
1962# define UNALIGNED_ADD(n) case (n) + 1: \
1963 t |= data_at(n) << CHAR_BIT*(n)
1964#endif
1965 UNALIGNED_ADD_ALL;
1966#undef UNALIGNED_ADD
1967#endif
1968#ifdef SKIP_TAIL
1969 skip_tail:
1970#endif
1971 h ^= t; h -= ROTL(t, 7);
1972 h *= C2;
1973 }
1974 h ^= l;
1975#undef aligned_data
1976
1977 return murmur_finish(h);
1978}
1979
1980st_index_t
1981st_hash_uint32(st_index_t h, uint32_t i)
1982{
1983 return murmur_step(h, i);
1984}
1985
1986NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1987st_index_t
1988st_hash_uint(st_index_t h, st_index_t i)
1989{
1990 i += h;
1991/* no matter if it is BigEndian or LittleEndian,
1992 * we hash just integers */
1993#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1994 h = murmur_step(h, i >> 8*8);
1995#endif
1996 h = murmur_step(h, i);
1997 return h;
1998}
1999
2000st_index_t
2001st_hash_end(st_index_t h)
2002{
2003 h = murmur_finish(h);
2004 return h;
2005}
2006
2007#undef st_hash_start
2008st_index_t
2009rb_st_hash_start(st_index_t h)
2010{
2011 return h;
2012}
2013
2014static st_index_t
2015strhash(st_data_t arg)
2016{
2017 register const char *string = (const char *)arg;
2018 return st_hash(string, strlen(string), FNV1_32A_INIT);
2019}
2020
2021int
2022st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2023{
2024 char c1, c2;
2025
2026 while (1) {
2027 c1 = *s1++;
2028 c2 = *s2++;
2029 if (c1 == '\0' || c2 == '\0') {
2030 if (c1 != '\0') return 1;
2031 if (c2 != '\0') return -1;
2032 return 0;
2033 }
2034 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2035 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2036 if (c1 != c2) {
2037 if (c1 > c2)
2038 return 1;
2039 else
2040 return -1;
2041 }
2042 }
2043}
2044
2045int
2046st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2047{
2048 char c1, c2;
2049 size_t i;
2050
2051 for (i = 0; i < n; i++) {
2052 c1 = *s1++;
2053 c2 = *s2++;
2054 if (c1 == '\0' || c2 == '\0') {
2055 if (c1 != '\0') return 1;
2056 if (c2 != '\0') return -1;
2057 return 0;
2058 }
2059 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2060 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2061 if (c1 != c2) {
2062 if (c1 > c2)
2063 return 1;
2064 else
2065 return -1;
2066 }
2067 }
2068 return 0;
2069}
2070
2071static int
2072st_strcmp(st_data_t lhs, st_data_t rhs)
2073{
2074 const char *s1 = (char *)lhs;
2075 const char *s2 = (char *)rhs;
2076 return strcmp(s1, s2);
2077}
2078
2079static int
2080st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2081{
2082 const char *s1 = (char *)lhs;
2083 const char *s2 = (char *)rhs;
2084 return st_locale_insensitive_strcasecmp(s1, s2);
2085}
2086
2087NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2088static st_index_t
2089strcasehash(st_data_t arg)
2090{
2091 register const char *string = (const char *)arg;
2092 register st_index_t hval = FNV1_32A_INIT;
2093
2094 /*
2095 * FNV-1a hash each octet in the buffer
2096 */
2097 while (*string) {
2098 unsigned int c = (unsigned char)*string++;
2099 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2100 hval ^= c;
2101
2102 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2103 hval *= FNV_32_PRIME;
2104 }
2105 return hval;
2106}
2107
2108int
2109st_numcmp(st_data_t x, st_data_t y)
2110{
2111 return x != y;
2112}
2113
2114st_index_t
2115st_numhash(st_data_t n)
2116{
2117 enum {s1 = 11, s2 = 3};
2118 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2119}
2120
2121#ifdef RUBY
2122/* Expand TAB to be suitable for holding SIZ entries in total.
2123 Pre-existing entries remain not deleted inside of TAB, but its bins
2124 are cleared to expect future reconstruction. See rehash below. */
2125static void
2126st_expand_table(st_table *tab, st_index_t siz)
2127{
2128 st_table *tmp;
2129 st_index_t n;
2130
2131 if (siz <= get_allocated_entries(tab))
2132 return; /* enough room already */
2133
2134 tmp = st_init_table_with_size(tab->type, siz);
2135 n = get_allocated_entries(tab);
2136 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2137 free(tab->entries);
2138 free(tab->bins);
2139 free(tmp->bins);
2140 tab->entry_power = tmp->entry_power;
2141 tab->bin_power = tmp->bin_power;
2142 tab->size_ind = tmp->size_ind;
2143 tab->entries = tmp->entries;
2144 tab->bins = NULL;
2145 tab->rebuilds_num++;
2146 free(tmp);
2147}
2148
2149/* Rehash using linear search. Return TRUE if we found that the table
2150 was rebuilt. */
2151static int
2152st_rehash_linear(st_table *tab)
2153{
2154 int eq_p, rebuilt_p;
2155 st_index_t i, j;
2156 st_table_entry *p, *q;
2157
2158 free(tab->bins);
2159 tab->bins = NULL;
2160
2161 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2162 p = &tab->entries[i];
2163 if (DELETED_ENTRY_P(p))
2164 continue;
2165 for (j = i + 1; j < tab->entries_bound; j++) {
2166 q = &tab->entries[j];
2167 if (DELETED_ENTRY_P(q))
2168 continue;
2169 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2170 if (EXPECT(rebuilt_p, 0))
2171 return TRUE;
2172 if (eq_p) {
2173 *p = *q;
2174 MARK_ENTRY_DELETED(q);
2175 tab->num_entries--;
2176 update_range_for_deleted(tab, j);
2177 }
2178 }
2179 }
2180 return FALSE;
2181}
2182
2183/* Rehash using index. Return TRUE if we found that the table was
2184 rebuilt. */
2185static int
2186st_rehash_indexed(st_table *tab)
2187{
2188 int eq_p, rebuilt_p;
2189 st_index_t i;
2190 st_index_t const n = bins_size(tab);
2191 unsigned int const size_ind = get_size_ind(tab);
2192 st_index_t *bins = realloc(tab->bins, n);
2193 tab->bins = bins;
2194 initialize_bins(tab);
2195 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2196 st_table_entry *p = &tab->entries[i];
2197 st_index_t ind;
2198#ifdef QUADRATIC_PROBE
2199 st_index_t d = 1;
2200#else
2201 st_index_t perturb = p->hash;
2202#endif
2203
2204 if (DELETED_ENTRY_P(p))
2205 continue;
2206
2207 ind = hash_bin(p->hash, tab);
2208 for (;;) {
2209 st_index_t bin = get_bin(bins, size_ind, ind);
2210 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2211 /* ok, new room */
2212 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2213 break;
2214 }
2215 else {
2216 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2217 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2218 if (EXPECT(rebuilt_p, 0))
2219 return TRUE;
2220 if (eq_p) {
2221 /* duplicated key; delete it */
2222 q->record = p->record;
2223 MARK_ENTRY_DELETED(p);
2224 tab->num_entries--;
2225 update_range_for_deleted(tab, bin);
2226 break;
2227 }
2228 else {
2229 /* hash collision; skip it */
2230#ifdef QUADRATIC_PROBE
2231 ind = hash_bin(ind + d, tab);
2232 d++;
2233#else
2234 ind = secondary_hash(ind, tab, &perturb);
2235#endif
2236 }
2237 }
2238 }
2239 }
2240 return FALSE;
2241}
2242
2243/* Reconstruct TAB's bins according to TAB's entries. This function
2244 permits conflicting keys inside of entries. No errors are reported
2245 then. All but one of them are discarded silently. */
2246static void
2247st_rehash(st_table *tab)
2248{
2249 int rebuilt_p;
2250
2251 do {
2252 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2253 rebuilt_p = st_rehash_linear(tab);
2254 else
2255 rebuilt_p = st_rehash_indexed(tab);
2256 } while (rebuilt_p);
2257}
2258
2259static st_data_t
2260st_stringify(VALUE key)
2261{
2262 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2263 rb_hash_key_str(key) : key;
2264}
2265
2266static void
2267st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2268{
2269 st_data_t k = st_stringify(key);
2271 e.hash = do_hash(k, tab);
2272 e.key = k;
2273 e.record = val;
2274
2275 tab->entries[tab->entries_bound++] = e;
2276 tab->num_entries++;
2277 RB_OBJ_WRITTEN(hash, Qundef, k);
2278 RB_OBJ_WRITTEN(hash, Qundef, val);
2279}
2280
2281static void
2282st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2283{
2284 long i;
2285
2286 for (i = 0; i < argc; /* */) {
2287 st_data_t k = st_stringify(argv[i++]);
2288 st_data_t v = argv[i++];
2289 st_insert(tab, k, v);
2290 RB_OBJ_WRITTEN(hash, Qundef, k);
2291 RB_OBJ_WRITTEN(hash, Qundef, v);
2292 }
2293}
2294
2295static void
2296st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2297{
2298 long i;
2299
2300 /* push elems */
2301 for (i = 0; i < argc; /* */) {
2302 VALUE key = argv[i++];
2303 VALUE val = argv[i++];
2304 st_insert_single(tab, hash, key, val);
2305 }
2306
2307 /* reindex */
2308 st_rehash(tab);
2309}
2310
2311/* Mimics ruby's { foo => bar } syntax. This function is subpart
2312 of rb_hash_bulk_insert. */
2313void
2314rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2315{
2316 st_index_t n, size = argc / 2;
2317 st_table *tab = RHASH_ST_TABLE(hash);
2318
2319 tab = RHASH_TBL_RAW(hash);
2320 n = tab->entries_bound + size;
2321 st_expand_table(tab, n);
2322 if (UNLIKELY(tab->num_entries))
2323 st_insert_generic(tab, argc, argv, hash);
2324 else if (argc <= 2)
2325 st_insert_single(tab, hash, argv[0], argv[1]);
2326 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2327 st_insert_linear(tab, argc, argv, hash);
2328 else
2329 st_insert_generic(tab, argc, argv, hash);
2330}
2331
2332void
2333rb_st_compact_table(st_table *tab)
2334{
2335 st_index_t num = tab->num_entries;
2336 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2337 /* Compaction: */
2338 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2339 rebuild_table_with(new_tab, tab);
2340 rebuild_move_table(new_tab, tab);
2341 rebuild_cleanup(tab);
2342 }
2343}
2344
2345/*
2346 * set_table related code
2347 */
2348
2349struct set_table_entry {
2350 st_hash_t hash;
2351 st_data_t key;
2352};
2353
2354/* Return hash value of KEY for table TAB. */
2355static inline st_hash_t
2356set_do_hash(st_data_t key, set_table *tab)
2357{
2358 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2359 return normalize_hash_value(hash);
2360}
2361
2362/* Return bin size index of table TAB. */
2363static inline unsigned int
2364set_get_size_ind(const set_table *tab)
2365{
2366 return tab->size_ind;
2367}
2368
2369/* Return the number of allocated bins of table TAB. */
2370static inline st_index_t
2371set_get_bins_num(const set_table *tab)
2372{
2373 return ((st_index_t) 1)<<tab->bin_power;
2374}
2375
2376/* Return mask for a bin index in table TAB. */
2377static inline st_index_t
2378set_bins_mask(const set_table *tab)
2379{
2380 return set_get_bins_num(tab) - 1;
2381}
2382
2383/* Return the index of table TAB bin corresponding to
2384 HASH_VALUE. */
2385static inline st_index_t
2386set_hash_bin(st_hash_t hash_value, set_table *tab)
2387{
2388 return hash_value & set_bins_mask(tab);
2389}
2390
2391/* Return the number of allocated entries of table TAB. */
2392static inline st_index_t
2393set_get_allocated_entries(const set_table *tab)
2394{
2395 return ((st_index_t) 1)<<tab->entry_power;
2396}
2397
2398static inline size_t
2399set_allocated_entries_size(const set_table *tab)
2400{
2401 return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2402}
2403
2404static inline bool
2405set_has_bins(const set_table *tab)
2406{
2407 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2408}
2409
2410/* Return size of the allocated bins of table TAB. */
2411static inline st_index_t
2412set_bins_size(const set_table *tab)
2413{
2414 if (set_has_bins(tab)) {
2415 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2416 }
2417
2418 return 0;
2419}
2420
2421static inline st_index_t *
2422set_bins_ptr(const set_table *tab)
2423{
2424 if (set_has_bins(tab)) {
2425 return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2426 }
2427
2428 return NULL;
2429}
2430
2431/* Mark all bins of table TAB as empty. */
2432static void
2433set_initialize_bins(set_table *tab)
2434{
2435 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2436}
2437
2438/* Make table TAB empty. */
2439static void
2440set_make_tab_empty(set_table *tab)
2441{
2442 tab->num_entries = 0;
2443 tab->entries_start = tab->entries_bound = 0;
2444 if (set_bins_ptr(tab) != NULL)
2445 set_initialize_bins(tab);
2446}
2447
2448static set_table *
2449set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2450{
2451 int n;
2452
2453#ifdef HASH_LOG
2454#if HASH_LOG+0 < 0
2455 {
2456 const char *e = getenv("ST_HASH_LOG");
2457 if (!e || !*e) init_st = 1;
2458 }
2459#endif
2460 if (init_st == 0) {
2461 init_st = 1;
2462 atexit(stat_col);
2463 }
2464#endif
2465
2466 n = get_power2(size);
2467
2468 tab->type = type;
2469 tab->entry_power = n;
2470 tab->bin_power = features[n].bin_power;
2471 tab->size_ind = features[n].size_ind;
2472
2473 size_t memsize = 0;
2474 if (set_has_bins(tab)) {
2475 memsize += set_bins_size(tab);
2476 }
2477 memsize += set_get_allocated_entries(tab) * sizeof(set_table_entry);
2478 tab->entries = (set_table_entry *)malloc(memsize);
2479 set_make_tab_empty(tab);
2480 tab->rebuilds_num = 0;
2481 return tab;
2482}
2483
2484/* Create and return table with TYPE which can hold at least SIZE
2485 entries. The real number of entries which the table can hold is
2486 the nearest power of two for SIZE. */
2487set_table *
2488set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2489{
2490 if (tab == NULL) tab = malloc(sizeof(set_table));
2491
2492 set_init_existing_table_with_size(tab, type, size);
2493
2494 return tab;
2495}
2496
2497set_table *
2498set_init_numtable(void)
2499{
2500 return set_init_table_with_size(NULL, &type_numhash, 0);
2501}
2502
2503set_table *
2504set_init_numtable_with_size(st_index_t size)
2505{
2506 return set_init_table_with_size(NULL, &type_numhash, size);
2507}
2508
2509size_t
2510set_table_size(const struct set_table *tbl)
2511{
2512 return tbl->num_entries;
2513}
2514
2515/* Make table TAB empty. */
2516void
2517set_table_clear(set_table *tab)
2518{
2519 set_make_tab_empty(tab);
2520 tab->rebuilds_num++;
2521}
2522
2523/* Free table TAB space. This should only be used if you passed NULL to
2524 set_init_table_with_size/set_copy when creating the table. */
2525void
2526set_free_table(set_table *tab)
2527{
2528 free(tab->entries);
2529 free(tab);
2530}
2531
2532/* Return byte size of memory allocated for table TAB. */
2533size_t
2534set_memsize(const set_table *tab)
2535{
2536 return(sizeof(set_table)
2537 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2538 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2539}
2540
2541static st_index_t
2542set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2543
2544static st_index_t
2545set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2546
2547static st_index_t
2548set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2549
2550static st_index_t
2551set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2552 st_data_t key, st_index_t *bin_ind);
2553
2554static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2555static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2556static void set_rebuild_cleanup(set_table *const tab);
2557
2558/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2559 and can change size of the table entries and bins arrays.
2560 Rebuilding is implemented by creation of a new table or by
2561 compaction of the existing one. */
2562static void
2563set_rebuild_table(set_table *tab)
2564{
2565 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2566 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2567 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2568 /* Compaction: */
2569 tab->num_entries = 0;
2570 if (set_has_bins(tab))
2571 set_initialize_bins(tab);
2572 set_rebuild_table_with(tab, tab);
2573 }
2574 else {
2575 set_table *new_tab;
2576 /* This allocation could trigger GC and compaction. If tab is the
2577 * gen_fields_tbl, then tab could have changed in size due to objects being
2578 * freed and/or moved. Do not store attributes of tab before this line. */
2579 new_tab = set_init_table_with_size(NULL, tab->type,
2580 2 * tab->num_entries - 1);
2581 set_rebuild_table_with(new_tab, tab);
2582 set_rebuild_move_table(new_tab, tab);
2583 }
2584 set_rebuild_cleanup(tab);
2585}
2586
2587static void
2588set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2589{
2590 st_index_t i, ni;
2591 unsigned int size_ind;
2592 set_table_entry *new_entries;
2593 set_table_entry *curr_entry_ptr;
2594 st_index_t *bins;
2595 st_index_t bin_ind;
2596
2597 new_entries = new_tab->entries;
2598
2599 ni = 0;
2600 bins = set_bins_ptr(new_tab);
2601 size_ind = set_get_size_ind(new_tab);
2602 st_index_t bound = tab->entries_bound;
2603 set_table_entry *entries = tab->entries;
2604
2605 for (i = tab->entries_start; i < bound; i++) {
2606 curr_entry_ptr = &entries[i];
2607 PREFETCH(entries + i + 1, 0);
2608 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2609 continue;
2610 if (&new_entries[ni] != curr_entry_ptr)
2611 new_entries[ni] = *curr_entry_ptr;
2612 if (EXPECT(bins != NULL, 1)) {
2613 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2614 curr_entry_ptr->key);
2615 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2616 }
2617 new_tab->num_entries++;
2618 ni++;
2619 }
2620
2621 assert(new_tab->num_entries == tab->num_entries);
2622}
2623
2624static void
2625set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2626{
2627 tab->entry_power = new_tab->entry_power;
2628 tab->bin_power = new_tab->bin_power;
2629 tab->size_ind = new_tab->size_ind;
2630 free(tab->entries);
2631 tab->entries = new_tab->entries;
2632 free(new_tab);
2633}
2634
2635static void
2636set_rebuild_cleanup(set_table *const tab)
2637{
2638 tab->entries_start = 0;
2639 tab->entries_bound = tab->num_entries;
2640 tab->rebuilds_num++;
2641}
2642
2643/* Return the next secondary hash index for table TAB using previous
2644 index IND and PERTURB. Finally modulo of the function becomes a
2645 full *cycle linear congruential generator*, in other words it
2646 guarantees traversing all table bins in extreme case.
2647
2648 According the Hull-Dobell theorem a generator
2649 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2650 o m and c are relatively prime
2651 o a-1 is divisible by all prime factors of m
2652 o a-1 is divisible by 4 if m is divisible by 4.
2653
2654 For our case a is 5, c is 1, and m is a power of two. */
2655static inline st_index_t
2656set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2657{
2658 *perturb >>= 11;
2659 ind = (ind << 2) + ind + *perturb + 1;
2660 return set_hash_bin(ind, tab);
2661}
2662
2663/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2664 search. Return the index of the found entry in array `entries`.
2665 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2666 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2667static inline st_index_t
2668set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2669{
2670 int eq_p, rebuilt_p;
2671 st_index_t i, bound;
2672 set_table_entry *entries;
2673
2674 bound = tab->entries_bound;
2675 entries = tab->entries;
2676 for (i = tab->entries_start; i < bound; i++) {
2677 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2678 if (EXPECT(rebuilt_p, 0))
2679 return REBUILT_TABLE_ENTRY_IND;
2680 if (eq_p)
2681 return i;
2682 }
2683 return UNDEFINED_ENTRY_IND;
2684}
2685
2686/* Use the quadratic probing. The method has a better data locality
2687 but more collisions than the current approach. In average it
2688 results in a bit slower search. */
2689/*#define QUADRATIC_PROBE*/
2690
2691/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2692 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2693 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2694static st_index_t
2695set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2696{
2697 int eq_p, rebuilt_p;
2698 st_index_t ind;
2699#ifdef QUADRATIC_PROBE
2700 st_index_t d;
2701#else
2702 st_index_t perturb;
2703#endif
2704 st_index_t bin;
2705 set_table_entry *entries = tab->entries;
2706
2707 ind = set_hash_bin(hash_value, tab);
2708#ifdef QUADRATIC_PROBE
2709 d = 1;
2710#else
2711 perturb = hash_value;
2712#endif
2713 for (;;) {
2714 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2715 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2716 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2717 if (EXPECT(rebuilt_p, 0))
2718 return REBUILT_TABLE_ENTRY_IND;
2719 if (eq_p)
2720 break;
2721 }
2722 else if (EMPTY_BIN_P(bin))
2723 return UNDEFINED_ENTRY_IND;
2724#ifdef QUADRATIC_PROBE
2725 ind = set_hash_bin(ind + d, tab);
2726 d++;
2727#else
2728 ind = set_secondary_hash(ind, tab, &perturb);
2729#endif
2730 }
2731 return bin;
2732}
2733
2734/* Find and return index of table TAB bin corresponding to an entry
2735 with HASH_VALUE and KEY. If there is no such bin, return
2736 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2737 return REBUILT_TABLE_BIN_IND. */
2738static st_index_t
2739set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2740{
2741 int eq_p, rebuilt_p;
2742 st_index_t ind;
2743#ifdef QUADRATIC_PROBE
2744 st_index_t d;
2745#else
2746 st_index_t perturb;
2747#endif
2748 st_index_t bin;
2749 set_table_entry *entries = tab->entries;
2750
2751 ind = set_hash_bin(hash_value, tab);
2752#ifdef QUADRATIC_PROBE
2753 d = 1;
2754#else
2755 perturb = hash_value;
2756#endif
2757 for (;;) {
2758 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2759 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2760 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2761 if (EXPECT(rebuilt_p, 0))
2762 return REBUILT_TABLE_BIN_IND;
2763 if (eq_p)
2764 break;
2765 }
2766 else if (EMPTY_BIN_P(bin))
2767 return UNDEFINED_BIN_IND;
2768#ifdef QUADRATIC_PROBE
2769 ind = set_hash_bin(ind + d, tab);
2770 d++;
2771#else
2772 ind = set_secondary_hash(ind, tab, &perturb);
2773#endif
2774 }
2775 return ind;
2776}
2777
2778/* Find and return index of table TAB bin corresponding to an entry
2779 with HASH_VALUE and KEY. The entry should be in the table
2780 already. */
2781static st_index_t
2782set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2783{
2784 st_index_t ind;
2785#ifdef QUADRATIC_PROBE
2786 st_index_t d;
2787#else
2788 st_index_t perturb;
2789#endif
2790 st_index_t bin;
2791
2792 ind = set_hash_bin(hash_value, tab);
2793#ifdef QUADRATIC_PROBE
2794 d = 1;
2795#else
2796 perturb = hash_value;
2797#endif
2798 for (;;) {
2799 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2800 if (EMPTY_OR_DELETED_BIN_P(bin))
2801 return ind;
2802#ifdef QUADRATIC_PROBE
2803 ind = set_hash_bin(ind + d, tab);
2804 d++;
2805#else
2806 ind = set_secondary_hash(ind, tab, &perturb);
2807#endif
2808 }
2809}
2810
2811/* Mark I-th bin of table TAB as empty, in other words not
2812 corresponding to any entry. */
2813#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2814
2815/* Return index of table TAB bin for HASH_VALUE and KEY through
2816 BIN_IND and the pointed value as the function result. Reserve the
2817 bin for inclusion of the corresponding entry into the table if it
2818 is not there yet. We always find such bin as bins array length is
2819 bigger entries array. Although we can reuse a deleted bin, the
2820 result bin value is always empty if the table has no entry with
2821 KEY. Return the entries array index of the found entry or
2822 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2823 during the search, return REBUILT_TABLE_ENTRY_IND. */
2824static st_index_t
2825set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2826 st_data_t key, st_index_t *bin_ind)
2827{
2828 int eq_p, rebuilt_p;
2829 st_index_t ind;
2830 st_hash_t curr_hash_value = *hash_value;
2831#ifdef QUADRATIC_PROBE
2832 st_index_t d;
2833#else
2834 st_index_t perturb;
2835#endif
2836 st_index_t entry_index;
2837 st_index_t firset_deleted_bin_ind;
2838 set_table_entry *entries;
2839
2840 ind = set_hash_bin(curr_hash_value, tab);
2841#ifdef QUADRATIC_PROBE
2842 d = 1;
2843#else
2844 perturb = curr_hash_value;
2845#endif
2846 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2847 entries = tab->entries;
2848 for (;;) {
2849 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2850 if (EMPTY_BIN_P(entry_index)) {
2851 tab->num_entries++;
2852 entry_index = UNDEFINED_ENTRY_IND;
2853 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2854 /* We can reuse bin of a deleted entry. */
2855 ind = firset_deleted_bin_ind;
2856 MARK_SET_BIN_EMPTY(tab, ind);
2857 }
2858 break;
2859 }
2860 else if (! DELETED_BIN_P(entry_index)) {
2861 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2862 if (EXPECT(rebuilt_p, 0))
2863 return REBUILT_TABLE_ENTRY_IND;
2864 if (eq_p)
2865 break;
2866 }
2867 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2868 firset_deleted_bin_ind = ind;
2869#ifdef QUADRATIC_PROBE
2870 ind = set_hash_bin(ind + d, tab);
2871 d++;
2872#else
2873 ind = set_secondary_hash(ind, tab, &perturb);
2874#endif
2875 }
2876 *bin_ind = ind;
2877 return entry_index;
2878}
2879
2880/* Find an entry with KEY in table TAB. Return non-zero if we found
2881 it. */
2882int
2883set_table_lookup(set_table *tab, st_data_t key)
2884{
2885 st_index_t bin;
2886 st_hash_t hash = set_do_hash(key, tab);
2887
2888 retry:
2889 if (!set_has_bins(tab)) {
2890 bin = set_find_entry(tab, hash, key);
2891 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2892 goto retry;
2893 if (bin == UNDEFINED_ENTRY_IND)
2894 return 0;
2895 }
2896 else {
2897 bin = set_find_table_entry_ind(tab, hash, key);
2898 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2899 goto retry;
2900 if (bin == UNDEFINED_ENTRY_IND)
2901 return 0;
2902 bin -= ENTRY_BASE;
2903 }
2904 return 1;
2905}
2906
2907/* Check the table and rebuild it if it is necessary. */
2908static inline void
2909set_rebuild_table_if_necessary (set_table *tab)
2910{
2911 st_index_t bound = tab->entries_bound;
2912
2913 if (bound == set_get_allocated_entries(tab))
2914 set_rebuild_table(tab);
2915}
2916
2917/* Insert KEY into table TAB and return zero. If there is
2918 already entry with KEY in the table, return nonzero and update
2919 the value of the found entry. */
2920int
2921set_insert(set_table *tab, st_data_t key)
2922{
2923 set_table_entry *entry;
2924 st_index_t bin;
2925 st_index_t ind;
2926 st_hash_t hash_value;
2927 st_index_t bin_ind;
2928 int new_p;
2929
2930 hash_value = set_do_hash(key, tab);
2931 retry:
2932 set_rebuild_table_if_necessary(tab);
2933 if (!set_has_bins(tab)) {
2934 bin = set_find_entry(tab, hash_value, key);
2935 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2936 goto retry;
2937 new_p = bin == UNDEFINED_ENTRY_IND;
2938 if (new_p)
2939 tab->num_entries++;
2940 bin_ind = UNDEFINED_BIN_IND;
2941 }
2942 else {
2943 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2944 key, &bin_ind);
2945 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2946 goto retry;
2947 new_p = bin == UNDEFINED_ENTRY_IND;
2948 bin -= ENTRY_BASE;
2949 }
2950 if (new_p) {
2951 ind = tab->entries_bound++;
2952 entry = &tab->entries[ind];
2953 entry->hash = hash_value;
2954 entry->key = key;
2955 if (bin_ind != UNDEFINED_BIN_IND)
2956 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2957 return 0;
2958 }
2959 return 1;
2960}
2961
2962/* Create a copy of old_tab into new_tab. */
2963static set_table *
2964set_replace(set_table *new_tab, set_table *old_tab)
2965{
2966 *new_tab = *old_tab;
2967 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
2968 new_tab->entries = (set_table_entry *)malloc(memsize);
2969 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
2970 return new_tab;
2971}
2972
2973/* Create and return a copy of table OLD_TAB. */
2974set_table *
2975set_copy(set_table *new_tab, set_table *old_tab)
2976{
2977 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
2978
2979 if (set_replace(new_tab, old_tab) == NULL) {
2980 set_free_table(new_tab);
2981 return NULL;
2982 }
2983
2984 return new_tab;
2985}
2986
2987/* Update the entries start of table TAB after removing an entry
2988 with index N in the array entries. */
2989static inline void
2990set_update_range_for_deleted(set_table *tab, st_index_t n)
2991{
2992 /* Do not update entries_bound here. Otherwise, we can fill all
2993 bins by deleted entry value before rebuilding the table. */
2994 if (tab->entries_start == n) {
2995 st_index_t start = n + 1;
2996 st_index_t bound = tab->entries_bound;
2997 set_table_entry *entries = tab->entries;
2998 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
2999 tab->entries_start = start;
3000 }
3001}
3002
3003/* Mark I-th bin of table TAB as corresponding to a deleted table
3004 entry. Update number of entries in the table and number of bins
3005 corresponding to deleted entries. */
3006#define MARK_SET_BIN_DELETED(tab, i) \
3007 do { \
3008 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3009 } while (0)
3010
3011/* Delete entry with KEY from table TAB, and return non-zero. If
3012 there is no entry with KEY in the table, return zero. */
3013int
3014set_table_delete(set_table *tab, st_data_t *key)
3015{
3016 set_table_entry *entry;
3017 st_index_t bin;
3018 st_index_t bin_ind;
3019 st_hash_t hash;
3020
3021 hash = set_do_hash(*key, tab);
3022 retry:
3023 if (!set_has_bins(tab)) {
3024 bin = set_find_entry(tab, hash, *key);
3025 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3026 goto retry;
3027 if (bin == UNDEFINED_ENTRY_IND) {
3028 return 0;
3029 }
3030 }
3031 else {
3032 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3033 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3034 goto retry;
3035 if (bin_ind == UNDEFINED_BIN_IND) {
3036 return 0;
3037 }
3038 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3039 MARK_SET_BIN_DELETED(tab, bin_ind);
3040 }
3041 entry = &tab->entries[bin];
3042 *key = entry->key;
3043 MARK_ENTRY_DELETED(entry);
3044 tab->num_entries--;
3045 set_update_range_for_deleted(tab, bin);
3046 return 1;
3047}
3048
3049/* Traverse all entries in table TAB calling FUNC with current entry
3050 key and zero. If the call returns ST_STOP, stop
3051 traversing. If the call returns ST_DELETE, delete the current
3052 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3053 traversing. The function returns zero unless an error is found.
3054 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3055 different for ST_CHECK and when the current element is removed
3056 during traversing. */
3057static inline int
3058set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3059 set_update_callback_func *replace, st_data_t arg,
3060 int check_p)
3061{
3062 st_index_t bin;
3063 st_index_t bin_ind;
3064 set_table_entry *entries, *curr_entry_ptr;
3065 enum st_retval retval;
3066 st_index_t i, rebuilds_num;
3067 st_hash_t hash;
3068 st_data_t key;
3069 int error_p, packed_p = !set_has_bins(tab);
3070
3071 entries = tab->entries;
3072 /* The bound can change inside the loop even without rebuilding
3073 the table, e.g. by an entry insertion. */
3074 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3075 curr_entry_ptr = &entries[i];
3076 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3077 continue;
3078 key = curr_entry_ptr->key;
3079 rebuilds_num = tab->rebuilds_num;
3080 hash = curr_entry_ptr->hash;
3081 retval = (*func)(key, arg, 0);
3082
3083 if (retval == ST_REPLACE && replace) {
3084 retval = (*replace)(&key, arg, TRUE);
3085 curr_entry_ptr->key = key;
3086 }
3087
3088 if (rebuilds_num != tab->rebuilds_num) {
3089 retry:
3090 entries = tab->entries;
3091 packed_p = !set_has_bins(tab);
3092 if (packed_p) {
3093 i = set_find_entry(tab, hash, key);
3094 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3095 goto retry;
3096 error_p = i == UNDEFINED_ENTRY_IND;
3097 }
3098 else {
3099 i = set_find_table_entry_ind(tab, hash, key);
3100 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3101 goto retry;
3102 error_p = i == UNDEFINED_ENTRY_IND;
3103 i -= ENTRY_BASE;
3104 }
3105 if (error_p && check_p) {
3106 /* call func with error notice */
3107 retval = (*func)(0, arg, 1);
3108 return 1;
3109 }
3110 curr_entry_ptr = &entries[i];
3111 }
3112 switch (retval) {
3113 case ST_REPLACE:
3114 break;
3115 case ST_CONTINUE:
3116 break;
3117 case ST_CHECK:
3118 if (check_p)
3119 break;
3120 case ST_STOP:
3121 return 0;
3122 case ST_DELETE: {
3123 st_data_t key = curr_entry_ptr->key;
3124
3125 again:
3126 if (packed_p) {
3127 bin = set_find_entry(tab, hash, key);
3128 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3129 goto again;
3130 if (bin == UNDEFINED_ENTRY_IND)
3131 break;
3132 }
3133 else {
3134 bin_ind = set_find_table_bin_ind(tab, hash, key);
3135 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3136 goto again;
3137 if (bin_ind == UNDEFINED_BIN_IND)
3138 break;
3139 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3140 MARK_SET_BIN_DELETED(tab, bin_ind);
3141 }
3142 curr_entry_ptr = &entries[bin];
3143 MARK_ENTRY_DELETED(curr_entry_ptr);
3144 tab->num_entries--;
3145 set_update_range_for_deleted(tab, bin);
3146 break;
3147 }
3148 }
3149 }
3150 return 0;
3151}
3152
3153int
3154set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3155{
3156 return set_general_foreach(tab, func, replace, arg, TRUE);
3157}
3158
3159struct set_functor {
3160 set_foreach_callback_func *func;
3161 st_data_t arg;
3162};
3163
3164static int
3165set_apply_functor(st_data_t k, st_data_t d, int _)
3166{
3167 const struct set_functor *f = (void *)d;
3168 return f->func(k, f->arg);
3169}
3170
3171int
3172set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3173{
3174 const struct set_functor f = { func, arg };
3175 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3176}
3177
3178/* See comments for function set_delete_safe. */
3179int
3180set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3181 st_data_t never ATTRIBUTE_UNUSED)
3182{
3183 return set_general_foreach(tab, func, NULL, arg, TRUE);
3184}
3185
3186/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3187 Return the number of keys set up in array KEYS. */
3188inline st_index_t
3189set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3190{
3191 st_index_t i, bound;
3192 st_data_t key, *keys_start, *keys_end;
3193 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3194
3195 bound = tab->entries_bound;
3196 keys_start = keys;
3197 keys_end = keys + size;
3198 for (i = tab->entries_start; i < bound; i++) {
3199 if (keys == keys_end)
3200 break;
3201 curr_entry_ptr = &entries[i];
3202 key = curr_entry_ptr->key;
3203 if (! DELETED_ENTRY_P(curr_entry_ptr))
3204 *keys++ = key;
3205 }
3206
3207 return keys - keys_start;
3208}
3209
3210void
3211set_compact_table(set_table *tab)
3212{
3213 st_index_t num = tab->num_entries;
3214 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3215 /* Compaction: */
3216 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3217 set_rebuild_table_with(new_tab, tab);
3218 set_rebuild_move_table(new_tab, tab);
3219 set_rebuild_cleanup(tab);
3220 }
3221}
3222
3223#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:892
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:243
VALUE rb_cString
String class.
Definition string.c:83
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
set_table_entry * entries
Array of size 2^entry_power.
Definition set_table.h:28
Definition st.c:136
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40