Ruby 3.5.0dev (2025-06-27 revision aca692cd41d4cf74f5b5d35a703b55262ff4bcf8)
st.c (aca692cd41d4cf74f5b5d35a703b55262ff4bcf8)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/set_table.h"
113#include "internal/st.h"
114#include "ruby_assert.h"
115#endif
116
117#include <stdio.h>
118#ifdef HAVE_STDLIB_H
119#include <stdlib.h>
120#endif
121#include <string.h>
122
123#ifdef __GNUC__
124#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
125#define EXPECT(expr, val) __builtin_expect(expr, val)
126#define ATTRIBUTE_UNUSED __attribute__((unused))
127#else
128#define PREFETCH(addr, write_p)
129#define EXPECT(expr, val) (expr)
130#define ATTRIBUTE_UNUSED
131#endif
132
133/* The type of hashes. */
134typedef st_index_t st_hash_t;
135
137 st_hash_t hash;
138 st_data_t key;
139 st_data_t record;
140};
141
142#define type_numhash st_hashtype_num
143static const struct st_hash_type st_hashtype_num = {
144 st_numcmp,
145 st_numhash,
146};
147
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
150static const struct st_hash_type type_strhash = {
151 st_strcmp,
152 strhash,
153};
154
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
157static const struct st_hash_type type_strcasehash = {
158 st_locale_insensitive_strcasecmp_i,
159 strcasehash,
160};
161
162/* Value used to catch uninitialized entries/bins during debugging.
163 There is a possibility for a false alarm, but its probability is
164 extremely small. */
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
167
168#ifdef RUBY
169#undef malloc
170#undef realloc
171#undef calloc
172#undef free
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
177#endif
178
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182
183/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
184 up to TRUE if the table is rebuilt during the comparison. */
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 do { \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 } while (FALSE)
191
192/* Features of a table. */
194 /* Power of 2 used for number of allocated entries. */
195 unsigned char entry_power;
196 /* Power of 2 used for number of allocated bins. Depending on the
197 table size, the number of bins is 2-4 times more than the
198 number of entries. */
199 unsigned char bin_power;
200 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
201 unsigned char size_ind;
202 /* Bins are packed in words of type st_index_t. The following is
203 a size of bins counted by words. */
204 st_index_t bins_words;
205};
206
207/* Features of all possible size tables. */
208#if SIZEOF_ST_INDEX_T == 8
209#define MAX_POWER2 62
210static const struct st_features features[] = {
211 {0, 1, 0, 0x0},
212 {1, 2, 0, 0x1},
213 {2, 3, 0, 0x1},
214 {3, 4, 0, 0x2},
215 {4, 5, 0, 0x4},
216 {5, 6, 0, 0x8},
217 {6, 7, 0, 0x10},
218 {7, 8, 0, 0x20},
219 {8, 9, 1, 0x80},
220 {9, 10, 1, 0x100},
221 {10, 11, 1, 0x200},
222 {11, 12, 1, 0x400},
223 {12, 13, 1, 0x800},
224 {13, 14, 1, 0x1000},
225 {14, 15, 1, 0x2000},
226 {15, 16, 1, 0x4000},
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
274};
275
276#else
277#define MAX_POWER2 30
278
279static const struct st_features features[] = {
280 {0, 1, 0, 0x1},
281 {1, 2, 0, 0x1},
282 {2, 3, 0, 0x2},
283 {3, 4, 0, 0x4},
284 {4, 5, 0, 0x8},
285 {5, 6, 0, 0x10},
286 {6, 7, 0, 0x20},
287 {7, 8, 0, 0x40},
288 {8, 9, 1, 0x100},
289 {9, 10, 1, 0x200},
290 {10, 11, 1, 0x400},
291 {11, 12, 1, 0x800},
292 {12, 13, 1, 0x1000},
293 {13, 14, 1, 0x2000},
294 {14, 15, 1, 0x4000},
295 {15, 16, 1, 0x8000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
311};
312
313#endif
314
315/* The reserved hash value and its substitution. */
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318
319static inline st_hash_t
320normalize_hash_value(st_hash_t hash)
321{
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Return hash value of KEY for table TAB. */
328static inline st_hash_t
329do_hash(st_data_t key, st_table *tab)
330{
331 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332 return normalize_hash_value(hash);
333}
334
335/* Power of 2 defining the minimal number of allocated entries. */
336#define MINIMAL_POWER2 2
337
338#if MINIMAL_POWER2 < 2
339#error "MINIMAL_POWER2 should be >= 2"
340#endif
341
342/* If the power2 of the allocated `entries` is less than the following
343 value, don't allocate bins and use a linear search. */
344#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
345
346/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
347static int
348get_power2(st_index_t size)
349{
350 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
351 if (n <= MAX_POWER2)
352 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
353#ifdef RUBY
354 /* Ran out of the table entries */
355 rb_raise(rb_eRuntimeError, "st_table too big");
356#endif
357 /* should raise exception */
358 return -1;
359}
360
361/* Return value of N-th bin in array BINS of table with bins size
362 index S. */
363static inline st_index_t
364get_bin(st_index_t *bins, int s, st_index_t n)
365{
366 return (s == 0 ? ((unsigned char *) bins)[n]
367 : s == 1 ? ((unsigned short *) bins)[n]
368 : s == 2 ? ((unsigned int *) bins)[n]
369 : ((st_index_t *) bins)[n]);
370}
371
372/* Set up N-th bin in array BINS of table with bins size index S to
373 value V. */
374static inline void
375set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
376{
377 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
378 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
379 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
380 else ((st_index_t *) bins)[n] = v;
381}
382
383/* These macros define reserved values for empty table bin and table
384 bin which contains a deleted entry. We will never use such values
385 for an entry index in bins. */
386#define EMPTY_BIN 0
387#define DELETED_BIN 1
388/* Base of a real entry index in the bins. */
389#define ENTRY_BASE 2
390
391/* Mark I-th bin of table TAB as empty, in other words not
392 corresponding to any entry. */
393#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
394
395/* Values used for not found entry and bin with given
396 characteristics. */
397#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
398#define UNDEFINED_BIN_IND (~(st_index_t) 0)
399
400/* Entry and bin values returned when we found a table rebuild during
401 the search. */
402#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
403#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
404
405/* Mark I-th bin of table TAB as corresponding to a deleted table
406 entry. Update number of entries in the table and number of bins
407 corresponding to deleted entries. */
408#define MARK_BIN_DELETED(tab, i) \
409 do { \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
411 } while (0)
412
413/* Macros to check that value B is used empty bins and bins
414 corresponding deleted entries. */
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
418
419/* Macros to check empty bins and bins corresponding to deleted
420 entries. Bins are given by their index I in table TAB. */
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
424
425/* Macros for marking and checking deleted entries given by their
426 pointer E_PTR. */
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
429
430/* Return bin size index of table TAB. */
431static inline unsigned int
432get_size_ind(const st_table *tab)
433{
434 return tab->size_ind;
435}
436
437/* Return the number of allocated bins of table TAB. */
438static inline st_index_t
439get_bins_num(const st_table *tab)
440{
441 return ((st_index_t) 1)<<tab->bin_power;
442}
443
444/* Return mask for a bin index in table TAB. */
445static inline st_index_t
446bins_mask(const st_table *tab)
447{
448 return get_bins_num(tab) - 1;
449}
450
451/* Return the index of table TAB bin corresponding to
452 HASH_VALUE. */
453static inline st_index_t
454hash_bin(st_hash_t hash_value, st_table *tab)
455{
456 return hash_value & bins_mask(tab);
457}
458
459/* Return the number of allocated entries of table TAB. */
460static inline st_index_t
461get_allocated_entries(const st_table *tab)
462{
463 return ((st_index_t) 1)<<tab->entry_power;
464}
465
466/* Return size of the allocated bins of table TAB. */
467static inline st_index_t
468bins_size(const st_table *tab)
469{
470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
471}
472
473/* Mark all bins of table TAB as empty. */
474static void
475initialize_bins(st_table *tab)
476{
477 memset(tab->bins, 0, bins_size(tab));
478}
479
480/* Make table TAB empty. */
481static void
482make_tab_empty(st_table *tab)
483{
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
488}
489
490#ifdef HASH_LOG
491#ifdef HAVE_UNISTD_H
492#include <unistd.h>
493#endif
494static struct {
495 int all, total, num, str, strcase;
496} collision;
497
498/* Flag switching off output of package statistics at the end of
499 program. */
500static int init_st = 0;
501
502/* Output overall number of table searches and collisions into a
503 temporary file. */
504static void
505stat_col(void)
506{
507 char fname[10+sizeof(long)*3];
508 FILE *f;
509 if (!collision.total) return;
510 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
511 if (f == NULL)
512 return;
513 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
514 ((double)collision.all / (collision.total)) * 100);
515 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
516 fclose(f);
517}
518#endif
519
520st_table *
521st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
522{
523 int n;
524
525#ifdef HASH_LOG
526#if HASH_LOG+0 < 0
527 {
528 const char *e = getenv("ST_HASH_LOG");
529 if (!e || !*e) init_st = 1;
530 }
531#endif
532 if (init_st == 0) {
533 init_st = 1;
534 atexit(stat_col);
535 }
536#endif
537
538 n = get_power2(size);
539#ifndef RUBY
540 if (n < 0)
541 return NULL;
542#endif
543
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572/* Create and return table with TYPE which can hold at least SIZE
573 entries. The real number of entries which the table can hold is
574 the nearest power of two for SIZE. */
575st_table *
576st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
577{
578 st_table *tab = malloc(sizeof(st_table));
579#ifndef RUBY
580 if (tab == NULL)
581 return NULL;
582#endif
583
584#ifdef RUBY
585 st_init_existing_table_with_size(tab, type, size);
586#else
587 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
588 free(tab);
589 return NULL;
590 }
591#endif
592
593 return tab;
594}
595
596size_t
597st_table_size(const struct st_table *tbl)
598{
599 return tbl->num_entries;
600}
601
602/* Create and return table with TYPE which can hold a minimal number
603 of entries (see comments for get_power2). */
604st_table *
605st_init_table(const struct st_hash_type *type)
606{
607 return st_init_table_with_size(type, 0);
608}
609
610/* Create and return table which can hold a minimal number of
611 numbers. */
612st_table *
613st_init_numtable(void)
614{
615 return st_init_table(&type_numhash);
616}
617
618/* Create and return table which can hold SIZE numbers. */
619st_table *
620st_init_numtable_with_size(st_index_t size)
621{
622 return st_init_table_with_size(&type_numhash, size);
623}
624
625/* Create and return table which can hold a minimal number of
626 strings. */
627st_table *
628st_init_strtable(void)
629{
630 return st_init_table(&type_strhash);
631}
632
633/* Create and return table which can hold SIZE strings. */
634st_table *
635st_init_strtable_with_size(st_index_t size)
636{
637 return st_init_table_with_size(&type_strhash, size);
638}
639
640/* Create and return table which can hold a minimal number of strings
641 whose character case is ignored. */
642st_table *
643st_init_strcasetable(void)
644{
645 return st_init_table(&type_strcasehash);
646}
647
648/* Create and return table which can hold SIZE strings whose character
649 case is ignored. */
650st_table *
651st_init_strcasetable_with_size(st_index_t size)
652{
653 return st_init_table_with_size(&type_strcasehash, size);
654}
655
656/* Make table TAB empty. */
657void
658st_clear(st_table *tab)
659{
660 make_tab_empty(tab);
661 tab->rebuilds_num++;
662}
663
664/* Free table TAB space. */
665void
666st_free_table(st_table *tab)
667{
668 free(tab->bins);
669 free(tab->entries);
670 free(tab);
671}
672
673/* Return byte size of memory allocated for table TAB. */
674size_t
675st_memsize(const st_table *tab)
676{
677 return(sizeof(st_table)
678 + (tab->bins == NULL ? 0 : bins_size(tab))
679 + get_allocated_entries(tab) * sizeof(st_table_entry));
680}
681
682static st_index_t
683find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
684
685static st_index_t
686find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
687
688static st_index_t
689find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
690
691static st_index_t
692find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
693 st_data_t key, st_index_t *bin_ind);
694
695#ifdef HASH_LOG
696static void
697count_collision(const struct st_hash_type *type)
698{
699 collision.all++;
700 if (type == &type_numhash) {
701 collision.num++;
702 }
703 else if (type == &type_strhash) {
704 collision.strcase++;
705 }
706 else if (type == &type_strcasehash) {
707 collision.str++;
708 }
709}
710
711#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
712#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
713#define collision_check 0
714#else
715#define COLLISION
716#define FOUND_BIN
717#endif
718
719/* If the number of entries in the table is at least REBUILD_THRESHOLD
720 times less than the entry array length, decrease the table
721 size. */
722#define REBUILD_THRESHOLD 4
723
724#if REBUILD_THRESHOLD < 2
725#error "REBUILD_THRESHOLD should be >= 2"
726#endif
727
728static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
729static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
730static void rebuild_cleanup(st_table *const tab);
731
732/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
733 and can change size of the table entries and bins arrays.
734 Rebuilding is implemented by creation of a new table or by
735 compaction of the existing one. */
736static void
737rebuild_table(st_table *tab)
738{
739 if ((2 * tab->num_entries <= get_allocated_entries(tab)
740 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
741 || tab->num_entries < (1 << MINIMAL_POWER2)) {
742 /* Compaction: */
743 tab->num_entries = 0;
744 if (tab->bins != NULL)
745 initialize_bins(tab);
746 rebuild_table_with(tab, tab);
747 }
748 else {
749 st_table *new_tab;
750 /* This allocation could trigger GC and compaction. If tab is the
751 * gen_fields_tbl, then tab could have changed in size due to objects being
752 * freed and/or moved. Do not store attributes of tab before this line. */
753 new_tab = st_init_table_with_size(tab->type,
754 2 * tab->num_entries - 1);
755 rebuild_table_with(new_tab, tab);
756 rebuild_move_table(new_tab, tab);
757 }
758 rebuild_cleanup(tab);
759}
760
761static void
762rebuild_table_with(st_table *const new_tab, st_table *const tab)
763{
764 st_index_t i, ni;
765 unsigned int size_ind;
766 st_table_entry *new_entries;
767 st_table_entry *curr_entry_ptr;
768 st_index_t *bins;
769 st_index_t bin_ind;
770
771 new_entries = new_tab->entries;
772
773 ni = 0;
774 bins = new_tab->bins;
775 size_ind = get_size_ind(new_tab);
776 st_index_t bound = tab->entries_bound;
777 st_table_entry *entries = tab->entries;
778
779 for (i = tab->entries_start; i < bound; i++) {
780 curr_entry_ptr = &entries[i];
781 PREFETCH(entries + i + 1, 0);
782 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
783 continue;
784 if (&new_entries[ni] != curr_entry_ptr)
785 new_entries[ni] = *curr_entry_ptr;
786 if (EXPECT(bins != NULL, 1)) {
787 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
788 curr_entry_ptr->key);
789 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
790 }
791 new_tab->num_entries++;
792 ni++;
793 }
794
795 assert(new_tab->num_entries == tab->num_entries);
796}
797
798static void
799rebuild_move_table(st_table *const new_tab, st_table *const tab)
800{
801 tab->entry_power = new_tab->entry_power;
802 tab->bin_power = new_tab->bin_power;
803 tab->size_ind = new_tab->size_ind;
804 free(tab->bins);
805 tab->bins = new_tab->bins;
806 free(tab->entries);
807 tab->entries = new_tab->entries;
808 free(new_tab);
809}
810
811static void
812rebuild_cleanup(st_table *const tab)
813{
814 tab->entries_start = 0;
815 tab->entries_bound = tab->num_entries;
816 tab->rebuilds_num++;
817}
818
819/* Return the next secondary hash index for table TAB using previous
820 index IND and PERTURB. Finally modulo of the function becomes a
821 full *cycle linear congruential generator*, in other words it
822 guarantees traversing all table bins in extreme case.
823
824 According the Hull-Dobell theorem a generator
825 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
826 o m and c are relatively prime
827 o a-1 is divisible by all prime factors of m
828 o a-1 is divisible by 4 if m is divisible by 4.
829
830 For our case a is 5, c is 1, and m is a power of two. */
831static inline st_index_t
832secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
833{
834 *perturb >>= 11;
835 ind = (ind << 2) + ind + *perturb + 1;
836 return hash_bin(ind, tab);
837}
838
839/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
840 search. Return the index of the found entry in array `entries`.
841 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
842 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
843static inline st_index_t
844find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
845{
846 int eq_p, rebuilt_p;
847 st_index_t i, bound;
848 st_table_entry *entries;
849
850 bound = tab->entries_bound;
851 entries = tab->entries;
852 for (i = tab->entries_start; i < bound; i++) {
853 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
854 if (EXPECT(rebuilt_p, 0))
855 return REBUILT_TABLE_ENTRY_IND;
856 if (eq_p)
857 return i;
858 }
859 return UNDEFINED_ENTRY_IND;
860}
861
862/* Use the quadratic probing. The method has a better data locality
863 but more collisions than the current approach. In average it
864 results in a bit slower search. */
865/*#define QUADRATIC_PROBE*/
866
867/* Return index of entry with HASH_VALUE and KEY in table TAB. If
868 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
869 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
870static st_index_t
871find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
872{
873 int eq_p, rebuilt_p;
874 st_index_t ind;
875#ifdef QUADRATIC_PROBE
876 st_index_t d;
877#else
878 st_index_t perturb;
879#endif
880 st_index_t bin;
881 st_table_entry *entries = tab->entries;
882
883 ind = hash_bin(hash_value, tab);
884#ifdef QUADRATIC_PROBE
885 d = 1;
886#else
887 perturb = hash_value;
888#endif
889 FOUND_BIN;
890 for (;;) {
891 bin = get_bin(tab->bins, get_size_ind(tab), ind);
892 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
893 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
894 if (EXPECT(rebuilt_p, 0))
895 return REBUILT_TABLE_ENTRY_IND;
896 if (eq_p)
897 break;
898 }
899 else if (EMPTY_BIN_P(bin))
900 return UNDEFINED_ENTRY_IND;
901#ifdef QUADRATIC_PROBE
902 ind = hash_bin(ind + d, tab);
903 d++;
904#else
905 ind = secondary_hash(ind, tab, &perturb);
906#endif
907 COLLISION;
908 }
909 return bin;
910}
911
912/* Find and return index of table TAB bin corresponding to an entry
913 with HASH_VALUE and KEY. If there is no such bin, return
914 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
915 return REBUILT_TABLE_BIN_IND. */
916static st_index_t
917find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
918{
919 int eq_p, rebuilt_p;
920 st_index_t ind;
921#ifdef QUADRATIC_PROBE
922 st_index_t d;
923#else
924 st_index_t perturb;
925#endif
926 st_index_t bin;
927 st_table_entry *entries = tab->entries;
928
929 ind = hash_bin(hash_value, tab);
930#ifdef QUADRATIC_PROBE
931 d = 1;
932#else
933 perturb = hash_value;
934#endif
935 FOUND_BIN;
936 for (;;) {
937 bin = get_bin(tab->bins, get_size_ind(tab), ind);
938 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
939 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
940 if (EXPECT(rebuilt_p, 0))
941 return REBUILT_TABLE_BIN_IND;
942 if (eq_p)
943 break;
944 }
945 else if (EMPTY_BIN_P(bin))
946 return UNDEFINED_BIN_IND;
947#ifdef QUADRATIC_PROBE
948 ind = hash_bin(ind + d, tab);
949 d++;
950#else
951 ind = secondary_hash(ind, tab, &perturb);
952#endif
953 COLLISION;
954 }
955 return ind;
956}
957
958/* Find and return index of table TAB bin corresponding to an entry
959 with HASH_VALUE and KEY. The entry should be in the table
960 already. */
961static st_index_t
962find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
963{
964 st_index_t ind;
965#ifdef QUADRATIC_PROBE
966 st_index_t d;
967#else
968 st_index_t perturb;
969#endif
970 st_index_t bin;
971
972 ind = hash_bin(hash_value, tab);
973#ifdef QUADRATIC_PROBE
974 d = 1;
975#else
976 perturb = hash_value;
977#endif
978 FOUND_BIN;
979 for (;;) {
980 bin = get_bin(tab->bins, get_size_ind(tab), ind);
981 if (EMPTY_OR_DELETED_BIN_P(bin))
982 return ind;
983#ifdef QUADRATIC_PROBE
984 ind = hash_bin(ind + d, tab);
985 d++;
986#else
987 ind = secondary_hash(ind, tab, &perturb);
988#endif
989 COLLISION;
990 }
991}
992
993/* Return index of table TAB bin for HASH_VALUE and KEY through
994 BIN_IND and the pointed value as the function result. Reserve the
995 bin for inclusion of the corresponding entry into the table if it
996 is not there yet. We always find such bin as bins array length is
997 bigger entries array. Although we can reuse a deleted bin, the
998 result bin value is always empty if the table has no entry with
999 KEY. Return the entries array index of the found entry or
1000 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1001 during the search, return REBUILT_TABLE_ENTRY_IND. */
1002static st_index_t
1003find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1004 st_data_t key, st_index_t *bin_ind)
1005{
1006 int eq_p, rebuilt_p;
1007 st_index_t ind;
1008 st_hash_t curr_hash_value = *hash_value;
1009#ifdef QUADRATIC_PROBE
1010 st_index_t d;
1011#else
1012 st_index_t perturb;
1013#endif
1014 st_index_t entry_index;
1015 st_index_t first_deleted_bin_ind;
1016 st_table_entry *entries;
1017
1018 ind = hash_bin(curr_hash_value, tab);
1019#ifdef QUADRATIC_PROBE
1020 d = 1;
1021#else
1022 perturb = curr_hash_value;
1023#endif
1024 FOUND_BIN;
1025 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1026 entries = tab->entries;
1027 for (;;) {
1028 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1029 if (EMPTY_BIN_P(entry_index)) {
1030 tab->num_entries++;
1031 entry_index = UNDEFINED_ENTRY_IND;
1032 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1033 /* We can reuse bin of a deleted entry. */
1034 ind = first_deleted_bin_ind;
1035 MARK_BIN_EMPTY(tab, ind);
1036 }
1037 break;
1038 }
1039 else if (! DELETED_BIN_P(entry_index)) {
1040 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1041 if (EXPECT(rebuilt_p, 0))
1042 return REBUILT_TABLE_ENTRY_IND;
1043 if (eq_p)
1044 break;
1045 }
1046 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1047 first_deleted_bin_ind = ind;
1048#ifdef QUADRATIC_PROBE
1049 ind = hash_bin(ind + d, tab);
1050 d++;
1051#else
1052 ind = secondary_hash(ind, tab, &perturb);
1053#endif
1054 COLLISION;
1055 }
1056 *bin_ind = ind;
1057 return entry_index;
1058}
1059
1060/* Find an entry with KEY in table TAB. Return non-zero if we found
1061 it. Set up *RECORD to the found entry record. */
1062int
1063st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1064{
1065 st_index_t bin;
1066 st_hash_t hash = do_hash(key, tab);
1067
1068 retry:
1069 if (tab->bins == NULL) {
1070 bin = find_entry(tab, hash, key);
1071 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 goto retry;
1073 if (bin == UNDEFINED_ENTRY_IND)
1074 return 0;
1075 }
1076 else {
1077 bin = find_table_entry_ind(tab, hash, key);
1078 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1079 goto retry;
1080 if (bin == UNDEFINED_ENTRY_IND)
1081 return 0;
1082 bin -= ENTRY_BASE;
1083 }
1084 if (value != 0)
1085 *value = tab->entries[bin].record;
1086 return 1;
1087}
1088
1089/* Find an entry with KEY in table TAB. Return non-zero if we found
1090 it. Set up *RESULT to the found table entry key. */
1091int
1092st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1093{
1094 st_index_t bin;
1095 st_hash_t hash = do_hash(key, tab);
1096
1097 retry:
1098 if (tab->bins == NULL) {
1099 bin = find_entry(tab, hash, key);
1100 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1101 goto retry;
1102 if (bin == UNDEFINED_ENTRY_IND)
1103 return 0;
1104 }
1105 else {
1106 bin = find_table_entry_ind(tab, hash, key);
1107 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108 goto retry;
1109 if (bin == UNDEFINED_ENTRY_IND)
1110 return 0;
1111 bin -= ENTRY_BASE;
1112 }
1113 if (result != 0)
1114 *result = tab->entries[bin].key;
1115 return 1;
1116}
1117
1118/* Check the table and rebuild it if it is necessary. */
1119static inline void
1120rebuild_table_if_necessary (st_table *tab)
1121{
1122 st_index_t bound = tab->entries_bound;
1123
1124 if (bound == get_allocated_entries(tab))
1125 rebuild_table(tab);
1126}
1127
1128/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1129 already entry with KEY in the table, return nonzero and update
1130 the value of the found entry. */
1131int
1132st_insert(st_table *tab, st_data_t key, st_data_t value)
1133{
1134 st_table_entry *entry;
1135 st_index_t bin;
1136 st_index_t ind;
1137 st_hash_t hash_value;
1138 st_index_t bin_ind;
1139 int new_p;
1140
1141 hash_value = do_hash(key, tab);
1142 retry:
1143 rebuild_table_if_necessary(tab);
1144 if (tab->bins == NULL) {
1145 bin = find_entry(tab, hash_value, key);
1146 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1147 goto retry;
1148 new_p = bin == UNDEFINED_ENTRY_IND;
1149 if (new_p)
1150 tab->num_entries++;
1151 bin_ind = UNDEFINED_BIN_IND;
1152 }
1153 else {
1154 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1155 key, &bin_ind);
1156 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1157 goto retry;
1158 new_p = bin == UNDEFINED_ENTRY_IND;
1159 bin -= ENTRY_BASE;
1160 }
1161 if (new_p) {
1162 ind = tab->entries_bound++;
1163 entry = &tab->entries[ind];
1164 entry->hash = hash_value;
1165 entry->key = key;
1166 entry->record = value;
1167 if (bin_ind != UNDEFINED_BIN_IND)
1168 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1169 return 0;
1170 }
1171 tab->entries[bin].record = value;
1172 return 1;
1173}
1174
1175/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1176 entry with KEY before the insertion. */
1177static inline void
1178st_add_direct_with_hash(st_table *tab,
1179 st_data_t key, st_data_t value, st_hash_t hash)
1180{
1181 st_table_entry *entry;
1182 st_index_t ind;
1183 st_index_t bin_ind;
1184
1185 assert(hash != RESERVED_HASH_VAL);
1186
1187 rebuild_table_if_necessary(tab);
1188 ind = tab->entries_bound++;
1189 entry = &tab->entries[ind];
1190 entry->hash = hash;
1191 entry->key = key;
1192 entry->record = value;
1193 tab->num_entries++;
1194 if (tab->bins != NULL) {
1195 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1196 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1197 }
1198}
1199
1200void
1201rb_st_add_direct_with_hash(st_table *tab,
1202 st_data_t key, st_data_t value, st_hash_t hash)
1203{
1204 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1205}
1206
1207/* Insert (KEY, VALUE) into table TAB. The table should not have
1208 entry with KEY before the insertion. */
1209void
1210st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1211{
1212 st_hash_t hash_value;
1213
1214 hash_value = do_hash(key, tab);
1215 st_add_direct_with_hash(tab, key, value, hash_value);
1216}
1217
1218/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1219 there is already entry with KEY in the table, return nonzero and
1220 update the value of the found entry. */
1221int
1222st_insert2(st_table *tab, st_data_t key, st_data_t value,
1223 st_data_t (*func)(st_data_t))
1224{
1225 st_table_entry *entry;
1226 st_index_t bin;
1227 st_index_t ind;
1228 st_hash_t hash_value;
1229 st_index_t bin_ind;
1230 int new_p;
1231
1232 hash_value = do_hash(key, tab);
1233 retry:
1234 rebuild_table_if_necessary (tab);
1235 if (tab->bins == NULL) {
1236 bin = find_entry(tab, hash_value, key);
1237 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1238 goto retry;
1239 new_p = bin == UNDEFINED_ENTRY_IND;
1240 if (new_p)
1241 tab->num_entries++;
1242 bin_ind = UNDEFINED_BIN_IND;
1243 }
1244 else {
1245 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1246 key, &bin_ind);
1247 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1248 goto retry;
1249 new_p = bin == UNDEFINED_ENTRY_IND;
1250 bin -= ENTRY_BASE;
1251 }
1252 if (new_p) {
1253 key = (*func)(key);
1254 ind = tab->entries_bound++;
1255 entry = &tab->entries[ind];
1256 entry->hash = hash_value;
1257 entry->key = key;
1258 entry->record = value;
1259 if (bin_ind != UNDEFINED_BIN_IND)
1260 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1261 return 0;
1262 }
1263 tab->entries[bin].record = value;
1264 return 1;
1265}
1266
1267/* Create a copy of old_tab into new_tab. */
1268st_table *
1269st_replace(st_table *new_tab, st_table *old_tab)
1270{
1271 *new_tab = *old_tab;
1272 if (old_tab->bins == NULL)
1273 new_tab->bins = NULL;
1274 else {
1275 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1276#ifndef RUBY
1277 if (new_tab->bins == NULL) {
1278 return NULL;
1279 }
1280#endif
1281 }
1282 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1283 * sizeof(st_table_entry));
1284#ifndef RUBY
1285 if (new_tab->entries == NULL) {
1286 return NULL;
1287 }
1288#endif
1289 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1290 get_allocated_entries(old_tab));
1291 if (old_tab->bins != NULL)
1292 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1293
1294 return new_tab;
1295}
1296
1297/* Create and return a copy of table OLD_TAB. */
1298st_table *
1299st_copy(st_table *old_tab)
1300{
1301 st_table *new_tab;
1302
1303 new_tab = (st_table *) malloc(sizeof(st_table));
1304#ifndef RUBY
1305 if (new_tab == NULL)
1306 return NULL;
1307#endif
1308
1309 if (st_replace(new_tab, old_tab) == NULL) {
1310 st_free_table(new_tab);
1311 return NULL;
1312 }
1313
1314 return new_tab;
1315}
1316
1317/* Update the entries start of table TAB after removing an entry
1318 with index N in the array entries. */
1319static inline void
1320update_range_for_deleted(st_table *tab, st_index_t n)
1321{
1322 /* Do not update entries_bound here. Otherwise, we can fill all
1323 bins by deleted entry value before rebuilding the table. */
1324 if (tab->entries_start == n) {
1325 st_index_t start = n + 1;
1326 st_index_t bound = tab->entries_bound;
1327 st_table_entry *entries = tab->entries;
1328 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1329 tab->entries_start = start;
1330 }
1331}
1332
1333/* Delete entry with KEY from table TAB, set up *VALUE (unless
1334 VALUE is zero) from deleted table entry, and return non-zero. If
1335 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1336 is zero), and return zero. */
1337static int
1338st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1339{
1340 st_table_entry *entry;
1341 st_index_t bin;
1342 st_index_t bin_ind;
1343 st_hash_t hash;
1344
1345 hash = do_hash(*key, tab);
1346 retry:
1347 if (tab->bins == NULL) {
1348 bin = find_entry(tab, hash, *key);
1349 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1350 goto retry;
1351 if (bin == UNDEFINED_ENTRY_IND) {
1352 if (value != 0) *value = 0;
1353 return 0;
1354 }
1355 }
1356 else {
1357 bin_ind = find_table_bin_ind(tab, hash, *key);
1358 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1359 goto retry;
1360 if (bin_ind == UNDEFINED_BIN_IND) {
1361 if (value != 0) *value = 0;
1362 return 0;
1363 }
1364 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1365 MARK_BIN_DELETED(tab, bin_ind);
1366 }
1367 entry = &tab->entries[bin];
1368 *key = entry->key;
1369 if (value != 0) *value = entry->record;
1370 MARK_ENTRY_DELETED(entry);
1371 tab->num_entries--;
1372 update_range_for_deleted(tab, bin);
1373 return 1;
1374}
1375
1376int
1377st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1378{
1379 return st_general_delete(tab, key, value);
1380}
1381
1382/* The function and other functions with suffix '_safe' or '_check'
1383 are originated from the previous implementation of the hash tables.
1384 It was necessary for correct deleting entries during traversing
1385 tables. The current implementation permits deletion during
1386 traversing without a specific way to do this. */
1387int
1388st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1389 st_data_t never ATTRIBUTE_UNUSED)
1390{
1391 return st_general_delete(tab, key, value);
1392}
1393
1394/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1395 return zero. Otherwise, remove the first entry in the table.
1396 Return its key through KEY and its record through VALUE (unless
1397 VALUE is zero). */
1398int
1399st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1400{
1401 st_index_t i, bound;
1402 st_index_t bin;
1403 st_table_entry *entries, *curr_entry_ptr;
1404 st_index_t bin_ind;
1405
1406 entries = tab->entries;
1407 bound = tab->entries_bound;
1408 for (i = tab->entries_start; i < bound; i++) {
1409 curr_entry_ptr = &entries[i];
1410 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1411 st_hash_t entry_hash = curr_entry_ptr->hash;
1412 st_data_t entry_key = curr_entry_ptr->key;
1413
1414 if (value != 0) *value = curr_entry_ptr->record;
1415 *key = entry_key;
1416 retry:
1417 if (tab->bins == NULL) {
1418 bin = find_entry(tab, entry_hash, entry_key);
1419 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1420 entries = tab->entries;
1421 goto retry;
1422 }
1423 curr_entry_ptr = &entries[bin];
1424 }
1425 else {
1426 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1427 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1428 entries = tab->entries;
1429 goto retry;
1430 }
1431 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1432 - ENTRY_BASE];
1433 MARK_BIN_DELETED(tab, bin_ind);
1434 }
1435 MARK_ENTRY_DELETED(curr_entry_ptr);
1436 tab->num_entries--;
1437 update_range_for_deleted(tab, i);
1438 return 1;
1439 }
1440 }
1441 if (value != 0) *value = 0;
1442 return 0;
1443}
1444
1445/* See comments for function st_delete_safe. */
1446void
1447st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1448 st_data_t never ATTRIBUTE_UNUSED)
1449{
1450}
1451
1452/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1453 of the key and the value of the found entry, and non-zero as the
1454 3rd argument. If the entry is not found, call FUNC with a pointer
1455 to KEY, a pointer to zero, and a zero argument. If the call
1456 returns ST_CONTINUE, the table will have an entry with key and
1457 value returned by FUNC through the 1st and 2nd parameters. If the
1458 call of FUNC returns ST_DELETE, the table will not have entry with
1459 KEY. The function returns flag of that the entry with KEY was in
1460 the table before the call. */
1461int
1462st_update(st_table *tab, st_data_t key,
1463 st_update_callback_func *func, st_data_t arg)
1464{
1465 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1466 st_index_t bin = 0; /* Ditto */
1467 st_table_entry *entries;
1468 st_index_t bin_ind;
1469 st_data_t value = 0, old_key;
1470 int retval, existing;
1471 st_hash_t hash = do_hash(key, tab);
1472
1473 retry:
1474 entries = tab->entries;
1475 if (tab->bins == NULL) {
1476 bin = find_entry(tab, hash, key);
1477 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1478 goto retry;
1479 existing = bin != UNDEFINED_ENTRY_IND;
1480 entry = &entries[bin];
1481 bin_ind = UNDEFINED_BIN_IND;
1482 }
1483 else {
1484 bin_ind = find_table_bin_ind(tab, hash, key);
1485 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1486 goto retry;
1487 existing = bin_ind != UNDEFINED_BIN_IND;
1488 if (existing) {
1489 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1490 entry = &entries[bin];
1491 }
1492 }
1493 if (existing) {
1494 key = entry->key;
1495 value = entry->record;
1496 }
1497 old_key = key;
1498
1499 unsigned int rebuilds_num = tab->rebuilds_num;
1500
1501 retval = (*func)(&key, &value, arg, existing);
1502
1503 // We need to make sure that the callback didn't cause a table rebuild
1504 // Ideally we would make sure no operations happened
1505 assert(rebuilds_num == tab->rebuilds_num);
1506 (void)rebuilds_num;
1507
1508 switch (retval) {
1509 case ST_CONTINUE:
1510 if (! existing) {
1511 st_add_direct_with_hash(tab, key, value, hash);
1512 break;
1513 }
1514 if (old_key != key) {
1515 entry->key = key;
1516 }
1517 entry->record = value;
1518 break;
1519 case ST_DELETE:
1520 if (existing) {
1521 if (bin_ind != UNDEFINED_BIN_IND)
1522 MARK_BIN_DELETED(tab, bin_ind);
1523 MARK_ENTRY_DELETED(entry);
1524 tab->num_entries--;
1525 update_range_for_deleted(tab, bin);
1526 }
1527 break;
1528 }
1529 return existing;
1530}
1531
1532/* Traverse all entries in table TAB calling FUNC with current entry
1533 key and value and zero. If the call returns ST_STOP, stop
1534 traversing. If the call returns ST_DELETE, delete the current
1535 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1536 traversing. The function returns zero unless an error is found.
1537 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1538 different for ST_CHECK and when the current element is removed
1539 during traversing. */
1540static inline int
1541st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1542 int check_p)
1543{
1544 st_index_t bin;
1545 st_index_t bin_ind;
1546 st_table_entry *entries, *curr_entry_ptr;
1547 enum st_retval retval;
1548 st_index_t i, rebuilds_num;
1549 st_hash_t hash;
1550 st_data_t key;
1551 int error_p, packed_p = tab->bins == NULL;
1552
1553 entries = tab->entries;
1554 /* The bound can change inside the loop even without rebuilding
1555 the table, e.g. by an entry insertion. */
1556 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1557 curr_entry_ptr = &entries[i];
1558 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1559 continue;
1560 key = curr_entry_ptr->key;
1561 rebuilds_num = tab->rebuilds_num;
1562 hash = curr_entry_ptr->hash;
1563 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1564
1565 if (retval == ST_REPLACE && replace) {
1566 st_data_t value;
1567 value = curr_entry_ptr->record;
1568 retval = (*replace)(&key, &value, arg, TRUE);
1569 curr_entry_ptr->key = key;
1570 curr_entry_ptr->record = value;
1571 }
1572
1573 if (rebuilds_num != tab->rebuilds_num) {
1574 retry:
1575 entries = tab->entries;
1576 packed_p = tab->bins == NULL;
1577 if (packed_p) {
1578 i = find_entry(tab, hash, key);
1579 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1580 goto retry;
1581 error_p = i == UNDEFINED_ENTRY_IND;
1582 }
1583 else {
1584 i = find_table_entry_ind(tab, hash, key);
1585 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1586 goto retry;
1587 error_p = i == UNDEFINED_ENTRY_IND;
1588 i -= ENTRY_BASE;
1589 }
1590 if (error_p && check_p) {
1591 /* call func with error notice */
1592 retval = (*func)(0, 0, arg, 1);
1593 return 1;
1594 }
1595 curr_entry_ptr = &entries[i];
1596 }
1597 switch (retval) {
1598 case ST_REPLACE:
1599 break;
1600 case ST_CONTINUE:
1601 break;
1602 case ST_CHECK:
1603 if (check_p)
1604 break;
1605 case ST_STOP:
1606 return 0;
1607 case ST_DELETE: {
1608 st_data_t key = curr_entry_ptr->key;
1609
1610 again:
1611 if (packed_p) {
1612 bin = find_entry(tab, hash, key);
1613 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1614 goto again;
1615 if (bin == UNDEFINED_ENTRY_IND)
1616 break;
1617 }
1618 else {
1619 bin_ind = find_table_bin_ind(tab, hash, key);
1620 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1621 goto again;
1622 if (bin_ind == UNDEFINED_BIN_IND)
1623 break;
1624 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1625 MARK_BIN_DELETED(tab, bin_ind);
1626 }
1627 curr_entry_ptr = &entries[bin];
1628 MARK_ENTRY_DELETED(curr_entry_ptr);
1629 tab->num_entries--;
1630 update_range_for_deleted(tab, bin);
1631 break;
1632 }
1633 }
1634 }
1635 return 0;
1636}
1637
1638int
1639st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1640{
1641 return st_general_foreach(tab, func, replace, arg, TRUE);
1642}
1643
1644struct functor {
1645 st_foreach_callback_func *func;
1646 st_data_t arg;
1647};
1648
1649static int
1650apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1651{
1652 const struct functor *f = (void *)d;
1653 return f->func(k, v, f->arg);
1654}
1655
1656int
1657st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1658{
1659 const struct functor f = { func, arg };
1660 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1661}
1662
1663/* See comments for function st_delete_safe. */
1664int
1665st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1666 st_data_t never ATTRIBUTE_UNUSED)
1667{
1668 return st_general_foreach(tab, func, 0, arg, TRUE);
1669}
1670
1671/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1672 Return the number of keys set up in array KEYS. */
1673static inline st_index_t
1674st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1675{
1676 st_index_t i, bound;
1677 st_data_t key, *keys_start, *keys_end;
1678 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1679
1680 bound = tab->entries_bound;
1681 keys_start = keys;
1682 keys_end = keys + size;
1683 for (i = tab->entries_start; i < bound; i++) {
1684 if (keys == keys_end)
1685 break;
1686 curr_entry_ptr = &entries[i];
1687 key = curr_entry_ptr->key;
1688 if (! DELETED_ENTRY_P(curr_entry_ptr))
1689 *keys++ = key;
1690 }
1691
1692 return keys - keys_start;
1693}
1694
1695st_index_t
1696st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1697{
1698 return st_general_keys(tab, keys, size);
1699}
1700
1701/* See comments for function st_delete_safe. */
1702st_index_t
1703st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1704 st_data_t never ATTRIBUTE_UNUSED)
1705{
1706 return st_general_keys(tab, keys, size);
1707}
1708
1709/* Set up array VALUES by at most SIZE values of head table TAB
1710 entries. Return the number of values set up in array VALUES. */
1711static inline st_index_t
1712st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1713{
1714 st_index_t i, bound;
1715 st_data_t *values_start, *values_end;
1716 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1717
1718 values_start = values;
1719 values_end = values + size;
1720 bound = tab->entries_bound;
1721 for (i = tab->entries_start; i < bound; i++) {
1722 if (values == values_end)
1723 break;
1724 curr_entry_ptr = &entries[i];
1725 if (! DELETED_ENTRY_P(curr_entry_ptr))
1726 *values++ = curr_entry_ptr->record;
1727 }
1728
1729 return values - values_start;
1730}
1731
1732st_index_t
1733st_values(st_table *tab, st_data_t *values, st_index_t size)
1734{
1735 return st_general_values(tab, values, size);
1736}
1737
1738/* See comments for function st_delete_safe. */
1739st_index_t
1740st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1741 st_data_t never ATTRIBUTE_UNUSED)
1742{
1743 return st_general_values(tab, values, size);
1744}
1745
1746#define FNV1_32A_INIT 0x811c9dc5
1747
1748/*
1749 * 32 bit magic FNV-1a prime
1750 */
1751#define FNV_32_PRIME 0x01000193
1752
1753/* __POWERPC__ added to accommodate Darwin case. */
1754#ifndef UNALIGNED_WORD_ACCESS
1755# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1756 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1757 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1758 defined(__mc68020__)
1759# define UNALIGNED_WORD_ACCESS 1
1760# endif
1761#endif
1762#ifndef UNALIGNED_WORD_ACCESS
1763# define UNALIGNED_WORD_ACCESS 0
1764#endif
1765
1766/* This hash function is quite simplified MurmurHash3
1767 * Simplification is legal, cause most of magic still happens in finalizator.
1768 * And finalizator is almost the same as in MurmurHash3 */
1769#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1770#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1771
1772#if ST_INDEX_BITS <= 32
1773#define C1 (st_index_t)0xcc9e2d51
1774#define C2 (st_index_t)0x1b873593
1775#else
1776#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1777#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1778#endif
1779NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1780NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1781NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1782
1783static inline st_index_t
1784murmur_step(st_index_t h, st_index_t k)
1785{
1786#if ST_INDEX_BITS <= 32
1787#define r1 (17)
1788#define r2 (11)
1789#else
1790#define r1 (33)
1791#define r2 (24)
1792#endif
1793 k *= C1;
1794 h ^= ROTL(k, r1);
1795 h *= C2;
1796 h = ROTL(h, r2);
1797 return h;
1798}
1799#undef r1
1800#undef r2
1801
1802static inline st_index_t
1803murmur_finish(st_index_t h)
1804{
1805#if ST_INDEX_BITS <= 32
1806#define r1 (16)
1807#define r2 (13)
1808#define r3 (16)
1809 const st_index_t c1 = 0x85ebca6b;
1810 const st_index_t c2 = 0xc2b2ae35;
1811#else
1812/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1813#define r1 (30)
1814#define r2 (27)
1815#define r3 (31)
1816 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1817 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1818#endif
1819#if ST_INDEX_BITS > 64
1820 h ^= h >> 64;
1821 h *= c2;
1822 h ^= h >> 65;
1823#endif
1824 h ^= h >> r1;
1825 h *= c1;
1826 h ^= h >> r2;
1827 h *= c2;
1828 h ^= h >> r3;
1829 return h;
1830}
1831#undef r1
1832#undef r2
1833#undef r3
1834
1835st_index_t
1836st_hash(const void *ptr, size_t len, st_index_t h)
1837{
1838 const char *data = ptr;
1839 st_index_t t = 0;
1840 size_t l = len;
1841
1842#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1843#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1844#if SIZEOF_ST_INDEX_T > 4
1845#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1846#if SIZEOF_ST_INDEX_T > 8
1847#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1848 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1849#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1850#endif
1851#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1852#else
1853#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1854#endif
1855#undef SKIP_TAIL
1856 if (len >= sizeof(st_index_t)) {
1857#if !UNALIGNED_WORD_ACCESS
1858 int align = (int)((st_data_t)data % sizeof(st_index_t));
1859 if (align) {
1860 st_index_t d = 0;
1861 int sl, sr, pack;
1862
1863 switch (align) {
1864#ifdef WORDS_BIGENDIAN
1865# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1866 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1867#else
1868# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1869 t |= data_at(n) << CHAR_BIT*(n)
1870#endif
1871 UNALIGNED_ADD_ALL;
1872#undef UNALIGNED_ADD
1873 }
1874
1875#ifdef WORDS_BIGENDIAN
1876 t >>= (CHAR_BIT * align) - CHAR_BIT;
1877#else
1878 t <<= (CHAR_BIT * align);
1879#endif
1880
1881 data += sizeof(st_index_t)-align;
1882 len -= sizeof(st_index_t)-align;
1883
1884 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1885 sr = CHAR_BIT * align;
1886
1887 while (len >= sizeof(st_index_t)) {
1888 d = *(st_index_t *)data;
1889#ifdef WORDS_BIGENDIAN
1890 t = (t << sr) | (d >> sl);
1891#else
1892 t = (t >> sr) | (d << sl);
1893#endif
1894 h = murmur_step(h, t);
1895 t = d;
1896 data += sizeof(st_index_t);
1897 len -= sizeof(st_index_t);
1898 }
1899
1900 pack = len < (size_t)align ? (int)len : align;
1901 d = 0;
1902 switch (pack) {
1903#ifdef WORDS_BIGENDIAN
1904# define UNALIGNED_ADD(n) case (n) + 1: \
1905 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1906#else
1907# define UNALIGNED_ADD(n) case (n) + 1: \
1908 d |= data_at(n) << CHAR_BIT*(n)
1909#endif
1910 UNALIGNED_ADD_ALL;
1911#undef UNALIGNED_ADD
1912 }
1913#ifdef WORDS_BIGENDIAN
1914 t = (t << sr) | (d >> sl);
1915#else
1916 t = (t >> sr) | (d << sl);
1917#endif
1918
1919 if (len < (size_t)align) goto skip_tail;
1920# define SKIP_TAIL 1
1921 h = murmur_step(h, t);
1922 data += pack;
1923 len -= pack;
1924 }
1925 else
1926#endif
1927#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1928#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1929#else
1930#define aligned_data data
1931#endif
1932 {
1933 do {
1934 h = murmur_step(h, *(st_index_t *)aligned_data);
1935 data += sizeof(st_index_t);
1936 len -= sizeof(st_index_t);
1937 } while (len >= sizeof(st_index_t));
1938 }
1939 }
1940
1941 t = 0;
1942 switch (len) {
1943#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1944 /* in this case byteorder doesn't really matter */
1945#if SIZEOF_ST_INDEX_T > 4
1946 case 7: t |= data_at(6) << 48;
1947 case 6: t |= data_at(5) << 40;
1948 case 5: t |= data_at(4) << 32;
1949 case 4:
1950 t |= (st_index_t)*(uint32_t*)aligned_data;
1951 goto skip_tail;
1952# define SKIP_TAIL 1
1953#endif
1954 case 3: t |= data_at(2) << 16;
1955 case 2: t |= data_at(1) << 8;
1956 case 1: t |= data_at(0);
1957#else
1958#ifdef WORDS_BIGENDIAN
1959# define UNALIGNED_ADD(n) case (n) + 1: \
1960 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1961#else
1962# define UNALIGNED_ADD(n) case (n) + 1: \
1963 t |= data_at(n) << CHAR_BIT*(n)
1964#endif
1965 UNALIGNED_ADD_ALL;
1966#undef UNALIGNED_ADD
1967#endif
1968#ifdef SKIP_TAIL
1969 skip_tail:
1970#endif
1971 h ^= t; h -= ROTL(t, 7);
1972 h *= C2;
1973 }
1974 h ^= l;
1975#undef aligned_data
1976
1977 return murmur_finish(h);
1978}
1979
1980st_index_t
1981st_hash_uint32(st_index_t h, uint32_t i)
1982{
1983 return murmur_step(h, i);
1984}
1985
1986NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1987st_index_t
1988st_hash_uint(st_index_t h, st_index_t i)
1989{
1990 i += h;
1991/* no matter if it is BigEndian or LittleEndian,
1992 * we hash just integers */
1993#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1994 h = murmur_step(h, i >> 8*8);
1995#endif
1996 h = murmur_step(h, i);
1997 return h;
1998}
1999
2000st_index_t
2001st_hash_end(st_index_t h)
2002{
2003 h = murmur_finish(h);
2004 return h;
2005}
2006
2007#undef st_hash_start
2008st_index_t
2009rb_st_hash_start(st_index_t h)
2010{
2011 return h;
2012}
2013
2014static st_index_t
2015strhash(st_data_t arg)
2016{
2017 register const char *string = (const char *)arg;
2018 return st_hash(string, strlen(string), FNV1_32A_INIT);
2019}
2020
2021int
2022st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2023{
2024 char c1, c2;
2025
2026 while (1) {
2027 c1 = *s1++;
2028 c2 = *s2++;
2029 if (c1 == '\0' || c2 == '\0') {
2030 if (c1 != '\0') return 1;
2031 if (c2 != '\0') return -1;
2032 return 0;
2033 }
2034 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2035 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2036 if (c1 != c2) {
2037 if (c1 > c2)
2038 return 1;
2039 else
2040 return -1;
2041 }
2042 }
2043}
2044
2045int
2046st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2047{
2048 char c1, c2;
2049 size_t i;
2050
2051 for (i = 0; i < n; i++) {
2052 c1 = *s1++;
2053 c2 = *s2++;
2054 if (c1 == '\0' || c2 == '\0') {
2055 if (c1 != '\0') return 1;
2056 if (c2 != '\0') return -1;
2057 return 0;
2058 }
2059 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2060 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2061 if (c1 != c2) {
2062 if (c1 > c2)
2063 return 1;
2064 else
2065 return -1;
2066 }
2067 }
2068 return 0;
2069}
2070
2071static int
2072st_strcmp(st_data_t lhs, st_data_t rhs)
2073{
2074 const char *s1 = (char *)lhs;
2075 const char *s2 = (char *)rhs;
2076 return strcmp(s1, s2);
2077}
2078
2079static int
2080st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2081{
2082 const char *s1 = (char *)lhs;
2083 const char *s2 = (char *)rhs;
2084 return st_locale_insensitive_strcasecmp(s1, s2);
2085}
2086
2087NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2088static st_index_t
2089strcasehash(st_data_t arg)
2090{
2091 register const char *string = (const char *)arg;
2092 register st_index_t hval = FNV1_32A_INIT;
2093
2094 /*
2095 * FNV-1a hash each octet in the buffer
2096 */
2097 while (*string) {
2098 unsigned int c = (unsigned char)*string++;
2099 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2100 hval ^= c;
2101
2102 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2103 hval *= FNV_32_PRIME;
2104 }
2105 return hval;
2106}
2107
2108int
2109st_numcmp(st_data_t x, st_data_t y)
2110{
2111 return x != y;
2112}
2113
2114st_index_t
2115st_numhash(st_data_t n)
2116{
2117 enum {s1 = 11, s2 = 3};
2118 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2119}
2120
2121#ifdef RUBY
2122/* Expand TAB to be suitable for holding SIZ entries in total.
2123 Pre-existing entries remain not deleted inside of TAB, but its bins
2124 are cleared to expect future reconstruction. See rehash below. */
2125static void
2126st_expand_table(st_table *tab, st_index_t siz)
2127{
2128 st_table *tmp;
2129 st_index_t n;
2130
2131 if (siz <= get_allocated_entries(tab))
2132 return; /* enough room already */
2133
2134 tmp = st_init_table_with_size(tab->type, siz);
2135 n = get_allocated_entries(tab);
2136 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2137 free(tab->entries);
2138 free(tab->bins);
2139 free(tmp->bins);
2140 tab->entry_power = tmp->entry_power;
2141 tab->bin_power = tmp->bin_power;
2142 tab->size_ind = tmp->size_ind;
2143 tab->entries = tmp->entries;
2144 tab->bins = NULL;
2145 tab->rebuilds_num++;
2146 free(tmp);
2147}
2148
2149/* Rehash using linear search. Return TRUE if we found that the table
2150 was rebuilt. */
2151static int
2152st_rehash_linear(st_table *tab)
2153{
2154 int eq_p, rebuilt_p;
2155 st_index_t i, j;
2156 st_table_entry *p, *q;
2157
2158 free(tab->bins);
2159 tab->bins = NULL;
2160
2161 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2162 p = &tab->entries[i];
2163 if (DELETED_ENTRY_P(p))
2164 continue;
2165 for (j = i + 1; j < tab->entries_bound; j++) {
2166 q = &tab->entries[j];
2167 if (DELETED_ENTRY_P(q))
2168 continue;
2169 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2170 if (EXPECT(rebuilt_p, 0))
2171 return TRUE;
2172 if (eq_p) {
2173 *p = *q;
2174 MARK_ENTRY_DELETED(q);
2175 tab->num_entries--;
2176 update_range_for_deleted(tab, j);
2177 }
2178 }
2179 }
2180 return FALSE;
2181}
2182
2183/* Rehash using index. Return TRUE if we found that the table was
2184 rebuilt. */
2185static int
2186st_rehash_indexed(st_table *tab)
2187{
2188 int eq_p, rebuilt_p;
2189 st_index_t i;
2190 st_index_t const n = bins_size(tab);
2191 unsigned int const size_ind = get_size_ind(tab);
2192 st_index_t *bins = realloc(tab->bins, n);
2193 tab->bins = bins;
2194 initialize_bins(tab);
2195 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2196 st_table_entry *p = &tab->entries[i];
2197 st_index_t ind;
2198#ifdef QUADRATIC_PROBE
2199 st_index_t d = 1;
2200#else
2201 st_index_t perturb = p->hash;
2202#endif
2203
2204 if (DELETED_ENTRY_P(p))
2205 continue;
2206
2207 ind = hash_bin(p->hash, tab);
2208 for (;;) {
2209 st_index_t bin = get_bin(bins, size_ind, ind);
2210 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2211 /* ok, new room */
2212 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2213 break;
2214 }
2215 else {
2216 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2217 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2218 if (EXPECT(rebuilt_p, 0))
2219 return TRUE;
2220 if (eq_p) {
2221 /* duplicated key; delete it */
2222 q->record = p->record;
2223 MARK_ENTRY_DELETED(p);
2224 tab->num_entries--;
2225 update_range_for_deleted(tab, bin);
2226 break;
2227 }
2228 else {
2229 /* hash collision; skip it */
2230#ifdef QUADRATIC_PROBE
2231 ind = hash_bin(ind + d, tab);
2232 d++;
2233#else
2234 ind = secondary_hash(ind, tab, &perturb);
2235#endif
2236 }
2237 }
2238 }
2239 }
2240 return FALSE;
2241}
2242
2243/* Reconstruct TAB's bins according to TAB's entries. This function
2244 permits conflicting keys inside of entries. No errors are reported
2245 then. All but one of them are discarded silently. */
2246static void
2247st_rehash(st_table *tab)
2248{
2249 int rebuilt_p;
2250
2251 do {
2252 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2253 rebuilt_p = st_rehash_linear(tab);
2254 else
2255 rebuilt_p = st_rehash_indexed(tab);
2256 } while (rebuilt_p);
2257}
2258
2259static st_data_t
2260st_stringify(VALUE key)
2261{
2262 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2263 rb_hash_key_str(key) : key;
2264}
2265
2266static void
2267st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2268{
2269 st_data_t k = st_stringify(key);
2271 e.hash = do_hash(k, tab);
2272 e.key = k;
2273 e.record = val;
2274
2275 tab->entries[tab->entries_bound++] = e;
2276 tab->num_entries++;
2277 RB_OBJ_WRITTEN(hash, Qundef, k);
2278 RB_OBJ_WRITTEN(hash, Qundef, val);
2279}
2280
2281static void
2282st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2283{
2284 long i;
2285
2286 for (i = 0; i < argc; /* */) {
2287 st_data_t k = st_stringify(argv[i++]);
2288 st_data_t v = argv[i++];
2289 st_insert(tab, k, v);
2290 RB_OBJ_WRITTEN(hash, Qundef, k);
2291 RB_OBJ_WRITTEN(hash, Qundef, v);
2292 }
2293}
2294
2295static void
2296st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2297{
2298 long i;
2299
2300 /* push elems */
2301 for (i = 0; i < argc; /* */) {
2302 VALUE key = argv[i++];
2303 VALUE val = argv[i++];
2304 st_insert_single(tab, hash, key, val);
2305 }
2306
2307 /* reindex */
2308 st_rehash(tab);
2309}
2310
2311/* Mimics ruby's { foo => bar } syntax. This function is subpart
2312 of rb_hash_bulk_insert. */
2313void
2314rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2315{
2316 st_index_t n, size = argc / 2;
2317 st_table *tab = RHASH_ST_TABLE(hash);
2318
2319 tab = RHASH_TBL_RAW(hash);
2320 n = tab->entries_bound + size;
2321 st_expand_table(tab, n);
2322 if (UNLIKELY(tab->num_entries))
2323 st_insert_generic(tab, argc, argv, hash);
2324 else if (argc <= 2)
2325 st_insert_single(tab, hash, argv[0], argv[1]);
2326 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2327 st_insert_linear(tab, argc, argv, hash);
2328 else
2329 st_insert_generic(tab, argc, argv, hash);
2330}
2331
2332void
2333rb_st_compact_table(st_table *tab)
2334{
2335 st_index_t num = tab->num_entries;
2336 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2337 /* Compaction: */
2338 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2339 rebuild_table_with(new_tab, tab);
2340 rebuild_move_table(new_tab, tab);
2341 rebuild_cleanup(tab);
2342 }
2343}
2344
2345/*
2346 * set_table related code
2347 */
2348
2349struct set_table_entry {
2350 st_hash_t hash;
2351 st_data_t key;
2352};
2353
2354/* Return hash value of KEY for table TAB. */
2355static inline st_hash_t
2356set_do_hash(st_data_t key, set_table *tab)
2357{
2358 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2359 return normalize_hash_value(hash);
2360}
2361
2362/* Return bin size index of table TAB. */
2363static inline unsigned int
2364set_get_size_ind(const set_table *tab)
2365{
2366 return tab->size_ind;
2367}
2368
2369/* Return the number of allocated bins of table TAB. */
2370static inline st_index_t
2371set_get_bins_num(const set_table *tab)
2372{
2373 return ((st_index_t) 1)<<tab->bin_power;
2374}
2375
2376/* Return mask for a bin index in table TAB. */
2377static inline st_index_t
2378set_bins_mask(const set_table *tab)
2379{
2380 return set_get_bins_num(tab) - 1;
2381}
2382
2383/* Return the index of table TAB bin corresponding to
2384 HASH_VALUE. */
2385static inline st_index_t
2386set_hash_bin(st_hash_t hash_value, set_table *tab)
2387{
2388 return hash_value & set_bins_mask(tab);
2389}
2390
2391/* Return the number of allocated entries of table TAB. */
2392static inline st_index_t
2393set_get_allocated_entries(const set_table *tab)
2394{
2395 return ((st_index_t) 1)<<tab->entry_power;
2396}
2397
2398/* Return size of the allocated bins of table TAB. */
2399static inline st_index_t
2400set_bins_size(const set_table *tab)
2401{
2402 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2403}
2404
2405/* Mark all bins of table TAB as empty. */
2406static void
2407set_initialize_bins(set_table *tab)
2408{
2409 memset(tab->bins, 0, set_bins_size(tab));
2410}
2411
2412/* Make table TAB empty. */
2413static void
2414set_make_tab_empty(set_table *tab)
2415{
2416 tab->num_entries = 0;
2417 tab->entries_start = tab->entries_bound = 0;
2418 if (tab->bins != NULL)
2419 set_initialize_bins(tab);
2420}
2421
2422static set_table *
2423set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2424{
2425 int n;
2426
2427#ifdef HASH_LOG
2428#if HASH_LOG+0 < 0
2429 {
2430 const char *e = getenv("ST_HASH_LOG");
2431 if (!e || !*e) init_st = 1;
2432 }
2433#endif
2434 if (init_st == 0) {
2435 init_st = 1;
2436 atexit(stat_col);
2437 }
2438#endif
2439
2440 n = get_power2(size);
2441
2442 tab->type = type;
2443 tab->entry_power = n;
2444 tab->bin_power = features[n].bin_power;
2445 tab->size_ind = features[n].size_ind;
2446 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2447 tab->bins = NULL;
2448 else {
2449 tab->bins = (st_index_t *) malloc(set_bins_size(tab));
2450 }
2451 tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(tab)
2452 * sizeof(set_table_entry));
2453 set_make_tab_empty(tab);
2454 tab->rebuilds_num = 0;
2455 return tab;
2456}
2457
2458/* Create and return table with TYPE which can hold at least SIZE
2459 entries. The real number of entries which the table can hold is
2460 the nearest power of two for SIZE. */
2461set_table *
2462set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2463{
2464 if (tab == NULL) tab = malloc(sizeof(set_table));
2465
2466 set_init_existing_table_with_size(tab, type, size);
2467
2468 return tab;
2469}
2470
2471set_table *
2472set_init_numtable(void)
2473{
2474 return set_init_table_with_size(NULL, &type_numhash, 0);
2475}
2476
2477set_table *
2478set_init_numtable_with_size(st_index_t size)
2479{
2480 return set_init_table_with_size(NULL, &type_numhash, size);
2481}
2482
2483size_t
2484set_table_size(const struct set_table *tbl)
2485{
2486 return tbl->num_entries;
2487}
2488
2489/* Make table TAB empty. */
2490void
2491set_clear(set_table *tab)
2492{
2493 set_make_tab_empty(tab);
2494 tab->rebuilds_num++;
2495}
2496
2497/* Free table TAB space. This should only be used if you passed NULL to
2498 set_init_table_with_size/set_copy when creating the table. */
2499void
2500set_free_table(set_table *tab)
2501{
2502 free(tab->bins);
2503 free(tab->entries);
2504 free(tab);
2505}
2506
2507/* Return byte size of memory allocated for table TAB. */
2508size_t
2509set_memsize(const set_table *tab)
2510{
2511 return(sizeof(set_table)
2512 + (tab->bins == NULL ? 0 : set_bins_size(tab))
2513 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2514}
2515
2516static st_index_t
2517set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2518
2519static st_index_t
2520set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2521
2522static st_index_t
2523set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2524
2525static st_index_t
2526set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2527 st_data_t key, st_index_t *bin_ind);
2528
2529static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2530static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2531static void set_rebuild_cleanup(set_table *const tab);
2532
2533/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2534 and can change size of the table entries and bins arrays.
2535 Rebuilding is implemented by creation of a new table or by
2536 compaction of the existing one. */
2537static void
2538set_rebuild_table(set_table *tab)
2539{
2540 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2541 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2542 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2543 /* Compaction: */
2544 tab->num_entries = 0;
2545 if (tab->bins != NULL)
2546 set_initialize_bins(tab);
2547 set_rebuild_table_with(tab, tab);
2548 }
2549 else {
2550 set_table *new_tab;
2551 /* This allocation could trigger GC and compaction. If tab is the
2552 * gen_fields_tbl, then tab could have changed in size due to objects being
2553 * freed and/or moved. Do not store attributes of tab before this line. */
2554 new_tab = set_init_table_with_size(NULL, tab->type,
2555 2 * tab->num_entries - 1);
2556 set_rebuild_table_with(new_tab, tab);
2557 set_rebuild_move_table(new_tab, tab);
2558 }
2559 set_rebuild_cleanup(tab);
2560}
2561
2562static void
2563set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2564{
2565 st_index_t i, ni;
2566 unsigned int size_ind;
2567 set_table_entry *new_entries;
2568 set_table_entry *curr_entry_ptr;
2569 st_index_t *bins;
2570 st_index_t bin_ind;
2571
2572 new_entries = new_tab->entries;
2573
2574 ni = 0;
2575 bins = new_tab->bins;
2576 size_ind = set_get_size_ind(new_tab);
2577 st_index_t bound = tab->entries_bound;
2578 set_table_entry *entries = tab->entries;
2579
2580 for (i = tab->entries_start; i < bound; i++) {
2581 curr_entry_ptr = &entries[i];
2582 PREFETCH(entries + i + 1, 0);
2583 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2584 continue;
2585 if (&new_entries[ni] != curr_entry_ptr)
2586 new_entries[ni] = *curr_entry_ptr;
2587 if (EXPECT(bins != NULL, 1)) {
2588 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2589 curr_entry_ptr->key);
2590 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2591 }
2592 new_tab->num_entries++;
2593 ni++;
2594 }
2595
2596 assert(new_tab->num_entries == tab->num_entries);
2597}
2598
2599static void
2600set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2601{
2602 tab->entry_power = new_tab->entry_power;
2603 tab->bin_power = new_tab->bin_power;
2604 tab->size_ind = new_tab->size_ind;
2605 free(tab->bins);
2606 tab->bins = new_tab->bins;
2607 free(tab->entries);
2608 tab->entries = new_tab->entries;
2609 free(new_tab);
2610}
2611
2612static void
2613set_rebuild_cleanup(set_table *const tab)
2614{
2615 tab->entries_start = 0;
2616 tab->entries_bound = tab->num_entries;
2617 tab->rebuilds_num++;
2618}
2619
2620/* Return the next secondary hash index for table TAB using previous
2621 index IND and PERTURB. Finally modulo of the function becomes a
2622 full *cycle linear congruential generator*, in other words it
2623 guarantees traversing all table bins in extreme case.
2624
2625 According the Hull-Dobell theorem a generator
2626 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2627 o m and c are relatively prime
2628 o a-1 is divisible by all prime factors of m
2629 o a-1 is divisible by 4 if m is divisible by 4.
2630
2631 For our case a is 5, c is 1, and m is a power of two. */
2632static inline st_index_t
2633set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2634{
2635 *perturb >>= 11;
2636 ind = (ind << 2) + ind + *perturb + 1;
2637 return set_hash_bin(ind, tab);
2638}
2639
2640/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2641 search. Return the index of the found entry in array `entries`.
2642 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2643 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2644static inline st_index_t
2645set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2646{
2647 int eq_p, rebuilt_p;
2648 st_index_t i, bound;
2649 set_table_entry *entries;
2650
2651 bound = tab->entries_bound;
2652 entries = tab->entries;
2653 for (i = tab->entries_start; i < bound; i++) {
2654 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2655 if (EXPECT(rebuilt_p, 0))
2656 return REBUILT_TABLE_ENTRY_IND;
2657 if (eq_p)
2658 return i;
2659 }
2660 return UNDEFINED_ENTRY_IND;
2661}
2662
2663/* Use the quadratic probing. The method has a better data locality
2664 but more collisions than the current approach. In average it
2665 results in a bit slower search. */
2666/*#define QUADRATIC_PROBE*/
2667
2668/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2669 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2670 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2671static st_index_t
2672set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2673{
2674 int eq_p, rebuilt_p;
2675 st_index_t ind;
2676#ifdef QUADRATIC_PROBE
2677 st_index_t d;
2678#else
2679 st_index_t perturb;
2680#endif
2681 st_index_t bin;
2682 set_table_entry *entries = tab->entries;
2683
2684 ind = set_hash_bin(hash_value, tab);
2685#ifdef QUADRATIC_PROBE
2686 d = 1;
2687#else
2688 perturb = hash_value;
2689#endif
2690 for (;;) {
2691 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2692 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2693 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2694 if (EXPECT(rebuilt_p, 0))
2695 return REBUILT_TABLE_ENTRY_IND;
2696 if (eq_p)
2697 break;
2698 }
2699 else if (EMPTY_BIN_P(bin))
2700 return UNDEFINED_ENTRY_IND;
2701#ifdef QUADRATIC_PROBE
2702 ind = set_hash_bin(ind + d, tab);
2703 d++;
2704#else
2705 ind = set_secondary_hash(ind, tab, &perturb);
2706#endif
2707 }
2708 return bin;
2709}
2710
2711/* Find and return index of table TAB bin corresponding to an entry
2712 with HASH_VALUE and KEY. If there is no such bin, return
2713 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2714 return REBUILT_TABLE_BIN_IND. */
2715static st_index_t
2716set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2717{
2718 int eq_p, rebuilt_p;
2719 st_index_t ind;
2720#ifdef QUADRATIC_PROBE
2721 st_index_t d;
2722#else
2723 st_index_t perturb;
2724#endif
2725 st_index_t bin;
2726 set_table_entry *entries = tab->entries;
2727
2728 ind = set_hash_bin(hash_value, tab);
2729#ifdef QUADRATIC_PROBE
2730 d = 1;
2731#else
2732 perturb = hash_value;
2733#endif
2734 for (;;) {
2735 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2736 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2737 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2738 if (EXPECT(rebuilt_p, 0))
2739 return REBUILT_TABLE_BIN_IND;
2740 if (eq_p)
2741 break;
2742 }
2743 else if (EMPTY_BIN_P(bin))
2744 return UNDEFINED_BIN_IND;
2745#ifdef QUADRATIC_PROBE
2746 ind = set_hash_bin(ind + d, tab);
2747 d++;
2748#else
2749 ind = set_secondary_hash(ind, tab, &perturb);
2750#endif
2751 }
2752 return ind;
2753}
2754
2755/* Find and return index of table TAB bin corresponding to an entry
2756 with HASH_VALUE and KEY. The entry should be in the table
2757 already. */
2758static st_index_t
2759set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2760{
2761 st_index_t ind;
2762#ifdef QUADRATIC_PROBE
2763 st_index_t d;
2764#else
2765 st_index_t perturb;
2766#endif
2767 st_index_t bin;
2768
2769 ind = set_hash_bin(hash_value, tab);
2770#ifdef QUADRATIC_PROBE
2771 d = 1;
2772#else
2773 perturb = hash_value;
2774#endif
2775 for (;;) {
2776 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2777 if (EMPTY_OR_DELETED_BIN_P(bin))
2778 return ind;
2779#ifdef QUADRATIC_PROBE
2780 ind = set_hash_bin(ind + d, tab);
2781 d++;
2782#else
2783 ind = set_secondary_hash(ind, tab, &perturb);
2784#endif
2785 }
2786}
2787
2788/* Mark I-th bin of table TAB as empty, in other words not
2789 corresponding to any entry. */
2790#define MARK_SET_BIN_EMPTY(tab, i) (set_bin((tab)->bins, set_get_size_ind(tab), i, EMPTY_BIN))
2791
2792/* Return index of table TAB bin for HASH_VALUE and KEY through
2793 BIN_IND and the pointed value as the function result. Reserve the
2794 bin for inclusion of the corresponding entry into the table if it
2795 is not there yet. We always find such bin as bins array length is
2796 bigger entries array. Although we can reuse a deleted bin, the
2797 result bin value is always empty if the table has no entry with
2798 KEY. Return the entries array index of the found entry or
2799 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2800 during the search, return REBUILT_TABLE_ENTRY_IND. */
2801static st_index_t
2802set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2803 st_data_t key, st_index_t *bin_ind)
2804{
2805 int eq_p, rebuilt_p;
2806 st_index_t ind;
2807 st_hash_t curr_hash_value = *hash_value;
2808#ifdef QUADRATIC_PROBE
2809 st_index_t d;
2810#else
2811 st_index_t perturb;
2812#endif
2813 st_index_t entry_index;
2814 st_index_t firset_deleted_bin_ind;
2815 set_table_entry *entries;
2816
2817 ind = set_hash_bin(curr_hash_value, tab);
2818#ifdef QUADRATIC_PROBE
2819 d = 1;
2820#else
2821 perturb = curr_hash_value;
2822#endif
2823 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2824 entries = tab->entries;
2825 for (;;) {
2826 entry_index = get_bin(tab->bins, set_get_size_ind(tab), ind);
2827 if (EMPTY_BIN_P(entry_index)) {
2828 tab->num_entries++;
2829 entry_index = UNDEFINED_ENTRY_IND;
2830 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2831 /* We can reuse bin of a deleted entry. */
2832 ind = firset_deleted_bin_ind;
2833 MARK_SET_BIN_EMPTY(tab, ind);
2834 }
2835 break;
2836 }
2837 else if (! DELETED_BIN_P(entry_index)) {
2838 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2839 if (EXPECT(rebuilt_p, 0))
2840 return REBUILT_TABLE_ENTRY_IND;
2841 if (eq_p)
2842 break;
2843 }
2844 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2845 firset_deleted_bin_ind = ind;
2846#ifdef QUADRATIC_PROBE
2847 ind = set_hash_bin(ind + d, tab);
2848 d++;
2849#else
2850 ind = set_secondary_hash(ind, tab, &perturb);
2851#endif
2852 }
2853 *bin_ind = ind;
2854 return entry_index;
2855}
2856
2857/* Find an entry with KEY in table TAB. Return non-zero if we found
2858 it. */
2859int
2860set_lookup(set_table *tab, st_data_t key)
2861{
2862 st_index_t bin;
2863 st_hash_t hash = set_do_hash(key, tab);
2864
2865 retry:
2866 if (tab->bins == NULL) {
2867 bin = set_find_entry(tab, hash, key);
2868 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2869 goto retry;
2870 if (bin == UNDEFINED_ENTRY_IND)
2871 return 0;
2872 }
2873 else {
2874 bin = set_find_table_entry_ind(tab, hash, key);
2875 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2876 goto retry;
2877 if (bin == UNDEFINED_ENTRY_IND)
2878 return 0;
2879 bin -= ENTRY_BASE;
2880 }
2881 return 1;
2882}
2883
2884/* Check the table and rebuild it if it is necessary. */
2885static inline void
2886set_rebuild_table_if_necessary (set_table *tab)
2887{
2888 st_index_t bound = tab->entries_bound;
2889
2890 if (bound == set_get_allocated_entries(tab))
2891 set_rebuild_table(tab);
2892}
2893
2894/* Insert KEY into table TAB and return zero. If there is
2895 already entry with KEY in the table, return nonzero and update
2896 the value of the found entry. */
2897int
2898set_insert(set_table *tab, st_data_t key)
2899{
2900 set_table_entry *entry;
2901 st_index_t bin;
2902 st_index_t ind;
2903 st_hash_t hash_value;
2904 st_index_t bin_ind;
2905 int new_p;
2906
2907 hash_value = set_do_hash(key, tab);
2908 retry:
2909 set_rebuild_table_if_necessary(tab);
2910 if (tab->bins == NULL) {
2911 bin = set_find_entry(tab, hash_value, key);
2912 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2913 goto retry;
2914 new_p = bin == UNDEFINED_ENTRY_IND;
2915 if (new_p)
2916 tab->num_entries++;
2917 bin_ind = UNDEFINED_BIN_IND;
2918 }
2919 else {
2920 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2921 key, &bin_ind);
2922 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2923 goto retry;
2924 new_p = bin == UNDEFINED_ENTRY_IND;
2925 bin -= ENTRY_BASE;
2926 }
2927 if (new_p) {
2928 ind = tab->entries_bound++;
2929 entry = &tab->entries[ind];
2930 entry->hash = hash_value;
2931 entry->key = key;
2932 if (bin_ind != UNDEFINED_BIN_IND)
2933 set_bin(tab->bins, set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2934 return 0;
2935 }
2936 return 1;
2937}
2938
2939/* Create a copy of old_tab into new_tab. */
2940static set_table *
2941set_replace(set_table *new_tab, set_table *old_tab)
2942{
2943 *new_tab = *old_tab;
2944 if (old_tab->bins == NULL)
2945 new_tab->bins = NULL;
2946 else {
2947 new_tab->bins = (st_index_t *) malloc(set_bins_size(old_tab));
2948 }
2949 new_tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(old_tab)
2950 * sizeof(set_table_entry));
2951 MEMCPY(new_tab->entries, old_tab->entries, set_table_entry,
2952 set_get_allocated_entries(old_tab));
2953 if (old_tab->bins != NULL)
2954 MEMCPY(new_tab->bins, old_tab->bins, char, set_bins_size(old_tab));
2955
2956 return new_tab;
2957}
2958
2959/* Create and return a copy of table OLD_TAB. */
2960set_table *
2961set_copy(set_table *new_tab, set_table *old_tab)
2962{
2963 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
2964
2965 if (set_replace(new_tab, old_tab) == NULL) {
2966 set_free_table(new_tab);
2967 return NULL;
2968 }
2969
2970 return new_tab;
2971}
2972
2973/* Update the entries start of table TAB after removing an entry
2974 with index N in the array entries. */
2975static inline void
2976set_update_range_for_deleted(set_table *tab, st_index_t n)
2977{
2978 /* Do not update entries_bound here. Otherwise, we can fill all
2979 bins by deleted entry value before rebuilding the table. */
2980 if (tab->entries_start == n) {
2981 st_index_t start = n + 1;
2982 st_index_t bound = tab->entries_bound;
2983 set_table_entry *entries = tab->entries;
2984 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
2985 tab->entries_start = start;
2986 }
2987}
2988
2989/* Mark I-th bin of table TAB as corresponding to a deleted table
2990 entry. Update number of entries in the table and number of bins
2991 corresponding to deleted entries. */
2992#define MARK_SET_BIN_DELETED(tab, i) \
2993 do { \
2994 set_bin((tab)->bins, set_get_size_ind(tab), i, DELETED_BIN); \
2995 } while (0)
2996
2997/* Delete entry with KEY from table TAB, and return non-zero. If
2998 there is no entry with KEY in the table, return zero. */
2999int
3000set_delete(set_table *tab, st_data_t *key)
3001{
3002 set_table_entry *entry;
3003 st_index_t bin;
3004 st_index_t bin_ind;
3005 st_hash_t hash;
3006
3007 hash = set_do_hash(*key, tab);
3008 retry:
3009 if (tab->bins == NULL) {
3010 bin = set_find_entry(tab, hash, *key);
3011 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3012 goto retry;
3013 if (bin == UNDEFINED_ENTRY_IND) {
3014 return 0;
3015 }
3016 }
3017 else {
3018 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3019 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3020 goto retry;
3021 if (bin_ind == UNDEFINED_BIN_IND) {
3022 return 0;
3023 }
3024 bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3025 MARK_SET_BIN_DELETED(tab, bin_ind);
3026 }
3027 entry = &tab->entries[bin];
3028 *key = entry->key;
3029 MARK_ENTRY_DELETED(entry);
3030 tab->num_entries--;
3031 set_update_range_for_deleted(tab, bin);
3032 return 1;
3033}
3034
3035/* Traverse all entries in table TAB calling FUNC with current entry
3036 key and zero. If the call returns ST_STOP, stop
3037 traversing. If the call returns ST_DELETE, delete the current
3038 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3039 traversing. The function returns zero unless an error is found.
3040 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3041 different for ST_CHECK and when the current element is removed
3042 during traversing. */
3043static inline int
3044set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3045 set_update_callback_func *replace, st_data_t arg,
3046 int check_p)
3047{
3048 st_index_t bin;
3049 st_index_t bin_ind;
3050 set_table_entry *entries, *curr_entry_ptr;
3051 enum st_retval retval;
3052 st_index_t i, rebuilds_num;
3053 st_hash_t hash;
3054 st_data_t key;
3055 int error_p, packed_p = tab->bins == NULL;
3056
3057 entries = tab->entries;
3058 /* The bound can change inside the loop even without rebuilding
3059 the table, e.g. by an entry insertion. */
3060 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3061 curr_entry_ptr = &entries[i];
3062 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3063 continue;
3064 key = curr_entry_ptr->key;
3065 rebuilds_num = tab->rebuilds_num;
3066 hash = curr_entry_ptr->hash;
3067 retval = (*func)(key, arg, 0);
3068
3069 if (retval == ST_REPLACE && replace) {
3070 retval = (*replace)(&key, arg, TRUE);
3071 curr_entry_ptr->key = key;
3072 }
3073
3074 if (rebuilds_num != tab->rebuilds_num) {
3075 retry:
3076 entries = tab->entries;
3077 packed_p = tab->bins == NULL;
3078 if (packed_p) {
3079 i = set_find_entry(tab, hash, key);
3080 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3081 goto retry;
3082 error_p = i == UNDEFINED_ENTRY_IND;
3083 }
3084 else {
3085 i = set_find_table_entry_ind(tab, hash, key);
3086 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3087 goto retry;
3088 error_p = i == UNDEFINED_ENTRY_IND;
3089 i -= ENTRY_BASE;
3090 }
3091 if (error_p && check_p) {
3092 /* call func with error notice */
3093 retval = (*func)(0, arg, 1);
3094 return 1;
3095 }
3096 curr_entry_ptr = &entries[i];
3097 }
3098 switch (retval) {
3099 case ST_REPLACE:
3100 break;
3101 case ST_CONTINUE:
3102 break;
3103 case ST_CHECK:
3104 if (check_p)
3105 break;
3106 case ST_STOP:
3107 return 0;
3108 case ST_DELETE: {
3109 st_data_t key = curr_entry_ptr->key;
3110
3111 again:
3112 if (packed_p) {
3113 bin = set_find_entry(tab, hash, key);
3114 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3115 goto again;
3116 if (bin == UNDEFINED_ENTRY_IND)
3117 break;
3118 }
3119 else {
3120 bin_ind = set_find_table_bin_ind(tab, hash, key);
3121 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3122 goto again;
3123 if (bin_ind == UNDEFINED_BIN_IND)
3124 break;
3125 bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3126 MARK_SET_BIN_DELETED(tab, bin_ind);
3127 }
3128 curr_entry_ptr = &entries[bin];
3129 MARK_ENTRY_DELETED(curr_entry_ptr);
3130 tab->num_entries--;
3131 set_update_range_for_deleted(tab, bin);
3132 break;
3133 }
3134 }
3135 }
3136 return 0;
3137}
3138
3139int
3140set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3141{
3142 return set_general_foreach(tab, func, replace, arg, TRUE);
3143}
3144
3145struct set_functor {
3146 set_foreach_callback_func *func;
3147 st_data_t arg;
3148};
3149
3150static int
3151set_apply_functor(st_data_t k, st_data_t d, int _)
3152{
3153 const struct set_functor *f = (void *)d;
3154 return f->func(k, f->arg);
3155}
3156
3157int
3158set_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3159{
3160 const struct set_functor f = { func, arg };
3161 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3162}
3163
3164/* See comments for function set_delete_safe. */
3165int
3166set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3167 st_data_t never ATTRIBUTE_UNUSED)
3168{
3169 return set_general_foreach(tab, func, NULL, arg, TRUE);
3170}
3171
3172/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3173 Return the number of keys set up in array KEYS. */
3174inline st_index_t
3175set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3176{
3177 st_index_t i, bound;
3178 st_data_t key, *keys_start, *keys_end;
3179 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3180
3181 bound = tab->entries_bound;
3182 keys_start = keys;
3183 keys_end = keys + size;
3184 for (i = tab->entries_start; i < bound; i++) {
3185 if (keys == keys_end)
3186 break;
3187 curr_entry_ptr = &entries[i];
3188 key = curr_entry_ptr->key;
3189 if (! DELETED_ENTRY_P(curr_entry_ptr))
3190 *keys++ = key;
3191 }
3192
3193 return keys - keys_start;
3194}
3195
3196void
3197set_compact_table(set_table *tab)
3198{
3199 st_index_t num = tab->num_entries;
3200 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3201 /* Compaction: */
3202 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3203 set_rebuild_table_with(new_tab, tab);
3204 set_rebuild_move_table(new_tab, tab);
3205 set_rebuild_cleanup(tab);
3206 }
3207}
3208
3209#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:892
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:243
VALUE rb_cString
String class.
Definition string.c:82
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:136
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40