Ruby 4.1.0dev (2026-01-07 revision 25c72b0e8e206e5baec71d4ece7551b7da7da445)
st.c (25c72b0e8e206e5baec71d4ece7551b7da7da445)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/set_table.h"
113#include "internal/st.h"
114#include "ruby_assert.h"
115#endif
116
117#include <stdio.h>
118#ifdef HAVE_STDLIB_H
119#include <stdlib.h>
120#endif
121#include <string.h>
122
123#ifdef __GNUC__
124#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
125#define EXPECT(expr, val) __builtin_expect(expr, val)
126#define ATTRIBUTE_UNUSED __attribute__((unused))
127#else
128#define PREFETCH(addr, write_p)
129#define EXPECT(expr, val) (expr)
130#define ATTRIBUTE_UNUSED
131#endif
132
133/* The type of hashes. */
134typedef st_index_t st_hash_t;
135
137 st_hash_t hash;
138 st_data_t key;
139 st_data_t record;
140};
141
142#define type_numhash st_hashtype_num
143static const struct st_hash_type st_hashtype_num = {
144 st_numcmp,
145 st_numhash,
146};
147
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
150static const struct st_hash_type type_strhash = {
151 st_strcmp,
152 strhash,
153};
154
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
157static const struct st_hash_type type_strcasehash = {
158 st_locale_insensitive_strcasecmp_i,
159 strcasehash,
160};
161
162/* Value used to catch uninitialized entries/bins during debugging.
163 There is a possibility for a false alarm, but its probability is
164 extremely small. */
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
167
168#ifdef RUBY
169#undef malloc
170#undef realloc
171#undef calloc
172#undef free
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
177#endif
178
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182
183/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
184 up to TRUE if the table is rebuilt during the comparison. */
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 do { \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 } while (FALSE)
191
192/* Features of a table. */
194 /* Power of 2 used for number of allocated entries. */
195 unsigned char entry_power;
196 /* Power of 2 used for number of allocated bins. Depending on the
197 table size, the number of bins is 2-4 times more than the
198 number of entries. */
199 unsigned char bin_power;
200 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
201 unsigned char size_ind;
202 /* Bins are packed in words of type st_index_t. The following is
203 a size of bins counted by words. */
204 st_index_t bins_words;
205};
206
207/* Features of all possible size tables. */
208#if SIZEOF_ST_INDEX_T == 8
209#define MAX_POWER2 62
210static const struct st_features features[] = {
211 {0, 1, 0, 0x0},
212 {1, 2, 0, 0x1},
213 {2, 3, 0, 0x1},
214 {3, 4, 0, 0x2},
215 {4, 5, 0, 0x4},
216 {5, 6, 0, 0x8},
217 {6, 7, 0, 0x10},
218 {7, 8, 0, 0x20},
219 {8, 9, 1, 0x80},
220 {9, 10, 1, 0x100},
221 {10, 11, 1, 0x200},
222 {11, 12, 1, 0x400},
223 {12, 13, 1, 0x800},
224 {13, 14, 1, 0x1000},
225 {14, 15, 1, 0x2000},
226 {15, 16, 1, 0x4000},
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
274};
275
276#else
277#define MAX_POWER2 30
278
279static const struct st_features features[] = {
280 {0, 1, 0, 0x1},
281 {1, 2, 0, 0x1},
282 {2, 3, 0, 0x2},
283 {3, 4, 0, 0x4},
284 {4, 5, 0, 0x8},
285 {5, 6, 0, 0x10},
286 {6, 7, 0, 0x20},
287 {7, 8, 0, 0x40},
288 {8, 9, 1, 0x100},
289 {9, 10, 1, 0x200},
290 {10, 11, 1, 0x400},
291 {11, 12, 1, 0x800},
292 {12, 13, 1, 0x1000},
293 {13, 14, 1, 0x2000},
294 {14, 15, 1, 0x4000},
295 {15, 16, 1, 0x8000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
311};
312
313#endif
314
315/* The reserved hash value and its substitution. */
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318
319static inline st_hash_t
320normalize_hash_value(st_hash_t hash)
321{
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Return hash value of KEY for table TAB. */
328static inline st_hash_t
329do_hash(st_data_t key, st_table *tab)
330{
331 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332 return normalize_hash_value(hash);
333}
334
335/* Power of 2 defining the minimal number of allocated entries. */
336#define MINIMAL_POWER2 2
337
338#if MINIMAL_POWER2 < 2
339#error "MINIMAL_POWER2 should be >= 2"
340#endif
341
342/* If the power2 of the allocated `entries` is less than the following
343 value, don't allocate bins and use a linear search. */
344#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
345
346/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
347static int
348get_power2(st_index_t size)
349{
350 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
351 if (n <= MAX_POWER2)
352 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
353#ifdef RUBY
354 /* Ran out of the table entries */
355 rb_raise(rb_eRuntimeError, "st_table too big");
356#endif
357 /* should raise exception */
358 return -1;
359}
360
361/* Return value of N-th bin in array BINS of table with bins size
362 index S. */
363static inline st_index_t
364get_bin(st_index_t *bins, int s, st_index_t n)
365{
366 return (s == 0 ? ((unsigned char *) bins)[n]
367 : s == 1 ? ((unsigned short *) bins)[n]
368 : s == 2 ? ((unsigned int *) bins)[n]
369 : ((st_index_t *) bins)[n]);
370}
371
372/* Set up N-th bin in array BINS of table with bins size index S to
373 value V. */
374static inline void
375set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
376{
377 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
378 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
379 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
380 else ((st_index_t *) bins)[n] = v;
381}
382
383/* These macros define reserved values for empty table bin and table
384 bin which contains a deleted entry. We will never use such values
385 for an entry index in bins. */
386#define EMPTY_BIN 0
387#define DELETED_BIN 1
388/* Base of a real entry index in the bins. */
389#define ENTRY_BASE 2
390
391/* Mark I-th bin of table TAB as empty, in other words not
392 corresponding to any entry. */
393#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
394
395/* Values used for not found entry and bin with given
396 characteristics. */
397#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
398#define UNDEFINED_BIN_IND (~(st_index_t) 0)
399
400/* Entry and bin values returned when we found a table rebuild during
401 the search. */
402#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
403#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
404
405/* Mark I-th bin of table TAB as corresponding to a deleted table
406 entry. Update number of entries in the table and number of bins
407 corresponding to deleted entries. */
408#define MARK_BIN_DELETED(tab, i) \
409 do { \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
411 } while (0)
412
413/* Macros to check that value B is used empty bins and bins
414 corresponding deleted entries. */
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
418
419/* Macros to check empty bins and bins corresponding to deleted
420 entries. Bins are given by their index I in table TAB. */
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
424
425/* Macros for marking and checking deleted entries given by their
426 pointer E_PTR. */
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
429
430/* Return bin size index of table TAB. */
431static inline unsigned int
432get_size_ind(const st_table *tab)
433{
434 return tab->size_ind;
435}
436
437/* Return the number of allocated bins of table TAB. */
438static inline st_index_t
439get_bins_num(const st_table *tab)
440{
441 return ((st_index_t) 1)<<tab->bin_power;
442}
443
444/* Return mask for a bin index in table TAB. */
445static inline st_index_t
446bins_mask(const st_table *tab)
447{
448 return get_bins_num(tab) - 1;
449}
450
451/* Return the index of table TAB bin corresponding to
452 HASH_VALUE. */
453static inline st_index_t
454hash_bin(st_hash_t hash_value, st_table *tab)
455{
456 return hash_value & bins_mask(tab);
457}
458
459/* Return the number of allocated entries of table TAB. */
460static inline st_index_t
461get_allocated_entries(const st_table *tab)
462{
463 return ((st_index_t) 1)<<tab->entry_power;
464}
465
466/* Return size of the allocated bins of table TAB. */
467static inline st_index_t
468bins_size(const st_table *tab)
469{
470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
471}
472
473/* Mark all bins of table TAB as empty. */
474static void
475initialize_bins(st_table *tab)
476{
477 memset(tab->bins, 0, bins_size(tab));
478}
479
480/* Make table TAB empty. */
481static void
482make_tab_empty(st_table *tab)
483{
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
488}
489
490#ifdef HASH_LOG
491#ifdef HAVE_UNISTD_H
492#include <unistd.h>
493#endif
494static struct {
495 int all, total, num, str, strcase;
496} collision;
497
498/* Flag switching off output of package statistics at the end of
499 program. */
500static int init_st = 0;
501
502/* Output overall number of table searches and collisions into a
503 temporary file. */
504static void
505stat_col(void)
506{
507 char fname[10+sizeof(long)*3];
508 FILE *f;
509 if (!collision.total) return;
510 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
511 if (f == NULL)
512 return;
513 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
514 ((double)collision.all / (collision.total)) * 100);
515 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
516 fclose(f);
517}
518#endif
519
520st_table *
521st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
522{
523 int n;
524
525#ifdef HASH_LOG
526#if HASH_LOG+0 < 0
527 {
528 const char *e = getenv("ST_HASH_LOG");
529 if (!e || !*e) init_st = 1;
530 }
531#endif
532 if (init_st == 0) {
533 init_st = 1;
534 atexit(stat_col);
535 }
536#endif
537
538 n = get_power2(size);
539#ifndef RUBY
540 if (n < 0)
541 return NULL;
542#endif
543
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572/* Create and return table with TYPE which can hold at least SIZE
573 entries. The real number of entries which the table can hold is
574 the nearest power of two for SIZE. */
575st_table *
576st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
577{
578 st_table *tab = malloc(sizeof(st_table));
579#ifndef RUBY
580 if (tab == NULL)
581 return NULL;
582#endif
583
584#ifdef RUBY
585 st_init_existing_table_with_size(tab, type, size);
586#else
587 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
588 free(tab);
589 return NULL;
590 }
591#endif
592
593 return tab;
594}
595
596size_t
597st_table_size(const struct st_table *tbl)
598{
599 return tbl->num_entries;
600}
601
602/* Create and return table with TYPE which can hold a minimal number
603 of entries (see comments for get_power2). */
604st_table *
605st_init_table(const struct st_hash_type *type)
606{
607 return st_init_table_with_size(type, 0);
608}
609
610/* Create and return table which can hold a minimal number of
611 numbers. */
612st_table *
613st_init_numtable(void)
614{
615 return st_init_table(&type_numhash);
616}
617
618/* Create and return table which can hold SIZE numbers. */
619st_table *
620st_init_numtable_with_size(st_index_t size)
621{
622 return st_init_table_with_size(&type_numhash, size);
623}
624
625/* Create and return table which can hold a minimal number of
626 strings. */
627st_table *
628st_init_strtable(void)
629{
630 return st_init_table(&type_strhash);
631}
632
633/* Create and return table which can hold SIZE strings. */
634st_table *
635st_init_strtable_with_size(st_index_t size)
636{
637 return st_init_table_with_size(&type_strhash, size);
638}
639
640/* Create and return table which can hold a minimal number of strings
641 whose character case is ignored. */
642st_table *
643st_init_strcasetable(void)
644{
645 return st_init_table(&type_strcasehash);
646}
647
648/* Create and return table which can hold SIZE strings whose character
649 case is ignored. */
650st_table *
651st_init_strcasetable_with_size(st_index_t size)
652{
653 return st_init_table_with_size(&type_strcasehash, size);
654}
655
656/* Make table TAB empty. */
657void
658st_clear(st_table *tab)
659{
660 make_tab_empty(tab);
661 tab->rebuilds_num++;
662}
663
664/* Free table TAB space. */
665void
666st_free_table(st_table *tab)
667{
668 free(tab->bins);
669 free(tab->entries);
670 free(tab);
671}
672
673/* Return byte size of memory allocated for table TAB. */
674size_t
675st_memsize(const st_table *tab)
676{
677 RUBY_ASSERT(tab != NULL);
678 return(sizeof(st_table)
679 + (tab->bins == NULL ? 0 : bins_size(tab))
680 + get_allocated_entries(tab) * sizeof(st_table_entry));
681}
682
683static st_index_t
684find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
685
686static st_index_t
687find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
688
689static st_index_t
690find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
691
692static st_index_t
693find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
694 st_data_t key, st_index_t *bin_ind);
695
696#ifdef HASH_LOG
697static void
698count_collision(const struct st_hash_type *type)
699{
700 collision.all++;
701 if (type == &type_numhash) {
702 collision.num++;
703 }
704 else if (type == &type_strhash) {
705 collision.strcase++;
706 }
707 else if (type == &type_strcasehash) {
708 collision.str++;
709 }
710}
711
712#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
713#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
714#define collision_check 0
715#else
716#define COLLISION
717#define FOUND_BIN
718#endif
719
720/* If the number of entries in the table is at least REBUILD_THRESHOLD
721 times less than the entry array length, decrease the table
722 size. */
723#define REBUILD_THRESHOLD 4
724
725#if REBUILD_THRESHOLD < 2
726#error "REBUILD_THRESHOLD should be >= 2"
727#endif
728
729static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
730static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
731static void rebuild_cleanup(st_table *const tab);
732
733/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
734 and can change size of the table entries and bins arrays.
735 Rebuilding is implemented by creation of a new table or by
736 compaction of the existing one. */
737static void
738rebuild_table(st_table *tab)
739{
740 if ((2 * tab->num_entries <= get_allocated_entries(tab)
741 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
742 || tab->num_entries < (1 << MINIMAL_POWER2)) {
743 /* Compaction: */
744 tab->num_entries = 0;
745 if (tab->bins != NULL)
746 initialize_bins(tab);
747 rebuild_table_with(tab, tab);
748 }
749 else {
750 st_table *new_tab;
751 /* This allocation could trigger GC and compaction. If tab is the
752 * gen_fields_tbl, then tab could have changed in size due to objects being
753 * freed and/or moved. Do not store attributes of tab before this line. */
754 new_tab = st_init_table_with_size(tab->type,
755 2 * tab->num_entries - 1);
756 rebuild_table_with(new_tab, tab);
757 rebuild_move_table(new_tab, tab);
758 }
759 rebuild_cleanup(tab);
760}
761
762static void
763rebuild_table_with(st_table *const new_tab, st_table *const tab)
764{
765 st_index_t i, ni;
766 unsigned int size_ind;
767 st_table_entry *new_entries;
768 st_table_entry *curr_entry_ptr;
769 st_index_t *bins;
770 st_index_t bin_ind;
771
772 new_entries = new_tab->entries;
773
774 ni = 0;
775 bins = new_tab->bins;
776 size_ind = get_size_ind(new_tab);
777 st_index_t bound = tab->entries_bound;
778 st_table_entry *entries = tab->entries;
779
780 for (i = tab->entries_start; i < bound; i++) {
781 curr_entry_ptr = &entries[i];
782 PREFETCH(entries + i + 1, 0);
783 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
784 continue;
785 if (&new_entries[ni] != curr_entry_ptr)
786 new_entries[ni] = *curr_entry_ptr;
787 if (EXPECT(bins != NULL, 1)) {
788 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
789 curr_entry_ptr->key);
790 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
791 }
792 new_tab->num_entries++;
793 ni++;
794 }
795
796 assert(new_tab->num_entries == tab->num_entries);
797}
798
799static void
800rebuild_move_table(st_table *const new_tab, st_table *const tab)
801{
802 tab->entry_power = new_tab->entry_power;
803 tab->bin_power = new_tab->bin_power;
804 tab->size_ind = new_tab->size_ind;
805 free(tab->bins);
806 tab->bins = new_tab->bins;
807 free(tab->entries);
808 tab->entries = new_tab->entries;
809 free(new_tab);
810}
811
812static void
813rebuild_cleanup(st_table *const tab)
814{
815 tab->entries_start = 0;
816 tab->entries_bound = tab->num_entries;
817 tab->rebuilds_num++;
818}
819
820/* Return the next secondary hash index for table TAB using previous
821 index IND and PERTURB. Finally modulo of the function becomes a
822 full *cycle linear congruential generator*, in other words it
823 guarantees traversing all table bins in extreme case.
824
825 According the Hull-Dobell theorem a generator
826 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
827 o m and c are relatively prime
828 o a-1 is divisible by all prime factors of m
829 o a-1 is divisible by 4 if m is divisible by 4.
830
831 For our case a is 5, c is 1, and m is a power of two. */
832static inline st_index_t
833secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
834{
835 *perturb >>= 11;
836 ind = (ind << 2) + ind + *perturb + 1;
837 return hash_bin(ind, tab);
838}
839
840/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
841 search. Return the index of the found entry in array `entries`.
842 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
843 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
844static inline st_index_t
845find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
846{
847 int eq_p, rebuilt_p;
848 st_index_t i, bound;
849 st_table_entry *entries;
850
851 bound = tab->entries_bound;
852 entries = tab->entries;
853 for (i = tab->entries_start; i < bound; i++) {
854 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
855 if (EXPECT(rebuilt_p, 0))
856 return REBUILT_TABLE_ENTRY_IND;
857 if (eq_p)
858 return i;
859 }
860 return UNDEFINED_ENTRY_IND;
861}
862
863/* Use the quadratic probing. The method has a better data locality
864 but more collisions than the current approach. In average it
865 results in a bit slower search. */
866/*#define QUADRATIC_PROBE*/
867
868/* Return index of entry with HASH_VALUE and KEY in table TAB. If
869 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
870 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
871static st_index_t
872find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
873{
874 int eq_p, rebuilt_p;
875 st_index_t ind;
876#ifdef QUADRATIC_PROBE
877 st_index_t d;
878#else
879 st_index_t perturb;
880#endif
881 st_index_t bin;
882 st_table_entry *entries = tab->entries;
883
884 ind = hash_bin(hash_value, tab);
885#ifdef QUADRATIC_PROBE
886 d = 1;
887#else
888 perturb = hash_value;
889#endif
890 FOUND_BIN;
891 for (;;) {
892 bin = get_bin(tab->bins, get_size_ind(tab), ind);
893 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
894 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
895 if (EXPECT(rebuilt_p, 0))
896 return REBUILT_TABLE_ENTRY_IND;
897 if (eq_p)
898 break;
899 }
900 else if (EMPTY_BIN_P(bin))
901 return UNDEFINED_ENTRY_IND;
902#ifdef QUADRATIC_PROBE
903 ind = hash_bin(ind + d, tab);
904 d++;
905#else
906 ind = secondary_hash(ind, tab, &perturb);
907#endif
908 COLLISION;
909 }
910 return bin;
911}
912
913/* Find and return index of table TAB bin corresponding to an entry
914 with HASH_VALUE and KEY. If there is no such bin, return
915 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
916 return REBUILT_TABLE_BIN_IND. */
917static st_index_t
918find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
919{
920 int eq_p, rebuilt_p;
921 st_index_t ind;
922#ifdef QUADRATIC_PROBE
923 st_index_t d;
924#else
925 st_index_t perturb;
926#endif
927 st_index_t bin;
928 st_table_entry *entries = tab->entries;
929
930 ind = hash_bin(hash_value, tab);
931#ifdef QUADRATIC_PROBE
932 d = 1;
933#else
934 perturb = hash_value;
935#endif
936 FOUND_BIN;
937 for (;;) {
938 bin = get_bin(tab->bins, get_size_ind(tab), ind);
939 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
940 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
941 if (EXPECT(rebuilt_p, 0))
942 return REBUILT_TABLE_BIN_IND;
943 if (eq_p)
944 break;
945 }
946 else if (EMPTY_BIN_P(bin))
947 return UNDEFINED_BIN_IND;
948#ifdef QUADRATIC_PROBE
949 ind = hash_bin(ind + d, tab);
950 d++;
951#else
952 ind = secondary_hash(ind, tab, &perturb);
953#endif
954 COLLISION;
955 }
956 return ind;
957}
958
959/* Find and return index of table TAB bin corresponding to an entry
960 with HASH_VALUE and KEY. The entry should be in the table
961 already. */
962static st_index_t
963find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
964{
965 st_index_t ind;
966#ifdef QUADRATIC_PROBE
967 st_index_t d;
968#else
969 st_index_t perturb;
970#endif
971 st_index_t bin;
972
973 ind = hash_bin(hash_value, tab);
974#ifdef QUADRATIC_PROBE
975 d = 1;
976#else
977 perturb = hash_value;
978#endif
979 FOUND_BIN;
980 for (;;) {
981 bin = get_bin(tab->bins, get_size_ind(tab), ind);
982 if (EMPTY_OR_DELETED_BIN_P(bin))
983 return ind;
984#ifdef QUADRATIC_PROBE
985 ind = hash_bin(ind + d, tab);
986 d++;
987#else
988 ind = secondary_hash(ind, tab, &perturb);
989#endif
990 COLLISION;
991 }
992}
993
994/* Return index of table TAB bin for HASH_VALUE and KEY through
995 BIN_IND and the pointed value as the function result. Reserve the
996 bin for inclusion of the corresponding entry into the table if it
997 is not there yet. We always find such bin as bins array length is
998 bigger entries array. Although we can reuse a deleted bin, the
999 result bin value is always empty if the table has no entry with
1000 KEY. Return the entries array index of the found entry or
1001 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1002 during the search, return REBUILT_TABLE_ENTRY_IND. */
1003static st_index_t
1004find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1005 st_data_t key, st_index_t *bin_ind)
1006{
1007 int eq_p, rebuilt_p;
1008 st_index_t ind;
1009 st_hash_t curr_hash_value = *hash_value;
1010#ifdef QUADRATIC_PROBE
1011 st_index_t d;
1012#else
1013 st_index_t perturb;
1014#endif
1015 st_index_t entry_index;
1016 st_index_t first_deleted_bin_ind;
1017 st_table_entry *entries;
1018
1019 ind = hash_bin(curr_hash_value, tab);
1020#ifdef QUADRATIC_PROBE
1021 d = 1;
1022#else
1023 perturb = curr_hash_value;
1024#endif
1025 FOUND_BIN;
1026 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1027 entries = tab->entries;
1028 for (;;) {
1029 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1030 if (EMPTY_BIN_P(entry_index)) {
1031 tab->num_entries++;
1032 entry_index = UNDEFINED_ENTRY_IND;
1033 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1034 /* We can reuse bin of a deleted entry. */
1035 ind = first_deleted_bin_ind;
1036 MARK_BIN_EMPTY(tab, ind);
1037 }
1038 break;
1039 }
1040 else if (! DELETED_BIN_P(entry_index)) {
1041 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1042 if (EXPECT(rebuilt_p, 0))
1043 return REBUILT_TABLE_ENTRY_IND;
1044 if (eq_p)
1045 break;
1046 }
1047 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1048 first_deleted_bin_ind = ind;
1049#ifdef QUADRATIC_PROBE
1050 ind = hash_bin(ind + d, tab);
1051 d++;
1052#else
1053 ind = secondary_hash(ind, tab, &perturb);
1054#endif
1055 COLLISION;
1056 }
1057 *bin_ind = ind;
1058 return entry_index;
1059}
1060
1061/* Find an entry with KEY in table TAB. Return non-zero if we found
1062 it. Set up *RECORD to the found entry record. */
1063int
1064st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1065{
1066 st_index_t bin;
1067 st_hash_t hash = do_hash(key, tab);
1068
1069 retry:
1070 if (tab->bins == NULL) {
1071 bin = find_entry(tab, hash, key);
1072 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1073 goto retry;
1074 if (bin == UNDEFINED_ENTRY_IND)
1075 return 0;
1076 }
1077 else {
1078 bin = find_table_entry_ind(tab, hash, key);
1079 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1080 goto retry;
1081 if (bin == UNDEFINED_ENTRY_IND)
1082 return 0;
1083 bin -= ENTRY_BASE;
1084 }
1085 if (value != 0)
1086 *value = tab->entries[bin].record;
1087 return 1;
1088}
1089
1090/* Find an entry with KEY in table TAB. Return non-zero if we found
1091 it. Set up *RESULT to the found table entry key. */
1092int
1093st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1094{
1095 st_index_t bin;
1096 st_hash_t hash = do_hash(key, tab);
1097
1098 retry:
1099 if (tab->bins == NULL) {
1100 bin = find_entry(tab, hash, key);
1101 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1102 goto retry;
1103 if (bin == UNDEFINED_ENTRY_IND)
1104 return 0;
1105 }
1106 else {
1107 bin = find_table_entry_ind(tab, hash, key);
1108 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1109 goto retry;
1110 if (bin == UNDEFINED_ENTRY_IND)
1111 return 0;
1112 bin -= ENTRY_BASE;
1113 }
1114 if (result != 0)
1115 *result = tab->entries[bin].key;
1116 return 1;
1117}
1118
1119/* Check the table and rebuild it if it is necessary. */
1120static inline void
1121rebuild_table_if_necessary (st_table *tab)
1122{
1123 st_index_t bound = tab->entries_bound;
1124
1125 if (bound == get_allocated_entries(tab))
1126 rebuild_table(tab);
1127}
1128
1129/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1130 already entry with KEY in the table, return nonzero and update
1131 the value of the found entry. */
1132int
1133st_insert(st_table *tab, st_data_t key, st_data_t value)
1134{
1135 st_table_entry *entry;
1136 st_index_t bin;
1137 st_index_t ind;
1138 st_hash_t hash_value;
1139 st_index_t bin_ind;
1140 int new_p;
1141
1142 hash_value = do_hash(key, tab);
1143 retry:
1144 rebuild_table_if_necessary(tab);
1145 if (tab->bins == NULL) {
1146 bin = find_entry(tab, hash_value, key);
1147 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1148 goto retry;
1149 new_p = bin == UNDEFINED_ENTRY_IND;
1150 if (new_p)
1151 tab->num_entries++;
1152 bin_ind = UNDEFINED_BIN_IND;
1153 }
1154 else {
1155 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1156 key, &bin_ind);
1157 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1158 goto retry;
1159 new_p = bin == UNDEFINED_ENTRY_IND;
1160 bin -= ENTRY_BASE;
1161 }
1162 if (new_p) {
1163 ind = tab->entries_bound++;
1164 entry = &tab->entries[ind];
1165 entry->hash = hash_value;
1166 entry->key = key;
1167 entry->record = value;
1168 if (bin_ind != UNDEFINED_BIN_IND)
1169 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1170 return 0;
1171 }
1172 tab->entries[bin].record = value;
1173 return 1;
1174}
1175
1176/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1177 entry with KEY before the insertion. */
1178static inline void
1179st_add_direct_with_hash(st_table *tab,
1180 st_data_t key, st_data_t value, st_hash_t hash)
1181{
1182 st_table_entry *entry;
1183 st_index_t ind;
1184 st_index_t bin_ind;
1185
1186 assert(hash != RESERVED_HASH_VAL);
1187
1188 rebuild_table_if_necessary(tab);
1189 ind = tab->entries_bound++;
1190 entry = &tab->entries[ind];
1191 entry->hash = hash;
1192 entry->key = key;
1193 entry->record = value;
1194 tab->num_entries++;
1195 if (tab->bins != NULL) {
1196 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1197 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1198 }
1199}
1200
1201void
1202rb_st_add_direct_with_hash(st_table *tab,
1203 st_data_t key, st_data_t value, st_hash_t hash)
1204{
1205 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1206}
1207
1208/* Insert (KEY, VALUE) into table TAB. The table should not have
1209 entry with KEY before the insertion. */
1210void
1211st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1212{
1213 st_hash_t hash_value;
1214
1215 hash_value = do_hash(key, tab);
1216 st_add_direct_with_hash(tab, key, value, hash_value);
1217}
1218
1219/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1220 there is already entry with KEY in the table, return nonzero and
1221 update the value of the found entry. */
1222int
1223st_insert2(st_table *tab, st_data_t key, st_data_t value,
1224 st_data_t (*func)(st_data_t))
1225{
1226 st_table_entry *entry;
1227 st_index_t bin;
1228 st_index_t ind;
1229 st_hash_t hash_value;
1230 st_index_t bin_ind;
1231 int new_p;
1232
1233 hash_value = do_hash(key, tab);
1234 retry:
1235 rebuild_table_if_necessary (tab);
1236 if (tab->bins == NULL) {
1237 bin = find_entry(tab, hash_value, key);
1238 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1239 goto retry;
1240 new_p = bin == UNDEFINED_ENTRY_IND;
1241 if (new_p)
1242 tab->num_entries++;
1243 bin_ind = UNDEFINED_BIN_IND;
1244 }
1245 else {
1246 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1247 key, &bin_ind);
1248 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1249 goto retry;
1250 new_p = bin == UNDEFINED_ENTRY_IND;
1251 bin -= ENTRY_BASE;
1252 }
1253 if (new_p) {
1254 key = (*func)(key);
1255 ind = tab->entries_bound++;
1256 entry = &tab->entries[ind];
1257 entry->hash = hash_value;
1258 entry->key = key;
1259 entry->record = value;
1260 if (bin_ind != UNDEFINED_BIN_IND)
1261 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1262 return 0;
1263 }
1264 tab->entries[bin].record = value;
1265 return 1;
1266}
1267
1268/* Create a copy of old_tab into new_tab. */
1269st_table *
1270st_replace(st_table *new_tab, st_table *old_tab)
1271{
1272 *new_tab = *old_tab;
1273 if (old_tab->bins == NULL)
1274 new_tab->bins = NULL;
1275 else {
1276 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1277#ifndef RUBY
1278 if (new_tab->bins == NULL) {
1279 return NULL;
1280 }
1281#endif
1282 }
1283 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1284 * sizeof(st_table_entry));
1285#ifndef RUBY
1286 if (new_tab->entries == NULL) {
1287 return NULL;
1288 }
1289#endif
1290 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1291 get_allocated_entries(old_tab));
1292 if (old_tab->bins != NULL)
1293 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1294
1295 return new_tab;
1296}
1297
1298/* Create and return a copy of table OLD_TAB. */
1299st_table *
1300st_copy(st_table *old_tab)
1301{
1302 st_table *new_tab;
1303
1304 new_tab = (st_table *) malloc(sizeof(st_table));
1305#ifndef RUBY
1306 if (new_tab == NULL)
1307 return NULL;
1308#endif
1309
1310 if (st_replace(new_tab, old_tab) == NULL) {
1311 st_free_table(new_tab);
1312 return NULL;
1313 }
1314
1315 return new_tab;
1316}
1317
1318/* Update the entries start of table TAB after removing an entry
1319 with index N in the array entries. */
1320static inline void
1321update_range_for_deleted(st_table *tab, st_index_t n)
1322{
1323 /* Do not update entries_bound here. Otherwise, we can fill all
1324 bins by deleted entry value before rebuilding the table. */
1325 if (tab->entries_start == n) {
1326 st_index_t start = n + 1;
1327 st_index_t bound = tab->entries_bound;
1328 st_table_entry *entries = tab->entries;
1329 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1330 tab->entries_start = start;
1331 }
1332}
1333
1334/* Delete entry with KEY from table TAB, set up *VALUE (unless
1335 VALUE is zero) from deleted table entry, and return non-zero. If
1336 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1337 is zero), and return zero. */
1338static int
1339st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1340{
1341 st_table_entry *entry;
1342 st_index_t bin;
1343 st_index_t bin_ind;
1344 st_hash_t hash;
1345
1346 hash = do_hash(*key, tab);
1347 retry:
1348 if (tab->bins == NULL) {
1349 bin = find_entry(tab, hash, *key);
1350 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1351 goto retry;
1352 if (bin == UNDEFINED_ENTRY_IND) {
1353 if (value != 0) *value = 0;
1354 return 0;
1355 }
1356 }
1357 else {
1358 bin_ind = find_table_bin_ind(tab, hash, *key);
1359 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1360 goto retry;
1361 if (bin_ind == UNDEFINED_BIN_IND) {
1362 if (value != 0) *value = 0;
1363 return 0;
1364 }
1365 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1366 MARK_BIN_DELETED(tab, bin_ind);
1367 }
1368 entry = &tab->entries[bin];
1369 *key = entry->key;
1370 if (value != 0) *value = entry->record;
1371 MARK_ENTRY_DELETED(entry);
1372 tab->num_entries--;
1373 update_range_for_deleted(tab, bin);
1374 return 1;
1375}
1376
1377int
1378st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1379{
1380 return st_general_delete(tab, key, value);
1381}
1382
1383/* The function and other functions with suffix '_safe' or '_check'
1384 are originated from the previous implementation of the hash tables.
1385 It was necessary for correct deleting entries during traversing
1386 tables. The current implementation permits deletion during
1387 traversing without a specific way to do this. */
1388int
1389st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1390 st_data_t never ATTRIBUTE_UNUSED)
1391{
1392 return st_general_delete(tab, key, value);
1393}
1394
1395/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1396 return zero. Otherwise, remove the first entry in the table.
1397 Return its key through KEY and its record through VALUE (unless
1398 VALUE is zero). */
1399int
1400st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1401{
1402 st_index_t i, bound;
1403 st_index_t bin;
1404 st_table_entry *entries, *curr_entry_ptr;
1405 st_index_t bin_ind;
1406
1407 entries = tab->entries;
1408 bound = tab->entries_bound;
1409 for (i = tab->entries_start; i < bound; i++) {
1410 curr_entry_ptr = &entries[i];
1411 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1412 st_hash_t entry_hash = curr_entry_ptr->hash;
1413 st_data_t entry_key = curr_entry_ptr->key;
1414
1415 if (value != 0) *value = curr_entry_ptr->record;
1416 *key = entry_key;
1417 retry:
1418 if (tab->bins == NULL) {
1419 bin = find_entry(tab, entry_hash, entry_key);
1420 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1421 entries = tab->entries;
1422 goto retry;
1423 }
1424 curr_entry_ptr = &entries[bin];
1425 }
1426 else {
1427 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1428 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1429 entries = tab->entries;
1430 goto retry;
1431 }
1432 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1433 - ENTRY_BASE];
1434 MARK_BIN_DELETED(tab, bin_ind);
1435 }
1436 MARK_ENTRY_DELETED(curr_entry_ptr);
1437 tab->num_entries--;
1438 update_range_for_deleted(tab, i);
1439 return 1;
1440 }
1441 }
1442 if (value != 0) *value = 0;
1443 return 0;
1444}
1445
1446/* See comments for function st_delete_safe. */
1447void
1448st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1449 st_data_t never ATTRIBUTE_UNUSED)
1450{
1451}
1452
1453/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1454 of the key and the value of the found entry, and non-zero as the
1455 3rd argument. If the entry is not found, call FUNC with a pointer
1456 to KEY, a pointer to zero, and a zero argument. If the call
1457 returns ST_CONTINUE, the table will have an entry with key and
1458 value returned by FUNC through the 1st and 2nd parameters. If the
1459 call of FUNC returns ST_DELETE, the table will not have entry with
1460 KEY. The function returns flag of that the entry with KEY was in
1461 the table before the call. */
1462int
1463st_update(st_table *tab, st_data_t key,
1464 st_update_callback_func *func, st_data_t arg)
1465{
1466 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1467 st_index_t bin = 0; /* Ditto */
1468 st_table_entry *entries;
1469 st_index_t bin_ind;
1470 st_data_t value = 0, old_key;
1471 int retval, existing;
1472 st_hash_t hash = do_hash(key, tab);
1473
1474 retry:
1475 entries = tab->entries;
1476 if (tab->bins == NULL) {
1477 bin = find_entry(tab, hash, key);
1478 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1479 goto retry;
1480 existing = bin != UNDEFINED_ENTRY_IND;
1481 entry = &entries[bin];
1482 bin_ind = UNDEFINED_BIN_IND;
1483 }
1484 else {
1485 bin_ind = find_table_bin_ind(tab, hash, key);
1486 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1487 goto retry;
1488 existing = bin_ind != UNDEFINED_BIN_IND;
1489 if (existing) {
1490 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1491 entry = &entries[bin];
1492 }
1493 }
1494 if (existing) {
1495 key = entry->key;
1496 value = entry->record;
1497 }
1498 old_key = key;
1499
1500 unsigned int rebuilds_num = tab->rebuilds_num;
1501
1502 retval = (*func)(&key, &value, arg, existing);
1503
1504 // We need to make sure that the callback didn't cause a table rebuild
1505 // Ideally we would make sure no operations happened
1506 assert(rebuilds_num == tab->rebuilds_num);
1507 (void)rebuilds_num;
1508
1509 switch (retval) {
1510 case ST_CONTINUE:
1511 if (! existing) {
1512 st_add_direct_with_hash(tab, key, value, hash);
1513 break;
1514 }
1515 if (old_key != key) {
1516 entry->key = key;
1517 }
1518 entry->record = value;
1519 break;
1520 case ST_DELETE:
1521 if (existing) {
1522 if (bin_ind != UNDEFINED_BIN_IND)
1523 MARK_BIN_DELETED(tab, bin_ind);
1524 MARK_ENTRY_DELETED(entry);
1525 tab->num_entries--;
1526 update_range_for_deleted(tab, bin);
1527 }
1528 break;
1529 }
1530 return existing;
1531}
1532
1533/* Traverse all entries in table TAB calling FUNC with current entry
1534 key and value and zero. If the call returns ST_STOP, stop
1535 traversing. If the call returns ST_DELETE, delete the current
1536 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1537 traversing. The function returns zero unless an error is found.
1538 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1539 different for ST_CHECK and when the current element is removed
1540 during traversing. */
1541static inline int
1542st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1543 int check_p)
1544{
1545 st_index_t bin;
1546 st_index_t bin_ind;
1547 st_table_entry *entries, *curr_entry_ptr;
1548 enum st_retval retval;
1549 st_index_t i, rebuilds_num;
1550 st_hash_t hash;
1551 st_data_t key;
1552 int error_p, packed_p = tab->bins == NULL;
1553
1554 entries = tab->entries;
1555 /* The bound can change inside the loop even without rebuilding
1556 the table, e.g. by an entry insertion. */
1557 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1558 curr_entry_ptr = &entries[i];
1559 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1560 continue;
1561 key = curr_entry_ptr->key;
1562 rebuilds_num = tab->rebuilds_num;
1563 hash = curr_entry_ptr->hash;
1564 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1565
1566 if (retval == ST_REPLACE && replace) {
1567 st_data_t value;
1568 value = curr_entry_ptr->record;
1569 retval = (*replace)(&key, &value, arg, TRUE);
1570 curr_entry_ptr->key = key;
1571 curr_entry_ptr->record = value;
1572 }
1573
1574 if (rebuilds_num != tab->rebuilds_num) {
1575 retry:
1576 entries = tab->entries;
1577 packed_p = tab->bins == NULL;
1578 if (packed_p) {
1579 i = find_entry(tab, hash, key);
1580 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1581 goto retry;
1582 error_p = i == UNDEFINED_ENTRY_IND;
1583 }
1584 else {
1585 i = find_table_entry_ind(tab, hash, key);
1586 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1587 goto retry;
1588 error_p = i == UNDEFINED_ENTRY_IND;
1589 i -= ENTRY_BASE;
1590 }
1591 if (error_p && check_p) {
1592 /* call func with error notice */
1593 retval = (*func)(0, 0, arg, 1);
1594 return 1;
1595 }
1596 curr_entry_ptr = &entries[i];
1597 }
1598 switch (retval) {
1599 case ST_REPLACE:
1600 break;
1601 case ST_CONTINUE:
1602 break;
1603 case ST_CHECK:
1604 if (check_p)
1605 break;
1606 case ST_STOP:
1607 return 0;
1608 case ST_DELETE: {
1609 st_data_t key = curr_entry_ptr->key;
1610
1611 again:
1612 if (packed_p) {
1613 bin = find_entry(tab, hash, key);
1614 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1615 goto again;
1616 if (bin == UNDEFINED_ENTRY_IND)
1617 break;
1618 }
1619 else {
1620 bin_ind = find_table_bin_ind(tab, hash, key);
1621 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1622 goto again;
1623 if (bin_ind == UNDEFINED_BIN_IND)
1624 break;
1625 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1626 MARK_BIN_DELETED(tab, bin_ind);
1627 }
1628 curr_entry_ptr = &entries[bin];
1629 MARK_ENTRY_DELETED(curr_entry_ptr);
1630 tab->num_entries--;
1631 update_range_for_deleted(tab, bin);
1632 break;
1633 }
1634 }
1635 }
1636 return 0;
1637}
1638
1639int
1640st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1641{
1642 return st_general_foreach(tab, func, replace, arg, TRUE);
1643}
1644
1645struct functor {
1646 st_foreach_callback_func *func;
1647 st_data_t arg;
1648};
1649
1650static int
1651apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1652{
1653 const struct functor *f = (void *)d;
1654 return f->func(k, v, f->arg);
1655}
1656
1657int
1658st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1659{
1660 const struct functor f = { func, arg };
1661 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1662}
1663
1664/* See comments for function st_delete_safe. */
1665int
1666st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1667 st_data_t never ATTRIBUTE_UNUSED)
1668{
1669 return st_general_foreach(tab, func, 0, arg, TRUE);
1670}
1671
1672/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1673 Return the number of keys set up in array KEYS. */
1674static inline st_index_t
1675st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1676{
1677 st_index_t i, bound;
1678 st_data_t key, *keys_start, *keys_end;
1679 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1680
1681 bound = tab->entries_bound;
1682 keys_start = keys;
1683 keys_end = keys + size;
1684 for (i = tab->entries_start; i < bound; i++) {
1685 if (keys == keys_end)
1686 break;
1687 curr_entry_ptr = &entries[i];
1688 key = curr_entry_ptr->key;
1689 if (! DELETED_ENTRY_P(curr_entry_ptr))
1690 *keys++ = key;
1691 }
1692
1693 return keys - keys_start;
1694}
1695
1696st_index_t
1697st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1698{
1699 return st_general_keys(tab, keys, size);
1700}
1701
1702/* See comments for function st_delete_safe. */
1703st_index_t
1704st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1705 st_data_t never ATTRIBUTE_UNUSED)
1706{
1707 return st_general_keys(tab, keys, size);
1708}
1709
1710/* Set up array VALUES by at most SIZE values of head table TAB
1711 entries. Return the number of values set up in array VALUES. */
1712static inline st_index_t
1713st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1714{
1715 st_index_t i, bound;
1716 st_data_t *values_start, *values_end;
1717 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1718
1719 values_start = values;
1720 values_end = values + size;
1721 bound = tab->entries_bound;
1722 for (i = tab->entries_start; i < bound; i++) {
1723 if (values == values_end)
1724 break;
1725 curr_entry_ptr = &entries[i];
1726 if (! DELETED_ENTRY_P(curr_entry_ptr))
1727 *values++ = curr_entry_ptr->record;
1728 }
1729
1730 return values - values_start;
1731}
1732
1733st_index_t
1734st_values(st_table *tab, st_data_t *values, st_index_t size)
1735{
1736 return st_general_values(tab, values, size);
1737}
1738
1739/* See comments for function st_delete_safe. */
1740st_index_t
1741st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1742 st_data_t never ATTRIBUTE_UNUSED)
1743{
1744 return st_general_values(tab, values, size);
1745}
1746
1747#define FNV1_32A_INIT 0x811c9dc5
1748
1749/*
1750 * 32 bit magic FNV-1a prime
1751 */
1752#define FNV_32_PRIME 0x01000193
1753
1754/* __POWERPC__ added to accommodate Darwin case. */
1755#ifndef UNALIGNED_WORD_ACCESS
1756# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1757 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1758 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1759 defined(__mc68020__)
1760# define UNALIGNED_WORD_ACCESS 1
1761# endif
1762#endif
1763#ifndef UNALIGNED_WORD_ACCESS
1764# define UNALIGNED_WORD_ACCESS 0
1765#endif
1766
1767/* This hash function is quite simplified MurmurHash3
1768 * Simplification is legal, cause most of magic still happens in finalizator.
1769 * And finalizator is almost the same as in MurmurHash3 */
1770#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1771#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1772
1773#if ST_INDEX_BITS <= 32
1774#define C1 (st_index_t)0xcc9e2d51
1775#define C2 (st_index_t)0x1b873593
1776#else
1777#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1778#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1779#endif
1780NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1781NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1782NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1783
1784static inline st_index_t
1785murmur_step(st_index_t h, st_index_t k)
1786{
1787#if ST_INDEX_BITS <= 32
1788#define r1 (17)
1789#define r2 (11)
1790#else
1791#define r1 (33)
1792#define r2 (24)
1793#endif
1794 k *= C1;
1795 h ^= ROTL(k, r1);
1796 h *= C2;
1797 h = ROTL(h, r2);
1798 return h;
1799}
1800#undef r1
1801#undef r2
1802
1803static inline st_index_t
1804murmur_finish(st_index_t h)
1805{
1806#if ST_INDEX_BITS <= 32
1807#define r1 (16)
1808#define r2 (13)
1809#define r3 (16)
1810 const st_index_t c1 = 0x85ebca6b;
1811 const st_index_t c2 = 0xc2b2ae35;
1812#else
1813/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1814#define r1 (30)
1815#define r2 (27)
1816#define r3 (31)
1817 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1818 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1819#endif
1820#if ST_INDEX_BITS > 64
1821 h ^= h >> 64;
1822 h *= c2;
1823 h ^= h >> 65;
1824#endif
1825 h ^= h >> r1;
1826 h *= c1;
1827 h ^= h >> r2;
1828 h *= c2;
1829 h ^= h >> r3;
1830 return h;
1831}
1832#undef r1
1833#undef r2
1834#undef r3
1835
1836st_index_t
1837st_hash(const void *ptr, size_t len, st_index_t h)
1838{
1839 const char *data = ptr;
1840 st_index_t t = 0;
1841 size_t l = len;
1842
1843#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1844#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1845#if SIZEOF_ST_INDEX_T > 4
1846#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1847#if SIZEOF_ST_INDEX_T > 8
1848#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1849 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1850#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1851#endif
1852#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1853#else
1854#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1855#endif
1856#undef SKIP_TAIL
1857 if (len >= sizeof(st_index_t)) {
1858#if !UNALIGNED_WORD_ACCESS
1859 int align = (int)((st_data_t)data % sizeof(st_index_t));
1860 if (align) {
1861 st_index_t d = 0;
1862 int sl, sr, pack;
1863
1864 switch (align) {
1865#ifdef WORDS_BIGENDIAN
1866# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1867 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1868#else
1869# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1870 t |= data_at(n) << CHAR_BIT*(n)
1871#endif
1872 UNALIGNED_ADD_ALL;
1873#undef UNALIGNED_ADD
1874 }
1875
1876#ifdef WORDS_BIGENDIAN
1877 t >>= (CHAR_BIT * align) - CHAR_BIT;
1878#else
1879 t <<= (CHAR_BIT * align);
1880#endif
1881
1882 data += sizeof(st_index_t)-align;
1883 len -= sizeof(st_index_t)-align;
1884
1885 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1886 sr = CHAR_BIT * align;
1887
1888 while (len >= sizeof(st_index_t)) {
1889 d = *(st_index_t *)data;
1890#ifdef WORDS_BIGENDIAN
1891 t = (t << sr) | (d >> sl);
1892#else
1893 t = (t >> sr) | (d << sl);
1894#endif
1895 h = murmur_step(h, t);
1896 t = d;
1897 data += sizeof(st_index_t);
1898 len -= sizeof(st_index_t);
1899 }
1900
1901 pack = len < (size_t)align ? (int)len : align;
1902 d = 0;
1903 switch (pack) {
1904#ifdef WORDS_BIGENDIAN
1905# define UNALIGNED_ADD(n) case (n) + 1: \
1906 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1907#else
1908# define UNALIGNED_ADD(n) case (n) + 1: \
1909 d |= data_at(n) << CHAR_BIT*(n)
1910#endif
1911 UNALIGNED_ADD_ALL;
1912#undef UNALIGNED_ADD
1913 }
1914#ifdef WORDS_BIGENDIAN
1915 t = (t << sr) | (d >> sl);
1916#else
1917 t = (t >> sr) | (d << sl);
1918#endif
1919
1920 if (len < (size_t)align) goto skip_tail;
1921# define SKIP_TAIL 1
1922 h = murmur_step(h, t);
1923 data += pack;
1924 len -= pack;
1925 }
1926 else
1927#endif
1928#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1929#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1930#else
1931#define aligned_data data
1932#endif
1933 {
1934 do {
1935 h = murmur_step(h, *(st_index_t *)aligned_data);
1936 data += sizeof(st_index_t);
1937 len -= sizeof(st_index_t);
1938 } while (len >= sizeof(st_index_t));
1939 }
1940 }
1941
1942 t = 0;
1943 switch (len) {
1944#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1945 /* in this case byteorder doesn't really matter */
1946#if SIZEOF_ST_INDEX_T > 4
1947 case 7: t |= data_at(6) << 48;
1948 case 6: t |= data_at(5) << 40;
1949 case 5: t |= data_at(4) << 32;
1950 case 4:
1951 t |= (st_index_t)*(uint32_t*)aligned_data;
1952 goto skip_tail;
1953# define SKIP_TAIL 1
1954#endif
1955 case 3: t |= data_at(2) << 16;
1956 case 2: t |= data_at(1) << 8;
1957 case 1: t |= data_at(0);
1958#else
1959#ifdef WORDS_BIGENDIAN
1960# define UNALIGNED_ADD(n) case (n) + 1: \
1961 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1962#else
1963# define UNALIGNED_ADD(n) case (n) + 1: \
1964 t |= data_at(n) << CHAR_BIT*(n)
1965#endif
1966 UNALIGNED_ADD_ALL;
1967#undef UNALIGNED_ADD
1968#endif
1969#ifdef SKIP_TAIL
1970 skip_tail:
1971#endif
1972 h ^= t; h -= ROTL(t, 7);
1973 h *= C2;
1974 }
1975 h ^= l;
1976#undef aligned_data
1977
1978 return murmur_finish(h);
1979}
1980
1981st_index_t
1982st_hash_uint32(st_index_t h, uint32_t i)
1983{
1984 return murmur_step(h, i);
1985}
1986
1987NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1988st_index_t
1989st_hash_uint(st_index_t h, st_index_t i)
1990{
1991 i += h;
1992/* no matter if it is BigEndian or LittleEndian,
1993 * we hash just integers */
1994#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1995 h = murmur_step(h, i >> 8*8);
1996#endif
1997 h = murmur_step(h, i);
1998 return h;
1999}
2000
2001st_index_t
2002st_hash_end(st_index_t h)
2003{
2004 h = murmur_finish(h);
2005 return h;
2006}
2007
2008#undef st_hash_start
2009st_index_t
2010rb_st_hash_start(st_index_t h)
2011{
2012 return h;
2013}
2014
2015static st_index_t
2016strhash(st_data_t arg)
2017{
2018 register const char *string = (const char *)arg;
2019 return st_hash(string, strlen(string), FNV1_32A_INIT);
2020}
2021
2022int
2023st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2024{
2025 char c1, c2;
2026
2027 while (1) {
2028 c1 = *s1++;
2029 c2 = *s2++;
2030 if (c1 == '\0' || c2 == '\0') {
2031 if (c1 != '\0') return 1;
2032 if (c2 != '\0') return -1;
2033 return 0;
2034 }
2035 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2036 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2037 if (c1 != c2) {
2038 if (c1 > c2)
2039 return 1;
2040 else
2041 return -1;
2042 }
2043 }
2044}
2045
2046int
2047st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2048{
2049 char c1, c2;
2050 size_t i;
2051
2052 for (i = 0; i < n; i++) {
2053 c1 = *s1++;
2054 c2 = *s2++;
2055 if (c1 == '\0' || c2 == '\0') {
2056 if (c1 != '\0') return 1;
2057 if (c2 != '\0') return -1;
2058 return 0;
2059 }
2060 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2061 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2062 if (c1 != c2) {
2063 if (c1 > c2)
2064 return 1;
2065 else
2066 return -1;
2067 }
2068 }
2069 return 0;
2070}
2071
2072static int
2073st_strcmp(st_data_t lhs, st_data_t rhs)
2074{
2075 const char *s1 = (char *)lhs;
2076 const char *s2 = (char *)rhs;
2077 return strcmp(s1, s2);
2078}
2079
2080static int
2081st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2082{
2083 const char *s1 = (char *)lhs;
2084 const char *s2 = (char *)rhs;
2085 return st_locale_insensitive_strcasecmp(s1, s2);
2086}
2087
2088NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2089static st_index_t
2090strcasehash(st_data_t arg)
2091{
2092 register const char *string = (const char *)arg;
2093 register st_index_t hval = FNV1_32A_INIT;
2094
2095 /*
2096 * FNV-1a hash each octet in the buffer
2097 */
2098 while (*string) {
2099 unsigned int c = (unsigned char)*string++;
2100 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2101 hval ^= c;
2102
2103 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2104 hval *= FNV_32_PRIME;
2105 }
2106 return hval;
2107}
2108
2109int
2110st_numcmp(st_data_t x, st_data_t y)
2111{
2112 return x != y;
2113}
2114
2115st_index_t
2116st_numhash(st_data_t n)
2117{
2118 enum {s1 = 11, s2 = 3};
2119 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2120}
2121
2122#ifdef RUBY
2123/* Expand TAB to be suitable for holding SIZ entries in total.
2124 Pre-existing entries remain not deleted inside of TAB, but its bins
2125 are cleared to expect future reconstruction. See rehash below. */
2126static void
2127st_expand_table(st_table *tab, st_index_t siz)
2128{
2129 st_table *tmp;
2130 st_index_t n;
2131
2132 if (siz <= get_allocated_entries(tab))
2133 return; /* enough room already */
2134
2135 tmp = st_init_table_with_size(tab->type, siz);
2136 n = get_allocated_entries(tab);
2137 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2138 free(tab->entries);
2139 free(tab->bins);
2140 free(tmp->bins);
2141 tab->entry_power = tmp->entry_power;
2142 tab->bin_power = tmp->bin_power;
2143 tab->size_ind = tmp->size_ind;
2144 tab->entries = tmp->entries;
2145 tab->bins = NULL;
2146 tab->rebuilds_num++;
2147 free(tmp);
2148}
2149
2150/* Rehash using linear search. Return TRUE if we found that the table
2151 was rebuilt. */
2152static int
2153st_rehash_linear(st_table *tab)
2154{
2155 int eq_p, rebuilt_p;
2156 st_index_t i, j;
2157 st_table_entry *p, *q;
2158
2159 free(tab->bins);
2160 tab->bins = NULL;
2161
2162 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2163 p = &tab->entries[i];
2164 if (DELETED_ENTRY_P(p))
2165 continue;
2166 for (j = i + 1; j < tab->entries_bound; j++) {
2167 q = &tab->entries[j];
2168 if (DELETED_ENTRY_P(q))
2169 continue;
2170 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2171 if (EXPECT(rebuilt_p, 0))
2172 return TRUE;
2173 if (eq_p) {
2174 *p = *q;
2175 MARK_ENTRY_DELETED(q);
2176 tab->num_entries--;
2177 update_range_for_deleted(tab, j);
2178 }
2179 }
2180 }
2181 return FALSE;
2182}
2183
2184/* Rehash using index. Return TRUE if we found that the table was
2185 rebuilt. */
2186static int
2187st_rehash_indexed(st_table *tab)
2188{
2189 int eq_p, rebuilt_p;
2190 st_index_t i;
2191 st_index_t const n = bins_size(tab);
2192 unsigned int const size_ind = get_size_ind(tab);
2193 st_index_t *bins = realloc(tab->bins, n);
2194 tab->bins = bins;
2195 initialize_bins(tab);
2196 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2197 st_table_entry *p = &tab->entries[i];
2198 st_index_t ind;
2199#ifdef QUADRATIC_PROBE
2200 st_index_t d = 1;
2201#else
2202 st_index_t perturb = p->hash;
2203#endif
2204
2205 if (DELETED_ENTRY_P(p))
2206 continue;
2207
2208 ind = hash_bin(p->hash, tab);
2209 for (;;) {
2210 st_index_t bin = get_bin(bins, size_ind, ind);
2211 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2212 /* ok, new room */
2213 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2214 break;
2215 }
2216 else {
2217 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2218 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2219 if (EXPECT(rebuilt_p, 0))
2220 return TRUE;
2221 if (eq_p) {
2222 /* duplicated key; delete it */
2223 q->record = p->record;
2224 MARK_ENTRY_DELETED(p);
2225 tab->num_entries--;
2226 update_range_for_deleted(tab, bin);
2227 break;
2228 }
2229 else {
2230 /* hash collision; skip it */
2231#ifdef QUADRATIC_PROBE
2232 ind = hash_bin(ind + d, tab);
2233 d++;
2234#else
2235 ind = secondary_hash(ind, tab, &perturb);
2236#endif
2237 }
2238 }
2239 }
2240 }
2241 return FALSE;
2242}
2243
2244/* Reconstruct TAB's bins according to TAB's entries. This function
2245 permits conflicting keys inside of entries. No errors are reported
2246 then. All but one of them are discarded silently. */
2247static void
2248st_rehash(st_table *tab)
2249{
2250 int rebuilt_p;
2251
2252 do {
2253 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2254 rebuilt_p = st_rehash_linear(tab);
2255 else
2256 rebuilt_p = st_rehash_indexed(tab);
2257 } while (rebuilt_p);
2258}
2259
2260static st_data_t
2261st_stringify(VALUE key)
2262{
2263 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2264 rb_hash_key_str(key) : key;
2265}
2266
2267static void
2268st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2269{
2270 st_data_t k = st_stringify(key);
2272 e.hash = do_hash(k, tab);
2273 e.key = k;
2274 e.record = val;
2275
2276 tab->entries[tab->entries_bound++] = e;
2277 tab->num_entries++;
2278 RB_OBJ_WRITTEN(hash, Qundef, k);
2279 RB_OBJ_WRITTEN(hash, Qundef, val);
2280}
2281
2282static void
2283st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2284{
2285 long i;
2286
2287 for (i = 0; i < argc; /* */) {
2288 st_data_t k = st_stringify(argv[i++]);
2289 st_data_t v = argv[i++];
2290 st_insert(tab, k, v);
2291 RB_OBJ_WRITTEN(hash, Qundef, k);
2292 RB_OBJ_WRITTEN(hash, Qundef, v);
2293 }
2294}
2295
2296static void
2297st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2298{
2299 long i;
2300
2301 /* push elems */
2302 for (i = 0; i < argc; /* */) {
2303 VALUE key = argv[i++];
2304 VALUE val = argv[i++];
2305 st_insert_single(tab, hash, key, val);
2306 }
2307
2308 /* reindex */
2309 st_rehash(tab);
2310}
2311
2312/* Mimics ruby's { foo => bar } syntax. This function is subpart
2313 of rb_hash_bulk_insert. */
2314void
2315rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2316{
2317 st_index_t n, size = argc / 2;
2318 st_table *tab = RHASH_ST_TABLE(hash);
2319
2320 tab = RHASH_TBL_RAW(hash);
2321 n = tab->entries_bound + size;
2322 st_expand_table(tab, n);
2323 if (UNLIKELY(tab->num_entries))
2324 st_insert_generic(tab, argc, argv, hash);
2325 else if (argc <= 2)
2326 st_insert_single(tab, hash, argv[0], argv[1]);
2327 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2328 st_insert_linear(tab, argc, argv, hash);
2329 else
2330 st_insert_generic(tab, argc, argv, hash);
2331}
2332
2333void
2334rb_st_compact_table(st_table *tab)
2335{
2336 st_index_t num = tab->num_entries;
2337 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2338 /* Compaction: */
2339 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2340 rebuild_table_with(new_tab, tab);
2341 rebuild_move_table(new_tab, tab);
2342 rebuild_cleanup(tab);
2343 }
2344}
2345
2346/*
2347 * set_table related code
2348 */
2349
2350struct set_table_entry {
2351 st_hash_t hash;
2352 st_data_t key;
2353};
2354
2355/* Return hash value of KEY for table TAB. */
2356static inline st_hash_t
2357set_do_hash(st_data_t key, set_table *tab)
2358{
2359 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2360 return normalize_hash_value(hash);
2361}
2362
2363/* Return bin size index of table TAB. */
2364static inline unsigned int
2365set_get_size_ind(const set_table *tab)
2366{
2367 return tab->size_ind;
2368}
2369
2370/* Return the number of allocated bins of table TAB. */
2371static inline st_index_t
2372set_get_bins_num(const set_table *tab)
2373{
2374 return ((st_index_t) 1)<<tab->bin_power;
2375}
2376
2377/* Return mask for a bin index in table TAB. */
2378static inline st_index_t
2379set_bins_mask(const set_table *tab)
2380{
2381 return set_get_bins_num(tab) - 1;
2382}
2383
2384/* Return the index of table TAB bin corresponding to
2385 HASH_VALUE. */
2386static inline st_index_t
2387set_hash_bin(st_hash_t hash_value, set_table *tab)
2388{
2389 return hash_value & set_bins_mask(tab);
2390}
2391
2392/* Return the number of allocated entries of table TAB. */
2393static inline st_index_t
2394set_get_allocated_entries(const set_table *tab)
2395{
2396 return ((st_index_t) 1)<<tab->entry_power;
2397}
2398
2399static inline size_t
2400set_allocated_entries_size(const set_table *tab)
2401{
2402 return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2403}
2404
2405static inline bool
2406set_has_bins(const set_table *tab)
2407{
2408 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2409}
2410
2411/* Return size of the allocated bins of table TAB. */
2412static inline st_index_t
2413set_bins_size(const set_table *tab)
2414{
2415 if (set_has_bins(tab)) {
2416 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2417 }
2418
2419 return 0;
2420}
2421
2422static inline st_index_t *
2423set_bins_ptr(const set_table *tab)
2424{
2425 if (set_has_bins(tab)) {
2426 return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2427 }
2428
2429 return NULL;
2430}
2431
2432/* Mark all bins of table TAB as empty. */
2433static void
2434set_initialize_bins(set_table *tab)
2435{
2436 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2437}
2438
2439/* Make table TAB empty. */
2440static void
2441set_make_tab_empty(set_table *tab)
2442{
2443 tab->num_entries = 0;
2444 tab->entries_start = tab->entries_bound = 0;
2445 if (set_bins_ptr(tab) != NULL)
2446 set_initialize_bins(tab);
2447}
2448
2449static set_table *
2450set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2451{
2452 int n;
2453
2454#ifdef HASH_LOG
2455#if HASH_LOG+0 < 0
2456 {
2457 const char *e = getenv("ST_HASH_LOG");
2458 if (!e || !*e) init_st = 1;
2459 }
2460#endif
2461 if (init_st == 0) {
2462 init_st = 1;
2463 atexit(stat_col);
2464 }
2465#endif
2466
2467 n = get_power2(size);
2468
2469 tab->type = type;
2470 tab->entry_power = n;
2471 tab->bin_power = features[n].bin_power;
2472 tab->size_ind = features[n].size_ind;
2473
2474 size_t memsize = 0;
2475 if (set_has_bins(tab)) {
2476 memsize += set_bins_size(tab);
2477 }
2478 memsize += set_get_allocated_entries(tab) * sizeof(set_table_entry);
2479 tab->entries = (set_table_entry *)malloc(memsize);
2480 set_make_tab_empty(tab);
2481 tab->rebuilds_num = 0;
2482 return tab;
2483}
2484
2485/* Create and return table with TYPE which can hold at least SIZE
2486 entries. The real number of entries which the table can hold is
2487 the nearest power of two for SIZE. */
2488set_table *
2489set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2490{
2491 if (tab == NULL) tab = malloc(sizeof(set_table));
2492
2493 set_init_existing_table_with_size(tab, type, size);
2494
2495 return tab;
2496}
2497
2498set_table *
2499set_init_numtable(void)
2500{
2501 return set_init_table_with_size(NULL, &type_numhash, 0);
2502}
2503
2504set_table *
2505set_init_numtable_with_size(st_index_t size)
2506{
2507 return set_init_table_with_size(NULL, &type_numhash, size);
2508}
2509
2510size_t
2511set_table_size(const struct set_table *tbl)
2512{
2513 return tbl->num_entries;
2514}
2515
2516/* Make table TAB empty. */
2517void
2518set_table_clear(set_table *tab)
2519{
2520 set_make_tab_empty(tab);
2521 tab->rebuilds_num++;
2522}
2523
2524/* Free table TAB space. This should only be used if you passed NULL to
2525 set_init_table_with_size/set_copy when creating the table. */
2526void
2527set_free_table(set_table *tab)
2528{
2529 free(tab->entries);
2530 free(tab);
2531}
2532
2533/* Return byte size of memory allocated for table TAB. */
2534size_t
2535set_memsize(const set_table *tab)
2536{
2537 return(sizeof(set_table)
2538 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2539 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2540}
2541
2542static st_index_t
2543set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2544
2545static st_index_t
2546set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2547
2548static st_index_t
2549set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2550
2551static st_index_t
2552set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2553 st_data_t key, st_index_t *bin_ind);
2554
2555static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2556static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2557static void set_rebuild_cleanup(set_table *const tab);
2558
2559/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2560 and can change size of the table entries and bins arrays.
2561 Rebuilding is implemented by creation of a new table or by
2562 compaction of the existing one. */
2563static void
2564set_rebuild_table(set_table *tab)
2565{
2566 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2567 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2568 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2569 /* Compaction: */
2570 tab->num_entries = 0;
2571 if (set_has_bins(tab))
2572 set_initialize_bins(tab);
2573 set_rebuild_table_with(tab, tab);
2574 }
2575 else {
2576 set_table *new_tab;
2577 /* This allocation could trigger GC and compaction. If tab is the
2578 * gen_fields_tbl, then tab could have changed in size due to objects being
2579 * freed and/or moved. Do not store attributes of tab before this line. */
2580 new_tab = set_init_table_with_size(NULL, tab->type,
2581 2 * tab->num_entries - 1);
2582 set_rebuild_table_with(new_tab, tab);
2583 set_rebuild_move_table(new_tab, tab);
2584 }
2585 set_rebuild_cleanup(tab);
2586}
2587
2588static void
2589set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2590{
2591 st_index_t i, ni;
2592 unsigned int size_ind;
2593 set_table_entry *new_entries;
2594 set_table_entry *curr_entry_ptr;
2595 st_index_t *bins;
2596 st_index_t bin_ind;
2597
2598 new_entries = new_tab->entries;
2599
2600 ni = 0;
2601 bins = set_bins_ptr(new_tab);
2602 size_ind = set_get_size_ind(new_tab);
2603 st_index_t bound = tab->entries_bound;
2604 set_table_entry *entries = tab->entries;
2605
2606 for (i = tab->entries_start; i < bound; i++) {
2607 curr_entry_ptr = &entries[i];
2608 PREFETCH(entries + i + 1, 0);
2609 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2610 continue;
2611 if (&new_entries[ni] != curr_entry_ptr)
2612 new_entries[ni] = *curr_entry_ptr;
2613 if (EXPECT(bins != NULL, 1)) {
2614 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2615 curr_entry_ptr->key);
2616 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2617 }
2618 new_tab->num_entries++;
2619 ni++;
2620 }
2621
2622 assert(new_tab->num_entries == tab->num_entries);
2623}
2624
2625static void
2626set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2627{
2628 tab->entry_power = new_tab->entry_power;
2629 tab->bin_power = new_tab->bin_power;
2630 tab->size_ind = new_tab->size_ind;
2631 free(tab->entries);
2632 tab->entries = new_tab->entries;
2633 free(new_tab);
2634}
2635
2636static void
2637set_rebuild_cleanup(set_table *const tab)
2638{
2639 tab->entries_start = 0;
2640 tab->entries_bound = tab->num_entries;
2641 tab->rebuilds_num++;
2642}
2643
2644/* Return the next secondary hash index for table TAB using previous
2645 index IND and PERTURB. Finally modulo of the function becomes a
2646 full *cycle linear congruential generator*, in other words it
2647 guarantees traversing all table bins in extreme case.
2648
2649 According the Hull-Dobell theorem a generator
2650 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2651 o m and c are relatively prime
2652 o a-1 is divisible by all prime factors of m
2653 o a-1 is divisible by 4 if m is divisible by 4.
2654
2655 For our case a is 5, c is 1, and m is a power of two. */
2656static inline st_index_t
2657set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2658{
2659 *perturb >>= 11;
2660 ind = (ind << 2) + ind + *perturb + 1;
2661 return set_hash_bin(ind, tab);
2662}
2663
2664/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2665 search. Return the index of the found entry in array `entries`.
2666 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2667 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2668static inline st_index_t
2669set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2670{
2671 int eq_p, rebuilt_p;
2672 st_index_t i, bound;
2673 set_table_entry *entries;
2674
2675 bound = tab->entries_bound;
2676 entries = tab->entries;
2677 for (i = tab->entries_start; i < bound; i++) {
2678 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2679 if (EXPECT(rebuilt_p, 0))
2680 return REBUILT_TABLE_ENTRY_IND;
2681 if (eq_p)
2682 return i;
2683 }
2684 return UNDEFINED_ENTRY_IND;
2685}
2686
2687/* Use the quadratic probing. The method has a better data locality
2688 but more collisions than the current approach. In average it
2689 results in a bit slower search. */
2690/*#define QUADRATIC_PROBE*/
2691
2692/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2693 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2694 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2695static st_index_t
2696set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2697{
2698 int eq_p, rebuilt_p;
2699 st_index_t ind;
2700#ifdef QUADRATIC_PROBE
2701 st_index_t d;
2702#else
2703 st_index_t perturb;
2704#endif
2705 st_index_t bin;
2706 set_table_entry *entries = tab->entries;
2707
2708 ind = set_hash_bin(hash_value, tab);
2709#ifdef QUADRATIC_PROBE
2710 d = 1;
2711#else
2712 perturb = hash_value;
2713#endif
2714 for (;;) {
2715 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2716 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2717 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2718 if (EXPECT(rebuilt_p, 0))
2719 return REBUILT_TABLE_ENTRY_IND;
2720 if (eq_p)
2721 break;
2722 }
2723 else if (EMPTY_BIN_P(bin))
2724 return UNDEFINED_ENTRY_IND;
2725#ifdef QUADRATIC_PROBE
2726 ind = set_hash_bin(ind + d, tab);
2727 d++;
2728#else
2729 ind = set_secondary_hash(ind, tab, &perturb);
2730#endif
2731 }
2732 return bin;
2733}
2734
2735/* Find and return index of table TAB bin corresponding to an entry
2736 with HASH_VALUE and KEY. If there is no such bin, return
2737 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2738 return REBUILT_TABLE_BIN_IND. */
2739static st_index_t
2740set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2741{
2742 int eq_p, rebuilt_p;
2743 st_index_t ind;
2744#ifdef QUADRATIC_PROBE
2745 st_index_t d;
2746#else
2747 st_index_t perturb;
2748#endif
2749 st_index_t bin;
2750 set_table_entry *entries = tab->entries;
2751
2752 ind = set_hash_bin(hash_value, tab);
2753#ifdef QUADRATIC_PROBE
2754 d = 1;
2755#else
2756 perturb = hash_value;
2757#endif
2758 for (;;) {
2759 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2760 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2761 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2762 if (EXPECT(rebuilt_p, 0))
2763 return REBUILT_TABLE_BIN_IND;
2764 if (eq_p)
2765 break;
2766 }
2767 else if (EMPTY_BIN_P(bin))
2768 return UNDEFINED_BIN_IND;
2769#ifdef QUADRATIC_PROBE
2770 ind = set_hash_bin(ind + d, tab);
2771 d++;
2772#else
2773 ind = set_secondary_hash(ind, tab, &perturb);
2774#endif
2775 }
2776 return ind;
2777}
2778
2779/* Find and return index of table TAB bin corresponding to an entry
2780 with HASH_VALUE and KEY. The entry should be in the table
2781 already. */
2782static st_index_t
2783set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2784{
2785 st_index_t ind;
2786#ifdef QUADRATIC_PROBE
2787 st_index_t d;
2788#else
2789 st_index_t perturb;
2790#endif
2791 st_index_t bin;
2792
2793 ind = set_hash_bin(hash_value, tab);
2794#ifdef QUADRATIC_PROBE
2795 d = 1;
2796#else
2797 perturb = hash_value;
2798#endif
2799 for (;;) {
2800 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2801 if (EMPTY_OR_DELETED_BIN_P(bin))
2802 return ind;
2803#ifdef QUADRATIC_PROBE
2804 ind = set_hash_bin(ind + d, tab);
2805 d++;
2806#else
2807 ind = set_secondary_hash(ind, tab, &perturb);
2808#endif
2809 }
2810}
2811
2812/* Mark I-th bin of table TAB as empty, in other words not
2813 corresponding to any entry. */
2814#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2815
2816/* Return index of table TAB bin for HASH_VALUE and KEY through
2817 BIN_IND and the pointed value as the function result. Reserve the
2818 bin for inclusion of the corresponding entry into the table if it
2819 is not there yet. We always find such bin as bins array length is
2820 bigger entries array. Although we can reuse a deleted bin, the
2821 result bin value is always empty if the table has no entry with
2822 KEY. Return the entries array index of the found entry or
2823 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2824 during the search, return REBUILT_TABLE_ENTRY_IND. */
2825static st_index_t
2826set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2827 st_data_t key, st_index_t *bin_ind)
2828{
2829 int eq_p, rebuilt_p;
2830 st_index_t ind;
2831 st_hash_t curr_hash_value = *hash_value;
2832#ifdef QUADRATIC_PROBE
2833 st_index_t d;
2834#else
2835 st_index_t perturb;
2836#endif
2837 st_index_t entry_index;
2838 st_index_t firset_deleted_bin_ind;
2839 set_table_entry *entries;
2840
2841 ind = set_hash_bin(curr_hash_value, tab);
2842#ifdef QUADRATIC_PROBE
2843 d = 1;
2844#else
2845 perturb = curr_hash_value;
2846#endif
2847 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2848 entries = tab->entries;
2849 for (;;) {
2850 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2851 if (EMPTY_BIN_P(entry_index)) {
2852 tab->num_entries++;
2853 entry_index = UNDEFINED_ENTRY_IND;
2854 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2855 /* We can reuse bin of a deleted entry. */
2856 ind = firset_deleted_bin_ind;
2857 MARK_SET_BIN_EMPTY(tab, ind);
2858 }
2859 break;
2860 }
2861 else if (! DELETED_BIN_P(entry_index)) {
2862 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2863 if (EXPECT(rebuilt_p, 0))
2864 return REBUILT_TABLE_ENTRY_IND;
2865 if (eq_p)
2866 break;
2867 }
2868 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2869 firset_deleted_bin_ind = ind;
2870#ifdef QUADRATIC_PROBE
2871 ind = set_hash_bin(ind + d, tab);
2872 d++;
2873#else
2874 ind = set_secondary_hash(ind, tab, &perturb);
2875#endif
2876 }
2877 *bin_ind = ind;
2878 return entry_index;
2879}
2880
2881/* Find an entry with KEY in table TAB. Return non-zero if we found
2882 it. */
2883int
2884set_table_lookup(set_table *tab, st_data_t key)
2885{
2886 st_index_t bin;
2887 st_hash_t hash = set_do_hash(key, tab);
2888
2889 retry:
2890 if (!set_has_bins(tab)) {
2891 bin = set_find_entry(tab, hash, key);
2892 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2893 goto retry;
2894 if (bin == UNDEFINED_ENTRY_IND)
2895 return 0;
2896 }
2897 else {
2898 bin = set_find_table_entry_ind(tab, hash, key);
2899 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2900 goto retry;
2901 if (bin == UNDEFINED_ENTRY_IND)
2902 return 0;
2903 bin -= ENTRY_BASE;
2904 }
2905 return 1;
2906}
2907
2908/* Check the table and rebuild it if it is necessary. */
2909static inline void
2910set_rebuild_table_if_necessary (set_table *tab)
2911{
2912 st_index_t bound = tab->entries_bound;
2913
2914 if (bound == set_get_allocated_entries(tab))
2915 set_rebuild_table(tab);
2916}
2917
2918/* Insert KEY into table TAB and return zero. If there is
2919 already entry with KEY in the table, return nonzero and update
2920 the value of the found entry. */
2921int
2922set_insert(set_table *tab, st_data_t key)
2923{
2924 set_table_entry *entry;
2925 st_index_t bin;
2926 st_index_t ind;
2927 st_hash_t hash_value;
2928 st_index_t bin_ind;
2929 int new_p;
2930
2931 hash_value = set_do_hash(key, tab);
2932 retry:
2933 set_rebuild_table_if_necessary(tab);
2934 if (!set_has_bins(tab)) {
2935 bin = set_find_entry(tab, hash_value, key);
2936 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2937 goto retry;
2938 new_p = bin == UNDEFINED_ENTRY_IND;
2939 if (new_p)
2940 tab->num_entries++;
2941 bin_ind = UNDEFINED_BIN_IND;
2942 }
2943 else {
2944 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2945 key, &bin_ind);
2946 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2947 goto retry;
2948 new_p = bin == UNDEFINED_ENTRY_IND;
2949 bin -= ENTRY_BASE;
2950 }
2951 if (new_p) {
2952 ind = tab->entries_bound++;
2953 entry = &tab->entries[ind];
2954 entry->hash = hash_value;
2955 entry->key = key;
2956 if (bin_ind != UNDEFINED_BIN_IND)
2957 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2958 return 0;
2959 }
2960 return 1;
2961}
2962
2963/* Create a copy of old_tab into new_tab. */
2964static set_table *
2965set_replace(set_table *new_tab, set_table *old_tab)
2966{
2967 *new_tab = *old_tab;
2968 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
2969 new_tab->entries = (set_table_entry *)malloc(memsize);
2970 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
2971 return new_tab;
2972}
2973
2974/* Create and return a copy of table OLD_TAB. */
2975set_table *
2976set_copy(set_table *new_tab, set_table *old_tab)
2977{
2978 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
2979
2980 if (set_replace(new_tab, old_tab) == NULL) {
2981 set_free_table(new_tab);
2982 return NULL;
2983 }
2984
2985 return new_tab;
2986}
2987
2988/* Update the entries start of table TAB after removing an entry
2989 with index N in the array entries. */
2990static inline void
2991set_update_range_for_deleted(set_table *tab, st_index_t n)
2992{
2993 /* Do not update entries_bound here. Otherwise, we can fill all
2994 bins by deleted entry value before rebuilding the table. */
2995 if (tab->entries_start == n) {
2996 st_index_t start = n + 1;
2997 st_index_t bound = tab->entries_bound;
2998 set_table_entry *entries = tab->entries;
2999 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3000 tab->entries_start = start;
3001 }
3002}
3003
3004/* Mark I-th bin of table TAB as corresponding to a deleted table
3005 entry. Update number of entries in the table and number of bins
3006 corresponding to deleted entries. */
3007#define MARK_SET_BIN_DELETED(tab, i) \
3008 do { \
3009 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3010 } while (0)
3011
3012/* Delete entry with KEY from table TAB, and return non-zero. If
3013 there is no entry with KEY in the table, return zero. */
3014int
3015set_table_delete(set_table *tab, st_data_t *key)
3016{
3017 set_table_entry *entry;
3018 st_index_t bin;
3019 st_index_t bin_ind;
3020 st_hash_t hash;
3021
3022 hash = set_do_hash(*key, tab);
3023 retry:
3024 if (!set_has_bins(tab)) {
3025 bin = set_find_entry(tab, hash, *key);
3026 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3027 goto retry;
3028 if (bin == UNDEFINED_ENTRY_IND) {
3029 return 0;
3030 }
3031 }
3032 else {
3033 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3034 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3035 goto retry;
3036 if (bin_ind == UNDEFINED_BIN_IND) {
3037 return 0;
3038 }
3039 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3040 MARK_SET_BIN_DELETED(tab, bin_ind);
3041 }
3042 entry = &tab->entries[bin];
3043 *key = entry->key;
3044 MARK_ENTRY_DELETED(entry);
3045 tab->num_entries--;
3046 set_update_range_for_deleted(tab, bin);
3047 return 1;
3048}
3049
3050/* Traverse all entries in table TAB calling FUNC with current entry
3051 key and zero. If the call returns ST_STOP, stop
3052 traversing. If the call returns ST_DELETE, delete the current
3053 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3054 traversing. The function returns zero unless an error is found.
3055 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3056 different for ST_CHECK and when the current element is removed
3057 during traversing. */
3058static inline int
3059set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3060 set_update_callback_func *replace, st_data_t arg,
3061 int check_p)
3062{
3063 st_index_t bin;
3064 st_index_t bin_ind;
3065 set_table_entry *entries, *curr_entry_ptr;
3066 enum st_retval retval;
3067 st_index_t i, rebuilds_num;
3068 st_hash_t hash;
3069 st_data_t key;
3070 int error_p, packed_p = !set_has_bins(tab);
3071
3072 entries = tab->entries;
3073 /* The bound can change inside the loop even without rebuilding
3074 the table, e.g. by an entry insertion. */
3075 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3076 curr_entry_ptr = &entries[i];
3077 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3078 continue;
3079 key = curr_entry_ptr->key;
3080 rebuilds_num = tab->rebuilds_num;
3081 hash = curr_entry_ptr->hash;
3082 retval = (*func)(key, arg, 0);
3083
3084 if (retval == ST_REPLACE && replace) {
3085 retval = (*replace)(&key, arg, TRUE);
3086 curr_entry_ptr->key = key;
3087 }
3088
3089 if (rebuilds_num != tab->rebuilds_num) {
3090 retry:
3091 entries = tab->entries;
3092 packed_p = !set_has_bins(tab);
3093 if (packed_p) {
3094 i = set_find_entry(tab, hash, key);
3095 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3096 goto retry;
3097 error_p = i == UNDEFINED_ENTRY_IND;
3098 }
3099 else {
3100 i = set_find_table_entry_ind(tab, hash, key);
3101 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3102 goto retry;
3103 error_p = i == UNDEFINED_ENTRY_IND;
3104 i -= ENTRY_BASE;
3105 }
3106 if (error_p && check_p) {
3107 /* call func with error notice */
3108 retval = (*func)(0, arg, 1);
3109 return 1;
3110 }
3111 curr_entry_ptr = &entries[i];
3112 }
3113 switch (retval) {
3114 case ST_REPLACE:
3115 break;
3116 case ST_CONTINUE:
3117 break;
3118 case ST_CHECK:
3119 if (check_p)
3120 break;
3121 case ST_STOP:
3122 return 0;
3123 case ST_DELETE: {
3124 st_data_t key = curr_entry_ptr->key;
3125
3126 again:
3127 if (packed_p) {
3128 bin = set_find_entry(tab, hash, key);
3129 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3130 goto again;
3131 if (bin == UNDEFINED_ENTRY_IND)
3132 break;
3133 }
3134 else {
3135 bin_ind = set_find_table_bin_ind(tab, hash, key);
3136 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3137 goto again;
3138 if (bin_ind == UNDEFINED_BIN_IND)
3139 break;
3140 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3141 MARK_SET_BIN_DELETED(tab, bin_ind);
3142 }
3143 curr_entry_ptr = &entries[bin];
3144 MARK_ENTRY_DELETED(curr_entry_ptr);
3145 tab->num_entries--;
3146 set_update_range_for_deleted(tab, bin);
3147 break;
3148 }
3149 }
3150 }
3151 return 0;
3152}
3153
3154int
3155set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3156{
3157 return set_general_foreach(tab, func, replace, arg, TRUE);
3158}
3159
3160struct set_functor {
3161 set_foreach_callback_func *func;
3162 st_data_t arg;
3163};
3164
3165static int
3166set_apply_functor(st_data_t k, st_data_t d, int _)
3167{
3168 const struct set_functor *f = (void *)d;
3169 return f->func(k, f->arg);
3170}
3171
3172int
3173set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3174{
3175 const struct set_functor f = { func, arg };
3176 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3177}
3178
3179/* See comments for function set_delete_safe. */
3180int
3181set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3182 st_data_t never ATTRIBUTE_UNUSED)
3183{
3184 return set_general_foreach(tab, func, NULL, arg, TRUE);
3185}
3186
3187/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3188 Return the number of keys set up in array KEYS. */
3189inline st_index_t
3190set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3191{
3192 st_index_t i, bound;
3193 st_data_t key, *keys_start, *keys_end;
3194 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3195
3196 bound = tab->entries_bound;
3197 keys_start = keys;
3198 keys_end = keys + size;
3199 for (i = tab->entries_start; i < bound; i++) {
3200 if (keys == keys_end)
3201 break;
3202 curr_entry_ptr = &entries[i];
3203 key = curr_entry_ptr->key;
3204 if (! DELETED_ENTRY_P(curr_entry_ptr))
3205 *keys++ = key;
3206 }
3207
3208 return keys - keys_start;
3209}
3210
3211void
3212set_compact_table(set_table *tab)
3213{
3214 st_index_t num = tab->num_entries;
3215 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3216 /* Compaction: */
3217 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3218 set_rebuild_table_with(new_tab, tab);
3219 set_rebuild_move_table(new_tab, tab);
3220 set_rebuild_cleanup(tab);
3221 }
3222}
3223
3224#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:722
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1416
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:264
VALUE rb_cString
String class.
Definition string.c:84
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
set_table_entry * entries
Array of size 2^entry_power.
Definition set_table.h:28
Definition st.c:136
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40