Ruby 4.1.0dev (2026-05-19 revision 7e8d94dec00d75f51cbd931d6744d5bb9b19b289)
st.c (7e8d94dec00d75f51cbd931d6744d5bb9b19b289)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/gc.h"
111#include "internal/hash.h"
112#include "internal/sanitizers.h"
113#include "internal/set_table.h"
114#include "internal/st.h"
115#include "ruby_assert.h"
116#endif
117
118#include <stdio.h>
119#ifdef HAVE_STDLIB_H
120#include <stdlib.h>
121#endif
122#include <string.h>
123
124#ifdef __GNUC__
125#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
126#define EXPECT(expr, val) __builtin_expect(expr, val)
127#define ATTRIBUTE_UNUSED __attribute__((unused))
128#else
129#define PREFETCH(addr, write_p)
130#define EXPECT(expr, val) (expr)
131#define ATTRIBUTE_UNUSED
132#endif
133
134/* The type of hashes. */
135typedef st_index_t st_hash_t;
136
138 st_hash_t hash;
139 st_data_t key;
140 st_data_t record;
141};
142
143#define type_numhash st_hashtype_num
144static const struct st_hash_type st_hashtype_num = {
145 st_numcmp,
146 st_numhash,
147};
148
149static int st_strcmp(st_data_t, st_data_t);
150static st_index_t strhash(st_data_t);
151static const struct st_hash_type type_strhash = {
152 st_strcmp,
153 strhash,
154};
155
156static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
157static st_index_t strcasehash(st_data_t);
158static const struct st_hash_type type_strcasehash = {
159 st_locale_insensitive_strcasecmp_i,
160 strcasehash,
161};
162
163/* Value used to catch uninitialized entries/bins during debugging.
164 There is a possibility for a false alarm, but its probability is
165 extremely small. */
166#define ST_INIT_VAL 0xafafafafafafafaf
167#define ST_INIT_VAL_BYTE 0xafa
168
169#ifdef RUBY
170#undef malloc
171#undef realloc
172#undef calloc
173#undef free
174#define malloc ruby_xmalloc
175#define calloc ruby_xcalloc
176#define realloc ruby_xrealloc
177#define sized_realloc ruby_xrealloc_sized
178#define free ruby_xfree
179#define sized_free ruby_xfree_sized
180#define free_fixed_ptr(v) ruby_xfree_sized((v), sizeof(*(v)))
181#else
182#define sized_realloc(ptr, new_size, old_size) realloc(ptr, new_size)
183#define sized_free(v, s) free(v)
184#define free_fixed_ptr(v) free(v)
185#endif
186
187#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
188#define PTR_EQUAL(tab, ptr, hash_val, key_) \
189 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
190
191/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
192 up to TRUE if the table is rebuilt during the comparison. */
193#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
194 do { \
195 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
196 res = PTR_EQUAL(tab, ptr, hash_val, key); \
197 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
198 } while (FALSE)
199
200/* Features of a table. */
202 /* Power of 2 used for number of allocated entries. */
203 unsigned char entry_power;
204 /* Power of 2 used for number of allocated bins. Depending on the
205 table size, the number of bins is 2-4 times more than the
206 number of entries. */
207 unsigned char bin_power;
208 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
209 unsigned char size_ind;
210 /* Bins are packed in words of type st_index_t. The following is
211 a size of bins counted by words. */
212 st_index_t bins_words;
213};
214
215/* Features of all possible size tables. */
216#if SIZEOF_ST_INDEX_T == 8
217#define MAX_POWER2 62
218static const struct st_features features[] = {
219 {0, 1, 0, 0x0},
220 {1, 2, 0, 0x1},
221 {2, 3, 0, 0x1},
222 {3, 4, 0, 0x2},
223 {4, 5, 0, 0x4},
224 {5, 6, 0, 0x8},
225 {6, 7, 0, 0x10},
226 {7, 8, 0, 0x20},
227 {8, 9, 1, 0x80},
228 {9, 10, 1, 0x100},
229 {10, 11, 1, 0x200},
230 {11, 12, 1, 0x400},
231 {12, 13, 1, 0x800},
232 {13, 14, 1, 0x1000},
233 {14, 15, 1, 0x2000},
234 {15, 16, 1, 0x4000},
235 {16, 17, 2, 0x10000},
236 {17, 18, 2, 0x20000},
237 {18, 19, 2, 0x40000},
238 {19, 20, 2, 0x80000},
239 {20, 21, 2, 0x100000},
240 {21, 22, 2, 0x200000},
241 {22, 23, 2, 0x400000},
242 {23, 24, 2, 0x800000},
243 {24, 25, 2, 0x1000000},
244 {25, 26, 2, 0x2000000},
245 {26, 27, 2, 0x4000000},
246 {27, 28, 2, 0x8000000},
247 {28, 29, 2, 0x10000000},
248 {29, 30, 2, 0x20000000},
249 {30, 31, 2, 0x40000000},
250 {31, 32, 2, 0x80000000},
251 {32, 33, 3, 0x200000000},
252 {33, 34, 3, 0x400000000},
253 {34, 35, 3, 0x800000000},
254 {35, 36, 3, 0x1000000000},
255 {36, 37, 3, 0x2000000000},
256 {37, 38, 3, 0x4000000000},
257 {38, 39, 3, 0x8000000000},
258 {39, 40, 3, 0x10000000000},
259 {40, 41, 3, 0x20000000000},
260 {41, 42, 3, 0x40000000000},
261 {42, 43, 3, 0x80000000000},
262 {43, 44, 3, 0x100000000000},
263 {44, 45, 3, 0x200000000000},
264 {45, 46, 3, 0x400000000000},
265 {46, 47, 3, 0x800000000000},
266 {47, 48, 3, 0x1000000000000},
267 {48, 49, 3, 0x2000000000000},
268 {49, 50, 3, 0x4000000000000},
269 {50, 51, 3, 0x8000000000000},
270 {51, 52, 3, 0x10000000000000},
271 {52, 53, 3, 0x20000000000000},
272 {53, 54, 3, 0x40000000000000},
273 {54, 55, 3, 0x80000000000000},
274 {55, 56, 3, 0x100000000000000},
275 {56, 57, 3, 0x200000000000000},
276 {57, 58, 3, 0x400000000000000},
277 {58, 59, 3, 0x800000000000000},
278 {59, 60, 3, 0x1000000000000000},
279 {60, 61, 3, 0x2000000000000000},
280 {61, 62, 3, 0x4000000000000000},
281 {62, 63, 3, 0x8000000000000000},
282};
283
284#else
285#define MAX_POWER2 30
286
287static const struct st_features features[] = {
288 {0, 1, 0, 0x1},
289 {1, 2, 0, 0x1},
290 {2, 3, 0, 0x2},
291 {3, 4, 0, 0x4},
292 {4, 5, 0, 0x8},
293 {5, 6, 0, 0x10},
294 {6, 7, 0, 0x20},
295 {7, 8, 0, 0x40},
296 {8, 9, 1, 0x100},
297 {9, 10, 1, 0x200},
298 {10, 11, 1, 0x400},
299 {11, 12, 1, 0x800},
300 {12, 13, 1, 0x1000},
301 {13, 14, 1, 0x2000},
302 {14, 15, 1, 0x4000},
303 {15, 16, 1, 0x8000},
304 {16, 17, 2, 0x20000},
305 {17, 18, 2, 0x40000},
306 {18, 19, 2, 0x80000},
307 {19, 20, 2, 0x100000},
308 {20, 21, 2, 0x200000},
309 {21, 22, 2, 0x400000},
310 {22, 23, 2, 0x800000},
311 {23, 24, 2, 0x1000000},
312 {24, 25, 2, 0x2000000},
313 {25, 26, 2, 0x4000000},
314 {26, 27, 2, 0x8000000},
315 {27, 28, 2, 0x10000000},
316 {28, 29, 2, 0x20000000},
317 {29, 30, 2, 0x40000000},
318 {30, 31, 2, 0x80000000},
319};
320
321#endif
322
323/* The reserved hash value and its substitution. */
324#define RESERVED_HASH_VAL (~(st_hash_t) 0)
325#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
326
327static inline st_hash_t
328normalize_hash_value(st_hash_t hash)
329{
330 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
331 another value. Such mapping should be extremely rare. */
332 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
333}
334
335/* Return hash value of KEY for table TAB. */
336static inline st_hash_t
337do_hash(st_data_t key, st_table *tab)
338{
339 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
340 return normalize_hash_value(hash);
341}
342
343/* Power of 2 defining the minimal number of allocated entries. */
344#define MINIMAL_POWER2 2
345
346#if MINIMAL_POWER2 < 2
347#error "MINIMAL_POWER2 should be >= 2"
348#endif
349
350/* If the power2 of the allocated `entries` is less than the following
351 value, don't allocate bins and use a linear search. */
352#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
353
354/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
355static int
356get_power2(st_index_t size)
357{
358 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
359 if (n <= MAX_POWER2)
360 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
361#ifdef RUBY
362 /* Ran out of the table entries */
363 rb_raise(rb_eRuntimeError, "st_table too big");
364#endif
365 /* should raise exception */
366 return -1;
367}
368
369/* Return value of N-th bin in array BINS of table with bins size
370 index S. */
371static inline st_index_t
372get_bin(st_index_t *bins, int s, st_index_t n)
373{
374 return (s == 0 ? ((unsigned char *) bins)[n]
375 : s == 1 ? ((unsigned short *) bins)[n]
376 : s == 2 ? ((unsigned int *) bins)[n]
377 : ((st_index_t *) bins)[n]);
378}
379
380/* Set up N-th bin in array BINS of table with bins size index S to
381 value V. */
382static inline void
383set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
384{
385 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
386 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
387 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
388 else ((st_index_t *) bins)[n] = v;
389}
390
391/* These macros define reserved values for empty table bin and table
392 bin which contains a deleted entry. We will never use such values
393 for an entry index in bins. */
394#define EMPTY_BIN 0
395#define DELETED_BIN 1
396/* Base of a real entry index in the bins. */
397#define ENTRY_BASE 2
398
399/* Mark I-th bin of table TAB as empty, in other words not
400 corresponding to any entry. */
401#define MARK_BIN_EMPTY(tab, i) (set_bin(st_bins_ptr(tab), get_size_ind(tab), i, EMPTY_BIN))
402
403/* Values used for not found entry and bin with given
404 characteristics. */
405#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
406#define UNDEFINED_BIN_IND (~(st_index_t) 0)
407
408/* Entry and bin values returned when we found a table rebuild during
409 the search. */
410#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
411#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
412
413/* Mark I-th bin of table TAB as corresponding to a deleted table
414 entry. Update number of entries in the table and number of bins
415 corresponding to deleted entries. */
416#define MARK_BIN_DELETED(tab, i) \
417 do { \
418 set_bin(st_bins_ptr(tab), get_size_ind(tab), i, DELETED_BIN); \
419 } while (0)
420
421/* Macros to check that value B is used empty bins and bins
422 corresponding deleted entries. */
423#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
424#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
425#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
426
427/* Macros to check empty bins and bins corresponding to deleted
428 entries. Bins are given by their index I in table TAB. */
429#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
430#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
431#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
432
433/* Macros for marking and checking deleted entries given by their
434 pointer E_PTR. */
435#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
436#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
437
438/* Return the number of allocated entries of table TAB. */
439static inline st_index_t
440get_allocated_entries(const st_table *tab)
441{
442 return ((st_index_t) 1)<<tab->entry_power;
443}
444
445/* Return bin size index of table TAB. */
446static inline unsigned int
447get_size_ind(const st_table *tab)
448{
449 return tab->size_ind;
450}
451
452/* Return the number of allocated bins of table TAB. */
453static inline st_index_t
454get_bins_num(const st_table *tab)
455{
456 return ((st_index_t) 1)<<tab->bin_power;
457}
458
459/* Return mask for a bin index in table TAB. */
460static inline st_index_t
461bins_mask(const st_table *tab)
462{
463 return get_bins_num(tab) - 1;
464}
465
466static inline bool
467st_has_bins(const st_table *tab)
468{
469 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
470}
471
472static inline size_t
473st_allocated_entries_size(const st_table *tab)
474{
475 return get_allocated_entries(tab) * sizeof(st_table_entry);
476}
477
478static inline st_index_t *
479st_bins_ptr(const st_table *tab)
480{
481 if (st_has_bins(tab)) {
482 return (st_index_t *)(((char *)tab->entries) + st_allocated_entries_size(tab));
483 }
484
485 return NULL;
486}
487
488/* Return the index of table TAB bin corresponding to
489 HASH_VALUE. */
490static inline st_index_t
491hash_bin(st_hash_t hash_value, st_table *tab)
492{
493 return hash_value & bins_mask(tab);
494}
495
496/* Return size of the allocated bins of table TAB. */
497static inline st_index_t
498bins_size(const st_table *tab)
499{
500 if (st_has_bins(tab)) {
501 return features[tab->entry_power].bins_words * sizeof (st_index_t);
502 }
503 return 0;
504}
505
506/* Mark all bins of table TAB as empty. */
507static void
508initialize_bins(st_table *tab)
509{
510 memset(st_bins_ptr(tab), 0, bins_size(tab));
511}
512
513/* Make table TAB empty. */
514static void
515make_tab_empty(st_table *tab)
516{
517 tab->num_entries = 0;
518 tab->entries_start = tab->entries_bound = 0;
519 if (st_bins_ptr(tab) != NULL)
520 initialize_bins(tab);
521}
522
523#ifdef HASH_LOG
524#ifdef HAVE_UNISTD_H
525#include <unistd.h>
526#endif
527static struct {
528 int all, total, num, str, strcase;
529} collision;
530
531/* Flag switching off output of package statistics at the end of
532 program. */
533static int init_st = 0;
534
535/* Output overall number of table searches and collisions into a
536 temporary file. */
537static void
538stat_col(void)
539{
540 char fname[10+sizeof(long)*3];
541 FILE *f;
542 if (!collision.total) return;
543 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
544 if (f == NULL)
545 return;
546 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
547 ((double)collision.all / (collision.total)) * 100);
548 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
549 fclose(f);
550}
551#endif
552
553st_table *
554st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
555{
556 int n;
557
558#ifdef HASH_LOG
559#if HASH_LOG+0 < 0
560 {
561 const char *e = getenv("ST_HASH_LOG");
562 if (!e || !*e) init_st = 1;
563 }
564#endif
565 if (init_st == 0) {
566 init_st = 1;
567 atexit(stat_col);
568 }
569#endif
570
571 n = get_power2(size);
572#ifndef RUBY
573 if (n < 0)
574 return NULL;
575#endif
576
577 tab->type = type;
578 tab->entry_power = n;
579 tab->bin_power = features[n].bin_power;
580 tab->size_ind = features[n].size_ind;
581
582 size_t memsize = get_allocated_entries(tab) * sizeof(st_table_entry);
583 if (tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS) {
584 memsize += bins_size(tab);
585 }
586 tab->entries = (st_table_entry *)malloc(memsize);
587#ifndef RUBY
588 if (tab->entries == NULL) {
589 st_free_table(tab);
590 return NULL;
591 }
592#endif
593 make_tab_empty(tab);
594 tab->rebuilds_num = 0;
595 return tab;
596}
597
598st_table *
599st_init_existing_numtable_with_size(st_table *tab, st_index_t size)
600{
601 return st_init_existing_table_with_size(tab, &type_numhash, size);
602}
603
604/* Create and return table with TYPE which can hold at least SIZE
605 entries. The real number of entries which the table can hold is
606 the nearest power of two for SIZE. */
607st_table *
608st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
609{
610 st_table *tab = malloc(sizeof(st_table));
611#ifndef RUBY
612 if (tab == NULL)
613 return NULL;
614#endif
615
616#ifdef RUBY
617 st_init_existing_table_with_size(tab, type, size);
618#else
619 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
620 free_fixed_ptr(tab);
621 return NULL;
622 }
623#endif
624
625 return tab;
626}
627
628size_t
629st_table_size(const struct st_table *tbl)
630{
631 return tbl->num_entries;
632}
633
634/* Create and return table with TYPE which can hold a minimal number
635 of entries (see comments for get_power2). */
636st_table *
637st_init_table(const struct st_hash_type *type)
638{
639 return st_init_table_with_size(type, 0);
640}
641
642/* Create and return table which can hold a minimal number of
643 numbers. */
644st_table *
645st_init_numtable(void)
646{
647 return st_init_table(&type_numhash);
648}
649
650/* Create and return table which can hold SIZE numbers. */
651st_table *
652st_init_numtable_with_size(st_index_t size)
653{
654 return st_init_table_with_size(&type_numhash, size);
655}
656
657/* Create and return table which can hold a minimal number of
658 strings. */
659st_table *
660st_init_strtable(void)
661{
662 return st_init_table(&type_strhash);
663}
664
665/* Create and return table which can hold SIZE strings. */
666st_table *
667st_init_strtable_with_size(st_index_t size)
668{
669 return st_init_table_with_size(&type_strhash, size);
670}
671
672st_table *
673st_init_existing_strtable_with_size(st_table *tab, st_index_t size)
674{
675 return st_init_existing_table_with_size(tab, &type_strhash, size);
676}
677
678
679/* Create and return table which can hold a minimal number of strings
680 whose character case is ignored. */
681st_table *
682st_init_strcasetable(void)
683{
684 return st_init_table(&type_strcasehash);
685}
686
687/* Create and return table which can hold SIZE strings whose character
688 case is ignored. */
689st_table *
690st_init_strcasetable_with_size(st_index_t size)
691{
692 return st_init_table_with_size(&type_strcasehash, size);
693}
694
695/* Make table TAB empty. */
696void
697st_clear(st_table *tab)
698{
699 make_tab_empty(tab);
700 tab->rebuilds_num++;
701}
702
703static inline size_t
704st_entries_memsize(const st_table *tab)
705{
706 return get_allocated_entries(tab) * sizeof(st_table_entry);
707}
708
709static inline void
710st_free_entries(const st_table *tab)
711{
712 sized_free(tab->entries, st_entries_memsize(tab) + bins_size(tab));
713}
714
715void
716st_free_embedded_table(st_table *tab)
717{
718 st_free_entries(tab);
719}
720
721/* Free table TAB space. */
722void
723st_free_table(st_table *tab)
724{
725 st_free_embedded_table(tab);
726 free_fixed_ptr(tab);
727}
728
729/* Return byte size of memory allocated for table TAB. */
730size_t
731st_memsize(const st_table *tab)
732{
733 RUBY_ASSERT(tab != NULL);
734 return(sizeof(st_table)
735 + bins_size(tab)
736 + st_entries_memsize(tab));
737}
738
739static st_index_t
740find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
741
742static st_index_t
743find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
744
745static st_index_t
746find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
747
748static st_index_t
749find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
750 st_data_t key, st_index_t *bin_ind);
751
752#ifdef HASH_LOG
753static void
754count_collision(const struct st_hash_type *type)
755{
756 collision.all++;
757 if (type == &type_numhash) {
758 collision.num++;
759 }
760 else if (type == &type_strhash) {
761 collision.strcase++;
762 }
763 else if (type == &type_strcasehash) {
764 collision.str++;
765 }
766}
767
768#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
769#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
770#define collision_check 0
771#else
772#define COLLISION
773#define FOUND_BIN
774#endif
775
776/* If the number of entries in the table is at least REBUILD_THRESHOLD
777 times less than the entry array length, decrease the table
778 size. */
779#define REBUILD_THRESHOLD 4
780
781#if REBUILD_THRESHOLD < 2
782#error "REBUILD_THRESHOLD should be >= 2"
783#endif
784
785static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
786static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
787static void rebuild_cleanup(st_table *const tab);
788
789/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
790 and can change size of the table entries and bins arrays.
791 Rebuilding is implemented by creation of a new table or by
792 compaction of the existing one. */
793static void
794rebuild_table(st_table *tab)
795{
796 if ((2 * tab->num_entries <= get_allocated_entries(tab)
797 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
798 || tab->num_entries < (1 << MINIMAL_POWER2)) {
799 /* Compaction: */
800 tab->num_entries = 0;
801 if (st_has_bins(tab))
802 initialize_bins(tab);
803 rebuild_table_with(tab, tab);
804 }
805 else {
806 st_table *new_tab;
807 /* This allocation could trigger GC and compaction. If tab is the
808 * gen_fields_tbl, then tab could have changed in size due to objects being
809 * freed and/or moved. Do not store attributes of tab before this line. */
810 new_tab = st_init_table_with_size(tab->type,
811 2 * tab->num_entries - 1);
812 rebuild_table_with(new_tab, tab);
813 rebuild_move_table(new_tab, tab);
814 }
815 rebuild_cleanup(tab);
816}
817
818static void
819rebuild_table_with(st_table *const new_tab, st_table *const tab)
820{
821 st_index_t i, ni;
822 unsigned int size_ind;
823 st_table_entry *new_entries;
824 st_table_entry *curr_entry_ptr;
825 st_index_t *bins;
826 st_index_t bin_ind;
827
828 new_entries = new_tab->entries;
829
830 ni = 0;
831 bins = st_bins_ptr(new_tab);
832 size_ind = get_size_ind(new_tab);
833 st_index_t bound = tab->entries_bound;
834 st_table_entry *entries = tab->entries;
835
836 for (i = tab->entries_start; i < bound; i++) {
837 curr_entry_ptr = &entries[i];
838 PREFETCH(entries + i + 1, 0);
839 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
840 continue;
841 if (&new_entries[ni] != curr_entry_ptr)
842 new_entries[ni] = *curr_entry_ptr;
843 if (EXPECT(bins != NULL, 1)) {
844 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
845 curr_entry_ptr->key);
846 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
847 }
848 new_tab->num_entries++;
849 ni++;
850 }
851
852 assert(new_tab->num_entries == tab->num_entries);
853}
854
855static void
856rebuild_move_table(st_table *const new_tab, st_table *const tab)
857{
858 st_free_entries(tab);
859 tab->entry_power = new_tab->entry_power;
860 tab->bin_power = new_tab->bin_power;
861 tab->size_ind = new_tab->size_ind;
862 tab->entries = new_tab->entries;
863 free_fixed_ptr(new_tab);
864}
865
866static void
867rebuild_cleanup(st_table *const tab)
868{
869 tab->entries_start = 0;
870 tab->entries_bound = tab->num_entries;
871 tab->rebuilds_num++;
872}
873
874/* Return the next secondary hash index for table TAB using previous
875 index IND and PERTURB. Finally modulo of the function becomes a
876 full *cycle linear congruential generator*, in other words it
877 guarantees traversing all table bins in extreme case.
878
879 According the Hull-Dobell theorem a generator
880 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
881 o m and c are relatively prime
882 o a-1 is divisible by all prime factors of m
883 o a-1 is divisible by 4 if m is divisible by 4.
884
885 For our case a is 5, c is 1, and m is a power of two. */
886static inline st_index_t
887secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
888{
889 *perturb >>= 11;
890 ind = (ind << 2) + ind + *perturb + 1;
891 return hash_bin(ind, tab);
892}
893
894/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
895 search. Return the index of the found entry in array `entries`.
896 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
897 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
898static inline st_index_t
899find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
900{
901 int eq_p, rebuilt_p;
902 st_index_t i, bound;
903 st_table_entry *entries;
904
905 bound = tab->entries_bound;
906 entries = tab->entries;
907 for (i = tab->entries_start; i < bound; i++) {
908 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
909 if (EXPECT(rebuilt_p, 0))
910 return REBUILT_TABLE_ENTRY_IND;
911 if (eq_p)
912 return i;
913 }
914 return UNDEFINED_ENTRY_IND;
915}
916
917/* Use the quadratic probing. The method has a better data locality
918 but more collisions than the current approach. In average it
919 results in a bit slower search. */
920/*#define QUADRATIC_PROBE*/
921
922/* Return index of entry with HASH_VALUE and KEY in table TAB. If
923 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
924 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
925static st_index_t
926find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
927{
928 int eq_p, rebuilt_p;
929 st_index_t ind;
930#ifdef QUADRATIC_PROBE
931 st_index_t d;
932#else
933 st_index_t perturb;
934#endif
935 st_index_t bin;
936 st_table_entry *entries = tab->entries;
937
938 ind = hash_bin(hash_value, tab);
939#ifdef QUADRATIC_PROBE
940 d = 1;
941#else
942 perturb = hash_value;
943#endif
944 FOUND_BIN;
945 for (;;) {
946 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
947 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
948 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
949 if (EXPECT(rebuilt_p, 0))
950 return REBUILT_TABLE_ENTRY_IND;
951 if (eq_p)
952 break;
953 }
954 else if (EMPTY_BIN_P(bin))
955 return UNDEFINED_ENTRY_IND;
956#ifdef QUADRATIC_PROBE
957 ind = hash_bin(ind + d, tab);
958 d++;
959#else
960 ind = secondary_hash(ind, tab, &perturb);
961#endif
962 COLLISION;
963 }
964 return bin;
965}
966
967/* Find and return index of table TAB bin corresponding to an entry
968 with HASH_VALUE and KEY. If there is no such bin, return
969 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
970 return REBUILT_TABLE_BIN_IND. */
971static st_index_t
972find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
973{
974 int eq_p, rebuilt_p;
975 st_index_t ind;
976#ifdef QUADRATIC_PROBE
977 st_index_t d;
978#else
979 st_index_t perturb;
980#endif
981 st_index_t bin;
982 st_table_entry *entries = tab->entries;
983
984 ind = hash_bin(hash_value, tab);
985#ifdef QUADRATIC_PROBE
986 d = 1;
987#else
988 perturb = hash_value;
989#endif
990 FOUND_BIN;
991 for (;;) {
992 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
993 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
994 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
995 if (EXPECT(rebuilt_p, 0))
996 return REBUILT_TABLE_BIN_IND;
997 if (eq_p)
998 break;
999 }
1000 else if (EMPTY_BIN_P(bin))
1001 return UNDEFINED_BIN_IND;
1002#ifdef QUADRATIC_PROBE
1003 ind = hash_bin(ind + d, tab);
1004 d++;
1005#else
1006 ind = secondary_hash(ind, tab, &perturb);
1007#endif
1008 COLLISION;
1009 }
1010 return ind;
1011}
1012
1013/* Find and return index of table TAB bin corresponding to an entry
1014 with HASH_VALUE and KEY. The entry should be in the table
1015 already. */
1016static st_index_t
1017find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
1018{
1019 st_index_t ind;
1020#ifdef QUADRATIC_PROBE
1021 st_index_t d;
1022#else
1023 st_index_t perturb;
1024#endif
1025 st_index_t bin;
1026
1027 ind = hash_bin(hash_value, tab);
1028#ifdef QUADRATIC_PROBE
1029 d = 1;
1030#else
1031 perturb = hash_value;
1032#endif
1033 FOUND_BIN;
1034 for (;;) {
1035 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
1036 if (EMPTY_OR_DELETED_BIN_P(bin))
1037 return ind;
1038#ifdef QUADRATIC_PROBE
1039 ind = hash_bin(ind + d, tab);
1040 d++;
1041#else
1042 ind = secondary_hash(ind, tab, &perturb);
1043#endif
1044 COLLISION;
1045 }
1046}
1047
1048/* Return index of table TAB bin for HASH_VALUE and KEY through
1049 BIN_IND and the pointed value as the function result. Reserve the
1050 bin for inclusion of the corresponding entry into the table if it
1051 is not there yet. We always find such bin as bins array length is
1052 bigger entries array. Although we can reuse a deleted bin, the
1053 result bin value is always empty if the table has no entry with
1054 KEY. Return the entries array index of the found entry or
1055 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1056 during the search, return REBUILT_TABLE_ENTRY_IND. */
1057static st_index_t
1058find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1059 st_data_t key, st_index_t *bin_ind)
1060{
1061 int eq_p, rebuilt_p;
1062 st_index_t ind;
1063 st_hash_t curr_hash_value = *hash_value;
1064#ifdef QUADRATIC_PROBE
1065 st_index_t d;
1066#else
1067 st_index_t perturb;
1068#endif
1069 st_index_t entry_index;
1070 st_index_t first_deleted_bin_ind;
1071 st_table_entry *entries;
1072
1073 ind = hash_bin(curr_hash_value, tab);
1074#ifdef QUADRATIC_PROBE
1075 d = 1;
1076#else
1077 perturb = curr_hash_value;
1078#endif
1079 FOUND_BIN;
1080 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1081 entries = tab->entries;
1082 for (;;) {
1083 entry_index = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
1084 if (EMPTY_BIN_P(entry_index)) {
1085 tab->num_entries++;
1086 entry_index = UNDEFINED_ENTRY_IND;
1087 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1088 /* We can reuse bin of a deleted entry. */
1089 ind = first_deleted_bin_ind;
1090 MARK_BIN_EMPTY(tab, ind);
1091 }
1092 break;
1093 }
1094 else if (! DELETED_BIN_P(entry_index)) {
1095 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1096 if (EXPECT(rebuilt_p, 0))
1097 return REBUILT_TABLE_ENTRY_IND;
1098 if (eq_p)
1099 break;
1100 }
1101 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1102 first_deleted_bin_ind = ind;
1103#ifdef QUADRATIC_PROBE
1104 ind = hash_bin(ind + d, tab);
1105 d++;
1106#else
1107 ind = secondary_hash(ind, tab, &perturb);
1108#endif
1109 COLLISION;
1110 }
1111 *bin_ind = ind;
1112 return entry_index;
1113}
1114
1115/* Find an entry with KEY in table TAB. Return non-zero if we found
1116 it. Set up *RECORD to the found entry record. */
1117int
1118st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1119{
1120 st_index_t bin;
1121 st_hash_t hash = do_hash(key, tab);
1122
1123 retry:
1124 if (!st_has_bins(tab)) {
1125 bin = find_entry(tab, hash, key);
1126 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1127 goto retry;
1128 if (bin == UNDEFINED_ENTRY_IND)
1129 return 0;
1130 }
1131 else {
1132 bin = find_table_entry_ind(tab, hash, key);
1133 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1134 goto retry;
1135 if (bin == UNDEFINED_ENTRY_IND)
1136 return 0;
1137 bin -= ENTRY_BASE;
1138 }
1139 if (value != 0)
1140 *value = tab->entries[bin].record;
1141 return 1;
1142}
1143
1144/* Find an entry with KEY in table TAB. Return non-zero if we found
1145 it. Set up *RESULT to the found table entry key. */
1146int
1147st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1148{
1149 st_index_t bin;
1150 st_hash_t hash = do_hash(key, tab);
1151
1152 retry:
1153 if (!st_has_bins(tab)) {
1154 bin = find_entry(tab, hash, key);
1155 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1156 goto retry;
1157 if (bin == UNDEFINED_ENTRY_IND)
1158 return 0;
1159 }
1160 else {
1161 bin = find_table_entry_ind(tab, hash, key);
1162 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1163 goto retry;
1164 if (bin == UNDEFINED_ENTRY_IND)
1165 return 0;
1166 bin -= ENTRY_BASE;
1167 }
1168 if (result != 0)
1169 *result = tab->entries[bin].key;
1170 return 1;
1171}
1172
1173/* Check the table and rebuild it if it is necessary. */
1174static inline void
1175rebuild_table_if_necessary (st_table *tab)
1176{
1177 st_index_t bound = tab->entries_bound;
1178
1179 if (bound == get_allocated_entries(tab))
1180 rebuild_table(tab);
1181}
1182
1183/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1184 already entry with KEY in the table, return nonzero and update
1185 the value of the found entry. */
1186int
1187st_insert(st_table *tab, st_data_t key, st_data_t value)
1188{
1189 st_table_entry *entry;
1190 st_index_t bin;
1191 st_index_t ind;
1192 st_hash_t hash_value;
1193 st_index_t bin_ind;
1194 int new_p;
1195
1196 hash_value = do_hash(key, tab);
1197 retry:
1198 rebuild_table_if_necessary(tab);
1199 if (!st_has_bins(tab)) {
1200 bin = find_entry(tab, hash_value, key);
1201 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1202 goto retry;
1203 new_p = bin == UNDEFINED_ENTRY_IND;
1204 if (new_p)
1205 tab->num_entries++;
1206 bin_ind = UNDEFINED_BIN_IND;
1207 }
1208 else {
1209 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1210 key, &bin_ind);
1211 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1212 goto retry;
1213 new_p = bin == UNDEFINED_ENTRY_IND;
1214 bin -= ENTRY_BASE;
1215 }
1216 if (new_p) {
1217 ind = tab->entries_bound++;
1218 entry = &tab->entries[ind];
1219 entry->hash = hash_value;
1220 entry->key = key;
1221 entry->record = value;
1222 if (bin_ind != UNDEFINED_BIN_IND)
1223 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1224 return 0;
1225 }
1226 tab->entries[bin].record = value;
1227 return 1;
1228}
1229
1230/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1231 entry with KEY before the insertion. */
1232static inline void
1233st_add_direct_with_hash(st_table *tab,
1234 st_data_t key, st_data_t value, st_hash_t hash)
1235{
1236 st_table_entry *entry;
1237 st_index_t ind;
1238 st_index_t bin_ind;
1239
1240 assert(hash != RESERVED_HASH_VAL);
1241
1242 rebuild_table_if_necessary(tab);
1243 ind = tab->entries_bound++;
1244 entry = &tab->entries[ind];
1245 entry->hash = hash;
1246 entry->key = key;
1247 entry->record = value;
1248 tab->num_entries++;
1249 if (st_has_bins(tab)) {
1250 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1251 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1252 }
1253}
1254
1255void
1256rb_st_add_direct_with_hash(st_table *tab,
1257 st_data_t key, st_data_t value, st_hash_t hash)
1258{
1259 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1260}
1261
1262/* Insert (KEY, VALUE) into table TAB. The table should not have
1263 entry with KEY before the insertion. */
1264void
1265st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1266{
1267 st_hash_t hash_value;
1268
1269 hash_value = do_hash(key, tab);
1270 st_add_direct_with_hash(tab, key, value, hash_value);
1271}
1272
1273/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1274 there is already entry with KEY in the table, return nonzero and
1275 update the value of the found entry. */
1276int
1277st_insert2(st_table *tab, st_data_t key, st_data_t value,
1278 st_data_t (*func)(st_data_t))
1279{
1280 st_table_entry *entry;
1281 st_index_t bin;
1282 st_index_t ind;
1283 st_hash_t hash_value;
1284 st_index_t bin_ind;
1285 int new_p;
1286
1287 hash_value = do_hash(key, tab);
1288 retry:
1289 rebuild_table_if_necessary (tab);
1290 if (!st_has_bins(tab)) {
1291 bin = find_entry(tab, hash_value, key);
1292 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1293 goto retry;
1294 new_p = bin == UNDEFINED_ENTRY_IND;
1295 if (new_p)
1296 tab->num_entries++;
1297 bin_ind = UNDEFINED_BIN_IND;
1298 }
1299 else {
1300 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1301 key, &bin_ind);
1302 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1303 goto retry;
1304 new_p = bin == UNDEFINED_ENTRY_IND;
1305 bin -= ENTRY_BASE;
1306 }
1307 if (new_p) {
1308 key = (*func)(key);
1309 ind = tab->entries_bound++;
1310 entry = &tab->entries[ind];
1311 entry->hash = hash_value;
1312 entry->key = key;
1313 entry->record = value;
1314 if (bin_ind != UNDEFINED_BIN_IND)
1315 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1316 return 0;
1317 }
1318 tab->entries[bin].record = value;
1319 return 1;
1320}
1321
1322/* Create a copy of old_tab into new_tab. */
1323st_table *
1324st_replace(st_table *new_tab, st_table *old_tab)
1325{
1326 *new_tab = *old_tab;
1327 size_t memsize = get_allocated_entries(old_tab) * sizeof(st_table_entry);
1328 memsize += bins_size(old_tab);
1329 new_tab->entries = (st_table_entry *) malloc(memsize);
1330#ifndef RUBY
1331 if (new_tab->entries == NULL) {
1332 return NULL;
1333 }
1334#endif
1335 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
1336
1337 return new_tab;
1338}
1339
1340/* Create and return a copy of table OLD_TAB. */
1341st_table *
1342st_copy(st_table *old_tab)
1343{
1344 st_table *new_tab;
1345
1346 new_tab = (st_table *) malloc(sizeof(st_table));
1347#ifndef RUBY
1348 if (new_tab == NULL)
1349 return NULL;
1350#endif
1351
1352 if (st_replace(new_tab, old_tab) == NULL) {
1353 st_free_table(new_tab);
1354 return NULL;
1355 }
1356
1357 return new_tab;
1358}
1359
1360/* Update the entries start of table TAB after removing an entry
1361 with index N in the array entries. */
1362static inline void
1363update_range_for_deleted(st_table *tab, st_index_t n)
1364{
1365 /* Do not update entries_bound here. Otherwise, we can fill all
1366 bins by deleted entry value before rebuilding the table. */
1367 if (tab->entries_start == n) {
1368 st_index_t start = n + 1;
1369 st_index_t bound = tab->entries_bound;
1370 st_table_entry *entries = tab->entries;
1371 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1372 tab->entries_start = start;
1373 }
1374}
1375
1376/* Delete entry with KEY from table TAB, set up *VALUE (unless
1377 VALUE is zero) from deleted table entry, and return non-zero. If
1378 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1379 is zero), and return zero. */
1380static int
1381st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1382{
1383 st_table_entry *entry;
1384 st_index_t bin;
1385 st_index_t bin_ind;
1386 st_hash_t hash;
1387
1388 hash = do_hash(*key, tab);
1389 retry:
1390 if (!st_has_bins(tab)) {
1391 bin = find_entry(tab, hash, *key);
1392 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1393 goto retry;
1394 if (bin == UNDEFINED_ENTRY_IND) {
1395 if (value != 0) *value = 0;
1396 return 0;
1397 }
1398 }
1399 else {
1400 bin_ind = find_table_bin_ind(tab, hash, *key);
1401 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1402 goto retry;
1403 if (bin_ind == UNDEFINED_BIN_IND) {
1404 if (value != 0) *value = 0;
1405 return 0;
1406 }
1407 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1408 MARK_BIN_DELETED(tab, bin_ind);
1409 }
1410 entry = &tab->entries[bin];
1411 *key = entry->key;
1412 if (value != 0) *value = entry->record;
1413 MARK_ENTRY_DELETED(entry);
1414 tab->num_entries--;
1415 update_range_for_deleted(tab, bin);
1416 return 1;
1417}
1418
1419int
1420st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1421{
1422 return st_general_delete(tab, key, value);
1423}
1424
1425/* The function and other functions with suffix '_safe' or '_check'
1426 are originated from the previous implementation of the hash tables.
1427 It was necessary for correct deleting entries during traversing
1428 tables. The current implementation permits deletion during
1429 traversing without a specific way to do this. */
1430int
1431st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1432 st_data_t never ATTRIBUTE_UNUSED)
1433{
1434 return st_general_delete(tab, key, value);
1435}
1436
1437/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1438 return zero. Otherwise, remove the first entry in the table.
1439 Return its key through KEY and its record through VALUE (unless
1440 VALUE is zero). */
1441int
1442st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1443{
1444 st_index_t i, bound;
1445 st_index_t bin;
1446 st_table_entry *entries, *curr_entry_ptr;
1447 st_index_t bin_ind;
1448
1449 entries = tab->entries;
1450 bound = tab->entries_bound;
1451 for (i = tab->entries_start; i < bound; i++) {
1452 curr_entry_ptr = &entries[i];
1453 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1454 st_hash_t entry_hash = curr_entry_ptr->hash;
1455 st_data_t entry_key = curr_entry_ptr->key;
1456
1457 if (value != 0) *value = curr_entry_ptr->record;
1458 *key = entry_key;
1459 retry:
1460 if (!st_has_bins(tab)) {
1461 bin = find_entry(tab, entry_hash, entry_key);
1462 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1463 entries = tab->entries;
1464 goto retry;
1465 }
1466 curr_entry_ptr = &entries[bin];
1467 }
1468 else {
1469 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1470 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1471 entries = tab->entries;
1472 goto retry;
1473 }
1474 curr_entry_ptr = &entries[get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind)
1475 - ENTRY_BASE];
1476 MARK_BIN_DELETED(tab, bin_ind);
1477 }
1478 MARK_ENTRY_DELETED(curr_entry_ptr);
1479 tab->num_entries--;
1480 update_range_for_deleted(tab, i);
1481 return 1;
1482 }
1483 }
1484 if (value != 0) *value = 0;
1485 return 0;
1486}
1487
1488/* See comments for function st_delete_safe. */
1489void
1490st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1491 st_data_t never ATTRIBUTE_UNUSED)
1492{
1493}
1494
1495/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1496 of the key and the value of the found entry, and non-zero as the
1497 3rd argument. If the entry is not found, call FUNC with a pointer
1498 to KEY, a pointer to zero, and a zero argument. If the call
1499 returns ST_CONTINUE, the table will have an entry with key and
1500 value returned by FUNC through the 1st and 2nd parameters. If the
1501 call of FUNC returns ST_DELETE, the table will not have entry with
1502 KEY. The function returns flag of that the entry with KEY was in
1503 the table before the call. */
1504int
1505st_update(st_table *tab, st_data_t key,
1506 st_update_callback_func *func, st_data_t arg)
1507{
1508 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1509 st_index_t bin = 0; /* Ditto */
1510 st_table_entry *entries;
1511 st_index_t bin_ind;
1512 st_data_t value = 0, old_key;
1513 int retval, existing;
1514 st_hash_t hash = do_hash(key, tab);
1515
1516 retry:
1517 entries = tab->entries;
1518 if (!st_has_bins(tab)) {
1519 bin = find_entry(tab, hash, key);
1520 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1521 goto retry;
1522 existing = bin != UNDEFINED_ENTRY_IND;
1523 entry = &entries[bin];
1524 bin_ind = UNDEFINED_BIN_IND;
1525 }
1526 else {
1527 bin_ind = find_table_bin_ind(tab, hash, key);
1528 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1529 goto retry;
1530 existing = bin_ind != UNDEFINED_BIN_IND;
1531 if (existing) {
1532 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1533 entry = &entries[bin];
1534 }
1535 }
1536 if (existing) {
1537 key = entry->key;
1538 value = entry->record;
1539 }
1540 old_key = key;
1541
1542 unsigned int rebuilds_num = tab->rebuilds_num;
1543
1544 retval = (*func)(&key, &value, arg, existing);
1545
1546 // We need to make sure that the callback didn't cause a table rebuild
1547 // Ideally we would make sure no operations happened
1548 assert(rebuilds_num == tab->rebuilds_num);
1549 (void)rebuilds_num;
1550
1551 switch (retval) {
1552 case ST_CONTINUE:
1553 if (! existing) {
1554 st_add_direct_with_hash(tab, key, value, hash);
1555 break;
1556 }
1557 if (old_key != key) {
1558 entry->key = key;
1559 }
1560 entry->record = value;
1561 break;
1562 case ST_DELETE:
1563 if (existing) {
1564 if (bin_ind != UNDEFINED_BIN_IND)
1565 MARK_BIN_DELETED(tab, bin_ind);
1566 MARK_ENTRY_DELETED(entry);
1567 tab->num_entries--;
1568 update_range_for_deleted(tab, bin);
1569 }
1570 break;
1571 }
1572 return existing;
1573}
1574
1575/* Traverse all entries in table TAB calling FUNC with current entry
1576 key and value and zero. If the call returns ST_STOP, stop
1577 traversing. If the call returns ST_DELETE, delete the current
1578 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1579 traversing. The function returns zero unless an error is found.
1580 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1581 different for ST_CHECK and when the current element is removed
1582 during traversing. */
1583static inline int
1584st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1585 int check_p)
1586{
1587 st_index_t bin;
1588 st_index_t bin_ind;
1589 st_table_entry *entries, *curr_entry_ptr;
1590 enum st_retval retval;
1591 st_index_t i, rebuilds_num;
1592 st_hash_t hash;
1593 st_data_t key;
1594 int error_p, packed_p = !st_has_bins(tab);
1595
1596 entries = tab->entries;
1597 /* The bound can change inside the loop even without rebuilding
1598 the table, e.g. by an entry insertion. */
1599 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1600 curr_entry_ptr = &entries[i];
1601 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1602 continue;
1603 key = curr_entry_ptr->key;
1604 rebuilds_num = tab->rebuilds_num;
1605 hash = curr_entry_ptr->hash;
1606 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1607
1608 if (retval == ST_REPLACE && replace) {
1609 st_data_t value;
1610 value = curr_entry_ptr->record;
1611 retval = (*replace)(&key, &value, arg, TRUE);
1612 curr_entry_ptr->key = key;
1613 curr_entry_ptr->record = value;
1614 }
1615
1616 if (rebuilds_num != tab->rebuilds_num) {
1617 retry:
1618 entries = tab->entries;
1619 packed_p = !st_has_bins(tab);
1620 if (packed_p) {
1621 i = find_entry(tab, hash, key);
1622 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1623 goto retry;
1624 error_p = i == UNDEFINED_ENTRY_IND;
1625 }
1626 else {
1627 i = find_table_entry_ind(tab, hash, key);
1628 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1629 goto retry;
1630 error_p = i == UNDEFINED_ENTRY_IND;
1631 i -= ENTRY_BASE;
1632 }
1633 if (error_p && check_p) {
1634 /* call func with error notice */
1635 retval = (*func)(0, 0, arg, 1);
1636 return 1;
1637 }
1638 curr_entry_ptr = &entries[i];
1639 }
1640 switch (retval) {
1641 case ST_REPLACE:
1642 break;
1643 case ST_CONTINUE:
1644 break;
1645 case ST_CHECK:
1646 if (check_p)
1647 break;
1648 case ST_STOP:
1649 return 0;
1650 case ST_DELETE: {
1651 st_data_t key = curr_entry_ptr->key;
1652
1653 again:
1654 if (packed_p) {
1655 bin = find_entry(tab, hash, key);
1656 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1657 goto again;
1658 if (bin == UNDEFINED_ENTRY_IND)
1659 break;
1660 }
1661 else {
1662 bin_ind = find_table_bin_ind(tab, hash, key);
1663 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1664 goto again;
1665 if (bin_ind == UNDEFINED_BIN_IND)
1666 break;
1667 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1668 MARK_BIN_DELETED(tab, bin_ind);
1669 }
1670 curr_entry_ptr = &entries[bin];
1671 MARK_ENTRY_DELETED(curr_entry_ptr);
1672 tab->num_entries--;
1673 update_range_for_deleted(tab, bin);
1674 break;
1675 }
1676 }
1677 }
1678 return 0;
1679}
1680
1681int
1682st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1683{
1684 return st_general_foreach(tab, func, replace, arg, TRUE);
1685}
1686
1687struct functor {
1688 st_foreach_callback_func *func;
1689 st_data_t arg;
1690};
1691
1692static int
1693apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1694{
1695 const struct functor *f = (void *)d;
1696 return f->func(k, v, f->arg);
1697}
1698
1699int
1700st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1701{
1702 const struct functor f = { func, arg };
1703 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1704}
1705
1706/* See comments for function st_delete_safe. */
1707int
1708st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1709 st_data_t never ATTRIBUTE_UNUSED)
1710{
1711 return st_general_foreach(tab, func, 0, arg, TRUE);
1712}
1713
1714/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1715 Return the number of keys set up in array KEYS. */
1716static inline st_index_t
1717st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1718{
1719 st_index_t i, bound;
1720 st_data_t key, *keys_start, *keys_end;
1721 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1722
1723 bound = tab->entries_bound;
1724 keys_start = keys;
1725 keys_end = keys + size;
1726 for (i = tab->entries_start; i < bound; i++) {
1727 if (keys == keys_end)
1728 break;
1729 curr_entry_ptr = &entries[i];
1730 key = curr_entry_ptr->key;
1731 if (! DELETED_ENTRY_P(curr_entry_ptr))
1732 *keys++ = key;
1733 }
1734
1735 return keys - keys_start;
1736}
1737
1738st_index_t
1739st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1740{
1741 return st_general_keys(tab, keys, size);
1742}
1743
1744/* See comments for function st_delete_safe. */
1745st_index_t
1746st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1747 st_data_t never ATTRIBUTE_UNUSED)
1748{
1749 return st_general_keys(tab, keys, size);
1750}
1751
1752/* Set up array VALUES by at most SIZE values of head table TAB
1753 entries. Return the number of values set up in array VALUES. */
1754static inline st_index_t
1755st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1756{
1757 st_index_t i, bound;
1758 st_data_t *values_start, *values_end;
1759 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1760
1761 values_start = values;
1762 values_end = values + size;
1763 bound = tab->entries_bound;
1764 for (i = tab->entries_start; i < bound; i++) {
1765 if (values == values_end)
1766 break;
1767 curr_entry_ptr = &entries[i];
1768 if (! DELETED_ENTRY_P(curr_entry_ptr))
1769 *values++ = curr_entry_ptr->record;
1770 }
1771
1772 return values - values_start;
1773}
1774
1775st_index_t
1776st_values(st_table *tab, st_data_t *values, st_index_t size)
1777{
1778 return st_general_values(tab, values, size);
1779}
1780
1781/* See comments for function st_delete_safe. */
1782st_index_t
1783st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1784 st_data_t never ATTRIBUTE_UNUSED)
1785{
1786 return st_general_values(tab, values, size);
1787}
1788
1789#define FNV1_32A_INIT 0x811c9dc5
1790
1791/*
1792 * 32 bit magic FNV-1a prime
1793 */
1794#define FNV_32_PRIME 0x01000193
1795
1796/* __POWERPC__ added to accommodate Darwin case. */
1797#ifndef UNALIGNED_WORD_ACCESS
1798# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1799 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1800 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1801 defined(__mc68020__)
1802# define UNALIGNED_WORD_ACCESS 1
1803# endif
1804#endif
1805#ifndef UNALIGNED_WORD_ACCESS
1806# define UNALIGNED_WORD_ACCESS 0
1807#endif
1808
1809/* This hash function is quite simplified MurmurHash3
1810 * Simplification is legal, cause most of magic still happens in finalizator.
1811 * And finalizator is almost the same as in MurmurHash3 */
1812#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1813#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1814
1815#if ST_INDEX_BITS <= 32
1816#define C1 (st_index_t)0xcc9e2d51
1817#define C2 (st_index_t)0x1b873593
1818#else
1819#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1820#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1821#endif
1822NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1823NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1824NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1825
1826static inline st_index_t
1827murmur_step(st_index_t h, st_index_t k)
1828{
1829#if ST_INDEX_BITS <= 32
1830#define r1 (17)
1831#define r2 (11)
1832#else
1833#define r1 (33)
1834#define r2 (24)
1835#endif
1836 k *= C1;
1837 h ^= ROTL(k, r1);
1838 h *= C2;
1839 h = ROTL(h, r2);
1840 return h;
1841}
1842#undef r1
1843#undef r2
1844
1845static inline st_index_t
1846murmur_finish(st_index_t h)
1847{
1848#if ST_INDEX_BITS <= 32
1849#define r1 (16)
1850#define r2 (13)
1851#define r3 (16)
1852 const st_index_t c1 = 0x85ebca6b;
1853 const st_index_t c2 = 0xc2b2ae35;
1854#else
1855/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1856#define r1 (30)
1857#define r2 (27)
1858#define r3 (31)
1859 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1860 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1861#endif
1862#if ST_INDEX_BITS > 64
1863 h ^= h >> 64;
1864 h *= c2;
1865 h ^= h >> 65;
1866#endif
1867 h ^= h >> r1;
1868 h *= c1;
1869 h ^= h >> r2;
1870 h *= c2;
1871 h ^= h >> r3;
1872 return h;
1873}
1874#undef r1
1875#undef r2
1876#undef r3
1877
1878st_index_t
1879st_hash(const void *ptr, size_t len, st_index_t h)
1880{
1881 const char *data = ptr;
1882 st_index_t t = 0;
1883 size_t l = len;
1884
1885#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1886#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1887#if SIZEOF_ST_INDEX_T > 4
1888#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1889#if SIZEOF_ST_INDEX_T > 8
1890#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1891 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1892#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1893#endif
1894#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1895#else
1896#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1897#endif
1898#undef SKIP_TAIL
1899 if (len >= sizeof(st_index_t)) {
1900#if !UNALIGNED_WORD_ACCESS
1901 int align = (int)((st_data_t)data % sizeof(st_index_t));
1902 if (align) {
1903 st_index_t d = 0;
1904 int sl, sr, pack;
1905
1906 switch (align) {
1907#ifdef WORDS_BIGENDIAN
1908# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1909 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1910#else
1911# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1912 t |= data_at(n) << CHAR_BIT*(n)
1913#endif
1914 UNALIGNED_ADD_ALL;
1915#undef UNALIGNED_ADD
1916 }
1917
1918#ifdef WORDS_BIGENDIAN
1919 t >>= (CHAR_BIT * align) - CHAR_BIT;
1920#else
1921 t <<= (CHAR_BIT * align);
1922#endif
1923
1924 data += sizeof(st_index_t)-align;
1925 len -= sizeof(st_index_t)-align;
1926
1927 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1928 sr = CHAR_BIT * align;
1929
1930 while (len >= sizeof(st_index_t)) {
1931 d = *(st_index_t *)data;
1932#ifdef WORDS_BIGENDIAN
1933 t = (t << sr) | (d >> sl);
1934#else
1935 t = (t >> sr) | (d << sl);
1936#endif
1937 h = murmur_step(h, t);
1938 t = d;
1939 data += sizeof(st_index_t);
1940 len -= sizeof(st_index_t);
1941 }
1942
1943 pack = len < (size_t)align ? (int)len : align;
1944 d = 0;
1945 switch (pack) {
1946#ifdef WORDS_BIGENDIAN
1947# define UNALIGNED_ADD(n) case (n) + 1: \
1948 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1949#else
1950# define UNALIGNED_ADD(n) case (n) + 1: \
1951 d |= data_at(n) << CHAR_BIT*(n)
1952#endif
1953 UNALIGNED_ADD_ALL;
1954#undef UNALIGNED_ADD
1955 }
1956#ifdef WORDS_BIGENDIAN
1957 t = (t << sr) | (d >> sl);
1958#else
1959 t = (t >> sr) | (d << sl);
1960#endif
1961
1962 if (len < (size_t)align) goto skip_tail;
1963# define SKIP_TAIL 1
1964 h = murmur_step(h, t);
1965 data += pack;
1966 len -= pack;
1967 }
1968 else
1969#endif
1970#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1971#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1972#else
1973#define aligned_data data
1974#endif
1975 {
1976 do {
1977 h = murmur_step(h, *(st_index_t *)aligned_data);
1978 data += sizeof(st_index_t);
1979 len -= sizeof(st_index_t);
1980 } while (len >= sizeof(st_index_t));
1981 }
1982 }
1983
1984 t = 0;
1985 switch (len) {
1986#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1987 /* in this case byteorder doesn't really matter */
1988#if SIZEOF_ST_INDEX_T > 4
1989 case 7: t |= data_at(6) << 48;
1990 case 6: t |= data_at(5) << 40;
1991 case 5: t |= data_at(4) << 32;
1992 case 4:
1993 t |= (st_index_t)*(uint32_t*)aligned_data;
1994 goto skip_tail;
1995# define SKIP_TAIL 1
1996#endif
1997 case 3: t |= data_at(2) << 16;
1998 case 2: t |= data_at(1) << 8;
1999 case 1: t |= data_at(0);
2000#else
2001#ifdef WORDS_BIGENDIAN
2002# define UNALIGNED_ADD(n) case (n) + 1: \
2003 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2004#else
2005# define UNALIGNED_ADD(n) case (n) + 1: \
2006 t |= data_at(n) << CHAR_BIT*(n)
2007#endif
2008 UNALIGNED_ADD_ALL;
2009#undef UNALIGNED_ADD
2010#endif
2011#ifdef SKIP_TAIL
2012 skip_tail:
2013#endif
2014 h ^= t; h -= ROTL(t, 7);
2015 h *= C2;
2016 }
2017 h ^= l;
2018#undef aligned_data
2019
2020 return murmur_finish(h);
2021}
2022
2023st_index_t
2024st_hash_uint32(st_index_t h, uint32_t i)
2025{
2026 return murmur_step(h, i);
2027}
2028
2029NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2030st_index_t
2031st_hash_uint(st_index_t h, st_index_t i)
2032{
2033 i += h;
2034/* no matter if it is BigEndian or LittleEndian,
2035 * we hash just integers */
2036#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2037 h = murmur_step(h, i >> 8*8);
2038#endif
2039 h = murmur_step(h, i);
2040 return h;
2041}
2042
2043st_index_t
2044st_hash_end(st_index_t h)
2045{
2046 h = murmur_finish(h);
2047 return h;
2048}
2049
2050#undef st_hash_start
2051st_index_t
2052rb_st_hash_start(st_index_t h)
2053{
2054 return h;
2055}
2056
2057static st_index_t
2058strhash(st_data_t arg)
2059{
2060 register const char *string = (const char *)arg;
2061 return st_hash(string, strlen(string), FNV1_32A_INIT);
2062}
2063
2064int
2065st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2066{
2067 char c1, c2;
2068
2069 while (1) {
2070 c1 = *s1++;
2071 c2 = *s2++;
2072 if (c1 == '\0' || c2 == '\0') {
2073 if (c1 != '\0') return 1;
2074 if (c2 != '\0') return -1;
2075 return 0;
2076 }
2077 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2078 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2079 if (c1 != c2) {
2080 if (c1 > c2)
2081 return 1;
2082 else
2083 return -1;
2084 }
2085 }
2086}
2087
2088int
2089st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2090{
2091 char c1, c2;
2092 size_t i;
2093
2094 for (i = 0; i < n; i++) {
2095 c1 = *s1++;
2096 c2 = *s2++;
2097 if (c1 == '\0' || c2 == '\0') {
2098 if (c1 != '\0') return 1;
2099 if (c2 != '\0') return -1;
2100 return 0;
2101 }
2102 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2103 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2104 if (c1 != c2) {
2105 if (c1 > c2)
2106 return 1;
2107 else
2108 return -1;
2109 }
2110 }
2111 return 0;
2112}
2113
2114static int
2115st_strcmp(st_data_t lhs, st_data_t rhs)
2116{
2117 const char *s1 = (char *)lhs;
2118 const char *s2 = (char *)rhs;
2119 return strcmp(s1, s2);
2120}
2121
2122static int
2123st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2124{
2125 const char *s1 = (char *)lhs;
2126 const char *s2 = (char *)rhs;
2127 return st_locale_insensitive_strcasecmp(s1, s2);
2128}
2129
2130NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2131static st_index_t
2132strcasehash(st_data_t arg)
2133{
2134 register const char *string = (const char *)arg;
2135 register st_index_t hval = FNV1_32A_INIT;
2136
2137 /*
2138 * FNV-1a hash each octet in the buffer
2139 */
2140 while (*string) {
2141 unsigned int c = (unsigned char)*string++;
2142 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2143 hval ^= c;
2144
2145 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2146 hval *= FNV_32_PRIME;
2147 }
2148 return hval;
2149}
2150
2151int
2152st_numcmp(st_data_t x, st_data_t y)
2153{
2154 return x != y;
2155}
2156
2157st_index_t
2158st_numhash(st_data_t n)
2159{
2160 enum {s1 = 11, s2 = 3};
2161 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2162}
2163
2164#ifdef RUBY
2165/* Expand TAB to be suitable for holding SIZ entries in total.
2166 Pre-existing entries remain not deleted inside of TAB, but its bins
2167 are cleared to expect future reconstruction. See rehash below. */
2168static void
2169st_expand_table(st_table *tab, st_index_t siz)
2170{
2171 st_table *tmp;
2172 st_index_t n;
2173
2174 if (siz <= get_allocated_entries(tab))
2175 return; /* enough room already */
2176
2177 tmp = st_init_table_with_size(tab->type, siz);
2178 n = get_allocated_entries(tab);
2179 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2180 st_free_entries(tab);
2181
2182 tab->entry_power = tmp->entry_power;
2183 tab->bin_power = tmp->bin_power;
2184 tab->size_ind = tmp->size_ind;
2185 tab->entries = tmp->entries;
2186 tab->rebuilds_num++;
2187 free_fixed_ptr(tmp);
2188}
2189
2190/* Rehash using linear search. Return TRUE if we found that the table
2191 was rebuilt. */
2192static int
2193st_rehash_linear(st_table *tab)
2194{
2195 int eq_p, rebuilt_p;
2196 st_index_t i, j;
2197 st_table_entry *p, *q;
2198
2199 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2200 p = &tab->entries[i];
2201 if (DELETED_ENTRY_P(p))
2202 continue;
2203 for (j = i + 1; j < tab->entries_bound; j++) {
2204 q = &tab->entries[j];
2205 if (DELETED_ENTRY_P(q))
2206 continue;
2207 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2208 if (EXPECT(rebuilt_p, 0))
2209 return TRUE;
2210 if (eq_p) {
2211 *p = *q;
2212 MARK_ENTRY_DELETED(q);
2213 tab->num_entries--;
2214 update_range_for_deleted(tab, j);
2215 }
2216 }
2217 }
2218 return FALSE;
2219}
2220
2221/* Rehash using index. Return TRUE if we found that the table was
2222 rebuilt. */
2223static int
2224st_rehash_indexed(st_table *tab)
2225{
2226 int eq_p, rebuilt_p;
2227 st_index_t i;
2228
2229 unsigned int const size_ind = get_size_ind(tab);
2230 initialize_bins(tab);
2231 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2232 st_table_entry *p = &tab->entries[i];
2233 st_index_t ind;
2234#ifdef QUADRATIC_PROBE
2235 st_index_t d = 1;
2236#else
2237 st_index_t perturb = p->hash;
2238#endif
2239
2240 if (DELETED_ENTRY_P(p))
2241 continue;
2242
2243 ind = hash_bin(p->hash, tab);
2244 for (;;) {
2245 st_index_t bin = get_bin(st_bins_ptr(tab), size_ind, ind);
2246 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2247 /* ok, new room */
2248 set_bin(st_bins_ptr(tab), size_ind, ind, i + ENTRY_BASE);
2249 break;
2250 }
2251 else {
2252 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2253 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2254 if (EXPECT(rebuilt_p, 0))
2255 return TRUE;
2256 if (eq_p) {
2257 /* duplicated key; delete it */
2258 q->record = p->record;
2259 MARK_ENTRY_DELETED(p);
2260 tab->num_entries--;
2261 update_range_for_deleted(tab, bin);
2262 break;
2263 }
2264 else {
2265 /* hash collision; skip it */
2266#ifdef QUADRATIC_PROBE
2267 ind = hash_bin(ind + d, tab);
2268 d++;
2269#else
2270 ind = secondary_hash(ind, tab, &perturb);
2271#endif
2272 }
2273 }
2274 }
2275 }
2276 return FALSE;
2277}
2278
2279/* Reconstruct TAB's bins according to TAB's entries. This function
2280 permits conflicting keys inside of entries. No errors are reported
2281 then. All but one of them are discarded silently. */
2282static void
2283st_rehash(st_table *tab)
2284{
2285 int rebuilt_p;
2286
2287 do {
2288 if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2289 rebuilt_p = st_rehash_linear(tab);
2290 else
2291 rebuilt_p = st_rehash_indexed(tab);
2292 } while (rebuilt_p);
2293}
2294
2295static st_data_t
2296st_stringify(VALUE key)
2297{
2298 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2299 rb_hash_key_str(key) : key;
2300}
2301
2302static void
2303st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2304{
2305 st_data_t k = st_stringify(key);
2307 e.hash = do_hash(k, tab);
2308 e.key = k;
2309 e.record = val;
2310
2311 tab->entries[tab->entries_bound++] = e;
2312 tab->num_entries++;
2313 RB_OBJ_WRITTEN(hash, Qundef, k);
2314 RB_OBJ_WRITTEN(hash, Qundef, val);
2315}
2316
2317static void
2318st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2319{
2320 long i;
2321
2322 for (i = 0; i < argc; /* */) {
2323 st_data_t k = st_stringify(argv[i++]);
2324 st_data_t v = argv[i++];
2325 st_insert(tab, k, v);
2326 RB_OBJ_WRITTEN(hash, Qundef, k);
2327 RB_OBJ_WRITTEN(hash, Qundef, v);
2328 }
2329}
2330
2331static void
2332st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2333{
2334 long i;
2335
2336 /* push elems */
2337 for (i = 0; i < argc; /* */) {
2338 VALUE key = argv[i++];
2339 VALUE val = argv[i++];
2340 st_insert_single(tab, hash, key, val);
2341 }
2342
2343 /* reindex */
2344 st_rehash(tab);
2345}
2346
2347/* Mimics ruby's { foo => bar } syntax. This function is subpart
2348 of rb_hash_bulk_insert. */
2349void
2350rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2351{
2352 st_index_t n, size = argc / 2;
2353 st_table *tab = RHASH_ST_TABLE(hash);
2354
2355 tab = RHASH_TBL_RAW(hash);
2356 n = tab->entries_bound + size;
2357 st_expand_table(tab, n);
2358 if (UNLIKELY(tab->num_entries))
2359 st_insert_generic(tab, argc, argv, hash);
2360 else if (argc <= 2)
2361 st_insert_single(tab, hash, argv[0], argv[1]);
2362 else if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2363 st_insert_linear(tab, argc, argv, hash);
2364 else
2365 st_insert_generic(tab, argc, argv, hash);
2366}
2367
2368void
2369rb_st_compact_table(st_table *tab)
2370{
2371 st_index_t num = tab->num_entries;
2372 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2373 /* Compaction: */
2374 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2375 rebuild_table_with(new_tab, tab);
2376 rebuild_move_table(new_tab, tab);
2377 rebuild_cleanup(tab);
2378 }
2379}
2380
2381/*
2382 * set_table related code
2383 */
2384
2385struct set_table_entry {
2386 st_hash_t hash;
2387 st_data_t key;
2388};
2389
2390/* Return hash value of KEY for table TAB. */
2391static inline st_hash_t
2392set_do_hash(st_data_t key, set_table *tab)
2393{
2394 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2395 return normalize_hash_value(hash);
2396}
2397
2398/* Return bin size index of table TAB. */
2399static inline unsigned int
2400set_get_size_ind(const set_table *tab)
2401{
2402 return tab->size_ind;
2403}
2404
2405/* Return the number of allocated bins of table TAB. */
2406static inline st_index_t
2407set_get_bins_num(const set_table *tab)
2408{
2409 return ((st_index_t) 1)<<tab->bin_power;
2410}
2411
2412/* Return mask for a bin index in table TAB. */
2413static inline st_index_t
2414set_bins_mask(const set_table *tab)
2415{
2416 return set_get_bins_num(tab) - 1;
2417}
2418
2419/* Return the index of table TAB bin corresponding to
2420 HASH_VALUE. */
2421static inline st_index_t
2422set_hash_bin(st_hash_t hash_value, set_table *tab)
2423{
2424 return hash_value & set_bins_mask(tab);
2425}
2426
2427/* Return the number of allocated entries of table TAB. */
2428static inline st_index_t
2429set_get_allocated_entries(const set_table *tab)
2430{
2431 return ((st_index_t) 1)<<tab->entry_power;
2432}
2433
2434static inline size_t
2435set_allocated_entries_size(const set_table *tab)
2436{
2437 return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2438}
2439
2440static inline bool
2441set_has_bins(const set_table *tab)
2442{
2443 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2444}
2445
2446/* Return size of the allocated bins of table TAB. */
2447static inline st_index_t
2448set_bins_size(const set_table *tab)
2449{
2450 if (set_has_bins(tab)) {
2451 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2452 }
2453
2454 return 0;
2455}
2456
2457static inline st_index_t *
2458set_bins_ptr(const set_table *tab)
2459{
2460 if (set_has_bins(tab)) {
2461 return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2462 }
2463
2464 return NULL;
2465}
2466
2467/* Mark all bins of table TAB as empty. */
2468static void
2469set_initialize_bins(set_table *tab)
2470{
2471 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2472}
2473
2474/* Make table TAB empty. */
2475static void
2476set_make_tab_empty(set_table *tab)
2477{
2478 tab->num_entries = 0;
2479 tab->entries_start = tab->entries_bound = 0;
2480 if (set_bins_ptr(tab) != NULL)
2481 set_initialize_bins(tab);
2482}
2483
2484static inline size_t
2485set_entries_memsize(set_table *tab)
2486{
2487 size_t memsize = set_get_allocated_entries(tab) * sizeof(set_table_entry);
2488 if (set_has_bins(tab)) {
2489 memsize += set_bins_size(tab);
2490 }
2491 return memsize;
2492}
2493
2494static set_table *
2495set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2496{
2497 int n;
2498
2499#ifdef HASH_LOG
2500#if HASH_LOG+0 < 0
2501 {
2502 const char *e = getenv("ST_HASH_LOG");
2503 if (!e || !*e) init_st = 1;
2504 }
2505#endif
2506 if (init_st == 0) {
2507 init_st = 1;
2508 atexit(stat_col);
2509 }
2510#endif
2511
2512 n = get_power2(size);
2513
2514 tab->type = type;
2515 tab->entry_power = n;
2516 tab->bin_power = features[n].bin_power;
2517 tab->size_ind = features[n].size_ind;
2518
2519 tab->entries = (set_table_entry *)malloc(set_entries_memsize(tab));
2520 set_make_tab_empty(tab);
2521 tab->rebuilds_num = 0;
2522 return tab;
2523}
2524
2525/* Create and return table with TYPE which can hold at least SIZE
2526 entries. The real number of entries which the table can hold is
2527 the nearest power of two for SIZE. */
2528set_table *
2529set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2530{
2531 if (tab == NULL) tab = malloc(sizeof(set_table));
2532
2533 set_init_existing_table_with_size(tab, type, size);
2534
2535 return tab;
2536}
2537
2538set_table *
2539set_init_numtable(void)
2540{
2541 return set_init_table_with_size(NULL, &type_numhash, 0);
2542}
2543
2544set_table *
2545set_init_numtable_with_size(st_index_t size)
2546{
2547 return set_init_table_with_size(NULL, &type_numhash, size);
2548}
2549
2550set_table *
2551set_init_embedded_numtable_with_size(set_table *tab, st_index_t size)
2552{
2553 return set_init_existing_table_with_size(tab, &type_numhash, size);
2554}
2555
2556size_t
2557set_table_size(const struct set_table *tbl)
2558{
2559 return tbl->num_entries;
2560}
2561
2562/* Make table TAB empty. */
2563void
2564set_table_clear(set_table *tab)
2565{
2566 set_make_tab_empty(tab);
2567 tab->rebuilds_num++;
2568}
2569
2570void
2571set_free_embedded_table(set_table *tab)
2572{
2573 sized_free(tab->entries, set_entries_memsize(tab));
2574}
2575
2576/* Free table TAB space. This should only be used if you passed NULL to
2577 set_init_table_with_size/set_copy when creating the table. */
2578void
2579set_free_table(set_table *tab)
2580{
2581 set_free_embedded_table(tab);
2582 free_fixed_ptr(tab);
2583}
2584
2585/* Return byte size of memory allocated for table TAB. */
2586size_t
2587set_memsize(const set_table *tab)
2588{
2589 return(sizeof(set_table)
2590 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2591 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2592}
2593
2594static st_index_t
2595set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2596
2597static st_index_t
2598set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2599
2600static st_index_t
2601set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2602
2603static st_index_t
2604set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2605 st_data_t key, st_index_t *bin_ind);
2606
2607static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2608static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2609static void set_rebuild_cleanup(set_table *const tab);
2610
2611/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2612 and can change size of the table entries and bins arrays.
2613 Rebuilding is implemented by creation of a new table or by
2614 compaction of the existing one. */
2615static void
2616set_rebuild_table(set_table *tab)
2617{
2618 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2619 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2620 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2621 /* Compaction: */
2622 tab->num_entries = 0;
2623 if (set_has_bins(tab))
2624 set_initialize_bins(tab);
2625 set_rebuild_table_with(tab, tab);
2626 }
2627 else {
2628 set_table *new_tab;
2629 /* This allocation could trigger GC and compaction. If tab is the
2630 * gen_fields_tbl, then tab could have changed in size due to objects being
2631 * freed and/or moved. Do not store attributes of tab before this line. */
2632 new_tab = set_init_table_with_size(NULL, tab->type,
2633 2 * tab->num_entries - 1);
2634 set_rebuild_table_with(new_tab, tab);
2635 set_rebuild_move_table(new_tab, tab);
2636 }
2637 set_rebuild_cleanup(tab);
2638}
2639
2640static void
2641set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2642{
2643 st_index_t i, ni;
2644 unsigned int size_ind;
2645 set_table_entry *new_entries;
2646 set_table_entry *curr_entry_ptr;
2647 st_index_t *bins;
2648 st_index_t bin_ind;
2649
2650 new_entries = new_tab->entries;
2651
2652 ni = 0;
2653 bins = set_bins_ptr(new_tab);
2654 size_ind = set_get_size_ind(new_tab);
2655 st_index_t bound = tab->entries_bound;
2656 set_table_entry *entries = tab->entries;
2657
2658 for (i = tab->entries_start; i < bound; i++) {
2659 curr_entry_ptr = &entries[i];
2660 PREFETCH(entries + i + 1, 0);
2661 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2662 continue;
2663 if (&new_entries[ni] != curr_entry_ptr)
2664 new_entries[ni] = *curr_entry_ptr;
2665 if (EXPECT(bins != NULL, 1)) {
2666 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2667 curr_entry_ptr->key);
2668 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2669 }
2670 new_tab->num_entries++;
2671 ni++;
2672 }
2673
2674 assert(new_tab->num_entries == tab->num_entries);
2675}
2676
2677static void
2678set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2679{
2680 sized_free(tab->entries, set_entries_memsize(tab));
2681 tab->entries = new_tab->entries;
2682
2683 tab->entry_power = new_tab->entry_power;
2684 tab->bin_power = new_tab->bin_power;
2685 tab->size_ind = new_tab->size_ind;
2686
2687 free_fixed_ptr(new_tab);
2688}
2689
2690static void
2691set_rebuild_cleanup(set_table *const tab)
2692{
2693 tab->entries_start = 0;
2694 tab->entries_bound = tab->num_entries;
2695 tab->rebuilds_num++;
2696}
2697
2698/* Return the next secondary hash index for table TAB using previous
2699 index IND and PERTURB. Finally modulo of the function becomes a
2700 full *cycle linear congruential generator*, in other words it
2701 guarantees traversing all table bins in extreme case.
2702
2703 According the Hull-Dobell theorem a generator
2704 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2705 o m and c are relatively prime
2706 o a-1 is divisible by all prime factors of m
2707 o a-1 is divisible by 4 if m is divisible by 4.
2708
2709 For our case a is 5, c is 1, and m is a power of two. */
2710static inline st_index_t
2711set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2712{
2713 *perturb >>= 11;
2714 ind = (ind << 2) + ind + *perturb + 1;
2715 return set_hash_bin(ind, tab);
2716}
2717
2718/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2719 search. Return the index of the found entry in array `entries`.
2720 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2721 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2722static inline st_index_t
2723set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2724{
2725 int eq_p, rebuilt_p;
2726 st_index_t i, bound;
2727 set_table_entry *entries;
2728
2729 bound = tab->entries_bound;
2730 entries = tab->entries;
2731 for (i = tab->entries_start; i < bound; i++) {
2732 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2733 if (EXPECT(rebuilt_p, 0))
2734 return REBUILT_TABLE_ENTRY_IND;
2735 if (eq_p)
2736 return i;
2737 }
2738 return UNDEFINED_ENTRY_IND;
2739}
2740
2741/* Use the quadratic probing. The method has a better data locality
2742 but more collisions than the current approach. In average it
2743 results in a bit slower search. */
2744/*#define QUADRATIC_PROBE*/
2745
2746/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2747 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2748 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2749static st_index_t
2750set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2751{
2752 int eq_p, rebuilt_p;
2753 st_index_t ind;
2754#ifdef QUADRATIC_PROBE
2755 st_index_t d;
2756#else
2757 st_index_t perturb;
2758#endif
2759 st_index_t bin;
2760 set_table_entry *entries = tab->entries;
2761
2762 ind = set_hash_bin(hash_value, tab);
2763#ifdef QUADRATIC_PROBE
2764 d = 1;
2765#else
2766 perturb = hash_value;
2767#endif
2768 for (;;) {
2769 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2770 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2771 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2772 if (EXPECT(rebuilt_p, 0))
2773 return REBUILT_TABLE_ENTRY_IND;
2774 if (eq_p)
2775 break;
2776 }
2777 else if (EMPTY_BIN_P(bin))
2778 return UNDEFINED_ENTRY_IND;
2779#ifdef QUADRATIC_PROBE
2780 ind = set_hash_bin(ind + d, tab);
2781 d++;
2782#else
2783 ind = set_secondary_hash(ind, tab, &perturb);
2784#endif
2785 }
2786 return bin;
2787}
2788
2789/* Find and return index of table TAB bin corresponding to an entry
2790 with HASH_VALUE and KEY. If there is no such bin, return
2791 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2792 return REBUILT_TABLE_BIN_IND. */
2793static st_index_t
2794set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2795{
2796 int eq_p, rebuilt_p;
2797 st_index_t ind;
2798#ifdef QUADRATIC_PROBE
2799 st_index_t d;
2800#else
2801 st_index_t perturb;
2802#endif
2803 st_index_t bin;
2804 set_table_entry *entries = tab->entries;
2805
2806 ind = set_hash_bin(hash_value, tab);
2807#ifdef QUADRATIC_PROBE
2808 d = 1;
2809#else
2810 perturb = hash_value;
2811#endif
2812 for (;;) {
2813 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2814 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2815 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2816 if (EXPECT(rebuilt_p, 0))
2817 return REBUILT_TABLE_BIN_IND;
2818 if (eq_p)
2819 break;
2820 }
2821 else if (EMPTY_BIN_P(bin))
2822 return UNDEFINED_BIN_IND;
2823#ifdef QUADRATIC_PROBE
2824 ind = set_hash_bin(ind + d, tab);
2825 d++;
2826#else
2827 ind = set_secondary_hash(ind, tab, &perturb);
2828#endif
2829 }
2830 return ind;
2831}
2832
2833/* Find and return index of table TAB bin corresponding to an entry
2834 with HASH_VALUE and KEY. The entry should be in the table
2835 already. */
2836static st_index_t
2837set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2838{
2839 st_index_t ind;
2840#ifdef QUADRATIC_PROBE
2841 st_index_t d;
2842#else
2843 st_index_t perturb;
2844#endif
2845 st_index_t bin;
2846
2847 ind = set_hash_bin(hash_value, tab);
2848#ifdef QUADRATIC_PROBE
2849 d = 1;
2850#else
2851 perturb = hash_value;
2852#endif
2853 for (;;) {
2854 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2855 if (EMPTY_OR_DELETED_BIN_P(bin))
2856 return ind;
2857#ifdef QUADRATIC_PROBE
2858 ind = set_hash_bin(ind + d, tab);
2859 d++;
2860#else
2861 ind = set_secondary_hash(ind, tab, &perturb);
2862#endif
2863 }
2864}
2865
2866/* Mark I-th bin of table TAB as empty, in other words not
2867 corresponding to any entry. */
2868#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2869
2870/* Return index of table TAB bin for HASH_VALUE and KEY through
2871 BIN_IND and the pointed value as the function result. Reserve the
2872 bin for inclusion of the corresponding entry into the table if it
2873 is not there yet. We always find such bin as bins array length is
2874 bigger entries array. Although we can reuse a deleted bin, the
2875 result bin value is always empty if the table has no entry with
2876 KEY. Return the entries array index of the found entry or
2877 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2878 during the search, return REBUILT_TABLE_ENTRY_IND. */
2879static st_index_t
2880set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2881 st_data_t key, st_index_t *bin_ind)
2882{
2883 int eq_p, rebuilt_p;
2884 st_index_t ind;
2885 st_hash_t curr_hash_value = *hash_value;
2886#ifdef QUADRATIC_PROBE
2887 st_index_t d;
2888#else
2889 st_index_t perturb;
2890#endif
2891 st_index_t entry_index;
2892 st_index_t firset_deleted_bin_ind;
2893 set_table_entry *entries;
2894
2895 ind = set_hash_bin(curr_hash_value, tab);
2896#ifdef QUADRATIC_PROBE
2897 d = 1;
2898#else
2899 perturb = curr_hash_value;
2900#endif
2901 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2902 entries = tab->entries;
2903 for (;;) {
2904 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2905 if (EMPTY_BIN_P(entry_index)) {
2906 tab->num_entries++;
2907 entry_index = UNDEFINED_ENTRY_IND;
2908 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2909 /* We can reuse bin of a deleted entry. */
2910 ind = firset_deleted_bin_ind;
2911 MARK_SET_BIN_EMPTY(tab, ind);
2912 }
2913 break;
2914 }
2915 else if (! DELETED_BIN_P(entry_index)) {
2916 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2917 if (EXPECT(rebuilt_p, 0))
2918 return REBUILT_TABLE_ENTRY_IND;
2919 if (eq_p)
2920 break;
2921 }
2922 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2923 firset_deleted_bin_ind = ind;
2924#ifdef QUADRATIC_PROBE
2925 ind = set_hash_bin(ind + d, tab);
2926 d++;
2927#else
2928 ind = set_secondary_hash(ind, tab, &perturb);
2929#endif
2930 }
2931 *bin_ind = ind;
2932 return entry_index;
2933}
2934
2935/* Find an entry with KEY in table TAB. Return non-zero if we found
2936 it. */
2937int
2938set_table_lookup(set_table *tab, st_data_t key)
2939{
2940 st_index_t bin;
2941 st_hash_t hash = set_do_hash(key, tab);
2942
2943 retry:
2944 if (!set_has_bins(tab)) {
2945 bin = set_find_entry(tab, hash, key);
2946 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2947 goto retry;
2948 if (bin == UNDEFINED_ENTRY_IND)
2949 return 0;
2950 }
2951 else {
2952 bin = set_find_table_entry_ind(tab, hash, key);
2953 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2954 goto retry;
2955 if (bin == UNDEFINED_ENTRY_IND)
2956 return 0;
2957 bin -= ENTRY_BASE;
2958 }
2959 return 1;
2960}
2961
2962/* Check the table and rebuild it if it is necessary. */
2963static inline void
2964set_rebuild_table_if_necessary (set_table *tab)
2965{
2966 st_index_t bound = tab->entries_bound;
2967
2968 if (bound == set_get_allocated_entries(tab))
2969 set_rebuild_table(tab);
2970}
2971
2972/* Insert KEY into table TAB and return zero. If there is
2973 already entry with KEY in the table, return nonzero and update
2974 the value of the found entry. */
2975int
2976set_insert(set_table *tab, st_data_t key)
2977{
2978 set_table_entry *entry;
2979 st_index_t bin;
2980 st_index_t ind;
2981 st_hash_t hash_value;
2982 st_index_t bin_ind;
2983 int new_p;
2984
2985 hash_value = set_do_hash(key, tab);
2986 retry:
2987 set_rebuild_table_if_necessary(tab);
2988 if (!set_has_bins(tab)) {
2989 bin = set_find_entry(tab, hash_value, key);
2990 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2991 goto retry;
2992 new_p = bin == UNDEFINED_ENTRY_IND;
2993 if (new_p)
2994 tab->num_entries++;
2995 bin_ind = UNDEFINED_BIN_IND;
2996 }
2997 else {
2998 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2999 key, &bin_ind);
3000 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3001 goto retry;
3002 new_p = bin == UNDEFINED_ENTRY_IND;
3003 bin -= ENTRY_BASE;
3004 }
3005 if (new_p) {
3006 ind = tab->entries_bound++;
3007 entry = &tab->entries[ind];
3008 entry->hash = hash_value;
3009 entry->key = key;
3010 if (bin_ind != UNDEFINED_BIN_IND)
3011 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
3012 return 0;
3013 }
3014 return 1;
3015}
3016
3017/* Create a copy of old_tab into new_tab. */
3018static set_table *
3019set_replace(set_table *new_tab, set_table *old_tab)
3020{
3021 *new_tab = *old_tab;
3022 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
3023 new_tab->entries = (set_table_entry *)malloc(memsize);
3024 MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
3025 return new_tab;
3026}
3027
3028/* Create and return a copy of table OLD_TAB. */
3029set_table *
3030set_copy(set_table *new_tab, set_table *old_tab)
3031{
3032 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
3033
3034 if (set_replace(new_tab, old_tab) == NULL) {
3035 set_free_table(new_tab);
3036 return NULL;
3037 }
3038
3039 return new_tab;
3040}
3041
3042/* Update the entries start of table TAB after removing an entry
3043 with index N in the array entries. */
3044static inline void
3045set_update_range_for_deleted(set_table *tab, st_index_t n)
3046{
3047 /* Do not update entries_bound here. Otherwise, we can fill all
3048 bins by deleted entry value before rebuilding the table. */
3049 if (tab->entries_start == n) {
3050 st_index_t start = n + 1;
3051 st_index_t bound = tab->entries_bound;
3052 set_table_entry *entries = tab->entries;
3053 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3054 tab->entries_start = start;
3055 }
3056}
3057
3058/* Mark I-th bin of table TAB as corresponding to a deleted table
3059 entry. Update number of entries in the table and number of bins
3060 corresponding to deleted entries. */
3061#define MARK_SET_BIN_DELETED(tab, i) \
3062 do { \
3063 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3064 } while (0)
3065
3066/* Delete entry with KEY from table TAB, and return non-zero. If
3067 there is no entry with KEY in the table, return zero. */
3068int
3069set_table_delete(set_table *tab, st_data_t *key)
3070{
3071 set_table_entry *entry;
3072 st_index_t bin;
3073 st_index_t bin_ind;
3074 st_hash_t hash;
3075
3076 hash = set_do_hash(*key, tab);
3077 retry:
3078 if (!set_has_bins(tab)) {
3079 bin = set_find_entry(tab, hash, *key);
3080 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3081 goto retry;
3082 if (bin == UNDEFINED_ENTRY_IND) {
3083 return 0;
3084 }
3085 }
3086 else {
3087 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3088 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3089 goto retry;
3090 if (bin_ind == UNDEFINED_BIN_IND) {
3091 return 0;
3092 }
3093 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3094 MARK_SET_BIN_DELETED(tab, bin_ind);
3095 }
3096 entry = &tab->entries[bin];
3097 *key = entry->key;
3098 MARK_ENTRY_DELETED(entry);
3099 tab->num_entries--;
3100 set_update_range_for_deleted(tab, bin);
3101 return 1;
3102}
3103
3104/* Traverse all entries in table TAB calling FUNC with current entry
3105 key and zero. If the call returns ST_STOP, stop
3106 traversing. If the call returns ST_DELETE, delete the current
3107 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3108 traversing. The function returns zero unless an error is found.
3109 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3110 different for ST_CHECK and when the current element is removed
3111 during traversing. */
3112static inline int
3113set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3114 set_update_callback_func *replace, st_data_t arg,
3115 int check_p)
3116{
3117 st_index_t bin;
3118 st_index_t bin_ind;
3119 set_table_entry *entries, *curr_entry_ptr;
3120 enum st_retval retval;
3121 st_index_t i, rebuilds_num;
3122 st_hash_t hash;
3123 st_data_t key;
3124 int error_p, packed_p = !set_has_bins(tab);
3125
3126 entries = tab->entries;
3127 /* The bound can change inside the loop even without rebuilding
3128 the table, e.g. by an entry insertion. */
3129 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3130 curr_entry_ptr = &entries[i];
3131 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3132 continue;
3133 key = curr_entry_ptr->key;
3134 rebuilds_num = tab->rebuilds_num;
3135 hash = curr_entry_ptr->hash;
3136 retval = (*func)(key, arg, 0);
3137
3138 if (retval == ST_REPLACE && replace) {
3139 retval = (*replace)(&key, arg, TRUE);
3140 curr_entry_ptr->key = key;
3141 }
3142
3143 if (rebuilds_num != tab->rebuilds_num) {
3144 retry:
3145 entries = tab->entries;
3146 packed_p = !set_has_bins(tab);
3147 if (packed_p) {
3148 i = set_find_entry(tab, hash, key);
3149 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3150 goto retry;
3151 error_p = i == UNDEFINED_ENTRY_IND;
3152 }
3153 else {
3154 i = set_find_table_entry_ind(tab, hash, key);
3155 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3156 goto retry;
3157 error_p = i == UNDEFINED_ENTRY_IND;
3158 i -= ENTRY_BASE;
3159 }
3160 if (error_p && check_p) {
3161 /* call func with error notice */
3162 retval = (*func)(0, arg, 1);
3163 return 1;
3164 }
3165 curr_entry_ptr = &entries[i];
3166 }
3167 switch (retval) {
3168 case ST_REPLACE:
3169 break;
3170 case ST_CONTINUE:
3171 break;
3172 case ST_CHECK:
3173 if (check_p)
3174 break;
3175 case ST_STOP:
3176 return 0;
3177 case ST_DELETE: {
3178 st_data_t key = curr_entry_ptr->key;
3179
3180 again:
3181 if (packed_p) {
3182 bin = set_find_entry(tab, hash, key);
3183 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3184 goto again;
3185 if (bin == UNDEFINED_ENTRY_IND)
3186 break;
3187 }
3188 else {
3189 bin_ind = set_find_table_bin_ind(tab, hash, key);
3190 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3191 goto again;
3192 if (bin_ind == UNDEFINED_BIN_IND)
3193 break;
3194 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3195 MARK_SET_BIN_DELETED(tab, bin_ind);
3196 }
3197 curr_entry_ptr = &entries[bin];
3198 MARK_ENTRY_DELETED(curr_entry_ptr);
3199 tab->num_entries--;
3200 set_update_range_for_deleted(tab, bin);
3201 break;
3202 }
3203 }
3204 }
3205 return 0;
3206}
3207
3208int
3209set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3210{
3211 return set_general_foreach(tab, func, replace, arg, TRUE);
3212}
3213
3214struct set_functor {
3215 set_foreach_callback_func *func;
3216 st_data_t arg;
3217};
3218
3219static int
3220set_apply_functor(st_data_t k, st_data_t d, int _)
3221{
3222 const struct set_functor *f = (void *)d;
3223 return f->func(k, f->arg);
3224}
3225
3226int
3227set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3228{
3229 const struct set_functor f = { func, arg };
3230 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3231}
3232
3233/* See comments for function set_delete_safe. */
3234int
3235set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3236 st_data_t never ATTRIBUTE_UNUSED)
3237{
3238 return set_general_foreach(tab, func, NULL, arg, TRUE);
3239}
3240
3241/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3242 Return the number of keys set up in array KEYS. */
3243inline st_index_t
3244set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3245{
3246 st_index_t i, bound;
3247 st_data_t key, *keys_start, *keys_end;
3248 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3249
3250 bound = tab->entries_bound;
3251 keys_start = keys;
3252 keys_end = keys + size;
3253 for (i = tab->entries_start; i < bound; i++) {
3254 if (keys == keys_end)
3255 break;
3256 curr_entry_ptr = &entries[i];
3257 key = curr_entry_ptr->key;
3258 if (! DELETED_ENTRY_P(curr_entry_ptr))
3259 *keys++ = key;
3260 }
3261
3262 return keys - keys_start;
3263}
3264
3265void
3266set_compact_table(set_table *tab)
3267{
3268 st_index_t num = tab->num_entries;
3269 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3270 /* Compaction: */
3271 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3272 set_rebuild_table_with(new_tab, tab);
3273 set_rebuild_move_table(new_tab, tab);
3274 set_rebuild_cleanup(tab);
3275 }
3276}
3277
3278#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:711
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1425
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:235
VALUE rb_cString
String class.
Definition string.c:81
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:468
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
set_table_entry * entries
Array of size 2^entry_power.
Definition set_table.h:28
Definition st.c:137
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40