Ruby 3.5.0dev (2025-05-16 revision 6b10d40157b5287cd13169437800343165eae949)
st.c (6b10d40157b5287cd13169437800343165eae949)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/set_table.h"
113#include "internal/st.h"
114#include "ruby_assert.h"
115#endif
116
117#include <stdio.h>
118#ifdef HAVE_STDLIB_H
119#include <stdlib.h>
120#endif
121#include <string.h>
122
123#ifdef __GNUC__
124#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
125#define EXPECT(expr, val) __builtin_expect(expr, val)
126#define ATTRIBUTE_UNUSED __attribute__((unused))
127#else
128#define PREFETCH(addr, write_p)
129#define EXPECT(expr, val) (expr)
130#define ATTRIBUTE_UNUSED
131#endif
132
133/* The type of hashes. */
134typedef st_index_t st_hash_t;
135
137 st_hash_t hash;
138 st_data_t key;
139 st_data_t record;
140};
141
142#define type_numhash st_hashtype_num
143static const struct st_hash_type st_hashtype_num = {
144 st_numcmp,
145 st_numhash,
146};
147
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
150static const struct st_hash_type type_strhash = {
151 st_strcmp,
152 strhash,
153};
154
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
157static const struct st_hash_type type_strcasehash = {
158 st_locale_insensitive_strcasecmp_i,
159 strcasehash,
160};
161
162/* Value used to catch uninitialized entries/bins during debugging.
163 There is a possibility for a false alarm, but its probability is
164 extremely small. */
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
167
168#ifdef RUBY
169#undef malloc
170#undef realloc
171#undef calloc
172#undef free
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
177#endif
178
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182
183/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
184 up to TRUE if the table is rebuilt during the comparison. */
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 do { \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 } while (FALSE)
191
192/* Features of a table. */
194 /* Power of 2 used for number of allocated entries. */
195 unsigned char entry_power;
196 /* Power of 2 used for number of allocated bins. Depending on the
197 table size, the number of bins is 2-4 times more than the
198 number of entries. */
199 unsigned char bin_power;
200 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
201 unsigned char size_ind;
202 /* Bins are packed in words of type st_index_t. The following is
203 a size of bins counted by words. */
204 st_index_t bins_words;
205};
206
207/* Features of all possible size tables. */
208#if SIZEOF_ST_INDEX_T == 8
209#define MAX_POWER2 62
210static const struct st_features features[] = {
211 {0, 1, 0, 0x0},
212 {1, 2, 0, 0x1},
213 {2, 3, 0, 0x1},
214 {3, 4, 0, 0x2},
215 {4, 5, 0, 0x4},
216 {5, 6, 0, 0x8},
217 {6, 7, 0, 0x10},
218 {7, 8, 0, 0x20},
219 {8, 9, 1, 0x80},
220 {9, 10, 1, 0x100},
221 {10, 11, 1, 0x200},
222 {11, 12, 1, 0x400},
223 {12, 13, 1, 0x800},
224 {13, 14, 1, 0x1000},
225 {14, 15, 1, 0x2000},
226 {15, 16, 1, 0x4000},
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
274};
275
276#else
277#define MAX_POWER2 30
278
279static const struct st_features features[] = {
280 {0, 1, 0, 0x1},
281 {1, 2, 0, 0x1},
282 {2, 3, 0, 0x2},
283 {3, 4, 0, 0x4},
284 {4, 5, 0, 0x8},
285 {5, 6, 0, 0x10},
286 {6, 7, 0, 0x20},
287 {7, 8, 0, 0x40},
288 {8, 9, 1, 0x100},
289 {9, 10, 1, 0x200},
290 {10, 11, 1, 0x400},
291 {11, 12, 1, 0x800},
292 {12, 13, 1, 0x1000},
293 {13, 14, 1, 0x2000},
294 {14, 15, 1, 0x4000},
295 {15, 16, 1, 0x8000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
311};
312
313#endif
314
315/* The reserved hash value and its substitution. */
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318
319static inline st_hash_t
320normalize_hash_value(st_hash_t hash)
321{
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Return hash value of KEY for table TAB. */
328static inline st_hash_t
329do_hash(st_data_t key, st_table *tab)
330{
331 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332 return normalize_hash_value(hash);
333}
334
335/* Power of 2 defining the minimal number of allocated entries. */
336#define MINIMAL_POWER2 2
337
338#if MINIMAL_POWER2 < 2
339#error "MINIMAL_POWER2 should be >= 2"
340#endif
341
342/* If the power2 of the allocated `entries` is less than the following
343 value, don't allocate bins and use a linear search. */
344#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
345
346/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
347static int
348get_power2(st_index_t size)
349{
350 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
351 if (n <= MAX_POWER2)
352 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
353#ifdef RUBY
354 /* Ran out of the table entries */
355 rb_raise(rb_eRuntimeError, "st_table too big");
356#endif
357 /* should raise exception */
358 return -1;
359}
360
361/* Return value of N-th bin in array BINS of table with bins size
362 index S. */
363static inline st_index_t
364get_bin(st_index_t *bins, int s, st_index_t n)
365{
366 return (s == 0 ? ((unsigned char *) bins)[n]
367 : s == 1 ? ((unsigned short *) bins)[n]
368 : s == 2 ? ((unsigned int *) bins)[n]
369 : ((st_index_t *) bins)[n]);
370}
371
372/* Set up N-th bin in array BINS of table with bins size index S to
373 value V. */
374static inline void
375set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
376{
377 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
378 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
379 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
380 else ((st_index_t *) bins)[n] = v;
381}
382
383/* These macros define reserved values for empty table bin and table
384 bin which contains a deleted entry. We will never use such values
385 for an entry index in bins. */
386#define EMPTY_BIN 0
387#define DELETED_BIN 1
388/* Base of a real entry index in the bins. */
389#define ENTRY_BASE 2
390
391/* Mark I-th bin of table TAB as empty, in other words not
392 corresponding to any entry. */
393#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
394
395/* Values used for not found entry and bin with given
396 characteristics. */
397#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
398#define UNDEFINED_BIN_IND (~(st_index_t) 0)
399
400/* Entry and bin values returned when we found a table rebuild during
401 the search. */
402#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
403#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
404
405/* Mark I-th bin of table TAB as corresponding to a deleted table
406 entry. Update number of entries in the table and number of bins
407 corresponding to deleted entries. */
408#define MARK_BIN_DELETED(tab, i) \
409 do { \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
411 } while (0)
412
413/* Macros to check that value B is used empty bins and bins
414 corresponding deleted entries. */
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
418
419/* Macros to check empty bins and bins corresponding to deleted
420 entries. Bins are given by their index I in table TAB. */
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
424
425/* Macros for marking and checking deleted entries given by their
426 pointer E_PTR. */
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
429
430/* Return bin size index of table TAB. */
431static inline unsigned int
432get_size_ind(const st_table *tab)
433{
434 return tab->size_ind;
435}
436
437/* Return the number of allocated bins of table TAB. */
438static inline st_index_t
439get_bins_num(const st_table *tab)
440{
441 return ((st_index_t) 1)<<tab->bin_power;
442}
443
444/* Return mask for a bin index in table TAB. */
445static inline st_index_t
446bins_mask(const st_table *tab)
447{
448 return get_bins_num(tab) - 1;
449}
450
451/* Return the index of table TAB bin corresponding to
452 HASH_VALUE. */
453static inline st_index_t
454hash_bin(st_hash_t hash_value, st_table *tab)
455{
456 return hash_value & bins_mask(tab);
457}
458
459/* Return the number of allocated entries of table TAB. */
460static inline st_index_t
461get_allocated_entries(const st_table *tab)
462{
463 return ((st_index_t) 1)<<tab->entry_power;
464}
465
466/* Return size of the allocated bins of table TAB. */
467static inline st_index_t
468bins_size(const st_table *tab)
469{
470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
471}
472
473/* Mark all bins of table TAB as empty. */
474static void
475initialize_bins(st_table *tab)
476{
477 memset(tab->bins, 0, bins_size(tab));
478}
479
480/* Make table TAB empty. */
481static void
482make_tab_empty(st_table *tab)
483{
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
488}
489
490#ifdef HASH_LOG
491#ifdef HAVE_UNISTD_H
492#include <unistd.h>
493#endif
494static struct {
495 int all, total, num, str, strcase;
496} collision;
497
498/* Flag switching off output of package statistics at the end of
499 program. */
500static int init_st = 0;
501
502/* Output overall number of table searches and collisions into a
503 temporary file. */
504static void
505stat_col(void)
506{
507 char fname[10+sizeof(long)*3];
508 FILE *f;
509 if (!collision.total) return;
510 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
511 if (f == NULL)
512 return;
513 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
514 ((double)collision.all / (collision.total)) * 100);
515 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
516 fclose(f);
517}
518#endif
519
520st_table *
521st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
522{
523 int n;
524
525#ifdef HASH_LOG
526#if HASH_LOG+0 < 0
527 {
528 const char *e = getenv("ST_HASH_LOG");
529 if (!e || !*e) init_st = 1;
530 }
531#endif
532 if (init_st == 0) {
533 init_st = 1;
534 atexit(stat_col);
535 }
536#endif
537
538 n = get_power2(size);
539#ifndef RUBY
540 if (n < 0)
541 return NULL;
542#endif
543
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572/* Create and return table with TYPE which can hold at least SIZE
573 entries. The real number of entries which the table can hold is
574 the nearest power of two for SIZE. */
575st_table *
576st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
577{
578 st_table *tab = malloc(sizeof(st_table));
579#ifndef RUBY
580 if (tab == NULL)
581 return NULL;
582#endif
583
584#ifdef RUBY
585 st_init_existing_table_with_size(tab, type, size);
586#else
587 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
588 free(tab);
589 return NULL;
590 }
591#endif
592
593 return tab;
594}
595
596size_t
597st_table_size(const struct st_table *tbl)
598{
599 return tbl->num_entries;
600}
601
602/* Create and return table with TYPE which can hold a minimal number
603 of entries (see comments for get_power2). */
604st_table *
605st_init_table(const struct st_hash_type *type)
606{
607 return st_init_table_with_size(type, 0);
608}
609
610/* Create and return table which can hold a minimal number of
611 numbers. */
612st_table *
613st_init_numtable(void)
614{
615 return st_init_table(&type_numhash);
616}
617
618/* Create and return table which can hold SIZE numbers. */
619st_table *
620st_init_numtable_with_size(st_index_t size)
621{
622 return st_init_table_with_size(&type_numhash, size);
623}
624
625/* Create and return table which can hold a minimal number of
626 strings. */
627st_table *
628st_init_strtable(void)
629{
630 return st_init_table(&type_strhash);
631}
632
633/* Create and return table which can hold SIZE strings. */
634st_table *
635st_init_strtable_with_size(st_index_t size)
636{
637 return st_init_table_with_size(&type_strhash, size);
638}
639
640/* Create and return table which can hold a minimal number of strings
641 whose character case is ignored. */
642st_table *
643st_init_strcasetable(void)
644{
645 return st_init_table(&type_strcasehash);
646}
647
648/* Create and return table which can hold SIZE strings whose character
649 case is ignored. */
650st_table *
651st_init_strcasetable_with_size(st_index_t size)
652{
653 return st_init_table_with_size(&type_strcasehash, size);
654}
655
656/* Make table TAB empty. */
657void
658st_clear(st_table *tab)
659{
660 make_tab_empty(tab);
661 tab->rebuilds_num++;
662}
663
664/* Free table TAB space. */
665void
666st_free_table(st_table *tab)
667{
668 free(tab->bins);
669 free(tab->entries);
670 free(tab);
671}
672
673/* Return byte size of memory allocated for table TAB. */
674size_t
675st_memsize(const st_table *tab)
676{
677 return(sizeof(st_table)
678 + (tab->bins == NULL ? 0 : bins_size(tab))
679 + get_allocated_entries(tab) * sizeof(st_table_entry));
680}
681
682static st_index_t
683find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
684
685static st_index_t
686find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
687
688static st_index_t
689find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
690
691static st_index_t
692find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
693 st_data_t key, st_index_t *bin_ind);
694
695#ifdef HASH_LOG
696static void
697count_collision(const struct st_hash_type *type)
698{
699 collision.all++;
700 if (type == &type_numhash) {
701 collision.num++;
702 }
703 else if (type == &type_strhash) {
704 collision.strcase++;
705 }
706 else if (type == &type_strcasehash) {
707 collision.str++;
708 }
709}
710
711#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
712#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
713#define collision_check 0
714#else
715#define COLLISION
716#define FOUND_BIN
717#endif
718
719/* If the number of entries in the table is at least REBUILD_THRESHOLD
720 times less than the entry array length, decrease the table
721 size. */
722#define REBUILD_THRESHOLD 4
723
724#if REBUILD_THRESHOLD < 2
725#error "REBUILD_THRESHOLD should be >= 2"
726#endif
727
728static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
729static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
730static void rebuild_cleanup(st_table *const tab);
731
732/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
733 and can change size of the table entries and bins arrays.
734 Rebuilding is implemented by creation of a new table or by
735 compaction of the existing one. */
736static void
737rebuild_table(st_table *tab)
738{
739 if ((2 * tab->num_entries <= get_allocated_entries(tab)
740 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
741 || tab->num_entries < (1 << MINIMAL_POWER2)) {
742 /* Compaction: */
743 tab->num_entries = 0;
744 if (tab->bins != NULL)
745 initialize_bins(tab);
746 rebuild_table_with(tab, tab);
747 }
748 else {
749 st_table *new_tab;
750 /* This allocation could trigger GC and compaction. If tab is the
751 * gen_fields_tbl, then tab could have changed in size due to objects being
752 * freed and/or moved. Do not store attributes of tab before this line. */
753 new_tab = st_init_table_with_size(tab->type,
754 2 * tab->num_entries - 1);
755 rebuild_table_with(new_tab, tab);
756 rebuild_move_table(new_tab, tab);
757 }
758 rebuild_cleanup(tab);
759}
760
761static void
762rebuild_table_with(st_table *const new_tab, st_table *const tab)
763{
764 st_index_t i, ni;
765 unsigned int size_ind;
766 st_table_entry *new_entries;
767 st_table_entry *curr_entry_ptr;
768 st_index_t *bins;
769 st_index_t bin_ind;
770
771 new_entries = new_tab->entries;
772
773 ni = 0;
774 bins = new_tab->bins;
775 size_ind = get_size_ind(new_tab);
776 st_index_t bound = tab->entries_bound;
777 st_table_entry *entries = tab->entries;
778
779 for (i = tab->entries_start; i < bound; i++) {
780 curr_entry_ptr = &entries[i];
781 PREFETCH(entries + i + 1, 0);
782 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
783 continue;
784 if (&new_entries[ni] != curr_entry_ptr)
785 new_entries[ni] = *curr_entry_ptr;
786 if (EXPECT(bins != NULL, 1)) {
787 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
788 curr_entry_ptr->key);
789 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
790 }
791 new_tab->num_entries++;
792 ni++;
793 }
794
795 assert(new_tab->num_entries == tab->num_entries);
796}
797
798static void
799rebuild_move_table(st_table *const new_tab, st_table *const tab)
800{
801 tab->entry_power = new_tab->entry_power;
802 tab->bin_power = new_tab->bin_power;
803 tab->size_ind = new_tab->size_ind;
804 free(tab->bins);
805 tab->bins = new_tab->bins;
806 free(tab->entries);
807 tab->entries = new_tab->entries;
808 free(new_tab);
809}
810
811static void
812rebuild_cleanup(st_table *const tab)
813{
814 tab->entries_start = 0;
815 tab->entries_bound = tab->num_entries;
816 tab->rebuilds_num++;
817}
818
819/* Return the next secondary hash index for table TAB using previous
820 index IND and PERTURB. Finally modulo of the function becomes a
821 full *cycle linear congruential generator*, in other words it
822 guarantees traversing all table bins in extreme case.
823
824 According the Hull-Dobell theorem a generator
825 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
826 o m and c are relatively prime
827 o a-1 is divisible by all prime factors of m
828 o a-1 is divisible by 4 if m is divisible by 4.
829
830 For our case a is 5, c is 1, and m is a power of two. */
831static inline st_index_t
832secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
833{
834 *perturb >>= 11;
835 ind = (ind << 2) + ind + *perturb + 1;
836 return hash_bin(ind, tab);
837}
838
839/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
840 search. Return the index of the found entry in array `entries`.
841 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
842 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
843static inline st_index_t
844find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
845{
846 int eq_p, rebuilt_p;
847 st_index_t i, bound;
848 st_table_entry *entries;
849
850 bound = tab->entries_bound;
851 entries = tab->entries;
852 for (i = tab->entries_start; i < bound; i++) {
853 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
854 if (EXPECT(rebuilt_p, 0))
855 return REBUILT_TABLE_ENTRY_IND;
856 if (eq_p)
857 return i;
858 }
859 return UNDEFINED_ENTRY_IND;
860}
861
862/* Use the quadratic probing. The method has a better data locality
863 but more collisions than the current approach. In average it
864 results in a bit slower search. */
865/*#define QUADRATIC_PROBE*/
866
867/* Return index of entry with HASH_VALUE and KEY in table TAB. If
868 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
869 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
870static st_index_t
871find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
872{
873 int eq_p, rebuilt_p;
874 st_index_t ind;
875#ifdef QUADRATIC_PROBE
876 st_index_t d;
877#else
878 st_index_t perturb;
879#endif
880 st_index_t bin;
881 st_table_entry *entries = tab->entries;
882
883 ind = hash_bin(hash_value, tab);
884#ifdef QUADRATIC_PROBE
885 d = 1;
886#else
887 perturb = hash_value;
888#endif
889 FOUND_BIN;
890 for (;;) {
891 bin = get_bin(tab->bins, get_size_ind(tab), ind);
892 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
893 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
894 if (EXPECT(rebuilt_p, 0))
895 return REBUILT_TABLE_ENTRY_IND;
896 if (eq_p)
897 break;
898 }
899 else if (EMPTY_BIN_P(bin))
900 return UNDEFINED_ENTRY_IND;
901#ifdef QUADRATIC_PROBE
902 ind = hash_bin(ind + d, tab);
903 d++;
904#else
905 ind = secondary_hash(ind, tab, &perturb);
906#endif
907 COLLISION;
908 }
909 return bin;
910}
911
912/* Find and return index of table TAB bin corresponding to an entry
913 with HASH_VALUE and KEY. If there is no such bin, return
914 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
915 return REBUILT_TABLE_BIN_IND. */
916static st_index_t
917find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
918{
919 int eq_p, rebuilt_p;
920 st_index_t ind;
921#ifdef QUADRATIC_PROBE
922 st_index_t d;
923#else
924 st_index_t perturb;
925#endif
926 st_index_t bin;
927 st_table_entry *entries = tab->entries;
928
929 ind = hash_bin(hash_value, tab);
930#ifdef QUADRATIC_PROBE
931 d = 1;
932#else
933 perturb = hash_value;
934#endif
935 FOUND_BIN;
936 for (;;) {
937 bin = get_bin(tab->bins, get_size_ind(tab), ind);
938 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
939 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
940 if (EXPECT(rebuilt_p, 0))
941 return REBUILT_TABLE_BIN_IND;
942 if (eq_p)
943 break;
944 }
945 else if (EMPTY_BIN_P(bin))
946 return UNDEFINED_BIN_IND;
947#ifdef QUADRATIC_PROBE
948 ind = hash_bin(ind + d, tab);
949 d++;
950#else
951 ind = secondary_hash(ind, tab, &perturb);
952#endif
953 COLLISION;
954 }
955 return ind;
956}
957
958/* Find and return index of table TAB bin corresponding to an entry
959 with HASH_VALUE and KEY. The entry should be in the table
960 already. */
961static st_index_t
962find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
963{
964 st_index_t ind;
965#ifdef QUADRATIC_PROBE
966 st_index_t d;
967#else
968 st_index_t perturb;
969#endif
970 st_index_t bin;
971
972 ind = hash_bin(hash_value, tab);
973#ifdef QUADRATIC_PROBE
974 d = 1;
975#else
976 perturb = hash_value;
977#endif
978 FOUND_BIN;
979 for (;;) {
980 bin = get_bin(tab->bins, get_size_ind(tab), ind);
981 if (EMPTY_OR_DELETED_BIN_P(bin))
982 return ind;
983#ifdef QUADRATIC_PROBE
984 ind = hash_bin(ind + d, tab);
985 d++;
986#else
987 ind = secondary_hash(ind, tab, &perturb);
988#endif
989 COLLISION;
990 }
991}
992
993/* Return index of table TAB bin for HASH_VALUE and KEY through
994 BIN_IND and the pointed value as the function result. Reserve the
995 bin for inclusion of the corresponding entry into the table if it
996 is not there yet. We always find such bin as bins array length is
997 bigger entries array. Although we can reuse a deleted bin, the
998 result bin value is always empty if the table has no entry with
999 KEY. Return the entries array index of the found entry or
1000 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1001 during the search, return REBUILT_TABLE_ENTRY_IND. */
1002static st_index_t
1003find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1004 st_data_t key, st_index_t *bin_ind)
1005{
1006 int eq_p, rebuilt_p;
1007 st_index_t ind;
1008 st_hash_t curr_hash_value = *hash_value;
1009#ifdef QUADRATIC_PROBE
1010 st_index_t d;
1011#else
1012 st_index_t perturb;
1013#endif
1014 st_index_t entry_index;
1015 st_index_t first_deleted_bin_ind;
1016 st_table_entry *entries;
1017
1018 ind = hash_bin(curr_hash_value, tab);
1019#ifdef QUADRATIC_PROBE
1020 d = 1;
1021#else
1022 perturb = curr_hash_value;
1023#endif
1024 FOUND_BIN;
1025 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1026 entries = tab->entries;
1027 for (;;) {
1028 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1029 if (EMPTY_BIN_P(entry_index)) {
1030 tab->num_entries++;
1031 entry_index = UNDEFINED_ENTRY_IND;
1032 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1033 /* We can reuse bin of a deleted entry. */
1034 ind = first_deleted_bin_ind;
1035 MARK_BIN_EMPTY(tab, ind);
1036 }
1037 break;
1038 }
1039 else if (! DELETED_BIN_P(entry_index)) {
1040 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1041 if (EXPECT(rebuilt_p, 0))
1042 return REBUILT_TABLE_ENTRY_IND;
1043 if (eq_p)
1044 break;
1045 }
1046 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1047 first_deleted_bin_ind = ind;
1048#ifdef QUADRATIC_PROBE
1049 ind = hash_bin(ind + d, tab);
1050 d++;
1051#else
1052 ind = secondary_hash(ind, tab, &perturb);
1053#endif
1054 COLLISION;
1055 }
1056 *bin_ind = ind;
1057 return entry_index;
1058}
1059
1060/* Find an entry with KEY in table TAB. Return non-zero if we found
1061 it. Set up *RECORD to the found entry record. */
1062int
1063st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1064{
1065 st_index_t bin;
1066 st_hash_t hash = do_hash(key, tab);
1067
1068 retry:
1069 if (tab->bins == NULL) {
1070 bin = find_entry(tab, hash, key);
1071 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 goto retry;
1073 if (bin == UNDEFINED_ENTRY_IND)
1074 return 0;
1075 }
1076 else {
1077 bin = find_table_entry_ind(tab, hash, key);
1078 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1079 goto retry;
1080 if (bin == UNDEFINED_ENTRY_IND)
1081 return 0;
1082 bin -= ENTRY_BASE;
1083 }
1084 if (value != 0)
1085 *value = tab->entries[bin].record;
1086 return 1;
1087}
1088
1089/* Find an entry with KEY in table TAB. Return non-zero if we found
1090 it. Set up *RESULT to the found table entry key. */
1091int
1092st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1093{
1094 st_index_t bin;
1095 st_hash_t hash = do_hash(key, tab);
1096
1097 retry:
1098 if (tab->bins == NULL) {
1099 bin = find_entry(tab, hash, key);
1100 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1101 goto retry;
1102 if (bin == UNDEFINED_ENTRY_IND)
1103 return 0;
1104 }
1105 else {
1106 bin = find_table_entry_ind(tab, hash, key);
1107 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108 goto retry;
1109 if (bin == UNDEFINED_ENTRY_IND)
1110 return 0;
1111 bin -= ENTRY_BASE;
1112 }
1113 if (result != 0)
1114 *result = tab->entries[bin].key;
1115 return 1;
1116}
1117
1118/* Check the table and rebuild it if it is necessary. */
1119static inline void
1120rebuild_table_if_necessary (st_table *tab)
1121{
1122 st_index_t bound = tab->entries_bound;
1123
1124 if (bound == get_allocated_entries(tab))
1125 rebuild_table(tab);
1126}
1127
1128/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1129 already entry with KEY in the table, return nonzero and update
1130 the value of the found entry. */
1131int
1132st_insert(st_table *tab, st_data_t key, st_data_t value)
1133{
1134 st_table_entry *entry;
1135 st_index_t bin;
1136 st_index_t ind;
1137 st_hash_t hash_value;
1138 st_index_t bin_ind;
1139 int new_p;
1140
1141 hash_value = do_hash(key, tab);
1142 retry:
1143 rebuild_table_if_necessary(tab);
1144 if (tab->bins == NULL) {
1145 bin = find_entry(tab, hash_value, key);
1146 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1147 goto retry;
1148 new_p = bin == UNDEFINED_ENTRY_IND;
1149 if (new_p)
1150 tab->num_entries++;
1151 bin_ind = UNDEFINED_BIN_IND;
1152 }
1153 else {
1154 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1155 key, &bin_ind);
1156 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1157 goto retry;
1158 new_p = bin == UNDEFINED_ENTRY_IND;
1159 bin -= ENTRY_BASE;
1160 }
1161 if (new_p) {
1162 ind = tab->entries_bound++;
1163 entry = &tab->entries[ind];
1164 entry->hash = hash_value;
1165 entry->key = key;
1166 entry->record = value;
1167 if (bin_ind != UNDEFINED_BIN_IND)
1168 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1169 return 0;
1170 }
1171 tab->entries[bin].record = value;
1172 return 1;
1173}
1174
1175/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1176 entry with KEY before the insertion. */
1177static inline void
1178st_add_direct_with_hash(st_table *tab,
1179 st_data_t key, st_data_t value, st_hash_t hash)
1180{
1181 st_table_entry *entry;
1182 st_index_t ind;
1183 st_index_t bin_ind;
1184
1185 assert(hash != RESERVED_HASH_VAL);
1186
1187 rebuild_table_if_necessary(tab);
1188 ind = tab->entries_bound++;
1189 entry = &tab->entries[ind];
1190 entry->hash = hash;
1191 entry->key = key;
1192 entry->record = value;
1193 tab->num_entries++;
1194 if (tab->bins != NULL) {
1195 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1196 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1197 }
1198}
1199
1200void
1201rb_st_add_direct_with_hash(st_table *tab,
1202 st_data_t key, st_data_t value, st_hash_t hash)
1203{
1204 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1205}
1206
1207/* Insert (KEY, VALUE) into table TAB. The table should not have
1208 entry with KEY before the insertion. */
1209void
1210st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1211{
1212 st_hash_t hash_value;
1213
1214 hash_value = do_hash(key, tab);
1215 st_add_direct_with_hash(tab, key, value, hash_value);
1216}
1217
1218/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1219 there is already entry with KEY in the table, return nonzero and
1220 update the value of the found entry. */
1221int
1222st_insert2(st_table *tab, st_data_t key, st_data_t value,
1223 st_data_t (*func)(st_data_t))
1224{
1225 st_table_entry *entry;
1226 st_index_t bin;
1227 st_index_t ind;
1228 st_hash_t hash_value;
1229 st_index_t bin_ind;
1230 int new_p;
1231
1232 hash_value = do_hash(key, tab);
1233 retry:
1234 rebuild_table_if_necessary (tab);
1235 if (tab->bins == NULL) {
1236 bin = find_entry(tab, hash_value, key);
1237 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1238 goto retry;
1239 new_p = bin == UNDEFINED_ENTRY_IND;
1240 if (new_p)
1241 tab->num_entries++;
1242 bin_ind = UNDEFINED_BIN_IND;
1243 }
1244 else {
1245 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1246 key, &bin_ind);
1247 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1248 goto retry;
1249 new_p = bin == UNDEFINED_ENTRY_IND;
1250 bin -= ENTRY_BASE;
1251 }
1252 if (new_p) {
1253 key = (*func)(key);
1254 ind = tab->entries_bound++;
1255 entry = &tab->entries[ind];
1256 entry->hash = hash_value;
1257 entry->key = key;
1258 entry->record = value;
1259 if (bin_ind != UNDEFINED_BIN_IND)
1260 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1261 return 0;
1262 }
1263 tab->entries[bin].record = value;
1264 return 1;
1265}
1266
1267/* Create a copy of old_tab into new_tab. */
1268st_table *
1269st_replace(st_table *new_tab, st_table *old_tab)
1270{
1271 *new_tab = *old_tab;
1272 if (old_tab->bins == NULL)
1273 new_tab->bins = NULL;
1274 else {
1275 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1276#ifndef RUBY
1277 if (new_tab->bins == NULL) {
1278 return NULL;
1279 }
1280#endif
1281 }
1282 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1283 * sizeof(st_table_entry));
1284#ifndef RUBY
1285 if (new_tab->entries == NULL) {
1286 return NULL;
1287 }
1288#endif
1289 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1290 get_allocated_entries(old_tab));
1291 if (old_tab->bins != NULL)
1292 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1293
1294 return new_tab;
1295}
1296
1297/* Create and return a copy of table OLD_TAB. */
1298st_table *
1299st_copy(st_table *old_tab)
1300{
1301 st_table *new_tab;
1302
1303 new_tab = (st_table *) malloc(sizeof(st_table));
1304#ifndef RUBY
1305 if (new_tab == NULL)
1306 return NULL;
1307#endif
1308
1309 if (st_replace(new_tab, old_tab) == NULL) {
1310 st_free_table(new_tab);
1311 return NULL;
1312 }
1313
1314 return new_tab;
1315}
1316
1317/* Update the entries start of table TAB after removing an entry
1318 with index N in the array entries. */
1319static inline void
1320update_range_for_deleted(st_table *tab, st_index_t n)
1321{
1322 /* Do not update entries_bound here. Otherwise, we can fill all
1323 bins by deleted entry value before rebuilding the table. */
1324 if (tab->entries_start == n) {
1325 st_index_t start = n + 1;
1326 st_index_t bound = tab->entries_bound;
1327 st_table_entry *entries = tab->entries;
1328 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1329 tab->entries_start = start;
1330 }
1331}
1332
1333/* Delete entry with KEY from table TAB, set up *VALUE (unless
1334 VALUE is zero) from deleted table entry, and return non-zero. If
1335 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1336 is zero), and return zero. */
1337static int
1338st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1339{
1340 st_table_entry *entry;
1341 st_index_t bin;
1342 st_index_t bin_ind;
1343 st_hash_t hash;
1344
1345 hash = do_hash(*key, tab);
1346 retry:
1347 if (tab->bins == NULL) {
1348 bin = find_entry(tab, hash, *key);
1349 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1350 goto retry;
1351 if (bin == UNDEFINED_ENTRY_IND) {
1352 if (value != 0) *value = 0;
1353 return 0;
1354 }
1355 }
1356 else {
1357 bin_ind = find_table_bin_ind(tab, hash, *key);
1358 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1359 goto retry;
1360 if (bin_ind == UNDEFINED_BIN_IND) {
1361 if (value != 0) *value = 0;
1362 return 0;
1363 }
1364 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1365 MARK_BIN_DELETED(tab, bin_ind);
1366 }
1367 entry = &tab->entries[bin];
1368 *key = entry->key;
1369 if (value != 0) *value = entry->record;
1370 MARK_ENTRY_DELETED(entry);
1371 tab->num_entries--;
1372 update_range_for_deleted(tab, bin);
1373 return 1;
1374}
1375
1376int
1377st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1378{
1379 return st_general_delete(tab, key, value);
1380}
1381
1382/* The function and other functions with suffix '_safe' or '_check'
1383 are originated from the previous implementation of the hash tables.
1384 It was necessary for correct deleting entries during traversing
1385 tables. The current implementation permits deletion during
1386 traversing without a specific way to do this. */
1387int
1388st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1389 st_data_t never ATTRIBUTE_UNUSED)
1390{
1391 return st_general_delete(tab, key, value);
1392}
1393
1394/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1395 return zero. Otherwise, remove the first entry in the table.
1396 Return its key through KEY and its record through VALUE (unless
1397 VALUE is zero). */
1398int
1399st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1400{
1401 st_index_t i, bound;
1402 st_index_t bin;
1403 st_table_entry *entries, *curr_entry_ptr;
1404 st_index_t bin_ind;
1405
1406 entries = tab->entries;
1407 bound = tab->entries_bound;
1408 for (i = tab->entries_start; i < bound; i++) {
1409 curr_entry_ptr = &entries[i];
1410 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1411 st_hash_t entry_hash = curr_entry_ptr->hash;
1412 st_data_t entry_key = curr_entry_ptr->key;
1413
1414 if (value != 0) *value = curr_entry_ptr->record;
1415 *key = entry_key;
1416 retry:
1417 if (tab->bins == NULL) {
1418 bin = find_entry(tab, entry_hash, entry_key);
1419 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1420 entries = tab->entries;
1421 goto retry;
1422 }
1423 curr_entry_ptr = &entries[bin];
1424 }
1425 else {
1426 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1427 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1428 entries = tab->entries;
1429 goto retry;
1430 }
1431 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1432 - ENTRY_BASE];
1433 MARK_BIN_DELETED(tab, bin_ind);
1434 }
1435 MARK_ENTRY_DELETED(curr_entry_ptr);
1436 tab->num_entries--;
1437 update_range_for_deleted(tab, i);
1438 return 1;
1439 }
1440 }
1441 if (value != 0) *value = 0;
1442 return 0;
1443}
1444
1445/* See comments for function st_delete_safe. */
1446void
1447st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1448 st_data_t never ATTRIBUTE_UNUSED)
1449{
1450}
1451
1452/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1453 of the key and the value of the found entry, and non-zero as the
1454 3rd argument. If the entry is not found, call FUNC with a pointer
1455 to KEY, a pointer to zero, and a zero argument. If the call
1456 returns ST_CONTINUE, the table will have an entry with key and
1457 value returned by FUNC through the 1st and 2nd parameters. If the
1458 call of FUNC returns ST_DELETE, the table will not have entry with
1459 KEY. The function returns flag of that the entry with KEY was in
1460 the table before the call. */
1461int
1462st_update(st_table *tab, st_data_t key,
1463 st_update_callback_func *func, st_data_t arg)
1464{
1465 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1466 st_index_t bin = 0; /* Ditto */
1467 st_table_entry *entries;
1468 st_index_t bin_ind;
1469 st_data_t value = 0, old_key;
1470 int retval, existing;
1471 st_hash_t hash = do_hash(key, tab);
1472
1473 retry:
1474 entries = tab->entries;
1475 if (tab->bins == NULL) {
1476 bin = find_entry(tab, hash, key);
1477 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1478 goto retry;
1479 existing = bin != UNDEFINED_ENTRY_IND;
1480 entry = &entries[bin];
1481 bin_ind = UNDEFINED_BIN_IND;
1482 }
1483 else {
1484 bin_ind = find_table_bin_ind(tab, hash, key);
1485 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1486 goto retry;
1487 existing = bin_ind != UNDEFINED_BIN_IND;
1488 if (existing) {
1489 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1490 entry = &entries[bin];
1491 }
1492 }
1493 if (existing) {
1494 key = entry->key;
1495 value = entry->record;
1496 }
1497 old_key = key;
1498 retval = (*func)(&key, &value, arg, existing);
1499 switch (retval) {
1500 case ST_CONTINUE:
1501 if (! existing) {
1502 st_add_direct_with_hash(tab, key, value, hash);
1503 break;
1504 }
1505 if (old_key != key) {
1506 entry->key = key;
1507 }
1508 entry->record = value;
1509 break;
1510 case ST_DELETE:
1511 if (existing) {
1512 if (bin_ind != UNDEFINED_BIN_IND)
1513 MARK_BIN_DELETED(tab, bin_ind);
1514 MARK_ENTRY_DELETED(entry);
1515 tab->num_entries--;
1516 update_range_for_deleted(tab, bin);
1517 }
1518 break;
1519 }
1520 return existing;
1521}
1522
1523/* Traverse all entries in table TAB calling FUNC with current entry
1524 key and value and zero. If the call returns ST_STOP, stop
1525 traversing. If the call returns ST_DELETE, delete the current
1526 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1527 traversing. The function returns zero unless an error is found.
1528 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1529 different for ST_CHECK and when the current element is removed
1530 during traversing. */
1531static inline int
1532st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1533 int check_p)
1534{
1535 st_index_t bin;
1536 st_index_t bin_ind;
1537 st_table_entry *entries, *curr_entry_ptr;
1538 enum st_retval retval;
1539 st_index_t i, rebuilds_num;
1540 st_hash_t hash;
1541 st_data_t key;
1542 int error_p, packed_p = tab->bins == NULL;
1543
1544 entries = tab->entries;
1545 /* The bound can change inside the loop even without rebuilding
1546 the table, e.g. by an entry insertion. */
1547 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1548 curr_entry_ptr = &entries[i];
1549 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1550 continue;
1551 key = curr_entry_ptr->key;
1552 rebuilds_num = tab->rebuilds_num;
1553 hash = curr_entry_ptr->hash;
1554 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1555
1556 if (retval == ST_REPLACE && replace) {
1557 st_data_t value;
1558 value = curr_entry_ptr->record;
1559 retval = (*replace)(&key, &value, arg, TRUE);
1560 curr_entry_ptr->key = key;
1561 curr_entry_ptr->record = value;
1562 }
1563
1564 if (rebuilds_num != tab->rebuilds_num) {
1565 retry:
1566 entries = tab->entries;
1567 packed_p = tab->bins == NULL;
1568 if (packed_p) {
1569 i = find_entry(tab, hash, key);
1570 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1571 goto retry;
1572 error_p = i == UNDEFINED_ENTRY_IND;
1573 }
1574 else {
1575 i = find_table_entry_ind(tab, hash, key);
1576 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1577 goto retry;
1578 error_p = i == UNDEFINED_ENTRY_IND;
1579 i -= ENTRY_BASE;
1580 }
1581 if (error_p && check_p) {
1582 /* call func with error notice */
1583 retval = (*func)(0, 0, arg, 1);
1584 return 1;
1585 }
1586 curr_entry_ptr = &entries[i];
1587 }
1588 switch (retval) {
1589 case ST_REPLACE:
1590 break;
1591 case ST_CONTINUE:
1592 break;
1593 case ST_CHECK:
1594 if (check_p)
1595 break;
1596 case ST_STOP:
1597 return 0;
1598 case ST_DELETE: {
1599 st_data_t key = curr_entry_ptr->key;
1600
1601 again:
1602 if (packed_p) {
1603 bin = find_entry(tab, hash, key);
1604 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1605 goto again;
1606 if (bin == UNDEFINED_ENTRY_IND)
1607 break;
1608 }
1609 else {
1610 bin_ind = find_table_bin_ind(tab, hash, key);
1611 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1612 goto again;
1613 if (bin_ind == UNDEFINED_BIN_IND)
1614 break;
1615 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1616 MARK_BIN_DELETED(tab, bin_ind);
1617 }
1618 curr_entry_ptr = &entries[bin];
1619 MARK_ENTRY_DELETED(curr_entry_ptr);
1620 tab->num_entries--;
1621 update_range_for_deleted(tab, bin);
1622 break;
1623 }
1624 }
1625 }
1626 return 0;
1627}
1628
1629int
1630st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1631{
1632 return st_general_foreach(tab, func, replace, arg, TRUE);
1633}
1634
1635struct functor {
1636 st_foreach_callback_func *func;
1637 st_data_t arg;
1638};
1639
1640static int
1641apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1642{
1643 const struct functor *f = (void *)d;
1644 return f->func(k, v, f->arg);
1645}
1646
1647int
1648st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1649{
1650 const struct functor f = { func, arg };
1651 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1652}
1653
1654/* See comments for function st_delete_safe. */
1655int
1656st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1657 st_data_t never ATTRIBUTE_UNUSED)
1658{
1659 return st_general_foreach(tab, func, 0, arg, TRUE);
1660}
1661
1662/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1663 Return the number of keys set up in array KEYS. */
1664static inline st_index_t
1665st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1666{
1667 st_index_t i, bound;
1668 st_data_t key, *keys_start, *keys_end;
1669 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1670
1671 bound = tab->entries_bound;
1672 keys_start = keys;
1673 keys_end = keys + size;
1674 for (i = tab->entries_start; i < bound; i++) {
1675 if (keys == keys_end)
1676 break;
1677 curr_entry_ptr = &entries[i];
1678 key = curr_entry_ptr->key;
1679 if (! DELETED_ENTRY_P(curr_entry_ptr))
1680 *keys++ = key;
1681 }
1682
1683 return keys - keys_start;
1684}
1685
1686st_index_t
1687st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1688{
1689 return st_general_keys(tab, keys, size);
1690}
1691
1692/* See comments for function st_delete_safe. */
1693st_index_t
1694st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1695 st_data_t never ATTRIBUTE_UNUSED)
1696{
1697 return st_general_keys(tab, keys, size);
1698}
1699
1700/* Set up array VALUES by at most SIZE values of head table TAB
1701 entries. Return the number of values set up in array VALUES. */
1702static inline st_index_t
1703st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1704{
1705 st_index_t i, bound;
1706 st_data_t *values_start, *values_end;
1707 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1708
1709 values_start = values;
1710 values_end = values + size;
1711 bound = tab->entries_bound;
1712 for (i = tab->entries_start; i < bound; i++) {
1713 if (values == values_end)
1714 break;
1715 curr_entry_ptr = &entries[i];
1716 if (! DELETED_ENTRY_P(curr_entry_ptr))
1717 *values++ = curr_entry_ptr->record;
1718 }
1719
1720 return values - values_start;
1721}
1722
1723st_index_t
1724st_values(st_table *tab, st_data_t *values, st_index_t size)
1725{
1726 return st_general_values(tab, values, size);
1727}
1728
1729/* See comments for function st_delete_safe. */
1730st_index_t
1731st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1732 st_data_t never ATTRIBUTE_UNUSED)
1733{
1734 return st_general_values(tab, values, size);
1735}
1736
1737#define FNV1_32A_INIT 0x811c9dc5
1738
1739/*
1740 * 32 bit magic FNV-1a prime
1741 */
1742#define FNV_32_PRIME 0x01000193
1743
1744/* __POWERPC__ added to accommodate Darwin case. */
1745#ifndef UNALIGNED_WORD_ACCESS
1746# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1747 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1748 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1749 defined(__mc68020__)
1750# define UNALIGNED_WORD_ACCESS 1
1751# endif
1752#endif
1753#ifndef UNALIGNED_WORD_ACCESS
1754# define UNALIGNED_WORD_ACCESS 0
1755#endif
1756
1757/* This hash function is quite simplified MurmurHash3
1758 * Simplification is legal, cause most of magic still happens in finalizator.
1759 * And finalizator is almost the same as in MurmurHash3 */
1760#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1761#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1762
1763#if ST_INDEX_BITS <= 32
1764#define C1 (st_index_t)0xcc9e2d51
1765#define C2 (st_index_t)0x1b873593
1766#else
1767#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1768#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1769#endif
1770NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1771NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1772NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1773
1774static inline st_index_t
1775murmur_step(st_index_t h, st_index_t k)
1776{
1777#if ST_INDEX_BITS <= 32
1778#define r1 (17)
1779#define r2 (11)
1780#else
1781#define r1 (33)
1782#define r2 (24)
1783#endif
1784 k *= C1;
1785 h ^= ROTL(k, r1);
1786 h *= C2;
1787 h = ROTL(h, r2);
1788 return h;
1789}
1790#undef r1
1791#undef r2
1792
1793static inline st_index_t
1794murmur_finish(st_index_t h)
1795{
1796#if ST_INDEX_BITS <= 32
1797#define r1 (16)
1798#define r2 (13)
1799#define r3 (16)
1800 const st_index_t c1 = 0x85ebca6b;
1801 const st_index_t c2 = 0xc2b2ae35;
1802#else
1803/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1804#define r1 (30)
1805#define r2 (27)
1806#define r3 (31)
1807 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1808 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1809#endif
1810#if ST_INDEX_BITS > 64
1811 h ^= h >> 64;
1812 h *= c2;
1813 h ^= h >> 65;
1814#endif
1815 h ^= h >> r1;
1816 h *= c1;
1817 h ^= h >> r2;
1818 h *= c2;
1819 h ^= h >> r3;
1820 return h;
1821}
1822#undef r1
1823#undef r2
1824#undef r3
1825
1826st_index_t
1827st_hash(const void *ptr, size_t len, st_index_t h)
1828{
1829 const char *data = ptr;
1830 st_index_t t = 0;
1831 size_t l = len;
1832
1833#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1834#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1835#if SIZEOF_ST_INDEX_T > 4
1836#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1837#if SIZEOF_ST_INDEX_T > 8
1838#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1839 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1840#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1841#endif
1842#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1843#else
1844#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1845#endif
1846#undef SKIP_TAIL
1847 if (len >= sizeof(st_index_t)) {
1848#if !UNALIGNED_WORD_ACCESS
1849 int align = (int)((st_data_t)data % sizeof(st_index_t));
1850 if (align) {
1851 st_index_t d = 0;
1852 int sl, sr, pack;
1853
1854 switch (align) {
1855#ifdef WORDS_BIGENDIAN
1856# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1857 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1858#else
1859# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1860 t |= data_at(n) << CHAR_BIT*(n)
1861#endif
1862 UNALIGNED_ADD_ALL;
1863#undef UNALIGNED_ADD
1864 }
1865
1866#ifdef WORDS_BIGENDIAN
1867 t >>= (CHAR_BIT * align) - CHAR_BIT;
1868#else
1869 t <<= (CHAR_BIT * align);
1870#endif
1871
1872 data += sizeof(st_index_t)-align;
1873 len -= sizeof(st_index_t)-align;
1874
1875 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1876 sr = CHAR_BIT * align;
1877
1878 while (len >= sizeof(st_index_t)) {
1879 d = *(st_index_t *)data;
1880#ifdef WORDS_BIGENDIAN
1881 t = (t << sr) | (d >> sl);
1882#else
1883 t = (t >> sr) | (d << sl);
1884#endif
1885 h = murmur_step(h, t);
1886 t = d;
1887 data += sizeof(st_index_t);
1888 len -= sizeof(st_index_t);
1889 }
1890
1891 pack = len < (size_t)align ? (int)len : align;
1892 d = 0;
1893 switch (pack) {
1894#ifdef WORDS_BIGENDIAN
1895# define UNALIGNED_ADD(n) case (n) + 1: \
1896 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1897#else
1898# define UNALIGNED_ADD(n) case (n) + 1: \
1899 d |= data_at(n) << CHAR_BIT*(n)
1900#endif
1901 UNALIGNED_ADD_ALL;
1902#undef UNALIGNED_ADD
1903 }
1904#ifdef WORDS_BIGENDIAN
1905 t = (t << sr) | (d >> sl);
1906#else
1907 t = (t >> sr) | (d << sl);
1908#endif
1909
1910 if (len < (size_t)align) goto skip_tail;
1911# define SKIP_TAIL 1
1912 h = murmur_step(h, t);
1913 data += pack;
1914 len -= pack;
1915 }
1916 else
1917#endif
1918#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1919#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1920#else
1921#define aligned_data data
1922#endif
1923 {
1924 do {
1925 h = murmur_step(h, *(st_index_t *)aligned_data);
1926 data += sizeof(st_index_t);
1927 len -= sizeof(st_index_t);
1928 } while (len >= sizeof(st_index_t));
1929 }
1930 }
1931
1932 t = 0;
1933 switch (len) {
1934#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1935 /* in this case byteorder doesn't really matter */
1936#if SIZEOF_ST_INDEX_T > 4
1937 case 7: t |= data_at(6) << 48;
1938 case 6: t |= data_at(5) << 40;
1939 case 5: t |= data_at(4) << 32;
1940 case 4:
1941 t |= (st_index_t)*(uint32_t*)aligned_data;
1942 goto skip_tail;
1943# define SKIP_TAIL 1
1944#endif
1945 case 3: t |= data_at(2) << 16;
1946 case 2: t |= data_at(1) << 8;
1947 case 1: t |= data_at(0);
1948#else
1949#ifdef WORDS_BIGENDIAN
1950# define UNALIGNED_ADD(n) case (n) + 1: \
1951 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1952#else
1953# define UNALIGNED_ADD(n) case (n) + 1: \
1954 t |= data_at(n) << CHAR_BIT*(n)
1955#endif
1956 UNALIGNED_ADD_ALL;
1957#undef UNALIGNED_ADD
1958#endif
1959#ifdef SKIP_TAIL
1960 skip_tail:
1961#endif
1962 h ^= t; h -= ROTL(t, 7);
1963 h *= C2;
1964 }
1965 h ^= l;
1966#undef aligned_data
1967
1968 return murmur_finish(h);
1969}
1970
1971st_index_t
1972st_hash_uint32(st_index_t h, uint32_t i)
1973{
1974 return murmur_step(h, i);
1975}
1976
1977NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1978st_index_t
1979st_hash_uint(st_index_t h, st_index_t i)
1980{
1981 i += h;
1982/* no matter if it is BigEndian or LittleEndian,
1983 * we hash just integers */
1984#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1985 h = murmur_step(h, i >> 8*8);
1986#endif
1987 h = murmur_step(h, i);
1988 return h;
1989}
1990
1991st_index_t
1992st_hash_end(st_index_t h)
1993{
1994 h = murmur_finish(h);
1995 return h;
1996}
1997
1998#undef st_hash_start
1999st_index_t
2000rb_st_hash_start(st_index_t h)
2001{
2002 return h;
2003}
2004
2005static st_index_t
2006strhash(st_data_t arg)
2007{
2008 register const char *string = (const char *)arg;
2009 return st_hash(string, strlen(string), FNV1_32A_INIT);
2010}
2011
2012int
2013st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2014{
2015 char c1, c2;
2016
2017 while (1) {
2018 c1 = *s1++;
2019 c2 = *s2++;
2020 if (c1 == '\0' || c2 == '\0') {
2021 if (c1 != '\0') return 1;
2022 if (c2 != '\0') return -1;
2023 return 0;
2024 }
2025 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2026 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2027 if (c1 != c2) {
2028 if (c1 > c2)
2029 return 1;
2030 else
2031 return -1;
2032 }
2033 }
2034}
2035
2036int
2037st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2038{
2039 char c1, c2;
2040 size_t i;
2041
2042 for (i = 0; i < n; i++) {
2043 c1 = *s1++;
2044 c2 = *s2++;
2045 if (c1 == '\0' || c2 == '\0') {
2046 if (c1 != '\0') return 1;
2047 if (c2 != '\0') return -1;
2048 return 0;
2049 }
2050 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2051 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2052 if (c1 != c2) {
2053 if (c1 > c2)
2054 return 1;
2055 else
2056 return -1;
2057 }
2058 }
2059 return 0;
2060}
2061
2062static int
2063st_strcmp(st_data_t lhs, st_data_t rhs)
2064{
2065 const char *s1 = (char *)lhs;
2066 const char *s2 = (char *)rhs;
2067 return strcmp(s1, s2);
2068}
2069
2070static int
2071st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2072{
2073 const char *s1 = (char *)lhs;
2074 const char *s2 = (char *)rhs;
2075 return st_locale_insensitive_strcasecmp(s1, s2);
2076}
2077
2078NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2079static st_index_t
2080strcasehash(st_data_t arg)
2081{
2082 register const char *string = (const char *)arg;
2083 register st_index_t hval = FNV1_32A_INIT;
2084
2085 /*
2086 * FNV-1a hash each octet in the buffer
2087 */
2088 while (*string) {
2089 unsigned int c = (unsigned char)*string++;
2090 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2091 hval ^= c;
2092
2093 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2094 hval *= FNV_32_PRIME;
2095 }
2096 return hval;
2097}
2098
2099int
2100st_numcmp(st_data_t x, st_data_t y)
2101{
2102 return x != y;
2103}
2104
2105st_index_t
2106st_numhash(st_data_t n)
2107{
2108 enum {s1 = 11, s2 = 3};
2109 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2110}
2111
2112#ifdef RUBY
2113/* Expand TAB to be suitable for holding SIZ entries in total.
2114 Pre-existing entries remain not deleted inside of TAB, but its bins
2115 are cleared to expect future reconstruction. See rehash below. */
2116static void
2117st_expand_table(st_table *tab, st_index_t siz)
2118{
2119 st_table *tmp;
2120 st_index_t n;
2121
2122 if (siz <= get_allocated_entries(tab))
2123 return; /* enough room already */
2124
2125 tmp = st_init_table_with_size(tab->type, siz);
2126 n = get_allocated_entries(tab);
2127 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2128 free(tab->entries);
2129 free(tab->bins);
2130 free(tmp->bins);
2131 tab->entry_power = tmp->entry_power;
2132 tab->bin_power = tmp->bin_power;
2133 tab->size_ind = tmp->size_ind;
2134 tab->entries = tmp->entries;
2135 tab->bins = NULL;
2136 tab->rebuilds_num++;
2137 free(tmp);
2138}
2139
2140/* Rehash using linear search. Return TRUE if we found that the table
2141 was rebuilt. */
2142static int
2143st_rehash_linear(st_table *tab)
2144{
2145 int eq_p, rebuilt_p;
2146 st_index_t i, j;
2147 st_table_entry *p, *q;
2148
2149 free(tab->bins);
2150 tab->bins = NULL;
2151
2152 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2153 p = &tab->entries[i];
2154 if (DELETED_ENTRY_P(p))
2155 continue;
2156 for (j = i + 1; j < tab->entries_bound; j++) {
2157 q = &tab->entries[j];
2158 if (DELETED_ENTRY_P(q))
2159 continue;
2160 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2161 if (EXPECT(rebuilt_p, 0))
2162 return TRUE;
2163 if (eq_p) {
2164 *p = *q;
2165 MARK_ENTRY_DELETED(q);
2166 tab->num_entries--;
2167 update_range_for_deleted(tab, j);
2168 }
2169 }
2170 }
2171 return FALSE;
2172}
2173
2174/* Rehash using index. Return TRUE if we found that the table was
2175 rebuilt. */
2176static int
2177st_rehash_indexed(st_table *tab)
2178{
2179 int eq_p, rebuilt_p;
2180 st_index_t i;
2181 st_index_t const n = bins_size(tab);
2182 unsigned int const size_ind = get_size_ind(tab);
2183 st_index_t *bins = realloc(tab->bins, n);
2184 tab->bins = bins;
2185 initialize_bins(tab);
2186 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2187 st_table_entry *p = &tab->entries[i];
2188 st_index_t ind;
2189#ifdef QUADRATIC_PROBE
2190 st_index_t d = 1;
2191#else
2192 st_index_t perturb = p->hash;
2193#endif
2194
2195 if (DELETED_ENTRY_P(p))
2196 continue;
2197
2198 ind = hash_bin(p->hash, tab);
2199 for (;;) {
2200 st_index_t bin = get_bin(bins, size_ind, ind);
2201 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2202 /* ok, new room */
2203 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2204 break;
2205 }
2206 else {
2207 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2208 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2209 if (EXPECT(rebuilt_p, 0))
2210 return TRUE;
2211 if (eq_p) {
2212 /* duplicated key; delete it */
2213 q->record = p->record;
2214 MARK_ENTRY_DELETED(p);
2215 tab->num_entries--;
2216 update_range_for_deleted(tab, bin);
2217 break;
2218 }
2219 else {
2220 /* hash collision; skip it */
2221#ifdef QUADRATIC_PROBE
2222 ind = hash_bin(ind + d, tab);
2223 d++;
2224#else
2225 ind = secondary_hash(ind, tab, &perturb);
2226#endif
2227 }
2228 }
2229 }
2230 }
2231 return FALSE;
2232}
2233
2234/* Reconstruct TAB's bins according to TAB's entries. This function
2235 permits conflicting keys inside of entries. No errors are reported
2236 then. All but one of them are discarded silently. */
2237static void
2238st_rehash(st_table *tab)
2239{
2240 int rebuilt_p;
2241
2242 do {
2243 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2244 rebuilt_p = st_rehash_linear(tab);
2245 else
2246 rebuilt_p = st_rehash_indexed(tab);
2247 } while (rebuilt_p);
2248}
2249
2250static st_data_t
2251st_stringify(VALUE key)
2252{
2253 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2254 rb_hash_key_str(key) : key;
2255}
2256
2257static void
2258st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2259{
2260 st_data_t k = st_stringify(key);
2262 e.hash = do_hash(k, tab);
2263 e.key = k;
2264 e.record = val;
2265
2266 tab->entries[tab->entries_bound++] = e;
2267 tab->num_entries++;
2268 RB_OBJ_WRITTEN(hash, Qundef, k);
2269 RB_OBJ_WRITTEN(hash, Qundef, val);
2270}
2271
2272static void
2273st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2274{
2275 long i;
2276
2277 for (i = 0; i < argc; /* */) {
2278 st_data_t k = st_stringify(argv[i++]);
2279 st_data_t v = argv[i++];
2280 st_insert(tab, k, v);
2281 RB_OBJ_WRITTEN(hash, Qundef, k);
2282 RB_OBJ_WRITTEN(hash, Qundef, v);
2283 }
2284}
2285
2286static void
2287st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2288{
2289 long i;
2290
2291 /* push elems */
2292 for (i = 0; i < argc; /* */) {
2293 VALUE key = argv[i++];
2294 VALUE val = argv[i++];
2295 st_insert_single(tab, hash, key, val);
2296 }
2297
2298 /* reindex */
2299 st_rehash(tab);
2300}
2301
2302/* Mimics ruby's { foo => bar } syntax. This function is subpart
2303 of rb_hash_bulk_insert. */
2304void
2305rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2306{
2307 st_index_t n, size = argc / 2;
2308 st_table *tab = RHASH_ST_TABLE(hash);
2309
2310 tab = RHASH_TBL_RAW(hash);
2311 n = tab->entries_bound + size;
2312 st_expand_table(tab, n);
2313 if (UNLIKELY(tab->num_entries))
2314 st_insert_generic(tab, argc, argv, hash);
2315 else if (argc <= 2)
2316 st_insert_single(tab, hash, argv[0], argv[1]);
2317 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2318 st_insert_linear(tab, argc, argv, hash);
2319 else
2320 st_insert_generic(tab, argc, argv, hash);
2321}
2322
2323void
2324rb_st_compact_table(st_table *tab)
2325{
2326 st_index_t num = tab->num_entries;
2327 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2328 /* Compaction: */
2329 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2330 rebuild_table_with(new_tab, tab);
2331 rebuild_move_table(new_tab, tab);
2332 rebuild_cleanup(tab);
2333 }
2334}
2335
2336/*
2337 * set_table related code
2338 */
2339
2340struct set_table_entry {
2341 st_hash_t hash;
2342 st_data_t key;
2343};
2344
2345/* Return hash value of KEY for table TAB. */
2346static inline st_hash_t
2347set_do_hash(st_data_t key, set_table *tab)
2348{
2349 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2350 return normalize_hash_value(hash);
2351}
2352
2353/* Return bin size index of table TAB. */
2354static inline unsigned int
2355set_get_size_ind(const set_table *tab)
2356{
2357 return tab->size_ind;
2358}
2359
2360/* Return the number of allocated bins of table TAB. */
2361static inline st_index_t
2362set_get_bins_num(const set_table *tab)
2363{
2364 return ((st_index_t) 1)<<tab->bin_power;
2365}
2366
2367/* Return mask for a bin index in table TAB. */
2368static inline st_index_t
2369set_bins_mask(const set_table *tab)
2370{
2371 return set_get_bins_num(tab) - 1;
2372}
2373
2374/* Return the index of table TAB bin corresponding to
2375 HASH_VALUE. */
2376static inline st_index_t
2377set_hash_bin(st_hash_t hash_value, set_table *tab)
2378{
2379 return hash_value & set_bins_mask(tab);
2380}
2381
2382/* Return the number of allocated entries of table TAB. */
2383static inline st_index_t
2384set_get_allocated_entries(const set_table *tab)
2385{
2386 return ((st_index_t) 1)<<tab->entry_power;
2387}
2388
2389/* Return size of the allocated bins of table TAB. */
2390static inline st_index_t
2391set_bins_size(const set_table *tab)
2392{
2393 return features[tab->entry_power].bins_words * sizeof (st_index_t);
2394}
2395
2396/* Mark all bins of table TAB as empty. */
2397static void
2398set_initialize_bins(set_table *tab)
2399{
2400 memset(tab->bins, 0, set_bins_size(tab));
2401}
2402
2403/* Make table TAB empty. */
2404static void
2405set_make_tab_empty(set_table *tab)
2406{
2407 tab->num_entries = 0;
2408 tab->entries_start = tab->entries_bound = 0;
2409 if (tab->bins != NULL)
2410 set_initialize_bins(tab);
2411}
2412
2413static set_table *
2414set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2415{
2416 int n;
2417
2418#ifdef HASH_LOG
2419#if HASH_LOG+0 < 0
2420 {
2421 const char *e = getenv("ST_HASH_LOG");
2422 if (!e || !*e) init_st = 1;
2423 }
2424#endif
2425 if (init_st == 0) {
2426 init_st = 1;
2427 atexit(stat_col);
2428 }
2429#endif
2430
2431 n = get_power2(size);
2432
2433 tab->type = type;
2434 tab->entry_power = n;
2435 tab->bin_power = features[n].bin_power;
2436 tab->size_ind = features[n].size_ind;
2437 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2438 tab->bins = NULL;
2439 else {
2440 tab->bins = (st_index_t *) malloc(set_bins_size(tab));
2441 }
2442 tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(tab)
2443 * sizeof(set_table_entry));
2444 set_make_tab_empty(tab);
2445 tab->rebuilds_num = 0;
2446 return tab;
2447}
2448
2449/* Create and return table with TYPE which can hold at least SIZE
2450 entries. The real number of entries which the table can hold is
2451 the nearest power of two for SIZE. */
2452set_table *
2453set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2454{
2455 if (tab == NULL) tab = malloc(sizeof(set_table));
2456
2457 set_init_existing_table_with_size(tab, type, size);
2458
2459 return tab;
2460}
2461
2462set_table *
2463set_init_numtable(void)
2464{
2465 return set_init_table_with_size(NULL, &type_numhash, 0);
2466}
2467
2468size_t
2469set_table_size(const struct set_table *tbl)
2470{
2471 return tbl->num_entries;
2472}
2473
2474/* Make table TAB empty. */
2475void
2476set_clear(set_table *tab)
2477{
2478 set_make_tab_empty(tab);
2479 tab->rebuilds_num++;
2480}
2481
2482/* Free table TAB space. This should only be used if you passed NULL to
2483 set_init_table_with_size/set_copy when creating the table. */
2484void
2485set_free_table(set_table *tab)
2486{
2487 free(tab->bins);
2488 free(tab->entries);
2489 free(tab);
2490}
2491
2492/* Return byte size of memory allocated for table TAB. */
2493size_t
2494set_memsize(const set_table *tab)
2495{
2496 return(sizeof(set_table)
2497 + (tab->bins == NULL ? 0 : set_bins_size(tab))
2498 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2499}
2500
2501static st_index_t
2502set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2503
2504static st_index_t
2505set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2506
2507static st_index_t
2508set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2509
2510static st_index_t
2511set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2512 st_data_t key, st_index_t *bin_ind);
2513
2514static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2515static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2516static void set_rebuild_cleanup(set_table *const tab);
2517
2518/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2519 and can change size of the table entries and bins arrays.
2520 Rebuilding is implemented by creation of a new table or by
2521 compaction of the existing one. */
2522static void
2523set_rebuild_table(set_table *tab)
2524{
2525 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2526 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2527 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2528 /* Compaction: */
2529 tab->num_entries = 0;
2530 if (tab->bins != NULL)
2531 set_initialize_bins(tab);
2532 set_rebuild_table_with(tab, tab);
2533 }
2534 else {
2535 set_table *new_tab;
2536 /* This allocation could trigger GC and compaction. If tab is the
2537 * gen_fields_tbl, then tab could have changed in size due to objects being
2538 * freed and/or moved. Do not store attributes of tab before this line. */
2539 new_tab = set_init_table_with_size(NULL, tab->type,
2540 2 * tab->num_entries - 1);
2541 set_rebuild_table_with(new_tab, tab);
2542 set_rebuild_move_table(new_tab, tab);
2543 }
2544 set_rebuild_cleanup(tab);
2545}
2546
2547static void
2548set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2549{
2550 st_index_t i, ni;
2551 unsigned int size_ind;
2552 set_table_entry *new_entries;
2553 set_table_entry *curr_entry_ptr;
2554 st_index_t *bins;
2555 st_index_t bin_ind;
2556
2557 new_entries = new_tab->entries;
2558
2559 ni = 0;
2560 bins = new_tab->bins;
2561 size_ind = set_get_size_ind(new_tab);
2562 st_index_t bound = tab->entries_bound;
2563 set_table_entry *entries = tab->entries;
2564
2565 for (i = tab->entries_start; i < bound; i++) {
2566 curr_entry_ptr = &entries[i];
2567 PREFETCH(entries + i + 1, 0);
2568 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2569 continue;
2570 if (&new_entries[ni] != curr_entry_ptr)
2571 new_entries[ni] = *curr_entry_ptr;
2572 if (EXPECT(bins != NULL, 1)) {
2573 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2574 curr_entry_ptr->key);
2575 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2576 }
2577 new_tab->num_entries++;
2578 ni++;
2579 }
2580
2581 assert(new_tab->num_entries == tab->num_entries);
2582}
2583
2584static void
2585set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2586{
2587 tab->entry_power = new_tab->entry_power;
2588 tab->bin_power = new_tab->bin_power;
2589 tab->size_ind = new_tab->size_ind;
2590 free(tab->bins);
2591 tab->bins = new_tab->bins;
2592 free(tab->entries);
2593 tab->entries = new_tab->entries;
2594 free(new_tab);
2595}
2596
2597static void
2598set_rebuild_cleanup(set_table *const tab)
2599{
2600 tab->entries_start = 0;
2601 tab->entries_bound = tab->num_entries;
2602 tab->rebuilds_num++;
2603}
2604
2605/* Return the next secondary hash index for table TAB using previous
2606 index IND and PERTURB. Finally modulo of the function becomes a
2607 full *cycle linear congruential generator*, in other words it
2608 guarantees traversing all table bins in extreme case.
2609
2610 According the Hull-Dobell theorem a generator
2611 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2612 o m and c are relatively prime
2613 o a-1 is divisible by all prime factors of m
2614 o a-1 is divisible by 4 if m is divisible by 4.
2615
2616 For our case a is 5, c is 1, and m is a power of two. */
2617static inline st_index_t
2618set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2619{
2620 *perturb >>= 11;
2621 ind = (ind << 2) + ind + *perturb + 1;
2622 return set_hash_bin(ind, tab);
2623}
2624
2625/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2626 search. Return the index of the found entry in array `entries`.
2627 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2628 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2629static inline st_index_t
2630set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2631{
2632 int eq_p, rebuilt_p;
2633 st_index_t i, bound;
2634 set_table_entry *entries;
2635
2636 bound = tab->entries_bound;
2637 entries = tab->entries;
2638 for (i = tab->entries_start; i < bound; i++) {
2639 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2640 if (EXPECT(rebuilt_p, 0))
2641 return REBUILT_TABLE_ENTRY_IND;
2642 if (eq_p)
2643 return i;
2644 }
2645 return UNDEFINED_ENTRY_IND;
2646}
2647
2648/* Use the quadratic probing. The method has a better data locality
2649 but more collisions than the current approach. In average it
2650 results in a bit slower search. */
2651/*#define QUADRATIC_PROBE*/
2652
2653/* Return index of entry with HASH_VALUE and KEY in table TAB. If
2654 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2655 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2656static st_index_t
2657set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2658{
2659 int eq_p, rebuilt_p;
2660 st_index_t ind;
2661#ifdef QUADRATIC_PROBE
2662 st_index_t d;
2663#else
2664 st_index_t perturb;
2665#endif
2666 st_index_t bin;
2667 set_table_entry *entries = tab->entries;
2668
2669 ind = set_hash_bin(hash_value, tab);
2670#ifdef QUADRATIC_PROBE
2671 d = 1;
2672#else
2673 perturb = hash_value;
2674#endif
2675 for (;;) {
2676 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2677 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2678 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2679 if (EXPECT(rebuilt_p, 0))
2680 return REBUILT_TABLE_ENTRY_IND;
2681 if (eq_p)
2682 break;
2683 }
2684 else if (EMPTY_BIN_P(bin))
2685 return UNDEFINED_ENTRY_IND;
2686#ifdef QUADRATIC_PROBE
2687 ind = set_hash_bin(ind + d, tab);
2688 d++;
2689#else
2690 ind = set_secondary_hash(ind, tab, &perturb);
2691#endif
2692 }
2693 return bin;
2694}
2695
2696/* Find and return index of table TAB bin corresponding to an entry
2697 with HASH_VALUE and KEY. If there is no such bin, return
2698 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2699 return REBUILT_TABLE_BIN_IND. */
2700static st_index_t
2701set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2702{
2703 int eq_p, rebuilt_p;
2704 st_index_t ind;
2705#ifdef QUADRATIC_PROBE
2706 st_index_t d;
2707#else
2708 st_index_t perturb;
2709#endif
2710 st_index_t bin;
2711 set_table_entry *entries = tab->entries;
2712
2713 ind = set_hash_bin(hash_value, tab);
2714#ifdef QUADRATIC_PROBE
2715 d = 1;
2716#else
2717 perturb = hash_value;
2718#endif
2719 for (;;) {
2720 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2721 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2722 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2723 if (EXPECT(rebuilt_p, 0))
2724 return REBUILT_TABLE_BIN_IND;
2725 if (eq_p)
2726 break;
2727 }
2728 else if (EMPTY_BIN_P(bin))
2729 return UNDEFINED_BIN_IND;
2730#ifdef QUADRATIC_PROBE
2731 ind = set_hash_bin(ind + d, tab);
2732 d++;
2733#else
2734 ind = set_secondary_hash(ind, tab, &perturb);
2735#endif
2736 }
2737 return ind;
2738}
2739
2740/* Find and return index of table TAB bin corresponding to an entry
2741 with HASH_VALUE and KEY. The entry should be in the table
2742 already. */
2743static st_index_t
2744set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2745{
2746 st_index_t ind;
2747#ifdef QUADRATIC_PROBE
2748 st_index_t d;
2749#else
2750 st_index_t perturb;
2751#endif
2752 st_index_t bin;
2753
2754 ind = set_hash_bin(hash_value, tab);
2755#ifdef QUADRATIC_PROBE
2756 d = 1;
2757#else
2758 perturb = hash_value;
2759#endif
2760 for (;;) {
2761 bin = get_bin(tab->bins, set_get_size_ind(tab), ind);
2762 if (EMPTY_OR_DELETED_BIN_P(bin))
2763 return ind;
2764#ifdef QUADRATIC_PROBE
2765 ind = set_hash_bin(ind + d, tab);
2766 d++;
2767#else
2768 ind = set_secondary_hash(ind, tab, &perturb);
2769#endif
2770 }
2771}
2772
2773/* Mark I-th bin of table TAB as empty, in other words not
2774 corresponding to any entry. */
2775#define MARK_SET_BIN_EMPTY(tab, i) (set_bin((tab)->bins, set_get_size_ind(tab), i, EMPTY_BIN))
2776
2777/* Return index of table TAB bin for HASH_VALUE and KEY through
2778 BIN_IND and the pointed value as the function result. Reserve the
2779 bin for inclusion of the corresponding entry into the table if it
2780 is not there yet. We always find such bin as bins array length is
2781 bigger entries array. Although we can reuse a deleted bin, the
2782 result bin value is always empty if the table has no entry with
2783 KEY. Return the entries array index of the found entry or
2784 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2785 during the search, return REBUILT_TABLE_ENTRY_IND. */
2786static st_index_t
2787set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2788 st_data_t key, st_index_t *bin_ind)
2789{
2790 int eq_p, rebuilt_p;
2791 st_index_t ind;
2792 st_hash_t curr_hash_value = *hash_value;
2793#ifdef QUADRATIC_PROBE
2794 st_index_t d;
2795#else
2796 st_index_t perturb;
2797#endif
2798 st_index_t entry_index;
2799 st_index_t firset_deleted_bin_ind;
2800 set_table_entry *entries;
2801
2802 ind = set_hash_bin(curr_hash_value, tab);
2803#ifdef QUADRATIC_PROBE
2804 d = 1;
2805#else
2806 perturb = curr_hash_value;
2807#endif
2808 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2809 entries = tab->entries;
2810 for (;;) {
2811 entry_index = get_bin(tab->bins, set_get_size_ind(tab), ind);
2812 if (EMPTY_BIN_P(entry_index)) {
2813 tab->num_entries++;
2814 entry_index = UNDEFINED_ENTRY_IND;
2815 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2816 /* We can reuse bin of a deleted entry. */
2817 ind = firset_deleted_bin_ind;
2818 MARK_SET_BIN_EMPTY(tab, ind);
2819 }
2820 break;
2821 }
2822 else if (! DELETED_BIN_P(entry_index)) {
2823 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2824 if (EXPECT(rebuilt_p, 0))
2825 return REBUILT_TABLE_ENTRY_IND;
2826 if (eq_p)
2827 break;
2828 }
2829 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2830 firset_deleted_bin_ind = ind;
2831#ifdef QUADRATIC_PROBE
2832 ind = set_hash_bin(ind + d, tab);
2833 d++;
2834#else
2835 ind = set_secondary_hash(ind, tab, &perturb);
2836#endif
2837 }
2838 *bin_ind = ind;
2839 return entry_index;
2840}
2841
2842/* Find an entry with KEY in table TAB. Return non-zero if we found
2843 it. */
2844int
2845set_lookup(set_table *tab, st_data_t key)
2846{
2847 st_index_t bin;
2848 st_hash_t hash = set_do_hash(key, tab);
2849
2850 retry:
2851 if (tab->bins == NULL) {
2852 bin = set_find_entry(tab, hash, key);
2853 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2854 goto retry;
2855 if (bin == UNDEFINED_ENTRY_IND)
2856 return 0;
2857 }
2858 else {
2859 bin = set_find_table_entry_ind(tab, hash, key);
2860 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2861 goto retry;
2862 if (bin == UNDEFINED_ENTRY_IND)
2863 return 0;
2864 bin -= ENTRY_BASE;
2865 }
2866 return 1;
2867}
2868
2869/* Check the table and rebuild it if it is necessary. */
2870static inline void
2871set_rebuild_table_if_necessary (set_table *tab)
2872{
2873 st_index_t bound = tab->entries_bound;
2874
2875 if (bound == set_get_allocated_entries(tab))
2876 set_rebuild_table(tab);
2877}
2878
2879/* Insert KEY into table TAB and return zero. If there is
2880 already entry with KEY in the table, return nonzero and update
2881 the value of the found entry. */
2882int
2883set_insert(set_table *tab, st_data_t key)
2884{
2885 set_table_entry *entry;
2886 st_index_t bin;
2887 st_index_t ind;
2888 st_hash_t hash_value;
2889 st_index_t bin_ind;
2890 int new_p;
2891
2892 hash_value = set_do_hash(key, tab);
2893 retry:
2894 set_rebuild_table_if_necessary(tab);
2895 if (tab->bins == NULL) {
2896 bin = set_find_entry(tab, hash_value, key);
2897 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2898 goto retry;
2899 new_p = bin == UNDEFINED_ENTRY_IND;
2900 if (new_p)
2901 tab->num_entries++;
2902 bin_ind = UNDEFINED_BIN_IND;
2903 }
2904 else {
2905 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2906 key, &bin_ind);
2907 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2908 goto retry;
2909 new_p = bin == UNDEFINED_ENTRY_IND;
2910 bin -= ENTRY_BASE;
2911 }
2912 if (new_p) {
2913 ind = tab->entries_bound++;
2914 entry = &tab->entries[ind];
2915 entry->hash = hash_value;
2916 entry->key = key;
2917 if (bin_ind != UNDEFINED_BIN_IND)
2918 set_bin(tab->bins, set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2919 return 0;
2920 }
2921 return 1;
2922}
2923
2924/* Create a copy of old_tab into new_tab. */
2925static set_table *
2926set_replace(set_table *new_tab, set_table *old_tab)
2927{
2928 *new_tab = *old_tab;
2929 if (old_tab->bins == NULL)
2930 new_tab->bins = NULL;
2931 else {
2932 new_tab->bins = (st_index_t *) malloc(set_bins_size(old_tab));
2933 }
2934 new_tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(old_tab)
2935 * sizeof(set_table_entry));
2936 MEMCPY(new_tab->entries, old_tab->entries, set_table_entry,
2937 set_get_allocated_entries(old_tab));
2938 if (old_tab->bins != NULL)
2939 MEMCPY(new_tab->bins, old_tab->bins, char, set_bins_size(old_tab));
2940
2941 return new_tab;
2942}
2943
2944/* Create and return a copy of table OLD_TAB. */
2945set_table *
2946set_copy(set_table *new_tab, set_table *old_tab)
2947{
2948 if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
2949
2950 if (set_replace(new_tab, old_tab) == NULL) {
2951 set_free_table(new_tab);
2952 return NULL;
2953 }
2954
2955 return new_tab;
2956}
2957
2958/* Update the entries start of table TAB after removing an entry
2959 with index N in the array entries. */
2960static inline void
2961set_update_range_for_deleted(set_table *tab, st_index_t n)
2962{
2963 /* Do not update entries_bound here. Otherwise, we can fill all
2964 bins by deleted entry value before rebuilding the table. */
2965 if (tab->entries_start == n) {
2966 st_index_t start = n + 1;
2967 st_index_t bound = tab->entries_bound;
2968 set_table_entry *entries = tab->entries;
2969 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
2970 tab->entries_start = start;
2971 }
2972}
2973
2974/* Mark I-th bin of table TAB as corresponding to a deleted table
2975 entry. Update number of entries in the table and number of bins
2976 corresponding to deleted entries. */
2977#define MARK_SET_BIN_DELETED(tab, i) \
2978 do { \
2979 set_bin((tab)->bins, set_get_size_ind(tab), i, DELETED_BIN); \
2980 } while (0)
2981
2982/* Delete entry with KEY from table TAB, and return non-zero. If
2983 there is no entry with KEY in the table, return zero. */
2984int
2985set_delete(set_table *tab, st_data_t *key)
2986{
2987 set_table_entry *entry;
2988 st_index_t bin;
2989 st_index_t bin_ind;
2990 st_hash_t hash;
2991
2992 hash = set_do_hash(*key, tab);
2993 retry:
2994 if (tab->bins == NULL) {
2995 bin = set_find_entry(tab, hash, *key);
2996 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2997 goto retry;
2998 if (bin == UNDEFINED_ENTRY_IND) {
2999 return 0;
3000 }
3001 }
3002 else {
3003 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3004 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3005 goto retry;
3006 if (bin_ind == UNDEFINED_BIN_IND) {
3007 return 0;
3008 }
3009 bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3010 MARK_SET_BIN_DELETED(tab, bin_ind);
3011 }
3012 entry = &tab->entries[bin];
3013 *key = entry->key;
3014 MARK_ENTRY_DELETED(entry);
3015 tab->num_entries--;
3016 set_update_range_for_deleted(tab, bin);
3017 return 1;
3018}
3019
3020/* Traverse all entries in table TAB calling FUNC with current entry
3021 key and zero. If the call returns ST_STOP, stop
3022 traversing. If the call returns ST_DELETE, delete the current
3023 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3024 traversing. The function returns zero unless an error is found.
3025 CHECK_P is flag of set_foreach_check call. The behavior is a bit
3026 different for ST_CHECK and when the current element is removed
3027 during traversing. */
3028static inline int
3029set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3030 set_update_callback_func *replace, st_data_t arg,
3031 int check_p)
3032{
3033 st_index_t bin;
3034 st_index_t bin_ind;
3035 set_table_entry *entries, *curr_entry_ptr;
3036 enum st_retval retval;
3037 st_index_t i, rebuilds_num;
3038 st_hash_t hash;
3039 st_data_t key;
3040 int error_p, packed_p = tab->bins == NULL;
3041
3042 entries = tab->entries;
3043 /* The bound can change inside the loop even without rebuilding
3044 the table, e.g. by an entry insertion. */
3045 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3046 curr_entry_ptr = &entries[i];
3047 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3048 continue;
3049 key = curr_entry_ptr->key;
3050 rebuilds_num = tab->rebuilds_num;
3051 hash = curr_entry_ptr->hash;
3052 retval = (*func)(key, arg, 0);
3053
3054 if (retval == ST_REPLACE && replace) {
3055 retval = (*replace)(&key, arg, TRUE);
3056 curr_entry_ptr->key = key;
3057 }
3058
3059 if (rebuilds_num != tab->rebuilds_num) {
3060 retry:
3061 entries = tab->entries;
3062 packed_p = tab->bins == NULL;
3063 if (packed_p) {
3064 i = set_find_entry(tab, hash, key);
3065 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3066 goto retry;
3067 error_p = i == UNDEFINED_ENTRY_IND;
3068 }
3069 else {
3070 i = set_find_table_entry_ind(tab, hash, key);
3071 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3072 goto retry;
3073 error_p = i == UNDEFINED_ENTRY_IND;
3074 i -= ENTRY_BASE;
3075 }
3076 if (error_p && check_p) {
3077 /* call func with error notice */
3078 retval = (*func)(0, arg, 1);
3079 return 1;
3080 }
3081 curr_entry_ptr = &entries[i];
3082 }
3083 switch (retval) {
3084 case ST_REPLACE:
3085 break;
3086 case ST_CONTINUE:
3087 break;
3088 case ST_CHECK:
3089 if (check_p)
3090 break;
3091 case ST_STOP:
3092 return 0;
3093 case ST_DELETE: {
3094 st_data_t key = curr_entry_ptr->key;
3095
3096 again:
3097 if (packed_p) {
3098 bin = set_find_entry(tab, hash, key);
3099 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3100 goto again;
3101 if (bin == UNDEFINED_ENTRY_IND)
3102 break;
3103 }
3104 else {
3105 bin_ind = set_find_table_bin_ind(tab, hash, key);
3106 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3107 goto again;
3108 if (bin_ind == UNDEFINED_BIN_IND)
3109 break;
3110 bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3111 MARK_SET_BIN_DELETED(tab, bin_ind);
3112 }
3113 curr_entry_ptr = &entries[bin];
3114 MARK_ENTRY_DELETED(curr_entry_ptr);
3115 tab->num_entries--;
3116 set_update_range_for_deleted(tab, bin);
3117 break;
3118 }
3119 }
3120 }
3121 return 0;
3122}
3123
3124int
3125set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3126{
3127 return set_general_foreach(tab, func, replace, arg, TRUE);
3128}
3129
3130struct set_functor {
3131 set_foreach_callback_func *func;
3132 st_data_t arg;
3133};
3134
3135static int
3136set_apply_functor(st_data_t k, st_data_t d, int _)
3137{
3138 const struct set_functor *f = (void *)d;
3139 return f->func(k, f->arg);
3140}
3141
3142int
3143set_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3144{
3145 const struct set_functor f = { func, arg };
3146 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3147}
3148
3149/* See comments for function set_delete_safe. */
3150int
3151set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3152 st_data_t never ATTRIBUTE_UNUSED)
3153{
3154 return set_general_foreach(tab, func, NULL, arg, TRUE);
3155}
3156
3157/* Set up array KEYS by at most SIZE keys of head table TAB entries.
3158 Return the number of keys set up in array KEYS. */
3159inline st_index_t
3160set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3161{
3162 st_index_t i, bound;
3163 st_data_t key, *keys_start, *keys_end;
3164 set_table_entry *curr_entry_ptr, *entries = tab->entries;
3165
3166 bound = tab->entries_bound;
3167 keys_start = keys;
3168 keys_end = keys + size;
3169 for (i = tab->entries_start; i < bound; i++) {
3170 if (keys == keys_end)
3171 break;
3172 curr_entry_ptr = &entries[i];
3173 key = curr_entry_ptr->key;
3174 if (! DELETED_ENTRY_P(curr_entry_ptr))
3175 *keys++ = key;
3176 }
3177
3178 return keys - keys_start;
3179}
3180
3181void
3182set_compact_table(set_table *tab)
3183{
3184 st_index_t num = tab->num_entries;
3185 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3186 /* Compaction: */
3187 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3188 set_rebuild_table_with(new_tab, tab);
3189 set_rebuild_move_table(new_tab, tab);
3190 set_rebuild_cleanup(tab);
3191 }
3192}
3193
3194#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:885
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:247
VALUE rb_cString
String class.
Definition string.c:82
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:136
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40