Ruby 3.5.0dev (2025-04-04 revision 6b5e187d0eb07994fee7b5f0336da388a793dcbb)
st.c (6b5e187d0eb07994fee7b5f0336da388a793dcbb)
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/st.h"
113#include "ruby_assert.h"
114#endif
115
116#include <stdio.h>
117#ifdef HAVE_STDLIB_H
118#include <stdlib.h>
119#endif
120#include <string.h>
121
122#ifdef __GNUC__
123#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
124#define EXPECT(expr, val) __builtin_expect(expr, val)
125#define ATTRIBUTE_UNUSED __attribute__((unused))
126#else
127#define PREFETCH(addr, write_p)
128#define EXPECT(expr, val) (expr)
129#define ATTRIBUTE_UNUSED
130#endif
131
132/* The type of hashes. */
133typedef st_index_t st_hash_t;
134
136 st_hash_t hash;
137 st_data_t key;
138 st_data_t record;
139};
140
141#define type_numhash st_hashtype_num
142static const struct st_hash_type st_hashtype_num = {
143 st_numcmp,
144 st_numhash,
145};
146
147static int st_strcmp(st_data_t, st_data_t);
148static st_index_t strhash(st_data_t);
149static const struct st_hash_type type_strhash = {
150 st_strcmp,
151 strhash,
152};
153
154static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
155static st_index_t strcasehash(st_data_t);
156static const struct st_hash_type type_strcasehash = {
157 st_locale_insensitive_strcasecmp_i,
158 strcasehash,
159};
160
161/* Value used to catch uninitialized entries/bins during debugging.
162 There is a possibility for a false alarm, but its probability is
163 extremely small. */
164#define ST_INIT_VAL 0xafafafafafafafaf
165#define ST_INIT_VAL_BYTE 0xafa
166
167#ifdef RUBY
168#undef malloc
169#undef realloc
170#undef calloc
171#undef free
172#define malloc ruby_xmalloc
173#define calloc ruby_xcalloc
174#define realloc ruby_xrealloc
175#define free ruby_xfree
176#endif
177
178#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
179#define PTR_EQUAL(tab, ptr, hash_val, key_) \
180 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
181
182/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
183 up to TRUE if the table is rebuilt during the comparison. */
184#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
185 do { \
186 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
187 res = PTR_EQUAL(tab, ptr, hash_val, key); \
188 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
189 } while (FALSE)
190
191/* Features of a table. */
193 /* Power of 2 used for number of allocated entries. */
194 unsigned char entry_power;
195 /* Power of 2 used for number of allocated bins. Depending on the
196 table size, the number of bins is 2-4 times more than the
197 number of entries. */
198 unsigned char bin_power;
199 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
200 unsigned char size_ind;
201 /* Bins are packed in words of type st_index_t. The following is
202 a size of bins counted by words. */
203 st_index_t bins_words;
204};
205
206/* Features of all possible size tables. */
207#if SIZEOF_ST_INDEX_T == 8
208#define MAX_POWER2 62
209static const struct st_features features[] = {
210 {0, 1, 0, 0x0},
211 {1, 2, 0, 0x1},
212 {2, 3, 0, 0x1},
213 {3, 4, 0, 0x2},
214 {4, 5, 0, 0x4},
215 {5, 6, 0, 0x8},
216 {6, 7, 0, 0x10},
217 {7, 8, 0, 0x20},
218 {8, 9, 1, 0x80},
219 {9, 10, 1, 0x100},
220 {10, 11, 1, 0x200},
221 {11, 12, 1, 0x400},
222 {12, 13, 1, 0x800},
223 {13, 14, 1, 0x1000},
224 {14, 15, 1, 0x2000},
225 {15, 16, 1, 0x4000},
226 {16, 17, 2, 0x10000},
227 {17, 18, 2, 0x20000},
228 {18, 19, 2, 0x40000},
229 {19, 20, 2, 0x80000},
230 {20, 21, 2, 0x100000},
231 {21, 22, 2, 0x200000},
232 {22, 23, 2, 0x400000},
233 {23, 24, 2, 0x800000},
234 {24, 25, 2, 0x1000000},
235 {25, 26, 2, 0x2000000},
236 {26, 27, 2, 0x4000000},
237 {27, 28, 2, 0x8000000},
238 {28, 29, 2, 0x10000000},
239 {29, 30, 2, 0x20000000},
240 {30, 31, 2, 0x40000000},
241 {31, 32, 2, 0x80000000},
242 {32, 33, 3, 0x200000000},
243 {33, 34, 3, 0x400000000},
244 {34, 35, 3, 0x800000000},
245 {35, 36, 3, 0x1000000000},
246 {36, 37, 3, 0x2000000000},
247 {37, 38, 3, 0x4000000000},
248 {38, 39, 3, 0x8000000000},
249 {39, 40, 3, 0x10000000000},
250 {40, 41, 3, 0x20000000000},
251 {41, 42, 3, 0x40000000000},
252 {42, 43, 3, 0x80000000000},
253 {43, 44, 3, 0x100000000000},
254 {44, 45, 3, 0x200000000000},
255 {45, 46, 3, 0x400000000000},
256 {46, 47, 3, 0x800000000000},
257 {47, 48, 3, 0x1000000000000},
258 {48, 49, 3, 0x2000000000000},
259 {49, 50, 3, 0x4000000000000},
260 {50, 51, 3, 0x8000000000000},
261 {51, 52, 3, 0x10000000000000},
262 {52, 53, 3, 0x20000000000000},
263 {53, 54, 3, 0x40000000000000},
264 {54, 55, 3, 0x80000000000000},
265 {55, 56, 3, 0x100000000000000},
266 {56, 57, 3, 0x200000000000000},
267 {57, 58, 3, 0x400000000000000},
268 {58, 59, 3, 0x800000000000000},
269 {59, 60, 3, 0x1000000000000000},
270 {60, 61, 3, 0x2000000000000000},
271 {61, 62, 3, 0x4000000000000000},
272 {62, 63, 3, 0x8000000000000000},
273};
274
275#else
276#define MAX_POWER2 30
277
278static const struct st_features features[] = {
279 {0, 1, 0, 0x1},
280 {1, 2, 0, 0x1},
281 {2, 3, 0, 0x2},
282 {3, 4, 0, 0x4},
283 {4, 5, 0, 0x8},
284 {5, 6, 0, 0x10},
285 {6, 7, 0, 0x20},
286 {7, 8, 0, 0x40},
287 {8, 9, 1, 0x100},
288 {9, 10, 1, 0x200},
289 {10, 11, 1, 0x400},
290 {11, 12, 1, 0x800},
291 {12, 13, 1, 0x1000},
292 {13, 14, 1, 0x2000},
293 {14, 15, 1, 0x4000},
294 {15, 16, 1, 0x8000},
295 {16, 17, 2, 0x20000},
296 {17, 18, 2, 0x40000},
297 {18, 19, 2, 0x80000},
298 {19, 20, 2, 0x100000},
299 {20, 21, 2, 0x200000},
300 {21, 22, 2, 0x400000},
301 {22, 23, 2, 0x800000},
302 {23, 24, 2, 0x1000000},
303 {24, 25, 2, 0x2000000},
304 {25, 26, 2, 0x4000000},
305 {26, 27, 2, 0x8000000},
306 {27, 28, 2, 0x10000000},
307 {28, 29, 2, 0x20000000},
308 {29, 30, 2, 0x40000000},
309 {30, 31, 2, 0x80000000},
310};
311
312#endif
313
314/* The reserved hash value and its substitution. */
315#define RESERVED_HASH_VAL (~(st_hash_t) 0)
316#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
317
318static inline st_hash_t
319normalize_hash_value(st_hash_t hash)
320{
321 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
322 another value. Such mapping should be extremely rare. */
323 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
324}
325
326/* Return hash value of KEY for table TAB. */
327static inline st_hash_t
328do_hash(st_data_t key, st_table *tab)
329{
330 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
331 return normalize_hash_value(hash);
332}
333
334/* Power of 2 defining the minimal number of allocated entries. */
335#define MINIMAL_POWER2 2
336
337#if MINIMAL_POWER2 < 2
338#error "MINIMAL_POWER2 should be >= 2"
339#endif
340
341/* If the power2 of the allocated `entries` is less than the following
342 value, don't allocate bins and use a linear search. */
343#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
344
345/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
346static int
347get_power2(st_index_t size)
348{
349 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
350 if (n <= MAX_POWER2)
351 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
352#ifdef RUBY
353 /* Ran out of the table entries */
354 rb_raise(rb_eRuntimeError, "st_table too big");
355#endif
356 /* should raise exception */
357 return -1;
358}
359
360/* Return value of N-th bin in array BINS of table with bins size
361 index S. */
362static inline st_index_t
363get_bin(st_index_t *bins, int s, st_index_t n)
364{
365 return (s == 0 ? ((unsigned char *) bins)[n]
366 : s == 1 ? ((unsigned short *) bins)[n]
367 : s == 2 ? ((unsigned int *) bins)[n]
368 : ((st_index_t *) bins)[n]);
369}
370
371/* Set up N-th bin in array BINS of table with bins size index S to
372 value V. */
373static inline void
374set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
375{
376 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
377 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
378 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
379 else ((st_index_t *) bins)[n] = v;
380}
381
382/* These macros define reserved values for empty table bin and table
383 bin which contains a deleted entry. We will never use such values
384 for an entry index in bins. */
385#define EMPTY_BIN 0
386#define DELETED_BIN 1
387/* Base of a real entry index in the bins. */
388#define ENTRY_BASE 2
389
390/* Mark I-th bin of table TAB as empty, in other words not
391 corresponding to any entry. */
392#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
393
394/* Values used for not found entry and bin with given
395 characteristics. */
396#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
397#define UNDEFINED_BIN_IND (~(st_index_t) 0)
398
399/* Entry and bin values returned when we found a table rebuild during
400 the search. */
401#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
402#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
403
404/* Mark I-th bin of table TAB as corresponding to a deleted table
405 entry. Update number of entries in the table and number of bins
406 corresponding to deleted entries. */
407#define MARK_BIN_DELETED(tab, i) \
408 do { \
409 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
410 } while (0)
411
412/* Macros to check that value B is used empty bins and bins
413 corresponding deleted entries. */
414#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
415#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
416#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
417
418/* Macros to check empty bins and bins corresponding to deleted
419 entries. Bins are given by their index I in table TAB. */
420#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
421#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423
424/* Macros for marking and checking deleted entries given by their
425 pointer E_PTR. */
426#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
427#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
428
429/* Return bin size index of table TAB. */
430static inline unsigned int
431get_size_ind(const st_table *tab)
432{
433 return tab->size_ind;
434}
435
436/* Return the number of allocated bins of table TAB. */
437static inline st_index_t
438get_bins_num(const st_table *tab)
439{
440 return ((st_index_t) 1)<<tab->bin_power;
441}
442
443/* Return mask for a bin index in table TAB. */
444static inline st_index_t
445bins_mask(const st_table *tab)
446{
447 return get_bins_num(tab) - 1;
448}
449
450/* Return the index of table TAB bin corresponding to
451 HASH_VALUE. */
452static inline st_index_t
453hash_bin(st_hash_t hash_value, st_table *tab)
454{
455 return hash_value & bins_mask(tab);
456}
457
458/* Return the number of allocated entries of table TAB. */
459static inline st_index_t
460get_allocated_entries(const st_table *tab)
461{
462 return ((st_index_t) 1)<<tab->entry_power;
463}
464
465/* Return size of the allocated bins of table TAB. */
466static inline st_index_t
467bins_size(const st_table *tab)
468{
469 return features[tab->entry_power].bins_words * sizeof (st_index_t);
470}
471
472/* Mark all bins of table TAB as empty. */
473static void
474initialize_bins(st_table *tab)
475{
476 memset(tab->bins, 0, bins_size(tab));
477}
478
479/* Make table TAB empty. */
480static void
481make_tab_empty(st_table *tab)
482{
483 tab->num_entries = 0;
484 tab->entries_start = tab->entries_bound = 0;
485 if (tab->bins != NULL)
486 initialize_bins(tab);
487}
488
489#ifdef HASH_LOG
490#ifdef HAVE_UNISTD_H
491#include <unistd.h>
492#endif
493static struct {
494 int all, total, num, str, strcase;
495} collision;
496
497/* Flag switching off output of package statistics at the end of
498 program. */
499static int init_st = 0;
500
501/* Output overall number of table searches and collisions into a
502 temporary file. */
503static void
504stat_col(void)
505{
506 char fname[10+sizeof(long)*3];
507 FILE *f;
508 if (!collision.total) return;
509 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
510 if (f == NULL)
511 return;
512 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
513 ((double)collision.all / (collision.total)) * 100);
514 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
515 fclose(f);
516}
517#endif
518
519st_table *
520st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
521{
522 int n;
523
524#ifdef HASH_LOG
525#if HASH_LOG+0 < 0
526 {
527 const char *e = getenv("ST_HASH_LOG");
528 if (!e || !*e) init_st = 1;
529 }
530#endif
531 if (init_st == 0) {
532 init_st = 1;
533 atexit(stat_col);
534 }
535#endif
536
537 n = get_power2(size);
538#ifndef RUBY
539 if (n < 0)
540 return NULL;
541#endif
542
543 tab->type = type;
544 tab->entry_power = n;
545 tab->bin_power = features[n].bin_power;
546 tab->size_ind = features[n].size_ind;
547 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
548 tab->bins = NULL;
549 else {
550 tab->bins = (st_index_t *) malloc(bins_size(tab));
551#ifndef RUBY
552 if (tab->bins == NULL) {
553 free(tab);
554 return NULL;
555 }
556#endif
557 }
558 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
559 * sizeof(st_table_entry));
560#ifndef RUBY
561 if (tab->entries == NULL) {
562 st_free_table(tab);
563 return NULL;
564 }
565#endif
566 make_tab_empty(tab);
567 tab->rebuilds_num = 0;
568 return tab;
569}
570
571/* Create and return table with TYPE which can hold at least SIZE
572 entries. The real number of entries which the table can hold is
573 the nearest power of two for SIZE. */
574st_table *
575st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
576{
577 st_table *tab = malloc(sizeof(st_table));
578#ifndef RUBY
579 if (tab == NULL)
580 return NULL;
581#endif
582
583#ifdef RUBY
584 st_init_existing_table_with_size(tab, type, size);
585#else
586 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
587 free(tab);
588 return NULL;
589 }
590#endif
591
592 return tab;
593}
594
595size_t
596st_table_size(const struct st_table *tbl)
597{
598 return tbl->num_entries;
599}
600
601/* Create and return table with TYPE which can hold a minimal number
602 of entries (see comments for get_power2). */
603st_table *
604st_init_table(const struct st_hash_type *type)
605{
606 return st_init_table_with_size(type, 0);
607}
608
609/* Create and return table which can hold a minimal number of
610 numbers. */
611st_table *
612st_init_numtable(void)
613{
614 return st_init_table(&type_numhash);
615}
616
617/* Create and return table which can hold SIZE numbers. */
618st_table *
619st_init_numtable_with_size(st_index_t size)
620{
621 return st_init_table_with_size(&type_numhash, size);
622}
623
624/* Create and return table which can hold a minimal number of
625 strings. */
626st_table *
627st_init_strtable(void)
628{
629 return st_init_table(&type_strhash);
630}
631
632/* Create and return table which can hold SIZE strings. */
633st_table *
634st_init_strtable_with_size(st_index_t size)
635{
636 return st_init_table_with_size(&type_strhash, size);
637}
638
639/* Create and return table which can hold a minimal number of strings
640 whose character case is ignored. */
641st_table *
642st_init_strcasetable(void)
643{
644 return st_init_table(&type_strcasehash);
645}
646
647/* Create and return table which can hold SIZE strings whose character
648 case is ignored. */
649st_table *
650st_init_strcasetable_with_size(st_index_t size)
651{
652 return st_init_table_with_size(&type_strcasehash, size);
653}
654
655/* Make table TAB empty. */
656void
657st_clear(st_table *tab)
658{
659 make_tab_empty(tab);
660 tab->rebuilds_num++;
661}
662
663/* Free table TAB space. */
664void
665st_free_table(st_table *tab)
666{
667 free(tab->bins);
668 free(tab->entries);
669 free(tab);
670}
671
672/* Return byte size of memory allocated for table TAB. */
673size_t
674st_memsize(const st_table *tab)
675{
676 return(sizeof(st_table)
677 + (tab->bins == NULL ? 0 : bins_size(tab))
678 + get_allocated_entries(tab) * sizeof(st_table_entry));
679}
680
681static st_index_t
682find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
683
684static st_index_t
685find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
686
687static st_index_t
688find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
689
690static st_index_t
691find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
692 st_data_t key, st_index_t *bin_ind);
693
694#ifdef HASH_LOG
695static void
696count_collision(const struct st_hash_type *type)
697{
698 collision.all++;
699 if (type == &type_numhash) {
700 collision.num++;
701 }
702 else if (type == &type_strhash) {
703 collision.strcase++;
704 }
705 else if (type == &type_strcasehash) {
706 collision.str++;
707 }
708}
709
710#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
711#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
712#define collision_check 0
713#else
714#define COLLISION
715#define FOUND_BIN
716#endif
717
718/* If the number of entries in the table is at least REBUILD_THRESHOLD
719 times less than the entry array length, decrease the table
720 size. */
721#define REBUILD_THRESHOLD 4
722
723#if REBUILD_THRESHOLD < 2
724#error "REBUILD_THRESHOLD should be >= 2"
725#endif
726
727static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
728static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
729static void rebuild_cleanup(st_table *const tab);
730
731/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
732 and can change size of the table entries and bins arrays.
733 Rebuilding is implemented by creation of a new table or by
734 compaction of the existing one. */
735static void
736rebuild_table(st_table *tab)
737{
738 if ((2 * tab->num_entries <= get_allocated_entries(tab)
739 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
740 || tab->num_entries < (1 << MINIMAL_POWER2)) {
741 /* Compaction: */
742 tab->num_entries = 0;
743 if (tab->bins != NULL)
744 initialize_bins(tab);
745 rebuild_table_with(tab, tab);
746 }
747 else {
748 st_table *new_tab;
749 /* This allocation could trigger GC and compaction. If tab is the
750 * gen_iv_tbl, then tab could have changed in size due to objects being
751 * freed and/or moved. Do not store attributes of tab before this line. */
752 new_tab = st_init_table_with_size(tab->type,
753 2 * tab->num_entries - 1);
754 rebuild_table_with(new_tab, tab);
755 rebuild_move_table(new_tab, tab);
756 }
757 rebuild_cleanup(tab);
758}
759
760static void
761rebuild_table_with(st_table *const new_tab, st_table *const tab)
762{
763 st_index_t i, ni;
764 unsigned int size_ind;
765 st_table_entry *new_entries;
766 st_table_entry *curr_entry_ptr;
767 st_index_t *bins;
768 st_index_t bin_ind;
769
770 new_entries = new_tab->entries;
771
772 ni = 0;
773 bins = new_tab->bins;
774 size_ind = get_size_ind(new_tab);
775 st_index_t bound = tab->entries_bound;
776 st_table_entry *entries = tab->entries;
777
778 for (i = tab->entries_start; i < bound; i++) {
779 curr_entry_ptr = &entries[i];
780 PREFETCH(entries + i + 1, 0);
781 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
782 continue;
783 if (&new_entries[ni] != curr_entry_ptr)
784 new_entries[ni] = *curr_entry_ptr;
785 if (EXPECT(bins != NULL, 1)) {
786 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
787 curr_entry_ptr->key);
788 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
789 }
790 new_tab->num_entries++;
791 ni++;
792 }
793
794 assert(new_tab->num_entries == tab->num_entries);
795}
796
797static void
798rebuild_move_table(st_table *const new_tab, st_table *const tab)
799{
800 tab->entry_power = new_tab->entry_power;
801 tab->bin_power = new_tab->bin_power;
802 tab->size_ind = new_tab->size_ind;
803 free(tab->bins);
804 tab->bins = new_tab->bins;
805 free(tab->entries);
806 tab->entries = new_tab->entries;
807 free(new_tab);
808}
809
810static void
811rebuild_cleanup(st_table *const tab)
812{
813 tab->entries_start = 0;
814 tab->entries_bound = tab->num_entries;
815 tab->rebuilds_num++;
816}
817
818/* Return the next secondary hash index for table TAB using previous
819 index IND and PERTURB. Finally modulo of the function becomes a
820 full *cycle linear congruential generator*, in other words it
821 guarantees traversing all table bins in extreme case.
822
823 According the Hull-Dobell theorem a generator
824 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
825 o m and c are relatively prime
826 o a-1 is divisible by all prime factors of m
827 o a-1 is divisible by 4 if m is divisible by 4.
828
829 For our case a is 5, c is 1, and m is a power of two. */
830static inline st_index_t
831secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
832{
833 *perturb >>= 11;
834 ind = (ind << 2) + ind + *perturb + 1;
835 return hash_bin(ind, tab);
836}
837
838/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
839 search. Return the index of the found entry in array `entries`.
840 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
841 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
842static inline st_index_t
843find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
844{
845 int eq_p, rebuilt_p;
846 st_index_t i, bound;
847 st_table_entry *entries;
848
849 bound = tab->entries_bound;
850 entries = tab->entries;
851 for (i = tab->entries_start; i < bound; i++) {
852 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
853 if (EXPECT(rebuilt_p, 0))
854 return REBUILT_TABLE_ENTRY_IND;
855 if (eq_p)
856 return i;
857 }
858 return UNDEFINED_ENTRY_IND;
859}
860
861/* Use the quadratic probing. The method has a better data locality
862 but more collisions than the current approach. In average it
863 results in a bit slower search. */
864/*#define QUADRATIC_PROBE*/
865
866/* Return index of entry with HASH_VALUE and KEY in table TAB. If
867 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
868 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
869static st_index_t
870find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
871{
872 int eq_p, rebuilt_p;
873 st_index_t ind;
874#ifdef QUADRATIC_PROBE
875 st_index_t d;
876#else
877 st_index_t perturb;
878#endif
879 st_index_t bin;
880 st_table_entry *entries = tab->entries;
881
882 ind = hash_bin(hash_value, tab);
883#ifdef QUADRATIC_PROBE
884 d = 1;
885#else
886 perturb = hash_value;
887#endif
888 FOUND_BIN;
889 for (;;) {
890 bin = get_bin(tab->bins, get_size_ind(tab), ind);
891 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
892 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
893 if (EXPECT(rebuilt_p, 0))
894 return REBUILT_TABLE_ENTRY_IND;
895 if (eq_p)
896 break;
897 }
898 else if (EMPTY_BIN_P(bin))
899 return UNDEFINED_ENTRY_IND;
900#ifdef QUADRATIC_PROBE
901 ind = hash_bin(ind + d, tab);
902 d++;
903#else
904 ind = secondary_hash(ind, tab, &perturb);
905#endif
906 COLLISION;
907 }
908 return bin;
909}
910
911/* Find and return index of table TAB bin corresponding to an entry
912 with HASH_VALUE and KEY. If there is no such bin, return
913 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
914 return REBUILT_TABLE_BIN_IND. */
915static st_index_t
916find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
917{
918 int eq_p, rebuilt_p;
919 st_index_t ind;
920#ifdef QUADRATIC_PROBE
921 st_index_t d;
922#else
923 st_index_t perturb;
924#endif
925 st_index_t bin;
926 st_table_entry *entries = tab->entries;
927
928 ind = hash_bin(hash_value, tab);
929#ifdef QUADRATIC_PROBE
930 d = 1;
931#else
932 perturb = hash_value;
933#endif
934 FOUND_BIN;
935 for (;;) {
936 bin = get_bin(tab->bins, get_size_ind(tab), ind);
937 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
938 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
939 if (EXPECT(rebuilt_p, 0))
940 return REBUILT_TABLE_BIN_IND;
941 if (eq_p)
942 break;
943 }
944 else if (EMPTY_BIN_P(bin))
945 return UNDEFINED_BIN_IND;
946#ifdef QUADRATIC_PROBE
947 ind = hash_bin(ind + d, tab);
948 d++;
949#else
950 ind = secondary_hash(ind, tab, &perturb);
951#endif
952 COLLISION;
953 }
954 return ind;
955}
956
957/* Find and return index of table TAB bin corresponding to an entry
958 with HASH_VALUE and KEY. The entry should be in the table
959 already. */
960static st_index_t
961find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
962{
963 st_index_t ind;
964#ifdef QUADRATIC_PROBE
965 st_index_t d;
966#else
967 st_index_t perturb;
968#endif
969 st_index_t bin;
970
971 ind = hash_bin(hash_value, tab);
972#ifdef QUADRATIC_PROBE
973 d = 1;
974#else
975 perturb = hash_value;
976#endif
977 FOUND_BIN;
978 for (;;) {
979 bin = get_bin(tab->bins, get_size_ind(tab), ind);
980 if (EMPTY_OR_DELETED_BIN_P(bin))
981 return ind;
982#ifdef QUADRATIC_PROBE
983 ind = hash_bin(ind + d, tab);
984 d++;
985#else
986 ind = secondary_hash(ind, tab, &perturb);
987#endif
988 COLLISION;
989 }
990}
991
992/* Return index of table TAB bin for HASH_VALUE and KEY through
993 BIN_IND and the pointed value as the function result. Reserve the
994 bin for inclusion of the corresponding entry into the table if it
995 is not there yet. We always find such bin as bins array length is
996 bigger entries array. Although we can reuse a deleted bin, the
997 result bin value is always empty if the table has no entry with
998 KEY. Return the entries array index of the found entry or
999 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1000 during the search, return REBUILT_TABLE_ENTRY_IND. */
1001static st_index_t
1002find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1003 st_data_t key, st_index_t *bin_ind)
1004{
1005 int eq_p, rebuilt_p;
1006 st_index_t ind;
1007 st_hash_t curr_hash_value = *hash_value;
1008#ifdef QUADRATIC_PROBE
1009 st_index_t d;
1010#else
1011 st_index_t perturb;
1012#endif
1013 st_index_t entry_index;
1014 st_index_t first_deleted_bin_ind;
1015 st_table_entry *entries;
1016
1017 ind = hash_bin(curr_hash_value, tab);
1018#ifdef QUADRATIC_PROBE
1019 d = 1;
1020#else
1021 perturb = curr_hash_value;
1022#endif
1023 FOUND_BIN;
1024 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1025 entries = tab->entries;
1026 for (;;) {
1027 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1028 if (EMPTY_BIN_P(entry_index)) {
1029 tab->num_entries++;
1030 entry_index = UNDEFINED_ENTRY_IND;
1031 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1032 /* We can reuse bin of a deleted entry. */
1033 ind = first_deleted_bin_ind;
1034 MARK_BIN_EMPTY(tab, ind);
1035 }
1036 break;
1037 }
1038 else if (! DELETED_BIN_P(entry_index)) {
1039 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1040 if (EXPECT(rebuilt_p, 0))
1041 return REBUILT_TABLE_ENTRY_IND;
1042 if (eq_p)
1043 break;
1044 }
1045 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1046 first_deleted_bin_ind = ind;
1047#ifdef QUADRATIC_PROBE
1048 ind = hash_bin(ind + d, tab);
1049 d++;
1050#else
1051 ind = secondary_hash(ind, tab, &perturb);
1052#endif
1053 COLLISION;
1054 }
1055 *bin_ind = ind;
1056 return entry_index;
1057}
1058
1059/* Find an entry with KEY in table TAB. Return non-zero if we found
1060 it. Set up *RECORD to the found entry record. */
1061int
1062st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1063{
1064 st_index_t bin;
1065 st_hash_t hash = do_hash(key, tab);
1066
1067 retry:
1068 if (tab->bins == NULL) {
1069 bin = find_entry(tab, hash, key);
1070 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1071 goto retry;
1072 if (bin == UNDEFINED_ENTRY_IND)
1073 return 0;
1074 }
1075 else {
1076 bin = find_table_entry_ind(tab, hash, key);
1077 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1078 goto retry;
1079 if (bin == UNDEFINED_ENTRY_IND)
1080 return 0;
1081 bin -= ENTRY_BASE;
1082 }
1083 if (value != 0)
1084 *value = tab->entries[bin].record;
1085 return 1;
1086}
1087
1088/* Find an entry with KEY in table TAB. Return non-zero if we found
1089 it. Set up *RESULT to the found table entry key. */
1090int
1091st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1092{
1093 st_index_t bin;
1094 st_hash_t hash = do_hash(key, tab);
1095
1096 retry:
1097 if (tab->bins == NULL) {
1098 bin = find_entry(tab, hash, key);
1099 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1100 goto retry;
1101 if (bin == UNDEFINED_ENTRY_IND)
1102 return 0;
1103 }
1104 else {
1105 bin = find_table_entry_ind(tab, hash, key);
1106 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1107 goto retry;
1108 if (bin == UNDEFINED_ENTRY_IND)
1109 return 0;
1110 bin -= ENTRY_BASE;
1111 }
1112 if (result != 0)
1113 *result = tab->entries[bin].key;
1114 return 1;
1115}
1116
1117/* Check the table and rebuild it if it is necessary. */
1118static inline void
1119rebuild_table_if_necessary (st_table *tab)
1120{
1121 st_index_t bound = tab->entries_bound;
1122
1123 if (bound == get_allocated_entries(tab))
1124 rebuild_table(tab);
1125}
1126
1127/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1128 already entry with KEY in the table, return nonzero and update
1129 the value of the found entry. */
1130int
1131st_insert(st_table *tab, st_data_t key, st_data_t value)
1132{
1133 st_table_entry *entry;
1134 st_index_t bin;
1135 st_index_t ind;
1136 st_hash_t hash_value;
1137 st_index_t bin_ind;
1138 int new_p;
1139
1140 hash_value = do_hash(key, tab);
1141 retry:
1142 rebuild_table_if_necessary(tab);
1143 if (tab->bins == NULL) {
1144 bin = find_entry(tab, hash_value, key);
1145 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1146 goto retry;
1147 new_p = bin == UNDEFINED_ENTRY_IND;
1148 if (new_p)
1149 tab->num_entries++;
1150 bin_ind = UNDEFINED_BIN_IND;
1151 }
1152 else {
1153 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1154 key, &bin_ind);
1155 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1156 goto retry;
1157 new_p = bin == UNDEFINED_ENTRY_IND;
1158 bin -= ENTRY_BASE;
1159 }
1160 if (new_p) {
1161 ind = tab->entries_bound++;
1162 entry = &tab->entries[ind];
1163 entry->hash = hash_value;
1164 entry->key = key;
1165 entry->record = value;
1166 if (bin_ind != UNDEFINED_BIN_IND)
1167 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1168 return 0;
1169 }
1170 tab->entries[bin].record = value;
1171 return 1;
1172}
1173
1174/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1175 entry with KEY before the insertion. */
1176static inline void
1177st_add_direct_with_hash(st_table *tab,
1178 st_data_t key, st_data_t value, st_hash_t hash)
1179{
1180 st_table_entry *entry;
1181 st_index_t ind;
1182 st_index_t bin_ind;
1183
1184 assert(hash != RESERVED_HASH_VAL);
1185
1186 rebuild_table_if_necessary(tab);
1187 ind = tab->entries_bound++;
1188 entry = &tab->entries[ind];
1189 entry->hash = hash;
1190 entry->key = key;
1191 entry->record = value;
1192 tab->num_entries++;
1193 if (tab->bins != NULL) {
1194 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1195 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1196 }
1197}
1198
1199void
1200rb_st_add_direct_with_hash(st_table *tab,
1201 st_data_t key, st_data_t value, st_hash_t hash)
1202{
1203 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1204}
1205
1206/* Insert (KEY, VALUE) into table TAB. The table should not have
1207 entry with KEY before the insertion. */
1208void
1209st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1210{
1211 st_hash_t hash_value;
1212
1213 hash_value = do_hash(key, tab);
1214 st_add_direct_with_hash(tab, key, value, hash_value);
1215}
1216
1217/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1218 there is already entry with KEY in the table, return nonzero and
1219 update the value of the found entry. */
1220int
1221st_insert2(st_table *tab, st_data_t key, st_data_t value,
1222 st_data_t (*func)(st_data_t))
1223{
1224 st_table_entry *entry;
1225 st_index_t bin;
1226 st_index_t ind;
1227 st_hash_t hash_value;
1228 st_index_t bin_ind;
1229 int new_p;
1230
1231 hash_value = do_hash(key, tab);
1232 retry:
1233 rebuild_table_if_necessary (tab);
1234 if (tab->bins == NULL) {
1235 bin = find_entry(tab, hash_value, key);
1236 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1237 goto retry;
1238 new_p = bin == UNDEFINED_ENTRY_IND;
1239 if (new_p)
1240 tab->num_entries++;
1241 bin_ind = UNDEFINED_BIN_IND;
1242 }
1243 else {
1244 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1245 key, &bin_ind);
1246 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1247 goto retry;
1248 new_p = bin == UNDEFINED_ENTRY_IND;
1249 bin -= ENTRY_BASE;
1250 }
1251 if (new_p) {
1252 key = (*func)(key);
1253 ind = tab->entries_bound++;
1254 entry = &tab->entries[ind];
1255 entry->hash = hash_value;
1256 entry->key = key;
1257 entry->record = value;
1258 if (bin_ind != UNDEFINED_BIN_IND)
1259 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1260 return 0;
1261 }
1262 tab->entries[bin].record = value;
1263 return 1;
1264}
1265
1266/* Create a copy of old_tab into new_tab. */
1267st_table *
1268st_replace(st_table *new_tab, st_table *old_tab)
1269{
1270 *new_tab = *old_tab;
1271 if (old_tab->bins == NULL)
1272 new_tab->bins = NULL;
1273 else {
1274 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1275#ifndef RUBY
1276 if (new_tab->bins == NULL) {
1277 return NULL;
1278 }
1279#endif
1280 }
1281 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1282 * sizeof(st_table_entry));
1283#ifndef RUBY
1284 if (new_tab->entries == NULL) {
1285 return NULL;
1286 }
1287#endif
1288 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1289 get_allocated_entries(old_tab));
1290 if (old_tab->bins != NULL)
1291 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1292
1293 return new_tab;
1294}
1295
1296/* Create and return a copy of table OLD_TAB. */
1297st_table *
1298st_copy(st_table *old_tab)
1299{
1300 st_table *new_tab;
1301
1302 new_tab = (st_table *) malloc(sizeof(st_table));
1303#ifndef RUBY
1304 if (new_tab == NULL)
1305 return NULL;
1306#endif
1307
1308 if (st_replace(new_tab, old_tab) == NULL) {
1309 st_free_table(new_tab);
1310 return NULL;
1311 }
1312
1313 return new_tab;
1314}
1315
1316/* Update the entries start of table TAB after removing an entry
1317 with index N in the array entries. */
1318static inline void
1319update_range_for_deleted(st_table *tab, st_index_t n)
1320{
1321 /* Do not update entries_bound here. Otherwise, we can fill all
1322 bins by deleted entry value before rebuilding the table. */
1323 if (tab->entries_start == n) {
1324 st_index_t start = n + 1;
1325 st_index_t bound = tab->entries_bound;
1326 st_table_entry *entries = tab->entries;
1327 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1328 tab->entries_start = start;
1329 }
1330}
1331
1332/* Delete entry with KEY from table TAB, set up *VALUE (unless
1333 VALUE is zero) from deleted table entry, and return non-zero. If
1334 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1335 is zero), and return zero. */
1336static int
1337st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1338{
1339 st_table_entry *entry;
1340 st_index_t bin;
1341 st_index_t bin_ind;
1342 st_hash_t hash;
1343
1344 hash = do_hash(*key, tab);
1345 retry:
1346 if (tab->bins == NULL) {
1347 bin = find_entry(tab, hash, *key);
1348 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1349 goto retry;
1350 if (bin == UNDEFINED_ENTRY_IND) {
1351 if (value != 0) *value = 0;
1352 return 0;
1353 }
1354 }
1355 else {
1356 bin_ind = find_table_bin_ind(tab, hash, *key);
1357 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1358 goto retry;
1359 if (bin_ind == UNDEFINED_BIN_IND) {
1360 if (value != 0) *value = 0;
1361 return 0;
1362 }
1363 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1364 MARK_BIN_DELETED(tab, bin_ind);
1365 }
1366 entry = &tab->entries[bin];
1367 *key = entry->key;
1368 if (value != 0) *value = entry->record;
1369 MARK_ENTRY_DELETED(entry);
1370 tab->num_entries--;
1371 update_range_for_deleted(tab, bin);
1372 return 1;
1373}
1374
1375int
1376st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1377{
1378 return st_general_delete(tab, key, value);
1379}
1380
1381/* The function and other functions with suffix '_safe' or '_check'
1382 are originated from the previous implementation of the hash tables.
1383 It was necessary for correct deleting entries during traversing
1384 tables. The current implementation permits deletion during
1385 traversing without a specific way to do this. */
1386int
1387st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1388 st_data_t never ATTRIBUTE_UNUSED)
1389{
1390 return st_general_delete(tab, key, value);
1391}
1392
1393/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1394 return zero. Otherwise, remove the first entry in the table.
1395 Return its key through KEY and its record through VALUE (unless
1396 VALUE is zero). */
1397int
1398st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1399{
1400 st_index_t i, bound;
1401 st_index_t bin;
1402 st_table_entry *entries, *curr_entry_ptr;
1403 st_index_t bin_ind;
1404
1405 entries = tab->entries;
1406 bound = tab->entries_bound;
1407 for (i = tab->entries_start; i < bound; i++) {
1408 curr_entry_ptr = &entries[i];
1409 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1410 st_hash_t entry_hash = curr_entry_ptr->hash;
1411 st_data_t entry_key = curr_entry_ptr->key;
1412
1413 if (value != 0) *value = curr_entry_ptr->record;
1414 *key = entry_key;
1415 retry:
1416 if (tab->bins == NULL) {
1417 bin = find_entry(tab, entry_hash, entry_key);
1418 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1419 entries = tab->entries;
1420 goto retry;
1421 }
1422 curr_entry_ptr = &entries[bin];
1423 }
1424 else {
1425 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1426 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1427 entries = tab->entries;
1428 goto retry;
1429 }
1430 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1431 - ENTRY_BASE];
1432 MARK_BIN_DELETED(tab, bin_ind);
1433 }
1434 MARK_ENTRY_DELETED(curr_entry_ptr);
1435 tab->num_entries--;
1436 update_range_for_deleted(tab, i);
1437 return 1;
1438 }
1439 }
1440 if (value != 0) *value = 0;
1441 return 0;
1442}
1443
1444/* See comments for function st_delete_safe. */
1445void
1446st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1447 st_data_t never ATTRIBUTE_UNUSED)
1448{
1449}
1450
1451/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1452 of the key and the value of the found entry, and non-zero as the
1453 3rd argument. If the entry is not found, call FUNC with a pointer
1454 to KEY, a pointer to zero, and a zero argument. If the call
1455 returns ST_CONTINUE, the table will have an entry with key and
1456 value returned by FUNC through the 1st and 2nd parameters. If the
1457 call of FUNC returns ST_DELETE, the table will not have entry with
1458 KEY. The function returns flag of that the entry with KEY was in
1459 the table before the call. */
1460int
1461st_update(st_table *tab, st_data_t key,
1462 st_update_callback_func *func, st_data_t arg)
1463{
1464 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1465 st_index_t bin = 0; /* Ditto */
1466 st_table_entry *entries;
1467 st_index_t bin_ind;
1468 st_data_t value = 0, old_key;
1469 int retval, existing;
1470 st_hash_t hash = do_hash(key, tab);
1471
1472 retry:
1473 entries = tab->entries;
1474 if (tab->bins == NULL) {
1475 bin = find_entry(tab, hash, key);
1476 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1477 goto retry;
1478 existing = bin != UNDEFINED_ENTRY_IND;
1479 entry = &entries[bin];
1480 bin_ind = UNDEFINED_BIN_IND;
1481 }
1482 else {
1483 bin_ind = find_table_bin_ind(tab, hash, key);
1484 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1485 goto retry;
1486 existing = bin_ind != UNDEFINED_BIN_IND;
1487 if (existing) {
1488 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1489 entry = &entries[bin];
1490 }
1491 }
1492 if (existing) {
1493 key = entry->key;
1494 value = entry->record;
1495 }
1496 old_key = key;
1497 retval = (*func)(&key, &value, arg, existing);
1498 switch (retval) {
1499 case ST_CONTINUE:
1500 if (! existing) {
1501 st_add_direct_with_hash(tab, key, value, hash);
1502 break;
1503 }
1504 if (old_key != key) {
1505 entry->key = key;
1506 }
1507 entry->record = value;
1508 break;
1509 case ST_DELETE:
1510 if (existing) {
1511 if (bin_ind != UNDEFINED_BIN_IND)
1512 MARK_BIN_DELETED(tab, bin_ind);
1513 MARK_ENTRY_DELETED(entry);
1514 tab->num_entries--;
1515 update_range_for_deleted(tab, bin);
1516 }
1517 break;
1518 }
1519 return existing;
1520}
1521
1522/* Traverse all entries in table TAB calling FUNC with current entry
1523 key and value and zero. If the call returns ST_STOP, stop
1524 traversing. If the call returns ST_DELETE, delete the current
1525 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1526 traversing. The function returns zero unless an error is found.
1527 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1528 different for ST_CHECK and when the current element is removed
1529 during traversing. */
1530static inline int
1531st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1532 int check_p)
1533{
1534 st_index_t bin;
1535 st_index_t bin_ind;
1536 st_table_entry *entries, *curr_entry_ptr;
1537 enum st_retval retval;
1538 st_index_t i, rebuilds_num;
1539 st_hash_t hash;
1540 st_data_t key;
1541 int error_p, packed_p = tab->bins == NULL;
1542
1543 entries = tab->entries;
1544 /* The bound can change inside the loop even without rebuilding
1545 the table, e.g. by an entry insertion. */
1546 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1547 curr_entry_ptr = &entries[i];
1548 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1549 continue;
1550 key = curr_entry_ptr->key;
1551 rebuilds_num = tab->rebuilds_num;
1552 hash = curr_entry_ptr->hash;
1553 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1554
1555 if (retval == ST_REPLACE && replace) {
1556 st_data_t value;
1557 value = curr_entry_ptr->record;
1558 retval = (*replace)(&key, &value, arg, TRUE);
1559 curr_entry_ptr->key = key;
1560 curr_entry_ptr->record = value;
1561 }
1562
1563 if (rebuilds_num != tab->rebuilds_num) {
1564 retry:
1565 entries = tab->entries;
1566 packed_p = tab->bins == NULL;
1567 if (packed_p) {
1568 i = find_entry(tab, hash, key);
1569 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1570 goto retry;
1571 error_p = i == UNDEFINED_ENTRY_IND;
1572 }
1573 else {
1574 i = find_table_entry_ind(tab, hash, key);
1575 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1576 goto retry;
1577 error_p = i == UNDEFINED_ENTRY_IND;
1578 i -= ENTRY_BASE;
1579 }
1580 if (error_p && check_p) {
1581 /* call func with error notice */
1582 retval = (*func)(0, 0, arg, 1);
1583 return 1;
1584 }
1585 curr_entry_ptr = &entries[i];
1586 }
1587 switch (retval) {
1588 case ST_REPLACE:
1589 break;
1590 case ST_CONTINUE:
1591 break;
1592 case ST_CHECK:
1593 if (check_p)
1594 break;
1595 case ST_STOP:
1596 return 0;
1597 case ST_DELETE: {
1598 st_data_t key = curr_entry_ptr->key;
1599
1600 again:
1601 if (packed_p) {
1602 bin = find_entry(tab, hash, key);
1603 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1604 goto again;
1605 if (bin == UNDEFINED_ENTRY_IND)
1606 break;
1607 }
1608 else {
1609 bin_ind = find_table_bin_ind(tab, hash, key);
1610 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1611 goto again;
1612 if (bin_ind == UNDEFINED_BIN_IND)
1613 break;
1614 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1615 MARK_BIN_DELETED(tab, bin_ind);
1616 }
1617 curr_entry_ptr = &entries[bin];
1618 MARK_ENTRY_DELETED(curr_entry_ptr);
1619 tab->num_entries--;
1620 update_range_for_deleted(tab, bin);
1621 break;
1622 }
1623 }
1624 }
1625 return 0;
1626}
1627
1628int
1629st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1630{
1631 return st_general_foreach(tab, func, replace, arg, TRUE);
1632}
1633
1634struct functor {
1635 st_foreach_callback_func *func;
1636 st_data_t arg;
1637};
1638
1639static int
1640apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1641{
1642 const struct functor *f = (void *)d;
1643 return f->func(k, v, f->arg);
1644}
1645
1646int
1647st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1648{
1649 const struct functor f = { func, arg };
1650 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1651}
1652
1653/* See comments for function st_delete_safe. */
1654int
1655st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1656 st_data_t never ATTRIBUTE_UNUSED)
1657{
1658 return st_general_foreach(tab, func, 0, arg, TRUE);
1659}
1660
1661/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1662 Return the number of keys set up in array KEYS. */
1663static inline st_index_t
1664st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1665{
1666 st_index_t i, bound;
1667 st_data_t key, *keys_start, *keys_end;
1668 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1669
1670 bound = tab->entries_bound;
1671 keys_start = keys;
1672 keys_end = keys + size;
1673 for (i = tab->entries_start; i < bound; i++) {
1674 if (keys == keys_end)
1675 break;
1676 curr_entry_ptr = &entries[i];
1677 key = curr_entry_ptr->key;
1678 if (! DELETED_ENTRY_P(curr_entry_ptr))
1679 *keys++ = key;
1680 }
1681
1682 return keys - keys_start;
1683}
1684
1685st_index_t
1686st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1687{
1688 return st_general_keys(tab, keys, size);
1689}
1690
1691/* See comments for function st_delete_safe. */
1692st_index_t
1693st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1694 st_data_t never ATTRIBUTE_UNUSED)
1695{
1696 return st_general_keys(tab, keys, size);
1697}
1698
1699/* Set up array VALUES by at most SIZE values of head table TAB
1700 entries. Return the number of values set up in array VALUES. */
1701static inline st_index_t
1702st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1703{
1704 st_index_t i, bound;
1705 st_data_t *values_start, *values_end;
1706 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1707
1708 values_start = values;
1709 values_end = values + size;
1710 bound = tab->entries_bound;
1711 for (i = tab->entries_start; i < bound; i++) {
1712 if (values == values_end)
1713 break;
1714 curr_entry_ptr = &entries[i];
1715 if (! DELETED_ENTRY_P(curr_entry_ptr))
1716 *values++ = curr_entry_ptr->record;
1717 }
1718
1719 return values - values_start;
1720}
1721
1722st_index_t
1723st_values(st_table *tab, st_data_t *values, st_index_t size)
1724{
1725 return st_general_values(tab, values, size);
1726}
1727
1728/* See comments for function st_delete_safe. */
1729st_index_t
1730st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1731 st_data_t never ATTRIBUTE_UNUSED)
1732{
1733 return st_general_values(tab, values, size);
1734}
1735
1736#define FNV1_32A_INIT 0x811c9dc5
1737
1738/*
1739 * 32 bit magic FNV-1a prime
1740 */
1741#define FNV_32_PRIME 0x01000193
1742
1743/* __POWERPC__ added to accommodate Darwin case. */
1744#ifndef UNALIGNED_WORD_ACCESS
1745# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1746 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1747 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1748 defined(__mc68020__)
1749# define UNALIGNED_WORD_ACCESS 1
1750# endif
1751#endif
1752#ifndef UNALIGNED_WORD_ACCESS
1753# define UNALIGNED_WORD_ACCESS 0
1754#endif
1755
1756/* This hash function is quite simplified MurmurHash3
1757 * Simplification is legal, cause most of magic still happens in finalizator.
1758 * And finalizator is almost the same as in MurmurHash3 */
1759#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1760#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1761
1762#if ST_INDEX_BITS <= 32
1763#define C1 (st_index_t)0xcc9e2d51
1764#define C2 (st_index_t)0x1b873593
1765#else
1766#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1767#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1768#endif
1769NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1770NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1771NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1772
1773static inline st_index_t
1774murmur_step(st_index_t h, st_index_t k)
1775{
1776#if ST_INDEX_BITS <= 32
1777#define r1 (17)
1778#define r2 (11)
1779#else
1780#define r1 (33)
1781#define r2 (24)
1782#endif
1783 k *= C1;
1784 h ^= ROTL(k, r1);
1785 h *= C2;
1786 h = ROTL(h, r2);
1787 return h;
1788}
1789#undef r1
1790#undef r2
1791
1792static inline st_index_t
1793murmur_finish(st_index_t h)
1794{
1795#if ST_INDEX_BITS <= 32
1796#define r1 (16)
1797#define r2 (13)
1798#define r3 (16)
1799 const st_index_t c1 = 0x85ebca6b;
1800 const st_index_t c2 = 0xc2b2ae35;
1801#else
1802/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1803#define r1 (30)
1804#define r2 (27)
1805#define r3 (31)
1806 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1807 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1808#endif
1809#if ST_INDEX_BITS > 64
1810 h ^= h >> 64;
1811 h *= c2;
1812 h ^= h >> 65;
1813#endif
1814 h ^= h >> r1;
1815 h *= c1;
1816 h ^= h >> r2;
1817 h *= c2;
1818 h ^= h >> r3;
1819 return h;
1820}
1821#undef r1
1822#undef r2
1823#undef r3
1824
1825st_index_t
1826st_hash(const void *ptr, size_t len, st_index_t h)
1827{
1828 const char *data = ptr;
1829 st_index_t t = 0;
1830 size_t l = len;
1831
1832#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1833#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1834#if SIZEOF_ST_INDEX_T > 4
1835#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1836#if SIZEOF_ST_INDEX_T > 8
1837#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1838 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1839#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1840#endif
1841#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1842#else
1843#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1844#endif
1845#undef SKIP_TAIL
1846 if (len >= sizeof(st_index_t)) {
1847#if !UNALIGNED_WORD_ACCESS
1848 int align = (int)((st_data_t)data % sizeof(st_index_t));
1849 if (align) {
1850 st_index_t d = 0;
1851 int sl, sr, pack;
1852
1853 switch (align) {
1854#ifdef WORDS_BIGENDIAN
1855# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1856 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1857#else
1858# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1859 t |= data_at(n) << CHAR_BIT*(n)
1860#endif
1861 UNALIGNED_ADD_ALL;
1862#undef UNALIGNED_ADD
1863 }
1864
1865#ifdef WORDS_BIGENDIAN
1866 t >>= (CHAR_BIT * align) - CHAR_BIT;
1867#else
1868 t <<= (CHAR_BIT * align);
1869#endif
1870
1871 data += sizeof(st_index_t)-align;
1872 len -= sizeof(st_index_t)-align;
1873
1874 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1875 sr = CHAR_BIT * align;
1876
1877 while (len >= sizeof(st_index_t)) {
1878 d = *(st_index_t *)data;
1879#ifdef WORDS_BIGENDIAN
1880 t = (t << sr) | (d >> sl);
1881#else
1882 t = (t >> sr) | (d << sl);
1883#endif
1884 h = murmur_step(h, t);
1885 t = d;
1886 data += sizeof(st_index_t);
1887 len -= sizeof(st_index_t);
1888 }
1889
1890 pack = len < (size_t)align ? (int)len : align;
1891 d = 0;
1892 switch (pack) {
1893#ifdef WORDS_BIGENDIAN
1894# define UNALIGNED_ADD(n) case (n) + 1: \
1895 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1896#else
1897# define UNALIGNED_ADD(n) case (n) + 1: \
1898 d |= data_at(n) << CHAR_BIT*(n)
1899#endif
1900 UNALIGNED_ADD_ALL;
1901#undef UNALIGNED_ADD
1902 }
1903#ifdef WORDS_BIGENDIAN
1904 t = (t << sr) | (d >> sl);
1905#else
1906 t = (t >> sr) | (d << sl);
1907#endif
1908
1909 if (len < (size_t)align) goto skip_tail;
1910# define SKIP_TAIL 1
1911 h = murmur_step(h, t);
1912 data += pack;
1913 len -= pack;
1914 }
1915 else
1916#endif
1917#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1918#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1919#else
1920#define aligned_data data
1921#endif
1922 {
1923 do {
1924 h = murmur_step(h, *(st_index_t *)aligned_data);
1925 data += sizeof(st_index_t);
1926 len -= sizeof(st_index_t);
1927 } while (len >= sizeof(st_index_t));
1928 }
1929 }
1930
1931 t = 0;
1932 switch (len) {
1933#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1934 /* in this case byteorder doesn't really matter */
1935#if SIZEOF_ST_INDEX_T > 4
1936 case 7: t |= data_at(6) << 48;
1937 case 6: t |= data_at(5) << 40;
1938 case 5: t |= data_at(4) << 32;
1939 case 4:
1940 t |= (st_index_t)*(uint32_t*)aligned_data;
1941 goto skip_tail;
1942# define SKIP_TAIL 1
1943#endif
1944 case 3: t |= data_at(2) << 16;
1945 case 2: t |= data_at(1) << 8;
1946 case 1: t |= data_at(0);
1947#else
1948#ifdef WORDS_BIGENDIAN
1949# define UNALIGNED_ADD(n) case (n) + 1: \
1950 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1951#else
1952# define UNALIGNED_ADD(n) case (n) + 1: \
1953 t |= data_at(n) << CHAR_BIT*(n)
1954#endif
1955 UNALIGNED_ADD_ALL;
1956#undef UNALIGNED_ADD
1957#endif
1958#ifdef SKIP_TAIL
1959 skip_tail:
1960#endif
1961 h ^= t; h -= ROTL(t, 7);
1962 h *= C2;
1963 }
1964 h ^= l;
1965#undef aligned_data
1966
1967 return murmur_finish(h);
1968}
1969
1970st_index_t
1971st_hash_uint32(st_index_t h, uint32_t i)
1972{
1973 return murmur_step(h, i);
1974}
1975
1976NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1977st_index_t
1978st_hash_uint(st_index_t h, st_index_t i)
1979{
1980 i += h;
1981/* no matter if it is BigEndian or LittleEndian,
1982 * we hash just integers */
1983#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1984 h = murmur_step(h, i >> 8*8);
1985#endif
1986 h = murmur_step(h, i);
1987 return h;
1988}
1989
1990st_index_t
1991st_hash_end(st_index_t h)
1992{
1993 h = murmur_finish(h);
1994 return h;
1995}
1996
1997#undef st_hash_start
1998st_index_t
1999rb_st_hash_start(st_index_t h)
2000{
2001 return h;
2002}
2003
2004static st_index_t
2005strhash(st_data_t arg)
2006{
2007 register const char *string = (const char *)arg;
2008 return st_hash(string, strlen(string), FNV1_32A_INIT);
2009}
2010
2011int
2012st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2013{
2014 char c1, c2;
2015
2016 while (1) {
2017 c1 = *s1++;
2018 c2 = *s2++;
2019 if (c1 == '\0' || c2 == '\0') {
2020 if (c1 != '\0') return 1;
2021 if (c2 != '\0') return -1;
2022 return 0;
2023 }
2024 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2025 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2026 if (c1 != c2) {
2027 if (c1 > c2)
2028 return 1;
2029 else
2030 return -1;
2031 }
2032 }
2033}
2034
2035int
2036st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2037{
2038 char c1, c2;
2039 size_t i;
2040
2041 for (i = 0; i < n; i++) {
2042 c1 = *s1++;
2043 c2 = *s2++;
2044 if (c1 == '\0' || c2 == '\0') {
2045 if (c1 != '\0') return 1;
2046 if (c2 != '\0') return -1;
2047 return 0;
2048 }
2049 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2050 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2051 if (c1 != c2) {
2052 if (c1 > c2)
2053 return 1;
2054 else
2055 return -1;
2056 }
2057 }
2058 return 0;
2059}
2060
2061static int
2062st_strcmp(st_data_t lhs, st_data_t rhs)
2063{
2064 const char *s1 = (char *)lhs;
2065 const char *s2 = (char *)rhs;
2066 return strcmp(s1, s2);
2067}
2068
2069static int
2070st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2071{
2072 const char *s1 = (char *)lhs;
2073 const char *s2 = (char *)rhs;
2074 return st_locale_insensitive_strcasecmp(s1, s2);
2075}
2076
2077NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2078static st_index_t
2079strcasehash(st_data_t arg)
2080{
2081 register const char *string = (const char *)arg;
2082 register st_index_t hval = FNV1_32A_INIT;
2083
2084 /*
2085 * FNV-1a hash each octet in the buffer
2086 */
2087 while (*string) {
2088 unsigned int c = (unsigned char)*string++;
2089 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2090 hval ^= c;
2091
2092 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2093 hval *= FNV_32_PRIME;
2094 }
2095 return hval;
2096}
2097
2098int
2099st_numcmp(st_data_t x, st_data_t y)
2100{
2101 return x != y;
2102}
2103
2104st_index_t
2105st_numhash(st_data_t n)
2106{
2107 enum {s1 = 11, s2 = 3};
2108 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2109}
2110
2111#ifdef RUBY
2112/* Expand TAB to be suitable for holding SIZ entries in total.
2113 Pre-existing entries remain not deleted inside of TAB, but its bins
2114 are cleared to expect future reconstruction. See rehash below. */
2115static void
2116st_expand_table(st_table *tab, st_index_t siz)
2117{
2118 st_table *tmp;
2119 st_index_t n;
2120
2121 if (siz <= get_allocated_entries(tab))
2122 return; /* enough room already */
2123
2124 tmp = st_init_table_with_size(tab->type, siz);
2125 n = get_allocated_entries(tab);
2126 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2127 free(tab->entries);
2128 free(tab->bins);
2129 free(tmp->bins);
2130 tab->entry_power = tmp->entry_power;
2131 tab->bin_power = tmp->bin_power;
2132 tab->size_ind = tmp->size_ind;
2133 tab->entries = tmp->entries;
2134 tab->bins = NULL;
2135 tab->rebuilds_num++;
2136 free(tmp);
2137}
2138
2139/* Rehash using linear search. Return TRUE if we found that the table
2140 was rebuilt. */
2141static int
2142st_rehash_linear(st_table *tab)
2143{
2144 int eq_p, rebuilt_p;
2145 st_index_t i, j;
2146 st_table_entry *p, *q;
2147
2148 free(tab->bins);
2149 tab->bins = NULL;
2150
2151 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2152 p = &tab->entries[i];
2153 if (DELETED_ENTRY_P(p))
2154 continue;
2155 for (j = i + 1; j < tab->entries_bound; j++) {
2156 q = &tab->entries[j];
2157 if (DELETED_ENTRY_P(q))
2158 continue;
2159 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2160 if (EXPECT(rebuilt_p, 0))
2161 return TRUE;
2162 if (eq_p) {
2163 *p = *q;
2164 MARK_ENTRY_DELETED(q);
2165 tab->num_entries--;
2166 update_range_for_deleted(tab, j);
2167 }
2168 }
2169 }
2170 return FALSE;
2171}
2172
2173/* Rehash using index. Return TRUE if we found that the table was
2174 rebuilt. */
2175static int
2176st_rehash_indexed(st_table *tab)
2177{
2178 int eq_p, rebuilt_p;
2179 st_index_t i;
2180 st_index_t const n = bins_size(tab);
2181 unsigned int const size_ind = get_size_ind(tab);
2182 st_index_t *bins = realloc(tab->bins, n);
2183 tab->bins = bins;
2184 initialize_bins(tab);
2185 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2186 st_table_entry *p = &tab->entries[i];
2187 st_index_t ind;
2188#ifdef QUADRATIC_PROBE
2189 st_index_t d = 1;
2190#else
2191 st_index_t perturb = p->hash;
2192#endif
2193
2194 if (DELETED_ENTRY_P(p))
2195 continue;
2196
2197 ind = hash_bin(p->hash, tab);
2198 for (;;) {
2199 st_index_t bin = get_bin(bins, size_ind, ind);
2200 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2201 /* ok, new room */
2202 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2203 break;
2204 }
2205 else {
2206 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2207 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2208 if (EXPECT(rebuilt_p, 0))
2209 return TRUE;
2210 if (eq_p) {
2211 /* duplicated key; delete it */
2212 q->record = p->record;
2213 MARK_ENTRY_DELETED(p);
2214 tab->num_entries--;
2215 update_range_for_deleted(tab, bin);
2216 break;
2217 }
2218 else {
2219 /* hash collision; skip it */
2220#ifdef QUADRATIC_PROBE
2221 ind = hash_bin(ind + d, tab);
2222 d++;
2223#else
2224 ind = secondary_hash(ind, tab, &perturb);
2225#endif
2226 }
2227 }
2228 }
2229 }
2230 return FALSE;
2231}
2232
2233/* Reconstruct TAB's bins according to TAB's entries. This function
2234 permits conflicting keys inside of entries. No errors are reported
2235 then. All but one of them are discarded silently. */
2236static void
2237st_rehash(st_table *tab)
2238{
2239 int rebuilt_p;
2240
2241 do {
2242 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2243 rebuilt_p = st_rehash_linear(tab);
2244 else
2245 rebuilt_p = st_rehash_indexed(tab);
2246 } while (rebuilt_p);
2247}
2248
2249static st_data_t
2250st_stringify(VALUE key)
2251{
2252 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2253 rb_hash_key_str(key) : key;
2254}
2255
2256static void
2257st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2258{
2259 st_data_t k = st_stringify(key);
2261 e.hash = do_hash(k, tab);
2262 e.key = k;
2263 e.record = val;
2264
2265 tab->entries[tab->entries_bound++] = e;
2266 tab->num_entries++;
2267 RB_OBJ_WRITTEN(hash, Qundef, k);
2268 RB_OBJ_WRITTEN(hash, Qundef, val);
2269}
2270
2271static void
2272st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2273{
2274 long i;
2275
2276 for (i = 0; i < argc; /* */) {
2277 st_data_t k = st_stringify(argv[i++]);
2278 st_data_t v = argv[i++];
2279 st_insert(tab, k, v);
2280 RB_OBJ_WRITTEN(hash, Qundef, k);
2281 RB_OBJ_WRITTEN(hash, Qundef, v);
2282 }
2283}
2284
2285static void
2286st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2287{
2288 long i;
2289
2290 /* push elems */
2291 for (i = 0; i < argc; /* */) {
2292 VALUE key = argv[i++];
2293 VALUE val = argv[i++];
2294 st_insert_single(tab, hash, key, val);
2295 }
2296
2297 /* reindex */
2298 st_rehash(tab);
2299}
2300
2301/* Mimics ruby's { foo => bar } syntax. This function is subpart
2302 of rb_hash_bulk_insert. */
2303void
2304rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2305{
2306 st_index_t n, size = argc / 2;
2307 st_table *tab = RHASH_ST_TABLE(hash);
2308
2309 tab = RHASH_TBL_RAW(hash);
2310 n = tab->entries_bound + size;
2311 st_expand_table(tab, n);
2312 if (UNLIKELY(tab->num_entries))
2313 st_insert_generic(tab, argc, argv, hash);
2314 else if (argc <= 2)
2315 st_insert_single(tab, hash, argv[0], argv[1]);
2316 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2317 st_insert_linear(tab, argc, argv, hash);
2318 else
2319 st_insert_generic(tab, argc, argv, hash);
2320}
2321
2322void
2323rb_st_compact_table(st_table *tab)
2324{
2325 st_index_t num = tab->num_entries;
2326 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2327 /* Compaction: */
2328 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2329 rebuild_table_with(new_tab, tab);
2330 rebuild_move_table(new_tab, tab);
2331 rebuild_cleanup(tab);
2332 }
2333}
2334
2335#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:898
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:247
VALUE rb_cString
String class.
Definition string.c:79
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:135
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40