107#elif defined RUBY_EXPORT
109#include "internal/bits.h"
110#include "internal/gc.h"
111#include "internal/hash.h"
112#include "internal/sanitizers.h"
113#include "internal/set_table.h"
114#include "internal/st.h"
115#include "ruby_assert.h"
125#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
126#define EXPECT(expr, val) __builtin_expect(expr, val)
127#define ATTRIBUTE_UNUSED __attribute__((unused))
129#define PREFETCH(addr, write_p)
130#define EXPECT(expr, val) (expr)
131#define ATTRIBUTE_UNUSED
135typedef st_index_t st_hash_t;
143#define type_numhash st_hashtype_num
149static int st_strcmp(st_data_t, st_data_t);
150static st_index_t strhash(st_data_t);
156static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
157static st_index_t strcasehash(st_data_t);
159 st_locale_insensitive_strcasecmp_i,
166#define ST_INIT_VAL 0xafafafafafafafaf
167#define ST_INIT_VAL_BYTE 0xafa
174#define malloc ruby_xmalloc
175#define calloc ruby_xcalloc
176#define realloc ruby_xrealloc
177#define sized_realloc ruby_xrealloc_sized
178#define free ruby_xfree
179#define sized_free ruby_xfree_sized
180#define free_fixed_ptr(v) ruby_xfree_sized((v), sizeof(*(v)))
182#define sized_realloc(ptr, new_size, old_size) realloc(ptr, new_size)
183#define sized_free(v, s) free(v)
184#define free_fixed_ptr(v) free(v)
187#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
188#define PTR_EQUAL(tab, ptr, hash_val, key_) \
189 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
193#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
195 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
196 res = PTR_EQUAL(tab, ptr, hash_val, key); \
197 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
203 unsigned char entry_power;
207 unsigned char bin_power;
209 unsigned char size_ind;
212 st_index_t bins_words;
216#if SIZEOF_ST_INDEX_T == 8
235 {16, 17, 2, 0x10000},
236 {17, 18, 2, 0x20000},
237 {18, 19, 2, 0x40000},
238 {19, 20, 2, 0x80000},
239 {20, 21, 2, 0x100000},
240 {21, 22, 2, 0x200000},
241 {22, 23, 2, 0x400000},
242 {23, 24, 2, 0x800000},
243 {24, 25, 2, 0x1000000},
244 {25, 26, 2, 0x2000000},
245 {26, 27, 2, 0x4000000},
246 {27, 28, 2, 0x8000000},
247 {28, 29, 2, 0x10000000},
248 {29, 30, 2, 0x20000000},
249 {30, 31, 2, 0x40000000},
250 {31, 32, 2, 0x80000000},
251 {32, 33, 3, 0x200000000},
252 {33, 34, 3, 0x400000000},
253 {34, 35, 3, 0x800000000},
254 {35, 36, 3, 0x1000000000},
255 {36, 37, 3, 0x2000000000},
256 {37, 38, 3, 0x4000000000},
257 {38, 39, 3, 0x8000000000},
258 {39, 40, 3, 0x10000000000},
259 {40, 41, 3, 0x20000000000},
260 {41, 42, 3, 0x40000000000},
261 {42, 43, 3, 0x80000000000},
262 {43, 44, 3, 0x100000000000},
263 {44, 45, 3, 0x200000000000},
264 {45, 46, 3, 0x400000000000},
265 {46, 47, 3, 0x800000000000},
266 {47, 48, 3, 0x1000000000000},
267 {48, 49, 3, 0x2000000000000},
268 {49, 50, 3, 0x4000000000000},
269 {50, 51, 3, 0x8000000000000},
270 {51, 52, 3, 0x10000000000000},
271 {52, 53, 3, 0x20000000000000},
272 {53, 54, 3, 0x40000000000000},
273 {54, 55, 3, 0x80000000000000},
274 {55, 56, 3, 0x100000000000000},
275 {56, 57, 3, 0x200000000000000},
276 {57, 58, 3, 0x400000000000000},
277 {58, 59, 3, 0x800000000000000},
278 {59, 60, 3, 0x1000000000000000},
279 {60, 61, 3, 0x2000000000000000},
280 {61, 62, 3, 0x4000000000000000},
281 {62, 63, 3, 0x8000000000000000},
304 {16, 17, 2, 0x20000},
305 {17, 18, 2, 0x40000},
306 {18, 19, 2, 0x80000},
307 {19, 20, 2, 0x100000},
308 {20, 21, 2, 0x200000},
309 {21, 22, 2, 0x400000},
310 {22, 23, 2, 0x800000},
311 {23, 24, 2, 0x1000000},
312 {24, 25, 2, 0x2000000},
313 {25, 26, 2, 0x4000000},
314 {26, 27, 2, 0x8000000},
315 {27, 28, 2, 0x10000000},
316 {28, 29, 2, 0x20000000},
317 {29, 30, 2, 0x40000000},
318 {30, 31, 2, 0x80000000},
324#define RESERVED_HASH_VAL (~(st_hash_t) 0)
325#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
327static inline st_hash_t
328normalize_hash_value(st_hash_t hash)
332 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
336static inline st_hash_t
337do_hash(st_data_t key,
st_table *tab)
339 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
340 return normalize_hash_value(hash);
344#define MINIMAL_POWER2 2
346#if MINIMAL_POWER2 < 2
347#error "MINIMAL_POWER2 should be >= 2"
352#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
356get_power2(st_index_t size)
358 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
360 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
371static inline st_index_t
372get_bin(st_index_t *bins,
int s, st_index_t n)
374 return (s == 0 ? ((
unsigned char *) bins)[n]
375 : s == 1 ? ((unsigned short *) bins)[n]
376 : s == 2 ? ((unsigned int *) bins)[n]
377 : ((st_index_t *) bins)[n]);
383set_bin(st_index_t *bins,
int s, st_index_t n, st_index_t v)
385 if (s == 0) ((
unsigned char *) bins)[n] = (
unsigned char) v;
386 else if (s == 1) ((
unsigned short *) bins)[n] = (
unsigned short) v;
387 else if (s == 2) ((
unsigned int *) bins)[n] = (
unsigned int) v;
388 else ((st_index_t *) bins)[n] = v;
401#define MARK_BIN_EMPTY(tab, i) (set_bin(st_bins_ptr(tab), get_size_ind(tab), i, EMPTY_BIN))
405#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
406#define UNDEFINED_BIN_IND (~(st_index_t) 0)
410#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
411#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
416#define MARK_BIN_DELETED(tab, i) \
418 set_bin(st_bins_ptr(tab), get_size_ind(tab), i, DELETED_BIN); \
423#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
424#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
425#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
429#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
430#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
431#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
435#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
436#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
439static inline st_index_t
440get_allocated_entries(
const st_table *tab)
442 return ((st_index_t) 1)<<tab->entry_power;
446static inline unsigned int
449 return tab->size_ind;
453static inline st_index_t
456 return ((st_index_t) 1)<<tab->bin_power;
460static inline st_index_t
463 return get_bins_num(tab) - 1;
469 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
473st_allocated_entries_size(
const st_table *tab)
478static inline st_index_t *
481 if (st_has_bins(tab)) {
482 return (st_index_t *)(((
char *)tab->entries) + st_allocated_entries_size(tab));
490static inline st_index_t
491hash_bin(st_hash_t hash_value,
st_table *tab)
493 return hash_value & bins_mask(tab);
497static inline st_index_t
500 if (st_has_bins(tab)) {
501 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
510 memset(st_bins_ptr(tab), 0, bins_size(tab));
517 tab->num_entries = 0;
518 tab->entries_start = tab->entries_bound = 0;
519 if (st_bins_ptr(tab) != NULL)
520 initialize_bins(tab);
528 int all, total, num, str, strcase;
533static int init_st = 0;
540 char fname[10+
sizeof(long)*3];
542 if (!collision.total)
return;
543 f = fopen((snprintf(fname,
sizeof(fname),
"/tmp/col%ld", (
long)getpid()), fname),
"w");
546 fprintf(f,
"collision: %d / %d (%6.2f)\n", collision.all, collision.total,
547 ((
double)collision.all / (collision.total)) * 100);
548 fprintf(f,
"num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
554st_init_existing_table_with_size(
st_table *tab,
const struct st_hash_type *type, st_index_t size)
561 const char *e = getenv(
"ST_HASH_LOG");
562 if (!e || !*e) init_st = 1;
571 n = get_power2(size);
578 tab->entry_power = n;
579 tab->bin_power = features[n].bin_power;
580 tab->size_ind = features[n].size_ind;
582 size_t memsize = get_allocated_entries(tab) *
sizeof(
st_table_entry);
583 if (tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS) {
584 memsize += bins_size(tab);
588 if (tab->entries == NULL) {
594 tab->rebuilds_num = 0;
599st_init_existing_numtable_with_size(
st_table *tab, st_index_t size)
601 return st_init_existing_table_with_size(tab, &type_numhash, size);
608st_init_table_with_size(
const struct st_hash_type *type, st_index_t size)
617 st_init_existing_table_with_size(tab, type, size);
619 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
629st_table_size(
const struct st_table *tbl)
631 return tbl->num_entries;
639 return st_init_table_with_size(type, 0);
645st_init_numtable(
void)
647 return st_init_table(&type_numhash);
652st_init_numtable_with_size(st_index_t size)
654 return st_init_table_with_size(&type_numhash, size);
660st_init_strtable(
void)
662 return st_init_table(&type_strhash);
667st_init_strtable_with_size(st_index_t size)
669 return st_init_table_with_size(&type_strhash, size);
673st_init_existing_strtable_with_size(
st_table *tab, st_index_t size)
675 return st_init_existing_table_with_size(tab, &type_strhash, size);
682st_init_strcasetable(
void)
684 return st_init_table(&type_strcasehash);
690st_init_strcasetable_with_size(st_index_t size)
692 return st_init_table_with_size(&type_strcasehash, size);
704st_entries_memsize(
const st_table *tab)
712 sized_free(tab->entries, st_entries_memsize(tab) + bins_size(tab));
716st_free_embedded_table(
st_table *tab)
718 st_free_entries(tab);
725 st_free_embedded_table(tab);
736 + st_entries_memsize(tab));
740find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
743find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
746find_table_bin_ind_direct(
st_table *table, st_hash_t hash_value, st_data_t key);
749find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
750 st_data_t key, st_index_t *bin_ind);
757 if (type == &type_numhash) {
760 else if (type == &type_strhash) {
763 else if (type == &type_strcasehash) {
768#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
769#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
770#define collision_check 0
779#define REBUILD_THRESHOLD 4
781#if REBUILD_THRESHOLD < 2
782#error "REBUILD_THRESHOLD should be >= 2"
785static void rebuild_table_with(
st_table *
const new_tab,
st_table *
const tab);
786static void rebuild_move_table(
st_table *
const new_tab,
st_table *
const tab);
787static void rebuild_cleanup(
st_table *
const tab);
796 if ((2 * tab->num_entries <= get_allocated_entries(tab)
797 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
798 || tab->num_entries < (1 << MINIMAL_POWER2)) {
800 tab->num_entries = 0;
801 if (st_has_bins(tab))
802 initialize_bins(tab);
803 rebuild_table_with(tab, tab);
810 new_tab = st_init_table_with_size(tab->type,
811 2 * tab->num_entries - 1);
812 rebuild_table_with(new_tab, tab);
813 rebuild_move_table(new_tab, tab);
815 rebuild_cleanup(tab);
822 unsigned int size_ind;
828 new_entries = new_tab->entries;
831 bins = st_bins_ptr(new_tab);
832 size_ind = get_size_ind(new_tab);
833 st_index_t bound = tab->entries_bound;
836 for (i = tab->entries_start; i < bound; i++) {
837 curr_entry_ptr = &entries[i];
838 PREFETCH(entries + i + 1, 0);
839 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
841 if (&new_entries[ni] != curr_entry_ptr)
842 new_entries[ni] = *curr_entry_ptr;
843 if (EXPECT(bins != NULL, 1)) {
844 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
845 curr_entry_ptr->key);
846 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
848 new_tab->num_entries++;
852 assert(new_tab->num_entries == tab->num_entries);
858 st_free_entries(tab);
859 tab->entry_power = new_tab->entry_power;
860 tab->bin_power = new_tab->bin_power;
861 tab->size_ind = new_tab->size_ind;
862 tab->entries = new_tab->entries;
863 free_fixed_ptr(new_tab);
869 tab->entries_start = 0;
870 tab->entries_bound = tab->num_entries;
886static inline st_index_t
887secondary_hash(st_index_t ind,
st_table *tab, st_index_t *perturb)
890 ind = (ind << 2) + ind + *perturb + 1;
891 return hash_bin(ind, tab);
898static inline st_index_t
899find_entry(
st_table *tab, st_hash_t hash_value, st_data_t key)
905 bound = tab->entries_bound;
906 entries = tab->entries;
907 for (i = tab->entries_start; i < bound; i++) {
908 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
909 if (EXPECT(rebuilt_p, 0))
910 return REBUILT_TABLE_ENTRY_IND;
914 return UNDEFINED_ENTRY_IND;
926find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
930#ifdef QUADRATIC_PROBE
938 ind = hash_bin(hash_value, tab);
939#ifdef QUADRATIC_PROBE
942 perturb = hash_value;
946 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
947 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
948 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
949 if (EXPECT(rebuilt_p, 0))
950 return REBUILT_TABLE_ENTRY_IND;
954 else if (EMPTY_BIN_P(bin))
955 return UNDEFINED_ENTRY_IND;
956#ifdef QUADRATIC_PROBE
957 ind = hash_bin(ind + d, tab);
960 ind = secondary_hash(ind, tab, &perturb);
972find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
976#ifdef QUADRATIC_PROBE
984 ind = hash_bin(hash_value, tab);
985#ifdef QUADRATIC_PROBE
988 perturb = hash_value;
992 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
993 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
994 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
995 if (EXPECT(rebuilt_p, 0))
996 return REBUILT_TABLE_BIN_IND;
1000 else if (EMPTY_BIN_P(bin))
1001 return UNDEFINED_BIN_IND;
1002#ifdef QUADRATIC_PROBE
1003 ind = hash_bin(ind + d, tab);
1006 ind = secondary_hash(ind, tab, &perturb);
1017find_table_bin_ind_direct(
st_table *tab, st_hash_t hash_value, st_data_t key)
1020#ifdef QUADRATIC_PROBE
1027 ind = hash_bin(hash_value, tab);
1028#ifdef QUADRATIC_PROBE
1031 perturb = hash_value;
1035 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
1036 if (EMPTY_OR_DELETED_BIN_P(bin))
1038#ifdef QUADRATIC_PROBE
1039 ind = hash_bin(ind + d, tab);
1042 ind = secondary_hash(ind, tab, &perturb);
1058find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
1059 st_data_t key, st_index_t *bin_ind)
1061 int eq_p, rebuilt_p;
1063 st_hash_t curr_hash_value = *hash_value;
1064#ifdef QUADRATIC_PROBE
1069 st_index_t entry_index;
1070 st_index_t first_deleted_bin_ind;
1073 ind = hash_bin(curr_hash_value, tab);
1074#ifdef QUADRATIC_PROBE
1077 perturb = curr_hash_value;
1080 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1081 entries = tab->entries;
1083 entry_index = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
1084 if (EMPTY_BIN_P(entry_index)) {
1086 entry_index = UNDEFINED_ENTRY_IND;
1087 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1089 ind = first_deleted_bin_ind;
1090 MARK_BIN_EMPTY(tab, ind);
1094 else if (! DELETED_BIN_P(entry_index)) {
1095 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1096 if (EXPECT(rebuilt_p, 0))
1097 return REBUILT_TABLE_ENTRY_IND;
1101 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1102 first_deleted_bin_ind = ind;
1103#ifdef QUADRATIC_PROBE
1104 ind = hash_bin(ind + d, tab);
1107 ind = secondary_hash(ind, tab, &perturb);
1118st_lookup(
st_table *tab, st_data_t key, st_data_t *value)
1121 st_hash_t hash = do_hash(key, tab);
1124 if (!st_has_bins(tab)) {
1125 bin = find_entry(tab, hash, key);
1126 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1128 if (bin == UNDEFINED_ENTRY_IND)
1132 bin = find_table_entry_ind(tab, hash, key);
1133 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1135 if (bin == UNDEFINED_ENTRY_IND)
1140 *value = tab->entries[bin].record;
1147st_get_key(
st_table *tab, st_data_t key, st_data_t *result)
1150 st_hash_t hash = do_hash(key, tab);
1153 if (!st_has_bins(tab)) {
1154 bin = find_entry(tab, hash, key);
1155 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1157 if (bin == UNDEFINED_ENTRY_IND)
1161 bin = find_table_entry_ind(tab, hash, key);
1162 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1164 if (bin == UNDEFINED_ENTRY_IND)
1169 *result = tab->entries[bin].key;
1175rebuild_table_if_necessary (
st_table *tab)
1177 st_index_t bound = tab->entries_bound;
1179 if (bound == get_allocated_entries(tab))
1187st_insert(
st_table *tab, st_data_t key, st_data_t value)
1192 st_hash_t hash_value;
1196 hash_value = do_hash(key, tab);
1198 rebuild_table_if_necessary(tab);
1199 if (!st_has_bins(tab)) {
1200 bin = find_entry(tab, hash_value, key);
1201 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1203 new_p = bin == UNDEFINED_ENTRY_IND;
1206 bin_ind = UNDEFINED_BIN_IND;
1209 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1211 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1213 new_p = bin == UNDEFINED_ENTRY_IND;
1217 ind = tab->entries_bound++;
1218 entry = &tab->entries[ind];
1219 entry->hash = hash_value;
1221 entry->record = value;
1222 if (bin_ind != UNDEFINED_BIN_IND)
1223 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1226 tab->entries[bin].record = value;
1233st_add_direct_with_hash(
st_table *tab,
1234 st_data_t key, st_data_t value, st_hash_t hash)
1240 assert(hash != RESERVED_HASH_VAL);
1242 rebuild_table_if_necessary(tab);
1243 ind = tab->entries_bound++;
1244 entry = &tab->entries[ind];
1247 entry->record = value;
1249 if (st_has_bins(tab)) {
1250 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1251 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1256rb_st_add_direct_with_hash(
st_table *tab,
1257 st_data_t key, st_data_t value, st_hash_t hash)
1259 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1265st_add_direct(
st_table *tab, st_data_t key, st_data_t value)
1267 st_hash_t hash_value;
1269 hash_value = do_hash(key, tab);
1270 st_add_direct_with_hash(tab, key, value, hash_value);
1277st_insert2(
st_table *tab, st_data_t key, st_data_t value,
1278 st_data_t (*func)(st_data_t))
1283 st_hash_t hash_value;
1287 hash_value = do_hash(key, tab);
1289 rebuild_table_if_necessary (tab);
1290 if (!st_has_bins(tab)) {
1291 bin = find_entry(tab, hash_value, key);
1292 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1294 new_p = bin == UNDEFINED_ENTRY_IND;
1297 bin_ind = UNDEFINED_BIN_IND;
1300 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1302 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1304 new_p = bin == UNDEFINED_ENTRY_IND;
1309 ind = tab->entries_bound++;
1310 entry = &tab->entries[ind];
1311 entry->hash = hash_value;
1313 entry->record = value;
1314 if (bin_ind != UNDEFINED_BIN_IND)
1315 set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1318 tab->entries[bin].record = value;
1326 *new_tab = *old_tab;
1327 size_t memsize = get_allocated_entries(old_tab) *
sizeof(
st_table_entry);
1328 memsize += bins_size(old_tab);
1331 if (new_tab->entries == NULL) {
1335 MEMCPY(new_tab->entries, old_tab->entries,
char, memsize);
1348 if (new_tab == NULL)
1352 if (st_replace(new_tab, old_tab) == NULL) {
1353 st_free_table(new_tab);
1363update_range_for_deleted(
st_table *tab, st_index_t n)
1367 if (tab->entries_start == n) {
1368 st_index_t start = n + 1;
1369 st_index_t bound = tab->entries_bound;
1371 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1372 tab->entries_start = start;
1381st_general_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1388 hash = do_hash(*key, tab);
1390 if (!st_has_bins(tab)) {
1391 bin = find_entry(tab, hash, *key);
1392 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1394 if (bin == UNDEFINED_ENTRY_IND) {
1395 if (value != 0) *value = 0;
1400 bin_ind = find_table_bin_ind(tab, hash, *key);
1401 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1403 if (bin_ind == UNDEFINED_BIN_IND) {
1404 if (value != 0) *value = 0;
1407 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1408 MARK_BIN_DELETED(tab, bin_ind);
1410 entry = &tab->entries[bin];
1412 if (value != 0) *value = entry->record;
1413 MARK_ENTRY_DELETED(entry);
1415 update_range_for_deleted(tab, bin);
1420st_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1422 return st_general_delete(tab, key, value);
1431st_delete_safe(
st_table *tab, st_data_t *key, st_data_t *value,
1432 st_data_t never ATTRIBUTE_UNUSED)
1434 return st_general_delete(tab, key, value);
1442st_shift(
st_table *tab, st_data_t *key, st_data_t *value)
1444 st_index_t i, bound;
1449 entries = tab->entries;
1450 bound = tab->entries_bound;
1451 for (i = tab->entries_start; i < bound; i++) {
1452 curr_entry_ptr = &entries[i];
1453 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1454 st_hash_t entry_hash = curr_entry_ptr->hash;
1455 st_data_t entry_key = curr_entry_ptr->key;
1457 if (value != 0) *value = curr_entry_ptr->record;
1460 if (!st_has_bins(tab)) {
1461 bin = find_entry(tab, entry_hash, entry_key);
1462 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1463 entries = tab->entries;
1466 curr_entry_ptr = &entries[bin];
1469 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1470 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1471 entries = tab->entries;
1474 curr_entry_ptr = &entries[get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind)
1476 MARK_BIN_DELETED(tab, bin_ind);
1478 MARK_ENTRY_DELETED(curr_entry_ptr);
1480 update_range_for_deleted(tab, i);
1484 if (value != 0) *value = 0;
1490st_cleanup_safe(
st_table *tab ATTRIBUTE_UNUSED,
1491 st_data_t never ATTRIBUTE_UNUSED)
1505st_update(
st_table *tab, st_data_t key,
1506 st_update_callback_func *func, st_data_t arg)
1512 st_data_t value = 0, old_key;
1513 int retval, existing;
1514 st_hash_t hash = do_hash(key, tab);
1517 entries = tab->entries;
1518 if (!st_has_bins(tab)) {
1519 bin = find_entry(tab, hash, key);
1520 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1522 existing = bin != UNDEFINED_ENTRY_IND;
1523 entry = &entries[bin];
1524 bin_ind = UNDEFINED_BIN_IND;
1527 bin_ind = find_table_bin_ind(tab, hash, key);
1528 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1530 existing = bin_ind != UNDEFINED_BIN_IND;
1532 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1533 entry = &entries[bin];
1538 value = entry->record;
1542 unsigned int rebuilds_num = tab->rebuilds_num;
1544 retval = (*func)(&key, &value, arg, existing);
1548 assert(rebuilds_num == tab->rebuilds_num);
1554 st_add_direct_with_hash(tab, key, value, hash);
1557 if (old_key != key) {
1560 entry->record = value;
1564 if (bin_ind != UNDEFINED_BIN_IND)
1565 MARK_BIN_DELETED(tab, bin_ind);
1566 MARK_ENTRY_DELETED(entry);
1568 update_range_for_deleted(tab, bin);
1584st_general_foreach(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1590 enum st_retval retval;
1591 st_index_t i, rebuilds_num;
1594 int error_p, packed_p = !st_has_bins(tab);
1596 entries = tab->entries;
1599 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1600 curr_entry_ptr = &entries[i];
1601 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1603 key = curr_entry_ptr->key;
1604 rebuilds_num = tab->rebuilds_num;
1605 hash = curr_entry_ptr->hash;
1606 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1608 if (retval == ST_REPLACE && replace) {
1610 value = curr_entry_ptr->record;
1611 retval = (*replace)(&key, &value, arg, TRUE);
1612 curr_entry_ptr->key = key;
1613 curr_entry_ptr->record = value;
1616 if (rebuilds_num != tab->rebuilds_num) {
1618 entries = tab->entries;
1619 packed_p = !st_has_bins(tab);
1621 i = find_entry(tab, hash, key);
1622 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1624 error_p = i == UNDEFINED_ENTRY_IND;
1627 i = find_table_entry_ind(tab, hash, key);
1628 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1630 error_p = i == UNDEFINED_ENTRY_IND;
1633 if (error_p && check_p) {
1635 retval = (*func)(0, 0, arg, 1);
1638 curr_entry_ptr = &entries[i];
1651 st_data_t key = curr_entry_ptr->key;
1655 bin = find_entry(tab, hash, key);
1656 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1658 if (bin == UNDEFINED_ENTRY_IND)
1662 bin_ind = find_table_bin_ind(tab, hash, key);
1663 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1665 if (bin_ind == UNDEFINED_BIN_IND)
1667 bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
1668 MARK_BIN_DELETED(tab, bin_ind);
1670 curr_entry_ptr = &entries[bin];
1671 MARK_ENTRY_DELETED(curr_entry_ptr);
1673 update_range_for_deleted(tab, bin);
1682st_foreach_with_replace(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1684 return st_general_foreach(tab, func, replace, arg, TRUE);
1688 st_foreach_callback_func *func;
1693apply_functor(st_data_t k, st_data_t v, st_data_t d,
int _)
1695 const struct functor *f = (
void *)d;
1696 return f->func(k, v, f->arg);
1700st_foreach(
st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1702 const struct functor f = { func, arg };
1703 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1708st_foreach_check(
st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1709 st_data_t never ATTRIBUTE_UNUSED)
1711 return st_general_foreach(tab, func, 0, arg, TRUE);
1716static inline st_index_t
1717st_general_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1719 st_index_t i, bound;
1720 st_data_t key, *keys_start, *keys_end;
1723 bound = tab->entries_bound;
1725 keys_end = keys + size;
1726 for (i = tab->entries_start; i < bound; i++) {
1727 if (keys == keys_end)
1729 curr_entry_ptr = &entries[i];
1730 key = curr_entry_ptr->key;
1731 if (! DELETED_ENTRY_P(curr_entry_ptr))
1735 return keys - keys_start;
1739st_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1741 return st_general_keys(tab, keys, size);
1746st_keys_check(
st_table *tab, st_data_t *keys, st_index_t size,
1747 st_data_t never ATTRIBUTE_UNUSED)
1749 return st_general_keys(tab, keys, size);
1754static inline st_index_t
1755st_general_values(
st_table *tab, st_data_t *values, st_index_t size)
1757 st_index_t i, bound;
1758 st_data_t *values_start, *values_end;
1761 values_start = values;
1762 values_end = values + size;
1763 bound = tab->entries_bound;
1764 for (i = tab->entries_start; i < bound; i++) {
1765 if (values == values_end)
1767 curr_entry_ptr = &entries[i];
1768 if (! DELETED_ENTRY_P(curr_entry_ptr))
1769 *values++ = curr_entry_ptr->record;
1772 return values - values_start;
1776st_values(
st_table *tab, st_data_t *values, st_index_t size)
1778 return st_general_values(tab, values, size);
1783st_values_check(
st_table *tab, st_data_t *values, st_index_t size,
1784 st_data_t never ATTRIBUTE_UNUSED)
1786 return st_general_values(tab, values, size);
1789#define FNV1_32A_INIT 0x811c9dc5
1794#define FNV_32_PRIME 0x01000193
1797#ifndef UNALIGNED_WORD_ACCESS
1798# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1799 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1800 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1801 defined(__mc68020__)
1802# define UNALIGNED_WORD_ACCESS 1
1805#ifndef UNALIGNED_WORD_ACCESS
1806# define UNALIGNED_WORD_ACCESS 0
1812#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1813#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1815#if ST_INDEX_BITS <= 32
1816#define C1 (st_index_t)0xcc9e2d51
1817#define C2 (st_index_t)0x1b873593
1819#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1820#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1822NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1823NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_finish(st_index_t h));
1824NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash(
const void *ptr,
size_t len, st_index_t h));
1826static inline st_index_t
1827murmur_step(st_index_t h, st_index_t k)
1829#if ST_INDEX_BITS <= 32
1845static inline st_index_t
1846murmur_finish(st_index_t h)
1848#if ST_INDEX_BITS <= 32
1852 const st_index_t c1 = 0x85ebca6b;
1853 const st_index_t c2 = 0xc2b2ae35;
1859 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1860 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1862#if ST_INDEX_BITS > 64
1879st_hash(
const void *ptr,
size_t len, st_index_t h)
1881 const char *data = ptr;
1885#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1886#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1887#if SIZEOF_ST_INDEX_T > 4
1888#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1889#if SIZEOF_ST_INDEX_T > 8
1890#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1891 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1892#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1894#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1896#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1899 if (
len >=
sizeof(st_index_t)) {
1900#if !UNALIGNED_WORD_ACCESS
1901 int align = (int)((st_data_t)data %
sizeof(st_index_t));
1907#ifdef WORDS_BIGENDIAN
1908# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1909 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1911# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1912 t |= data_at(n) << CHAR_BIT*(n)
1918#ifdef WORDS_BIGENDIAN
1919 t >>= (CHAR_BIT * align) - CHAR_BIT;
1921 t <<= (CHAR_BIT * align);
1924 data +=
sizeof(st_index_t)-align;
1925 len -=
sizeof(st_index_t)-align;
1927 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1928 sr = CHAR_BIT * align;
1930 while (
len >=
sizeof(st_index_t)) {
1931 d = *(st_index_t *)data;
1932#ifdef WORDS_BIGENDIAN
1933 t = (t << sr) | (d >> sl);
1935 t = (t >> sr) | (d << sl);
1937 h = murmur_step(h, t);
1939 data +=
sizeof(st_index_t);
1940 len -=
sizeof(st_index_t);
1943 pack =
len < (size_t)align ? (
int)
len : align;
1946#ifdef WORDS_BIGENDIAN
1947# define UNALIGNED_ADD(n) case (n) + 1: \
1948 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1950# define UNALIGNED_ADD(n) case (n) + 1: \
1951 d |= data_at(n) << CHAR_BIT*(n)
1956#ifdef WORDS_BIGENDIAN
1957 t = (t << sr) | (d >> sl);
1959 t = (t >> sr) | (d << sl);
1962 if (
len < (
size_t)align)
goto skip_tail;
1964 h = murmur_step(h, t);
1970#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1971#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1973#define aligned_data data
1977 h = murmur_step(h, *(st_index_t *)aligned_data);
1978 data +=
sizeof(st_index_t);
1979 len -=
sizeof(st_index_t);
1980 }
while (
len >=
sizeof(st_index_t));
1986#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1988#if SIZEOF_ST_INDEX_T > 4
1989 case 7: t |= data_at(6) << 48;
1990 case 6: t |= data_at(5) << 40;
1991 case 5: t |= data_at(4) << 32;
1993 t |= (st_index_t)*(uint32_t*)aligned_data;
1997 case 3: t |= data_at(2) << 16;
1998 case 2: t |= data_at(1) << 8;
1999 case 1: t |= data_at(0);
2001#ifdef WORDS_BIGENDIAN
2002# define UNALIGNED_ADD(n) case (n) + 1: \
2003 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2005# define UNALIGNED_ADD(n) case (n) + 1: \
2006 t |= data_at(n) << CHAR_BIT*(n)
2014 h ^= t; h -= ROTL(t, 7);
2020 return murmur_finish(h);
2024st_hash_uint32(st_index_t h, uint32_t i)
2026 return murmur_step(h, i);
2029NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2031st_hash_uint(st_index_t h, st_index_t i)
2036#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2037 h = murmur_step(h, i >> 8*8);
2039 h = murmur_step(h, i);
2044st_hash_end(st_index_t h)
2046 h = murmur_finish(h);
2052rb_st_hash_start(st_index_t h)
2058strhash(st_data_t arg)
2060 register const char *
string = (
const char *)arg;
2061 return st_hash(
string, strlen(
string), FNV1_32A_INIT);
2065st_locale_insensitive_strcasecmp(
const char *s1,
const char *s2)
2072 if (c1 ==
'\0' || c2 ==
'\0') {
2073 if (c1 !=
'\0')
return 1;
2074 if (c2 !=
'\0')
return -1;
2077 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2078 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2089st_locale_insensitive_strncasecmp(
const char *s1,
const char *s2,
size_t n)
2094 for (i = 0; i < n; i++) {
2097 if (c1 ==
'\0' || c2 ==
'\0') {
2098 if (c1 !=
'\0')
return 1;
2099 if (c2 !=
'\0')
return -1;
2102 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2103 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2115st_strcmp(st_data_t lhs, st_data_t rhs)
2117 const char *s1 = (
char *)lhs;
2118 const char *s2 = (
char *)rhs;
2119 return strcmp(s1, s2);
2123st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2125 const char *s1 = (
char *)lhs;
2126 const char *s2 = (
char *)rhs;
2127 return st_locale_insensitive_strcasecmp(s1, s2);
2130NO_SANITIZE(
"unsigned-integer-overflow", PUREFUNC(
static st_index_t strcasehash(st_data_t)));
2132strcasehash(st_data_t arg)
2134 register const char *
string = (
const char *)arg;
2135 register st_index_t hval = FNV1_32A_INIT;
2141 unsigned int c = (
unsigned char)*
string++;
2142 if ((
unsigned int)(c -
'A') <= (
'Z' -
'A')) c +=
'a' -
'A';
2146 hval *= FNV_32_PRIME;
2152st_numcmp(st_data_t x, st_data_t y)
2158st_numhash(st_data_t n)
2160 enum {s1 = 11, s2 = 3};
2161 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2169st_expand_table(
st_table *tab, st_index_t siz)
2174 if (siz <= get_allocated_entries(tab))
2177 tmp = st_init_table_with_size(tab->type, siz);
2178 n = get_allocated_entries(tab);
2180 st_free_entries(tab);
2182 tab->entry_power = tmp->entry_power;
2183 tab->bin_power = tmp->bin_power;
2184 tab->size_ind = tmp->size_ind;
2185 tab->entries = tmp->entries;
2186 tab->rebuilds_num++;
2187 free_fixed_ptr(tmp);
2195 int eq_p, rebuilt_p;
2199 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2200 p = &tab->entries[i];
2201 if (DELETED_ENTRY_P(p))
2203 for (j = i + 1; j < tab->entries_bound; j++) {
2204 q = &tab->entries[j];
2205 if (DELETED_ENTRY_P(q))
2207 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2208 if (EXPECT(rebuilt_p, 0))
2212 MARK_ENTRY_DELETED(q);
2214 update_range_for_deleted(tab, j);
2226 int eq_p, rebuilt_p;
2229 unsigned int const size_ind = get_size_ind(tab);
2230 initialize_bins(tab);
2231 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2234#ifdef QUADRATIC_PROBE
2237 st_index_t perturb = p->hash;
2240 if (DELETED_ENTRY_P(p))
2243 ind = hash_bin(p->hash, tab);
2245 st_index_t bin = get_bin(st_bins_ptr(tab), size_ind, ind);
2246 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2248 set_bin(st_bins_ptr(tab), size_ind, ind, i + ENTRY_BASE);
2253 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2254 if (EXPECT(rebuilt_p, 0))
2258 q->record = p->record;
2259 MARK_ENTRY_DELETED(p);
2261 update_range_for_deleted(tab, bin);
2266#ifdef QUADRATIC_PROBE
2267 ind = hash_bin(ind + d, tab);
2270 ind = secondary_hash(ind, tab, &perturb);
2288 if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2289 rebuilt_p = st_rehash_linear(tab);
2291 rebuilt_p = st_rehash_indexed(tab);
2292 }
while (rebuilt_p);
2296st_stringify(
VALUE key)
2299 rb_hash_key_str(key) : key;
2305 st_data_t k = st_stringify(key);
2307 e.hash = do_hash(k, tab);
2311 tab->entries[tab->entries_bound++] = e;
2322 for (i = 0; i < argc; ) {
2323 st_data_t k = st_stringify(argv[i++]);
2324 st_data_t v = argv[i++];
2325 st_insert(tab, k, v);
2337 for (i = 0; i < argc; ) {
2338 VALUE key = argv[i++];
2339 VALUE val = argv[i++];
2340 st_insert_single(tab, hash, key, val);
2350rb_hash_bulk_insert_into_st_table(
long argc,
const VALUE *argv,
VALUE hash)
2352 st_index_t n, size = argc / 2;
2353 st_table *tab = RHASH_ST_TABLE(hash);
2355 tab = RHASH_TBL_RAW(hash);
2356 n = tab->entries_bound + size;
2357 st_expand_table(tab, n);
2358 if (UNLIKELY(tab->num_entries))
2359 st_insert_generic(tab, argc, argv, hash);
2361 st_insert_single(tab, hash, argv[0], argv[1]);
2362 else if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2363 st_insert_linear(tab, argc, argv, hash);
2365 st_insert_generic(tab, argc, argv, hash);
2371 st_index_t num = tab->num_entries;
2372 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2374 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2375 rebuild_table_with(new_tab, tab);
2376 rebuild_move_table(new_tab, tab);
2377 rebuild_cleanup(tab);
2385struct set_table_entry {
2391static inline st_hash_t
2392set_do_hash(st_data_t key,
set_table *tab)
2394 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2395 return normalize_hash_value(hash);
2399static inline unsigned int
2402 return tab->size_ind;
2406static inline st_index_t
2409 return ((st_index_t) 1)<<tab->bin_power;
2413static inline st_index_t
2416 return set_get_bins_num(tab) - 1;
2421static inline st_index_t
2422set_hash_bin(st_hash_t hash_value,
set_table *tab)
2424 return hash_value & set_bins_mask(tab);
2428static inline st_index_t
2429set_get_allocated_entries(
const set_table *tab)
2431 return ((st_index_t) 1)<<tab->entry_power;
2435set_allocated_entries_size(
const set_table *tab)
2437 return set_get_allocated_entries(tab) *
sizeof(set_table_entry);
2443 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2447static inline st_index_t
2450 if (set_has_bins(tab)) {
2451 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
2457static inline st_index_t *
2460 if (set_has_bins(tab)) {
2461 return (st_index_t *)(((
char *)tab->
entries) + set_allocated_entries_size(tab));
2471 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2478 tab->num_entries = 0;
2479 tab->entries_start = tab->entries_bound = 0;
2480 if (set_bins_ptr(tab) != NULL)
2481 set_initialize_bins(tab);
2487 size_t memsize = set_get_allocated_entries(tab) *
sizeof(set_table_entry);
2488 if (set_has_bins(tab)) {
2489 memsize += set_bins_size(tab);
2502 const char *e = getenv(
"ST_HASH_LOG");
2503 if (!e || !*e) init_st = 1;
2512 n = get_power2(size);
2515 tab->entry_power = n;
2516 tab->bin_power = features[n].bin_power;
2517 tab->size_ind = features[n].size_ind;
2519 tab->
entries = (set_table_entry *)malloc(set_entries_memsize(tab));
2520 set_make_tab_empty(tab);
2521 tab->rebuilds_num = 0;
2531 if (tab == NULL) tab = malloc(
sizeof(
set_table));
2533 set_init_existing_table_with_size(tab, type, size);
2539set_init_numtable(
void)
2541 return set_init_table_with_size(NULL, &type_numhash, 0);
2545set_init_numtable_with_size(st_index_t size)
2547 return set_init_table_with_size(NULL, &type_numhash, size);
2551set_init_embedded_numtable_with_size(
set_table *tab, st_index_t size)
2553 return set_init_existing_table_with_size(tab, &type_numhash, size);
2557set_table_size(
const struct set_table *tbl)
2559 return tbl->num_entries;
2566 set_make_tab_empty(tab);
2567 tab->rebuilds_num++;
2573 sized_free(tab->
entries, set_entries_memsize(tab));
2581 set_free_embedded_table(tab);
2582 free_fixed_ptr(tab);
2590 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2591 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2595set_find_table_entry_ind(
set_table *tab, st_hash_t hash_value, st_data_t key);
2598set_find_table_bin_ind(
set_table *tab, st_hash_t hash_value, st_data_t key);
2601set_find_table_bin_ind_direct(
set_table *table, st_hash_t hash_value, st_data_t key);
2604set_find_table_bin_ptr_and_reserve(
set_table *tab, st_hash_t *hash_value,
2605 st_data_t key, st_index_t *bin_ind);
2609static void set_rebuild_cleanup(
set_table *
const tab);
2618 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2619 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2620 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2622 tab->num_entries = 0;
2623 if (set_has_bins(tab))
2624 set_initialize_bins(tab);
2625 set_rebuild_table_with(tab, tab);
2632 new_tab = set_init_table_with_size(NULL, tab->type,
2633 2 * tab->num_entries - 1);
2634 set_rebuild_table_with(new_tab, tab);
2635 set_rebuild_move_table(new_tab, tab);
2637 set_rebuild_cleanup(tab);
2644 unsigned int size_ind;
2645 set_table_entry *new_entries;
2646 set_table_entry *curr_entry_ptr;
2650 new_entries = new_tab->
entries;
2653 bins = set_bins_ptr(new_tab);
2654 size_ind = set_get_size_ind(new_tab);
2655 st_index_t bound = tab->entries_bound;
2656 set_table_entry *entries = tab->
entries;
2658 for (i = tab->entries_start; i < bound; i++) {
2659 curr_entry_ptr = &entries[i];
2660 PREFETCH(entries + i + 1, 0);
2661 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2663 if (&new_entries[ni] != curr_entry_ptr)
2664 new_entries[ni] = *curr_entry_ptr;
2665 if (EXPECT(bins != NULL, 1)) {
2666 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2667 curr_entry_ptr->key);
2668 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2670 new_tab->num_entries++;
2674 assert(new_tab->num_entries == tab->num_entries);
2680 sized_free(tab->
entries, set_entries_memsize(tab));
2683 tab->entry_power = new_tab->entry_power;
2684 tab->bin_power = new_tab->bin_power;
2685 tab->size_ind = new_tab->size_ind;
2687 free_fixed_ptr(new_tab);
2691set_rebuild_cleanup(
set_table *
const tab)
2693 tab->entries_start = 0;
2694 tab->entries_bound = tab->num_entries;
2695 tab->rebuilds_num++;
2710static inline st_index_t
2711set_secondary_hash(st_index_t ind,
set_table *tab, st_index_t *perturb)
2714 ind = (ind << 2) + ind + *perturb + 1;
2715 return set_hash_bin(ind, tab);
2722static inline st_index_t
2723set_find_entry(
set_table *tab, st_hash_t hash_value, st_data_t key)
2725 int eq_p, rebuilt_p;
2726 st_index_t i, bound;
2727 set_table_entry *entries;
2729 bound = tab->entries_bound;
2731 for (i = tab->entries_start; i < bound; i++) {
2732 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2733 if (EXPECT(rebuilt_p, 0))
2734 return REBUILT_TABLE_ENTRY_IND;
2738 return UNDEFINED_ENTRY_IND;
2750set_find_table_entry_ind(
set_table *tab, st_hash_t hash_value, st_data_t key)
2752 int eq_p, rebuilt_p;
2754#ifdef QUADRATIC_PROBE
2760 set_table_entry *entries = tab->
entries;
2762 ind = set_hash_bin(hash_value, tab);
2763#ifdef QUADRATIC_PROBE
2766 perturb = hash_value;
2769 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2770 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2771 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2772 if (EXPECT(rebuilt_p, 0))
2773 return REBUILT_TABLE_ENTRY_IND;
2777 else if (EMPTY_BIN_P(bin))
2778 return UNDEFINED_ENTRY_IND;
2779#ifdef QUADRATIC_PROBE
2780 ind = set_hash_bin(ind + d, tab);
2783 ind = set_secondary_hash(ind, tab, &perturb);
2794set_find_table_bin_ind(
set_table *tab, st_hash_t hash_value, st_data_t key)
2796 int eq_p, rebuilt_p;
2798#ifdef QUADRATIC_PROBE
2804 set_table_entry *entries = tab->
entries;
2806 ind = set_hash_bin(hash_value, tab);
2807#ifdef QUADRATIC_PROBE
2810 perturb = hash_value;
2813 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2814 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2815 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2816 if (EXPECT(rebuilt_p, 0))
2817 return REBUILT_TABLE_BIN_IND;
2821 else if (EMPTY_BIN_P(bin))
2822 return UNDEFINED_BIN_IND;
2823#ifdef QUADRATIC_PROBE
2824 ind = set_hash_bin(ind + d, tab);
2827 ind = set_secondary_hash(ind, tab, &perturb);
2837set_find_table_bin_ind_direct(
set_table *tab, st_hash_t hash_value, st_data_t key)
2840#ifdef QUADRATIC_PROBE
2847 ind = set_hash_bin(hash_value, tab);
2848#ifdef QUADRATIC_PROBE
2851 perturb = hash_value;
2854 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2855 if (EMPTY_OR_DELETED_BIN_P(bin))
2857#ifdef QUADRATIC_PROBE
2858 ind = set_hash_bin(ind + d, tab);
2861 ind = set_secondary_hash(ind, tab, &perturb);
2868#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2880set_find_table_bin_ptr_and_reserve(
set_table *tab, st_hash_t *hash_value,
2881 st_data_t key, st_index_t *bin_ind)
2883 int eq_p, rebuilt_p;
2885 st_hash_t curr_hash_value = *hash_value;
2886#ifdef QUADRATIC_PROBE
2891 st_index_t entry_index;
2892 st_index_t firset_deleted_bin_ind;
2893 set_table_entry *entries;
2895 ind = set_hash_bin(curr_hash_value, tab);
2896#ifdef QUADRATIC_PROBE
2899 perturb = curr_hash_value;
2901 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2904 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2905 if (EMPTY_BIN_P(entry_index)) {
2907 entry_index = UNDEFINED_ENTRY_IND;
2908 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2910 ind = firset_deleted_bin_ind;
2911 MARK_SET_BIN_EMPTY(tab, ind);
2915 else if (! DELETED_BIN_P(entry_index)) {
2916 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2917 if (EXPECT(rebuilt_p, 0))
2918 return REBUILT_TABLE_ENTRY_IND;
2922 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2923 firset_deleted_bin_ind = ind;
2924#ifdef QUADRATIC_PROBE
2925 ind = set_hash_bin(ind + d, tab);
2928 ind = set_secondary_hash(ind, tab, &perturb);
2938set_table_lookup(
set_table *tab, st_data_t key)
2941 st_hash_t hash = set_do_hash(key, tab);
2944 if (!set_has_bins(tab)) {
2945 bin = set_find_entry(tab, hash, key);
2946 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2948 if (bin == UNDEFINED_ENTRY_IND)
2952 bin = set_find_table_entry_ind(tab, hash, key);
2953 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2955 if (bin == UNDEFINED_ENTRY_IND)
2964set_rebuild_table_if_necessary (
set_table *tab)
2966 st_index_t bound = tab->entries_bound;
2968 if (bound == set_get_allocated_entries(tab))
2969 set_rebuild_table(tab);
2976set_insert(
set_table *tab, st_data_t key)
2978 set_table_entry *entry;
2981 st_hash_t hash_value;
2985 hash_value = set_do_hash(key, tab);
2987 set_rebuild_table_if_necessary(tab);
2988 if (!set_has_bins(tab)) {
2989 bin = set_find_entry(tab, hash_value, key);
2990 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2992 new_p = bin == UNDEFINED_ENTRY_IND;
2995 bin_ind = UNDEFINED_BIN_IND;
2998 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
3000 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3002 new_p = bin == UNDEFINED_ENTRY_IND;
3006 ind = tab->entries_bound++;
3008 entry->hash = hash_value;
3010 if (bin_ind != UNDEFINED_BIN_IND)
3011 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
3021 *new_tab = *old_tab;
3022 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
3023 new_tab->
entries = (set_table_entry *)malloc(memsize);
3034 if (set_replace(new_tab, old_tab) == NULL) {
3035 set_free_table(new_tab);
3045set_update_range_for_deleted(
set_table *tab, st_index_t n)
3049 if (tab->entries_start == n) {
3050 st_index_t start = n + 1;
3051 st_index_t bound = tab->entries_bound;
3052 set_table_entry *entries = tab->
entries;
3053 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3054 tab->entries_start = start;
3061#define MARK_SET_BIN_DELETED(tab, i) \
3063 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3069set_table_delete(
set_table *tab, st_data_t *key)
3071 set_table_entry *entry;
3076 hash = set_do_hash(*key, tab);
3078 if (!set_has_bins(tab)) {
3079 bin = set_find_entry(tab, hash, *key);
3080 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3082 if (bin == UNDEFINED_ENTRY_IND) {
3087 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3088 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3090 if (bin_ind == UNDEFINED_BIN_IND) {
3093 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3094 MARK_SET_BIN_DELETED(tab, bin_ind);
3098 MARK_ENTRY_DELETED(entry);
3100 set_update_range_for_deleted(tab, bin);
3113set_general_foreach(
set_table *tab, set_foreach_check_callback_func *func,
3114 set_update_callback_func *replace, st_data_t arg,
3119 set_table_entry *entries, *curr_entry_ptr;
3120 enum st_retval retval;
3121 st_index_t i, rebuilds_num;
3124 int error_p, packed_p = !set_has_bins(tab);
3129 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3130 curr_entry_ptr = &entries[i];
3131 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3133 key = curr_entry_ptr->key;
3134 rebuilds_num = tab->rebuilds_num;
3135 hash = curr_entry_ptr->hash;
3136 retval = (*func)(key, arg, 0);
3138 if (retval == ST_REPLACE && replace) {
3139 retval = (*replace)(&key, arg, TRUE);
3140 curr_entry_ptr->key = key;
3143 if (rebuilds_num != tab->rebuilds_num) {
3146 packed_p = !set_has_bins(tab);
3148 i = set_find_entry(tab, hash, key);
3149 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3151 error_p = i == UNDEFINED_ENTRY_IND;
3154 i = set_find_table_entry_ind(tab, hash, key);
3155 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3157 error_p = i == UNDEFINED_ENTRY_IND;
3160 if (error_p && check_p) {
3162 retval = (*func)(0, arg, 1);
3165 curr_entry_ptr = &entries[i];
3178 st_data_t key = curr_entry_ptr->key;
3182 bin = set_find_entry(tab, hash, key);
3183 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3185 if (bin == UNDEFINED_ENTRY_IND)
3189 bin_ind = set_find_table_bin_ind(tab, hash, key);
3190 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3192 if (bin_ind == UNDEFINED_BIN_IND)
3194 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3195 MARK_SET_BIN_DELETED(tab, bin_ind);
3197 curr_entry_ptr = &entries[bin];
3198 MARK_ENTRY_DELETED(curr_entry_ptr);
3200 set_update_range_for_deleted(tab, bin);
3209set_foreach_with_replace(
set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3211 return set_general_foreach(tab, func, replace, arg, TRUE);
3215 set_foreach_callback_func *func;
3220set_apply_functor(st_data_t k, st_data_t d,
int _)
3222 const struct set_functor *f = (
void *)d;
3223 return f->func(k, f->arg);
3227set_table_foreach(
set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3229 const struct set_functor f = { func, arg };
3230 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3235set_foreach_check(
set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3236 st_data_t never ATTRIBUTE_UNUSED)
3238 return set_general_foreach(tab, func, NULL, arg, TRUE);
3244set_keys(
set_table *tab, st_data_t *keys, st_index_t size)
3246 st_index_t i, bound;
3247 st_data_t key, *keys_start, *keys_end;
3248 set_table_entry *curr_entry_ptr, *entries = tab->
entries;
3250 bound = tab->entries_bound;
3252 keys_end = keys + size;
3253 for (i = tab->entries_start; i < bound; i++) {
3254 if (keys == keys_end)
3256 curr_entry_ptr = &entries[i];
3257 key = curr_entry_ptr->key;
3258 if (! DELETED_ENTRY_P(curr_entry_ptr))
3262 return keys - keys_start;
3268 st_index_t num = tab->num_entries;
3269 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3271 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3272 set_rebuild_table_with(new_tab, tab);
3273 set_rebuild_move_table(new_tab, tab);
3274 set_rebuild_cleanup(tab);
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cString
String class.
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
int len
Length of the buffer.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
set_table_entry * entries
Array of size 2^entry_power.
uintptr_t VALUE
Type that represents a Ruby object.