107#elif defined RUBY_EXPORT
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/st.h"
113#include "ruby_assert.h"
123#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
124#define EXPECT(expr, val) __builtin_expect(expr, val)
125#define ATTRIBUTE_UNUSED __attribute__((unused))
127#define PREFETCH(addr, write_p)
128#define EXPECT(expr, val) (expr)
129#define ATTRIBUTE_UNUSED
133typedef st_index_t st_hash_t;
141#define type_numhash st_hashtype_num
147static int st_strcmp(st_data_t, st_data_t);
148static st_index_t strhash(st_data_t);
154static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
155static st_index_t strcasehash(st_data_t);
157 st_locale_insensitive_strcasecmp_i,
164#define ST_INIT_VAL 0xafafafafafafafaf
165#define ST_INIT_VAL_BYTE 0xafa
172#define malloc ruby_xmalloc
173#define calloc ruby_xcalloc
174#define realloc ruby_xrealloc
175#define free ruby_xfree
178#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
179#define PTR_EQUAL(tab, ptr, hash_val, key_) \
180 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
184#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
187 res = PTR_EQUAL(tab, ptr, hash_val, key); \
188 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
194 unsigned char entry_power;
198 unsigned char bin_power;
200 unsigned char size_ind;
203 st_index_t bins_words;
207#if SIZEOF_ST_INDEX_T == 8
226 {16, 17, 2, 0x10000},
227 {17, 18, 2, 0x20000},
228 {18, 19, 2, 0x40000},
229 {19, 20, 2, 0x80000},
230 {20, 21, 2, 0x100000},
231 {21, 22, 2, 0x200000},
232 {22, 23, 2, 0x400000},
233 {23, 24, 2, 0x800000},
234 {24, 25, 2, 0x1000000},
235 {25, 26, 2, 0x2000000},
236 {26, 27, 2, 0x4000000},
237 {27, 28, 2, 0x8000000},
238 {28, 29, 2, 0x10000000},
239 {29, 30, 2, 0x20000000},
240 {30, 31, 2, 0x40000000},
241 {31, 32, 2, 0x80000000},
242 {32, 33, 3, 0x200000000},
243 {33, 34, 3, 0x400000000},
244 {34, 35, 3, 0x800000000},
245 {35, 36, 3, 0x1000000000},
246 {36, 37, 3, 0x2000000000},
247 {37, 38, 3, 0x4000000000},
248 {38, 39, 3, 0x8000000000},
249 {39, 40, 3, 0x10000000000},
250 {40, 41, 3, 0x20000000000},
251 {41, 42, 3, 0x40000000000},
252 {42, 43, 3, 0x80000000000},
253 {43, 44, 3, 0x100000000000},
254 {44, 45, 3, 0x200000000000},
255 {45, 46, 3, 0x400000000000},
256 {46, 47, 3, 0x800000000000},
257 {47, 48, 3, 0x1000000000000},
258 {48, 49, 3, 0x2000000000000},
259 {49, 50, 3, 0x4000000000000},
260 {50, 51, 3, 0x8000000000000},
261 {51, 52, 3, 0x10000000000000},
262 {52, 53, 3, 0x20000000000000},
263 {53, 54, 3, 0x40000000000000},
264 {54, 55, 3, 0x80000000000000},
265 {55, 56, 3, 0x100000000000000},
266 {56, 57, 3, 0x200000000000000},
267 {57, 58, 3, 0x400000000000000},
268 {58, 59, 3, 0x800000000000000},
269 {59, 60, 3, 0x1000000000000000},
270 {60, 61, 3, 0x2000000000000000},
271 {61, 62, 3, 0x4000000000000000},
272 {62, 63, 3, 0x8000000000000000},
295 {16, 17, 2, 0x20000},
296 {17, 18, 2, 0x40000},
297 {18, 19, 2, 0x80000},
298 {19, 20, 2, 0x100000},
299 {20, 21, 2, 0x200000},
300 {21, 22, 2, 0x400000},
301 {22, 23, 2, 0x800000},
302 {23, 24, 2, 0x1000000},
303 {24, 25, 2, 0x2000000},
304 {25, 26, 2, 0x4000000},
305 {26, 27, 2, 0x8000000},
306 {27, 28, 2, 0x10000000},
307 {28, 29, 2, 0x20000000},
308 {29, 30, 2, 0x40000000},
309 {30, 31, 2, 0x80000000},
315#define RESERVED_HASH_VAL (~(st_hash_t) 0)
316#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318static inline st_hash_t
319normalize_hash_value(st_hash_t hash)
323 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
327static inline st_hash_t
328do_hash(st_data_t key,
st_table *tab)
330 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
331 return normalize_hash_value(hash);
335#define MINIMAL_POWER2 2
337#if MINIMAL_POWER2 < 2
338#error "MINIMAL_POWER2 should be >= 2"
343#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
347get_power2(st_index_t size)
349 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
351 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
362static inline st_index_t
363get_bin(st_index_t *bins,
int s, st_index_t n)
365 return (s == 0 ? ((
unsigned char *) bins)[n]
366 : s == 1 ? ((unsigned short *) bins)[n]
367 : s == 2 ? ((unsigned int *) bins)[n]
368 : ((st_index_t *) bins)[n]);
374set_bin(st_index_t *bins,
int s, st_index_t n, st_index_t v)
376 if (s == 0) ((
unsigned char *) bins)[n] = (
unsigned char) v;
377 else if (s == 1) ((
unsigned short *) bins)[n] = (
unsigned short) v;
378 else if (s == 2) ((
unsigned int *) bins)[n] = (
unsigned int) v;
379 else ((st_index_t *) bins)[n] = v;
392#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
396#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
397#define UNDEFINED_BIN_IND (~(st_index_t) 0)
401#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
402#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
407#define MARK_BIN_DELETED(tab, i) \
409 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
414#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
415#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
416#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
420#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
421#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
426#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
427#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
430static inline unsigned int
433 return tab->size_ind;
437static inline st_index_t
440 return ((st_index_t) 1)<<tab->bin_power;
444static inline st_index_t
447 return get_bins_num(tab) - 1;
452static inline st_index_t
453hash_bin(st_hash_t hash_value,
st_table *tab)
455 return hash_value & bins_mask(tab);
459static inline st_index_t
460get_allocated_entries(
const st_table *tab)
462 return ((st_index_t) 1)<<tab->entry_power;
466static inline st_index_t
469 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
476 memset(tab->bins, 0, bins_size(tab));
483 tab->num_entries = 0;
484 tab->entries_start = tab->entries_bound = 0;
485 if (tab->bins != NULL)
486 initialize_bins(tab);
494 int all, total, num, str, strcase;
499static int init_st = 0;
506 char fname[10+
sizeof(long)*3];
508 if (!collision.total)
return;
509 f = fopen((snprintf(fname,
sizeof(fname),
"/tmp/col%ld", (
long)getpid()), fname),
"w");
512 fprintf(f,
"collision: %d / %d (%6.2f)\n", collision.all, collision.total,
513 ((
double)collision.all / (collision.total)) * 100);
514 fprintf(f,
"num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
520st_init_existing_table_with_size(
st_table *tab,
const struct st_hash_type *type, st_index_t size)
527 const char *e = getenv(
"ST_HASH_LOG");
528 if (!e || !*e) init_st = 1;
537 n = get_power2(size);
544 tab->entry_power = n;
545 tab->bin_power = features[n].bin_power;
546 tab->size_ind = features[n].size_ind;
547 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
550 tab->bins = (st_index_t *) malloc(bins_size(tab));
552 if (tab->bins == NULL) {
558 tab->entries = (
st_table_entry *) malloc(get_allocated_entries(tab)
561 if (tab->entries == NULL) {
567 tab->rebuilds_num = 0;
575st_init_table_with_size(
const struct st_hash_type *type, st_index_t size)
584 st_init_existing_table_with_size(tab, type, size);
586 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
596st_table_size(
const struct st_table *tbl)
598 return tbl->num_entries;
606 return st_init_table_with_size(type, 0);
612st_init_numtable(
void)
614 return st_init_table(&type_numhash);
619st_init_numtable_with_size(st_index_t size)
621 return st_init_table_with_size(&type_numhash, size);
627st_init_strtable(
void)
629 return st_init_table(&type_strhash);
634st_init_strtable_with_size(st_index_t size)
636 return st_init_table_with_size(&type_strhash, size);
642st_init_strcasetable(
void)
644 return st_init_table(&type_strcasehash);
650st_init_strcasetable_with_size(st_index_t size)
652 return st_init_table_with_size(&type_strcasehash, size);
677 + (tab->bins == NULL ? 0 : bins_size(tab))
682find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
685find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
688find_table_bin_ind_direct(
st_table *table, st_hash_t hash_value, st_data_t key);
691find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
692 st_data_t key, st_index_t *bin_ind);
699 if (type == &type_numhash) {
702 else if (type == &type_strhash) {
705 else if (type == &type_strcasehash) {
710#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
711#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
712#define collision_check 0
721#define REBUILD_THRESHOLD 4
723#if REBUILD_THRESHOLD < 2
724#error "REBUILD_THRESHOLD should be >= 2"
727static void rebuild_table_with(
st_table *
const new_tab,
st_table *
const tab);
728static void rebuild_move_table(
st_table *
const new_tab,
st_table *
const tab);
729static void rebuild_cleanup(
st_table *
const tab);
738 if ((2 * tab->num_entries <= get_allocated_entries(tab)
739 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
740 || tab->num_entries < (1 << MINIMAL_POWER2)) {
742 tab->num_entries = 0;
743 if (tab->bins != NULL)
744 initialize_bins(tab);
745 rebuild_table_with(tab, tab);
752 new_tab = st_init_table_with_size(tab->type,
753 2 * tab->num_entries - 1);
754 rebuild_table_with(new_tab, tab);
755 rebuild_move_table(new_tab, tab);
757 rebuild_cleanup(tab);
764 unsigned int size_ind;
770 new_entries = new_tab->entries;
773 bins = new_tab->bins;
774 size_ind = get_size_ind(new_tab);
775 st_index_t bound = tab->entries_bound;
778 for (i = tab->entries_start; i < bound; i++) {
779 curr_entry_ptr = &entries[i];
780 PREFETCH(entries + i + 1, 0);
781 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
783 if (&new_entries[ni] != curr_entry_ptr)
784 new_entries[ni] = *curr_entry_ptr;
785 if (EXPECT(bins != NULL, 1)) {
786 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
787 curr_entry_ptr->key);
788 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
790 new_tab->num_entries++;
794 assert(new_tab->num_entries == tab->num_entries);
800 tab->entry_power = new_tab->entry_power;
801 tab->bin_power = new_tab->bin_power;
802 tab->size_ind = new_tab->size_ind;
804 tab->bins = new_tab->bins;
806 tab->entries = new_tab->entries;
813 tab->entries_start = 0;
814 tab->entries_bound = tab->num_entries;
830static inline st_index_t
831secondary_hash(st_index_t ind,
st_table *tab, st_index_t *perturb)
834 ind = (ind << 2) + ind + *perturb + 1;
835 return hash_bin(ind, tab);
842static inline st_index_t
843find_entry(
st_table *tab, st_hash_t hash_value, st_data_t key)
849 bound = tab->entries_bound;
850 entries = tab->entries;
851 for (i = tab->entries_start; i < bound; i++) {
852 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
853 if (EXPECT(rebuilt_p, 0))
854 return REBUILT_TABLE_ENTRY_IND;
858 return UNDEFINED_ENTRY_IND;
870find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
874#ifdef QUADRATIC_PROBE
882 ind = hash_bin(hash_value, tab);
883#ifdef QUADRATIC_PROBE
886 perturb = hash_value;
890 bin = get_bin(tab->bins, get_size_ind(tab), ind);
891 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
892 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
893 if (EXPECT(rebuilt_p, 0))
894 return REBUILT_TABLE_ENTRY_IND;
898 else if (EMPTY_BIN_P(bin))
899 return UNDEFINED_ENTRY_IND;
900#ifdef QUADRATIC_PROBE
901 ind = hash_bin(ind + d, tab);
904 ind = secondary_hash(ind, tab, &perturb);
916find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
920#ifdef QUADRATIC_PROBE
928 ind = hash_bin(hash_value, tab);
929#ifdef QUADRATIC_PROBE
932 perturb = hash_value;
936 bin = get_bin(tab->bins, get_size_ind(tab), ind);
937 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
938 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
939 if (EXPECT(rebuilt_p, 0))
940 return REBUILT_TABLE_BIN_IND;
944 else if (EMPTY_BIN_P(bin))
945 return UNDEFINED_BIN_IND;
946#ifdef QUADRATIC_PROBE
947 ind = hash_bin(ind + d, tab);
950 ind = secondary_hash(ind, tab, &perturb);
961find_table_bin_ind_direct(
st_table *tab, st_hash_t hash_value, st_data_t key)
964#ifdef QUADRATIC_PROBE
971 ind = hash_bin(hash_value, tab);
972#ifdef QUADRATIC_PROBE
975 perturb = hash_value;
979 bin = get_bin(tab->bins, get_size_ind(tab), ind);
980 if (EMPTY_OR_DELETED_BIN_P(bin))
982#ifdef QUADRATIC_PROBE
983 ind = hash_bin(ind + d, tab);
986 ind = secondary_hash(ind, tab, &perturb);
1002find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
1003 st_data_t key, st_index_t *bin_ind)
1005 int eq_p, rebuilt_p;
1007 st_hash_t curr_hash_value = *hash_value;
1008#ifdef QUADRATIC_PROBE
1013 st_index_t entry_index;
1014 st_index_t first_deleted_bin_ind;
1017 ind = hash_bin(curr_hash_value, tab);
1018#ifdef QUADRATIC_PROBE
1021 perturb = curr_hash_value;
1024 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1025 entries = tab->entries;
1027 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1028 if (EMPTY_BIN_P(entry_index)) {
1030 entry_index = UNDEFINED_ENTRY_IND;
1031 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1033 ind = first_deleted_bin_ind;
1034 MARK_BIN_EMPTY(tab, ind);
1038 else if (! DELETED_BIN_P(entry_index)) {
1039 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1040 if (EXPECT(rebuilt_p, 0))
1041 return REBUILT_TABLE_ENTRY_IND;
1045 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1046 first_deleted_bin_ind = ind;
1047#ifdef QUADRATIC_PROBE
1048 ind = hash_bin(ind + d, tab);
1051 ind = secondary_hash(ind, tab, &perturb);
1062st_lookup(
st_table *tab, st_data_t key, st_data_t *value)
1065 st_hash_t hash = do_hash(key, tab);
1068 if (tab->bins == NULL) {
1069 bin = find_entry(tab, hash, key);
1070 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 if (bin == UNDEFINED_ENTRY_IND)
1076 bin = find_table_entry_ind(tab, hash, key);
1077 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1079 if (bin == UNDEFINED_ENTRY_IND)
1084 *value = tab->entries[bin].record;
1091st_get_key(
st_table *tab, st_data_t key, st_data_t *result)
1094 st_hash_t hash = do_hash(key, tab);
1097 if (tab->bins == NULL) {
1098 bin = find_entry(tab, hash, key);
1099 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1101 if (bin == UNDEFINED_ENTRY_IND)
1105 bin = find_table_entry_ind(tab, hash, key);
1106 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108 if (bin == UNDEFINED_ENTRY_IND)
1113 *result = tab->entries[bin].key;
1119rebuild_table_if_necessary (
st_table *tab)
1121 st_index_t bound = tab->entries_bound;
1123 if (bound == get_allocated_entries(tab))
1131st_insert(
st_table *tab, st_data_t key, st_data_t value)
1136 st_hash_t hash_value;
1140 hash_value = do_hash(key, tab);
1142 rebuild_table_if_necessary(tab);
1143 if (tab->bins == NULL) {
1144 bin = find_entry(tab, hash_value, key);
1145 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1147 new_p = bin == UNDEFINED_ENTRY_IND;
1150 bin_ind = UNDEFINED_BIN_IND;
1153 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1155 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1157 new_p = bin == UNDEFINED_ENTRY_IND;
1161 ind = tab->entries_bound++;
1162 entry = &tab->entries[ind];
1163 entry->hash = hash_value;
1165 entry->record = value;
1166 if (bin_ind != UNDEFINED_BIN_IND)
1167 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1170 tab->entries[bin].record = value;
1177st_add_direct_with_hash(
st_table *tab,
1178 st_data_t key, st_data_t value, st_hash_t hash)
1184 assert(hash != RESERVED_HASH_VAL);
1186 rebuild_table_if_necessary(tab);
1187 ind = tab->entries_bound++;
1188 entry = &tab->entries[ind];
1191 entry->record = value;
1193 if (tab->bins != NULL) {
1194 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1195 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1200rb_st_add_direct_with_hash(
st_table *tab,
1201 st_data_t key, st_data_t value, st_hash_t hash)
1203 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1209st_add_direct(
st_table *tab, st_data_t key, st_data_t value)
1211 st_hash_t hash_value;
1213 hash_value = do_hash(key, tab);
1214 st_add_direct_with_hash(tab, key, value, hash_value);
1221st_insert2(
st_table *tab, st_data_t key, st_data_t value,
1222 st_data_t (*func)(st_data_t))
1227 st_hash_t hash_value;
1231 hash_value = do_hash(key, tab);
1233 rebuild_table_if_necessary (tab);
1234 if (tab->bins == NULL) {
1235 bin = find_entry(tab, hash_value, key);
1236 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1238 new_p = bin == UNDEFINED_ENTRY_IND;
1241 bin_ind = UNDEFINED_BIN_IND;
1244 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1246 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1248 new_p = bin == UNDEFINED_ENTRY_IND;
1253 ind = tab->entries_bound++;
1254 entry = &tab->entries[ind];
1255 entry->hash = hash_value;
1257 entry->record = value;
1258 if (bin_ind != UNDEFINED_BIN_IND)
1259 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1262 tab->entries[bin].record = value;
1270 *new_tab = *old_tab;
1271 if (old_tab->bins == NULL)
1272 new_tab->bins = NULL;
1274 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1276 if (new_tab->bins == NULL) {
1281 new_tab->entries = (
st_table_entry *) malloc(get_allocated_entries(old_tab)
1284 if (new_tab->entries == NULL) {
1289 get_allocated_entries(old_tab));
1290 if (old_tab->bins != NULL)
1291 MEMCPY(new_tab->bins, old_tab->bins,
char, bins_size(old_tab));
1304 if (new_tab == NULL)
1308 if (st_replace(new_tab, old_tab) == NULL) {
1309 st_free_table(new_tab);
1319update_range_for_deleted(
st_table *tab, st_index_t n)
1323 if (tab->entries_start == n) {
1324 st_index_t start = n + 1;
1325 st_index_t bound = tab->entries_bound;
1327 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1328 tab->entries_start = start;
1337st_general_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1344 hash = do_hash(*key, tab);
1346 if (tab->bins == NULL) {
1347 bin = find_entry(tab, hash, *key);
1348 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1350 if (bin == UNDEFINED_ENTRY_IND) {
1351 if (value != 0) *value = 0;
1356 bin_ind = find_table_bin_ind(tab, hash, *key);
1357 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1359 if (bin_ind == UNDEFINED_BIN_IND) {
1360 if (value != 0) *value = 0;
1363 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1364 MARK_BIN_DELETED(tab, bin_ind);
1366 entry = &tab->entries[bin];
1368 if (value != 0) *value = entry->record;
1369 MARK_ENTRY_DELETED(entry);
1371 update_range_for_deleted(tab, bin);
1376st_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1378 return st_general_delete(tab, key, value);
1387st_delete_safe(
st_table *tab, st_data_t *key, st_data_t *value,
1388 st_data_t never ATTRIBUTE_UNUSED)
1390 return st_general_delete(tab, key, value);
1398st_shift(
st_table *tab, st_data_t *key, st_data_t *value)
1400 st_index_t i, bound;
1405 entries = tab->entries;
1406 bound = tab->entries_bound;
1407 for (i = tab->entries_start; i < bound; i++) {
1408 curr_entry_ptr = &entries[i];
1409 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1410 st_hash_t entry_hash = curr_entry_ptr->hash;
1411 st_data_t entry_key = curr_entry_ptr->key;
1413 if (value != 0) *value = curr_entry_ptr->record;
1416 if (tab->bins == NULL) {
1417 bin = find_entry(tab, entry_hash, entry_key);
1418 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1419 entries = tab->entries;
1422 curr_entry_ptr = &entries[bin];
1425 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1426 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1427 entries = tab->entries;
1430 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1432 MARK_BIN_DELETED(tab, bin_ind);
1434 MARK_ENTRY_DELETED(curr_entry_ptr);
1436 update_range_for_deleted(tab, i);
1440 if (value != 0) *value = 0;
1446st_cleanup_safe(
st_table *tab ATTRIBUTE_UNUSED,
1447 st_data_t never ATTRIBUTE_UNUSED)
1461st_update(
st_table *tab, st_data_t key,
1462 st_update_callback_func *func, st_data_t arg)
1468 st_data_t value = 0, old_key;
1469 int retval, existing;
1470 st_hash_t hash = do_hash(key, tab);
1473 entries = tab->entries;
1474 if (tab->bins == NULL) {
1475 bin = find_entry(tab, hash, key);
1476 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1478 existing = bin != UNDEFINED_ENTRY_IND;
1479 entry = &entries[bin];
1480 bin_ind = UNDEFINED_BIN_IND;
1483 bin_ind = find_table_bin_ind(tab, hash, key);
1484 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1486 existing = bin_ind != UNDEFINED_BIN_IND;
1488 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1489 entry = &entries[bin];
1494 value = entry->record;
1497 retval = (*func)(&key, &value, arg, existing);
1501 st_add_direct_with_hash(tab, key, value, hash);
1504 if (old_key != key) {
1507 entry->record = value;
1511 if (bin_ind != UNDEFINED_BIN_IND)
1512 MARK_BIN_DELETED(tab, bin_ind);
1513 MARK_ENTRY_DELETED(entry);
1515 update_range_for_deleted(tab, bin);
1531st_general_foreach(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1537 enum st_retval retval;
1538 st_index_t i, rebuilds_num;
1541 int error_p, packed_p = tab->bins == NULL;
1543 entries = tab->entries;
1546 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1547 curr_entry_ptr = &entries[i];
1548 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1550 key = curr_entry_ptr->key;
1551 rebuilds_num = tab->rebuilds_num;
1552 hash = curr_entry_ptr->hash;
1553 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1555 if (retval == ST_REPLACE && replace) {
1557 value = curr_entry_ptr->record;
1558 retval = (*replace)(&key, &value, arg, TRUE);
1559 curr_entry_ptr->key = key;
1560 curr_entry_ptr->record = value;
1563 if (rebuilds_num != tab->rebuilds_num) {
1565 entries = tab->entries;
1566 packed_p = tab->bins == NULL;
1568 i = find_entry(tab, hash, key);
1569 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1571 error_p = i == UNDEFINED_ENTRY_IND;
1574 i = find_table_entry_ind(tab, hash, key);
1575 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1577 error_p = i == UNDEFINED_ENTRY_IND;
1580 if (error_p && check_p) {
1582 retval = (*func)(0, 0, arg, 1);
1585 curr_entry_ptr = &entries[i];
1598 st_data_t key = curr_entry_ptr->key;
1602 bin = find_entry(tab, hash, key);
1603 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1605 if (bin == UNDEFINED_ENTRY_IND)
1609 bin_ind = find_table_bin_ind(tab, hash, key);
1610 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1612 if (bin_ind == UNDEFINED_BIN_IND)
1614 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1615 MARK_BIN_DELETED(tab, bin_ind);
1617 curr_entry_ptr = &entries[bin];
1618 MARK_ENTRY_DELETED(curr_entry_ptr);
1620 update_range_for_deleted(tab, bin);
1629st_foreach_with_replace(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1631 return st_general_foreach(tab, func, replace, arg, TRUE);
1635 st_foreach_callback_func *func;
1640apply_functor(st_data_t k, st_data_t v, st_data_t d,
int _)
1642 const struct functor *f = (
void *)d;
1643 return f->func(k, v, f->arg);
1647st_foreach(
st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1649 const struct functor f = { func, arg };
1650 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1655st_foreach_check(
st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1656 st_data_t never ATTRIBUTE_UNUSED)
1658 return st_general_foreach(tab, func, 0, arg, TRUE);
1663static inline st_index_t
1664st_general_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1666 st_index_t i, bound;
1667 st_data_t key, *keys_start, *keys_end;
1670 bound = tab->entries_bound;
1672 keys_end = keys + size;
1673 for (i = tab->entries_start; i < bound; i++) {
1674 if (keys == keys_end)
1676 curr_entry_ptr = &entries[i];
1677 key = curr_entry_ptr->key;
1678 if (! DELETED_ENTRY_P(curr_entry_ptr))
1682 return keys - keys_start;
1686st_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1688 return st_general_keys(tab, keys, size);
1693st_keys_check(
st_table *tab, st_data_t *keys, st_index_t size,
1694 st_data_t never ATTRIBUTE_UNUSED)
1696 return st_general_keys(tab, keys, size);
1701static inline st_index_t
1702st_general_values(
st_table *tab, st_data_t *values, st_index_t size)
1704 st_index_t i, bound;
1705 st_data_t *values_start, *values_end;
1708 values_start = values;
1709 values_end = values + size;
1710 bound = tab->entries_bound;
1711 for (i = tab->entries_start; i < bound; i++) {
1712 if (values == values_end)
1714 curr_entry_ptr = &entries[i];
1715 if (! DELETED_ENTRY_P(curr_entry_ptr))
1716 *values++ = curr_entry_ptr->record;
1719 return values - values_start;
1723st_values(
st_table *tab, st_data_t *values, st_index_t size)
1725 return st_general_values(tab, values, size);
1730st_values_check(
st_table *tab, st_data_t *values, st_index_t size,
1731 st_data_t never ATTRIBUTE_UNUSED)
1733 return st_general_values(tab, values, size);
1736#define FNV1_32A_INIT 0x811c9dc5
1741#define FNV_32_PRIME 0x01000193
1744#ifndef UNALIGNED_WORD_ACCESS
1745# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1746 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1747 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1748 defined(__mc68020__)
1749# define UNALIGNED_WORD_ACCESS 1
1752#ifndef UNALIGNED_WORD_ACCESS
1753# define UNALIGNED_WORD_ACCESS 0
1759#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1760#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1762#if ST_INDEX_BITS <= 32
1763#define C1 (st_index_t)0xcc9e2d51
1764#define C2 (st_index_t)0x1b873593
1766#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1767#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1769NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1770NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_finish(st_index_t h));
1771NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash(
const void *ptr,
size_t len, st_index_t h));
1773static inline st_index_t
1774murmur_step(st_index_t h, st_index_t k)
1776#if ST_INDEX_BITS <= 32
1792static inline st_index_t
1793murmur_finish(st_index_t h)
1795#if ST_INDEX_BITS <= 32
1799 const st_index_t c1 = 0x85ebca6b;
1800 const st_index_t c2 = 0xc2b2ae35;
1806 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1807 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1809#if ST_INDEX_BITS > 64
1826st_hash(
const void *ptr,
size_t len, st_index_t h)
1828 const char *data = ptr;
1832#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1833#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1834#if SIZEOF_ST_INDEX_T > 4
1835#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1836#if SIZEOF_ST_INDEX_T > 8
1837#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1838 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1839#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1841#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1843#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1846 if (
len >=
sizeof(st_index_t)) {
1847#if !UNALIGNED_WORD_ACCESS
1848 int align = (int)((st_data_t)data %
sizeof(st_index_t));
1854#ifdef WORDS_BIGENDIAN
1855# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1856 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1858# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1859 t |= data_at(n) << CHAR_BIT*(n)
1865#ifdef WORDS_BIGENDIAN
1866 t >>= (CHAR_BIT * align) - CHAR_BIT;
1868 t <<= (CHAR_BIT * align);
1871 data +=
sizeof(st_index_t)-align;
1872 len -=
sizeof(st_index_t)-align;
1874 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1875 sr = CHAR_BIT * align;
1877 while (
len >=
sizeof(st_index_t)) {
1878 d = *(st_index_t *)data;
1879#ifdef WORDS_BIGENDIAN
1880 t = (t << sr) | (d >> sl);
1882 t = (t >> sr) | (d << sl);
1884 h = murmur_step(h, t);
1886 data +=
sizeof(st_index_t);
1887 len -=
sizeof(st_index_t);
1890 pack =
len < (size_t)align ? (
int)
len : align;
1893#ifdef WORDS_BIGENDIAN
1894# define UNALIGNED_ADD(n) case (n) + 1: \
1895 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1897# define UNALIGNED_ADD(n) case (n) + 1: \
1898 d |= data_at(n) << CHAR_BIT*(n)
1903#ifdef WORDS_BIGENDIAN
1904 t = (t << sr) | (d >> sl);
1906 t = (t >> sr) | (d << sl);
1909 if (
len < (
size_t)align)
goto skip_tail;
1911 h = murmur_step(h, t);
1917#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1918#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1920#define aligned_data data
1924 h = murmur_step(h, *(st_index_t *)aligned_data);
1925 data +=
sizeof(st_index_t);
1926 len -=
sizeof(st_index_t);
1927 }
while (
len >=
sizeof(st_index_t));
1933#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1935#if SIZEOF_ST_INDEX_T > 4
1936 case 7: t |= data_at(6) << 48;
1937 case 6: t |= data_at(5) << 40;
1938 case 5: t |= data_at(4) << 32;
1940 t |= (st_index_t)*(uint32_t*)aligned_data;
1944 case 3: t |= data_at(2) << 16;
1945 case 2: t |= data_at(1) << 8;
1946 case 1: t |= data_at(0);
1948#ifdef WORDS_BIGENDIAN
1949# define UNALIGNED_ADD(n) case (n) + 1: \
1950 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1952# define UNALIGNED_ADD(n) case (n) + 1: \
1953 t |= data_at(n) << CHAR_BIT*(n)
1961 h ^= t; h -= ROTL(t, 7);
1967 return murmur_finish(h);
1971st_hash_uint32(st_index_t h, uint32_t i)
1973 return murmur_step(h, i);
1976NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1978st_hash_uint(st_index_t h, st_index_t i)
1983#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1984 h = murmur_step(h, i >> 8*8);
1986 h = murmur_step(h, i);
1991st_hash_end(st_index_t h)
1993 h = murmur_finish(h);
1999rb_st_hash_start(st_index_t h)
2005strhash(st_data_t arg)
2007 register const char *
string = (
const char *)arg;
2008 return st_hash(
string, strlen(
string), FNV1_32A_INIT);
2012st_locale_insensitive_strcasecmp(
const char *s1,
const char *s2)
2019 if (c1 ==
'\0' || c2 ==
'\0') {
2020 if (c1 !=
'\0')
return 1;
2021 if (c2 !=
'\0')
return -1;
2024 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2025 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2036st_locale_insensitive_strncasecmp(
const char *s1,
const char *s2,
size_t n)
2041 for (i = 0; i < n; i++) {
2044 if (c1 ==
'\0' || c2 ==
'\0') {
2045 if (c1 !=
'\0')
return 1;
2046 if (c2 !=
'\0')
return -1;
2049 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2050 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2062st_strcmp(st_data_t lhs, st_data_t rhs)
2064 const char *s1 = (
char *)lhs;
2065 const char *s2 = (
char *)rhs;
2066 return strcmp(s1, s2);
2070st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2072 const char *s1 = (
char *)lhs;
2073 const char *s2 = (
char *)rhs;
2074 return st_locale_insensitive_strcasecmp(s1, s2);
2077NO_SANITIZE(
"unsigned-integer-overflow", PUREFUNC(
static st_index_t strcasehash(st_data_t)));
2079strcasehash(st_data_t arg)
2081 register const char *
string = (
const char *)arg;
2082 register st_index_t hval = FNV1_32A_INIT;
2088 unsigned int c = (
unsigned char)*
string++;
2089 if ((
unsigned int)(c -
'A') <= (
'Z' -
'A')) c +=
'a' -
'A';
2093 hval *= FNV_32_PRIME;
2099st_numcmp(st_data_t x, st_data_t y)
2105st_numhash(st_data_t n)
2107 enum {s1 = 11, s2 = 3};
2108 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2116st_expand_table(
st_table *tab, st_index_t siz)
2121 if (siz <= get_allocated_entries(tab))
2124 tmp = st_init_table_with_size(tab->type, siz);
2125 n = get_allocated_entries(tab);
2130 tab->entry_power = tmp->entry_power;
2131 tab->bin_power = tmp->bin_power;
2132 tab->size_ind = tmp->size_ind;
2133 tab->entries = tmp->entries;
2135 tab->rebuilds_num++;
2144 int eq_p, rebuilt_p;
2151 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2152 p = &tab->entries[i];
2153 if (DELETED_ENTRY_P(p))
2155 for (j = i + 1; j < tab->entries_bound; j++) {
2156 q = &tab->entries[j];
2157 if (DELETED_ENTRY_P(q))
2159 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2160 if (EXPECT(rebuilt_p, 0))
2164 MARK_ENTRY_DELETED(q);
2166 update_range_for_deleted(tab, j);
2178 int eq_p, rebuilt_p;
2180 st_index_t
const n = bins_size(tab);
2181 unsigned int const size_ind = get_size_ind(tab);
2182 st_index_t *bins = realloc(tab->bins, n);
2184 initialize_bins(tab);
2185 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2188#ifdef QUADRATIC_PROBE
2191 st_index_t perturb = p->hash;
2194 if (DELETED_ENTRY_P(p))
2197 ind = hash_bin(p->hash, tab);
2199 st_index_t bin = get_bin(bins, size_ind, ind);
2200 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2202 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2207 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2208 if (EXPECT(rebuilt_p, 0))
2212 q->record = p->record;
2213 MARK_ENTRY_DELETED(p);
2215 update_range_for_deleted(tab, bin);
2220#ifdef QUADRATIC_PROBE
2221 ind = hash_bin(ind + d, tab);
2224 ind = secondary_hash(ind, tab, &perturb);
2242 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2243 rebuilt_p = st_rehash_linear(tab);
2245 rebuilt_p = st_rehash_indexed(tab);
2246 }
while (rebuilt_p);
2250st_stringify(
VALUE key)
2253 rb_hash_key_str(key) : key;
2259 st_data_t k = st_stringify(key);
2261 e.hash = do_hash(k, tab);
2265 tab->entries[tab->entries_bound++] = e;
2276 for (i = 0; i < argc; ) {
2277 st_data_t k = st_stringify(argv[i++]);
2278 st_data_t v = argv[i++];
2279 st_insert(tab, k, v);
2291 for (i = 0; i < argc; ) {
2292 VALUE key = argv[i++];
2293 VALUE val = argv[i++];
2294 st_insert_single(tab, hash, key, val);
2304rb_hash_bulk_insert_into_st_table(
long argc,
const VALUE *argv,
VALUE hash)
2306 st_index_t n, size = argc / 2;
2307 st_table *tab = RHASH_ST_TABLE(hash);
2309 tab = RHASH_TBL_RAW(hash);
2310 n = tab->entries_bound + size;
2311 st_expand_table(tab, n);
2312 if (UNLIKELY(tab->num_entries))
2313 st_insert_generic(tab, argc, argv, hash);
2315 st_insert_single(tab, hash, argv[0], argv[1]);
2316 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2317 st_insert_linear(tab, argc, argv, hash);
2319 st_insert_generic(tab, argc, argv, hash);
2325 st_index_t num = tab->num_entries;
2326 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2328 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2329 rebuild_table_with(new_tab, tab);
2330 rebuild_move_table(new_tab, tab);
2331 rebuild_cleanup(tab);
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cString
String class.
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
int len
Length of the buffer.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
uintptr_t VALUE
Type that represents a Ruby object.