107#elif defined RUBY_EXPORT
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/set_table.h"
113#include "internal/st.h"
114#include "ruby_assert.h"
124#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
125#define EXPECT(expr, val) __builtin_expect(expr, val)
126#define ATTRIBUTE_UNUSED __attribute__((unused))
128#define PREFETCH(addr, write_p)
129#define EXPECT(expr, val) (expr)
130#define ATTRIBUTE_UNUSED
134typedef st_index_t st_hash_t;
142#define type_numhash st_hashtype_num
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
158 st_locale_insensitive_strcasecmp_i,
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
195 unsigned char entry_power;
199 unsigned char bin_power;
201 unsigned char size_ind;
204 st_index_t bins_words;
208#if SIZEOF_ST_INDEX_T == 8
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
319static inline st_hash_t
320normalize_hash_value(st_hash_t hash)
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
328static inline st_hash_t
329do_hash(st_data_t key,
st_table *tab)
331 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332 return normalize_hash_value(hash);
336#define MINIMAL_POWER2 2
338#if MINIMAL_POWER2 < 2
339#error "MINIMAL_POWER2 should be >= 2"
344#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
348get_power2(st_index_t size)
350 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
352 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
363static inline st_index_t
364get_bin(st_index_t *bins,
int s, st_index_t n)
366 return (s == 0 ? ((
unsigned char *) bins)[n]
367 : s == 1 ? ((unsigned short *) bins)[n]
368 : s == 2 ? ((unsigned int *) bins)[n]
369 : ((st_index_t *) bins)[n]);
375set_bin(st_index_t *bins,
int s, st_index_t n, st_index_t v)
377 if (s == 0) ((
unsigned char *) bins)[n] = (
unsigned char) v;
378 else if (s == 1) ((
unsigned short *) bins)[n] = (
unsigned short) v;
379 else if (s == 2) ((
unsigned int *) bins)[n] = (
unsigned int) v;
380 else ((st_index_t *) bins)[n] = v;
393#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
397#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
398#define UNDEFINED_BIN_IND (~(st_index_t) 0)
402#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
403#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
408#define MARK_BIN_DELETED(tab, i) \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
431static inline unsigned int
434 return tab->size_ind;
438static inline st_index_t
441 return ((st_index_t) 1)<<tab->bin_power;
445static inline st_index_t
448 return get_bins_num(tab) - 1;
453static inline st_index_t
454hash_bin(st_hash_t hash_value,
st_table *tab)
456 return hash_value & bins_mask(tab);
460static inline st_index_t
461get_allocated_entries(
const st_table *tab)
463 return ((st_index_t) 1)<<tab->entry_power;
467static inline st_index_t
470 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
477 memset(tab->bins, 0, bins_size(tab));
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
495 int all, total, num, str, strcase;
500static int init_st = 0;
507 char fname[10+
sizeof(long)*3];
509 if (!collision.total)
return;
510 f = fopen((snprintf(fname,
sizeof(fname),
"/tmp/col%ld", (
long)getpid()), fname),
"w");
513 fprintf(f,
"collision: %d / %d (%6.2f)\n", collision.all, collision.total,
514 ((
double)collision.all / (collision.total)) * 100);
515 fprintf(f,
"num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
521st_init_existing_table_with_size(
st_table *tab,
const struct st_hash_type *type, st_index_t size)
528 const char *e = getenv(
"ST_HASH_LOG");
529 if (!e || !*e) init_st = 1;
538 n = get_power2(size);
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
553 if (tab->bins == NULL) {
559 tab->entries = (
st_table_entry *) malloc(get_allocated_entries(tab)
562 if (tab->entries == NULL) {
568 tab->rebuilds_num = 0;
576st_init_table_with_size(
const struct st_hash_type *type, st_index_t size)
585 st_init_existing_table_with_size(tab, type, size);
587 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
597st_table_size(
const struct st_table *tbl)
599 return tbl->num_entries;
607 return st_init_table_with_size(type, 0);
613st_init_numtable(
void)
615 return st_init_table(&type_numhash);
620st_init_numtable_with_size(st_index_t size)
622 return st_init_table_with_size(&type_numhash, size);
628st_init_strtable(
void)
630 return st_init_table(&type_strhash);
635st_init_strtable_with_size(st_index_t size)
637 return st_init_table_with_size(&type_strhash, size);
643st_init_strcasetable(
void)
645 return st_init_table(&type_strcasehash);
651st_init_strcasetable_with_size(st_index_t size)
653 return st_init_table_with_size(&type_strcasehash, size);
679 + (tab->bins == NULL ? 0 : bins_size(tab))
684find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
687find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
690find_table_bin_ind_direct(
st_table *table, st_hash_t hash_value, st_data_t key);
693find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
694 st_data_t key, st_index_t *bin_ind);
701 if (type == &type_numhash) {
704 else if (type == &type_strhash) {
707 else if (type == &type_strcasehash) {
712#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
713#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
714#define collision_check 0
723#define REBUILD_THRESHOLD 4
725#if REBUILD_THRESHOLD < 2
726#error "REBUILD_THRESHOLD should be >= 2"
729static void rebuild_table_with(
st_table *
const new_tab,
st_table *
const tab);
730static void rebuild_move_table(
st_table *
const new_tab,
st_table *
const tab);
731static void rebuild_cleanup(
st_table *
const tab);
740 if ((2 * tab->num_entries <= get_allocated_entries(tab)
741 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
742 || tab->num_entries < (1 << MINIMAL_POWER2)) {
744 tab->num_entries = 0;
745 if (tab->bins != NULL)
746 initialize_bins(tab);
747 rebuild_table_with(tab, tab);
754 new_tab = st_init_table_with_size(tab->type,
755 2 * tab->num_entries - 1);
756 rebuild_table_with(new_tab, tab);
757 rebuild_move_table(new_tab, tab);
759 rebuild_cleanup(tab);
766 unsigned int size_ind;
772 new_entries = new_tab->entries;
775 bins = new_tab->bins;
776 size_ind = get_size_ind(new_tab);
777 st_index_t bound = tab->entries_bound;
780 for (i = tab->entries_start; i < bound; i++) {
781 curr_entry_ptr = &entries[i];
782 PREFETCH(entries + i + 1, 0);
783 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
785 if (&new_entries[ni] != curr_entry_ptr)
786 new_entries[ni] = *curr_entry_ptr;
787 if (EXPECT(bins != NULL, 1)) {
788 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
789 curr_entry_ptr->key);
790 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
792 new_tab->num_entries++;
796 assert(new_tab->num_entries == tab->num_entries);
802 tab->entry_power = new_tab->entry_power;
803 tab->bin_power = new_tab->bin_power;
804 tab->size_ind = new_tab->size_ind;
806 tab->bins = new_tab->bins;
808 tab->entries = new_tab->entries;
815 tab->entries_start = 0;
816 tab->entries_bound = tab->num_entries;
832static inline st_index_t
833secondary_hash(st_index_t ind,
st_table *tab, st_index_t *perturb)
836 ind = (ind << 2) + ind + *perturb + 1;
837 return hash_bin(ind, tab);
844static inline st_index_t
845find_entry(
st_table *tab, st_hash_t hash_value, st_data_t key)
851 bound = tab->entries_bound;
852 entries = tab->entries;
853 for (i = tab->entries_start; i < bound; i++) {
854 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
855 if (EXPECT(rebuilt_p, 0))
856 return REBUILT_TABLE_ENTRY_IND;
860 return UNDEFINED_ENTRY_IND;
872find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
876#ifdef QUADRATIC_PROBE
884 ind = hash_bin(hash_value, tab);
885#ifdef QUADRATIC_PROBE
888 perturb = hash_value;
892 bin = get_bin(tab->bins, get_size_ind(tab), ind);
893 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
894 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
895 if (EXPECT(rebuilt_p, 0))
896 return REBUILT_TABLE_ENTRY_IND;
900 else if (EMPTY_BIN_P(bin))
901 return UNDEFINED_ENTRY_IND;
902#ifdef QUADRATIC_PROBE
903 ind = hash_bin(ind + d, tab);
906 ind = secondary_hash(ind, tab, &perturb);
918find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
922#ifdef QUADRATIC_PROBE
930 ind = hash_bin(hash_value, tab);
931#ifdef QUADRATIC_PROBE
934 perturb = hash_value;
938 bin = get_bin(tab->bins, get_size_ind(tab), ind);
939 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
940 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
941 if (EXPECT(rebuilt_p, 0))
942 return REBUILT_TABLE_BIN_IND;
946 else if (EMPTY_BIN_P(bin))
947 return UNDEFINED_BIN_IND;
948#ifdef QUADRATIC_PROBE
949 ind = hash_bin(ind + d, tab);
952 ind = secondary_hash(ind, tab, &perturb);
963find_table_bin_ind_direct(
st_table *tab, st_hash_t hash_value, st_data_t key)
966#ifdef QUADRATIC_PROBE
973 ind = hash_bin(hash_value, tab);
974#ifdef QUADRATIC_PROBE
977 perturb = hash_value;
981 bin = get_bin(tab->bins, get_size_ind(tab), ind);
982 if (EMPTY_OR_DELETED_BIN_P(bin))
984#ifdef QUADRATIC_PROBE
985 ind = hash_bin(ind + d, tab);
988 ind = secondary_hash(ind, tab, &perturb);
1004find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
1005 st_data_t key, st_index_t *bin_ind)
1007 int eq_p, rebuilt_p;
1009 st_hash_t curr_hash_value = *hash_value;
1010#ifdef QUADRATIC_PROBE
1015 st_index_t entry_index;
1016 st_index_t first_deleted_bin_ind;
1019 ind = hash_bin(curr_hash_value, tab);
1020#ifdef QUADRATIC_PROBE
1023 perturb = curr_hash_value;
1026 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1027 entries = tab->entries;
1029 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1030 if (EMPTY_BIN_P(entry_index)) {
1032 entry_index = UNDEFINED_ENTRY_IND;
1033 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1035 ind = first_deleted_bin_ind;
1036 MARK_BIN_EMPTY(tab, ind);
1040 else if (! DELETED_BIN_P(entry_index)) {
1041 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1042 if (EXPECT(rebuilt_p, 0))
1043 return REBUILT_TABLE_ENTRY_IND;
1047 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1048 first_deleted_bin_ind = ind;
1049#ifdef QUADRATIC_PROBE
1050 ind = hash_bin(ind + d, tab);
1053 ind = secondary_hash(ind, tab, &perturb);
1064st_lookup(
st_table *tab, st_data_t key, st_data_t *value)
1067 st_hash_t hash = do_hash(key, tab);
1070 if (tab->bins == NULL) {
1071 bin = find_entry(tab, hash, key);
1072 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1074 if (bin == UNDEFINED_ENTRY_IND)
1078 bin = find_table_entry_ind(tab, hash, key);
1079 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1081 if (bin == UNDEFINED_ENTRY_IND)
1086 *value = tab->entries[bin].record;
1093st_get_key(
st_table *tab, st_data_t key, st_data_t *result)
1096 st_hash_t hash = do_hash(key, tab);
1099 if (tab->bins == NULL) {
1100 bin = find_entry(tab, hash, key);
1101 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1103 if (bin == UNDEFINED_ENTRY_IND)
1107 bin = find_table_entry_ind(tab, hash, key);
1108 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1110 if (bin == UNDEFINED_ENTRY_IND)
1115 *result = tab->entries[bin].key;
1121rebuild_table_if_necessary (
st_table *tab)
1123 st_index_t bound = tab->entries_bound;
1125 if (bound == get_allocated_entries(tab))
1133st_insert(
st_table *tab, st_data_t key, st_data_t value)
1138 st_hash_t hash_value;
1142 hash_value = do_hash(key, tab);
1144 rebuild_table_if_necessary(tab);
1145 if (tab->bins == NULL) {
1146 bin = find_entry(tab, hash_value, key);
1147 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1149 new_p = bin == UNDEFINED_ENTRY_IND;
1152 bin_ind = UNDEFINED_BIN_IND;
1155 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1157 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1159 new_p = bin == UNDEFINED_ENTRY_IND;
1163 ind = tab->entries_bound++;
1164 entry = &tab->entries[ind];
1165 entry->hash = hash_value;
1167 entry->record = value;
1168 if (bin_ind != UNDEFINED_BIN_IND)
1169 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1172 tab->entries[bin].record = value;
1179st_add_direct_with_hash(
st_table *tab,
1180 st_data_t key, st_data_t value, st_hash_t hash)
1186 assert(hash != RESERVED_HASH_VAL);
1188 rebuild_table_if_necessary(tab);
1189 ind = tab->entries_bound++;
1190 entry = &tab->entries[ind];
1193 entry->record = value;
1195 if (tab->bins != NULL) {
1196 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1197 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1202rb_st_add_direct_with_hash(
st_table *tab,
1203 st_data_t key, st_data_t value, st_hash_t hash)
1205 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1211st_add_direct(
st_table *tab, st_data_t key, st_data_t value)
1213 st_hash_t hash_value;
1215 hash_value = do_hash(key, tab);
1216 st_add_direct_with_hash(tab, key, value, hash_value);
1223st_insert2(
st_table *tab, st_data_t key, st_data_t value,
1224 st_data_t (*func)(st_data_t))
1229 st_hash_t hash_value;
1233 hash_value = do_hash(key, tab);
1235 rebuild_table_if_necessary (tab);
1236 if (tab->bins == NULL) {
1237 bin = find_entry(tab, hash_value, key);
1238 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1240 new_p = bin == UNDEFINED_ENTRY_IND;
1243 bin_ind = UNDEFINED_BIN_IND;
1246 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1248 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1250 new_p = bin == UNDEFINED_ENTRY_IND;
1255 ind = tab->entries_bound++;
1256 entry = &tab->entries[ind];
1257 entry->hash = hash_value;
1259 entry->record = value;
1260 if (bin_ind != UNDEFINED_BIN_IND)
1261 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1264 tab->entries[bin].record = value;
1272 *new_tab = *old_tab;
1273 if (old_tab->bins == NULL)
1274 new_tab->bins = NULL;
1276 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1278 if (new_tab->bins == NULL) {
1283 new_tab->entries = (
st_table_entry *) malloc(get_allocated_entries(old_tab)
1286 if (new_tab->entries == NULL) {
1291 get_allocated_entries(old_tab));
1292 if (old_tab->bins != NULL)
1293 MEMCPY(new_tab->bins, old_tab->bins,
char, bins_size(old_tab));
1306 if (new_tab == NULL)
1310 if (st_replace(new_tab, old_tab) == NULL) {
1311 st_free_table(new_tab);
1321update_range_for_deleted(
st_table *tab, st_index_t n)
1325 if (tab->entries_start == n) {
1326 st_index_t start = n + 1;
1327 st_index_t bound = tab->entries_bound;
1329 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1330 tab->entries_start = start;
1339st_general_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1346 hash = do_hash(*key, tab);
1348 if (tab->bins == NULL) {
1349 bin = find_entry(tab, hash, *key);
1350 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1352 if (bin == UNDEFINED_ENTRY_IND) {
1353 if (value != 0) *value = 0;
1358 bin_ind = find_table_bin_ind(tab, hash, *key);
1359 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1361 if (bin_ind == UNDEFINED_BIN_IND) {
1362 if (value != 0) *value = 0;
1365 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1366 MARK_BIN_DELETED(tab, bin_ind);
1368 entry = &tab->entries[bin];
1370 if (value != 0) *value = entry->record;
1371 MARK_ENTRY_DELETED(entry);
1373 update_range_for_deleted(tab, bin);
1378st_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1380 return st_general_delete(tab, key, value);
1389st_delete_safe(
st_table *tab, st_data_t *key, st_data_t *value,
1390 st_data_t never ATTRIBUTE_UNUSED)
1392 return st_general_delete(tab, key, value);
1400st_shift(
st_table *tab, st_data_t *key, st_data_t *value)
1402 st_index_t i, bound;
1407 entries = tab->entries;
1408 bound = tab->entries_bound;
1409 for (i = tab->entries_start; i < bound; i++) {
1410 curr_entry_ptr = &entries[i];
1411 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1412 st_hash_t entry_hash = curr_entry_ptr->hash;
1413 st_data_t entry_key = curr_entry_ptr->key;
1415 if (value != 0) *value = curr_entry_ptr->record;
1418 if (tab->bins == NULL) {
1419 bin = find_entry(tab, entry_hash, entry_key);
1420 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1421 entries = tab->entries;
1424 curr_entry_ptr = &entries[bin];
1427 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1428 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1429 entries = tab->entries;
1432 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1434 MARK_BIN_DELETED(tab, bin_ind);
1436 MARK_ENTRY_DELETED(curr_entry_ptr);
1438 update_range_for_deleted(tab, i);
1442 if (value != 0) *value = 0;
1448st_cleanup_safe(
st_table *tab ATTRIBUTE_UNUSED,
1449 st_data_t never ATTRIBUTE_UNUSED)
1463st_update(
st_table *tab, st_data_t key,
1464 st_update_callback_func *func, st_data_t arg)
1470 st_data_t value = 0, old_key;
1471 int retval, existing;
1472 st_hash_t hash = do_hash(key, tab);
1475 entries = tab->entries;
1476 if (tab->bins == NULL) {
1477 bin = find_entry(tab, hash, key);
1478 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1480 existing = bin != UNDEFINED_ENTRY_IND;
1481 entry = &entries[bin];
1482 bin_ind = UNDEFINED_BIN_IND;
1485 bin_ind = find_table_bin_ind(tab, hash, key);
1486 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1488 existing = bin_ind != UNDEFINED_BIN_IND;
1490 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1491 entry = &entries[bin];
1496 value = entry->record;
1500 unsigned int rebuilds_num = tab->rebuilds_num;
1502 retval = (*func)(&key, &value, arg, existing);
1506 assert(rebuilds_num == tab->rebuilds_num);
1512 st_add_direct_with_hash(tab, key, value, hash);
1515 if (old_key != key) {
1518 entry->record = value;
1522 if (bin_ind != UNDEFINED_BIN_IND)
1523 MARK_BIN_DELETED(tab, bin_ind);
1524 MARK_ENTRY_DELETED(entry);
1526 update_range_for_deleted(tab, bin);
1542st_general_foreach(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1548 enum st_retval retval;
1549 st_index_t i, rebuilds_num;
1552 int error_p, packed_p = tab->bins == NULL;
1554 entries = tab->entries;
1557 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1558 curr_entry_ptr = &entries[i];
1559 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1561 key = curr_entry_ptr->key;
1562 rebuilds_num = tab->rebuilds_num;
1563 hash = curr_entry_ptr->hash;
1564 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1566 if (retval == ST_REPLACE && replace) {
1568 value = curr_entry_ptr->record;
1569 retval = (*replace)(&key, &value, arg, TRUE);
1570 curr_entry_ptr->key = key;
1571 curr_entry_ptr->record = value;
1574 if (rebuilds_num != tab->rebuilds_num) {
1576 entries = tab->entries;
1577 packed_p = tab->bins == NULL;
1579 i = find_entry(tab, hash, key);
1580 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1582 error_p = i == UNDEFINED_ENTRY_IND;
1585 i = find_table_entry_ind(tab, hash, key);
1586 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1588 error_p = i == UNDEFINED_ENTRY_IND;
1591 if (error_p && check_p) {
1593 retval = (*func)(0, 0, arg, 1);
1596 curr_entry_ptr = &entries[i];
1609 st_data_t key = curr_entry_ptr->key;
1613 bin = find_entry(tab, hash, key);
1614 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1616 if (bin == UNDEFINED_ENTRY_IND)
1620 bin_ind = find_table_bin_ind(tab, hash, key);
1621 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1623 if (bin_ind == UNDEFINED_BIN_IND)
1625 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1626 MARK_BIN_DELETED(tab, bin_ind);
1628 curr_entry_ptr = &entries[bin];
1629 MARK_ENTRY_DELETED(curr_entry_ptr);
1631 update_range_for_deleted(tab, bin);
1640st_foreach_with_replace(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1642 return st_general_foreach(tab, func, replace, arg, TRUE);
1646 st_foreach_callback_func *func;
1651apply_functor(st_data_t k, st_data_t v, st_data_t d,
int _)
1653 const struct functor *f = (
void *)d;
1654 return f->func(k, v, f->arg);
1658st_foreach(
st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1660 const struct functor f = { func, arg };
1661 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1666st_foreach_check(
st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1667 st_data_t never ATTRIBUTE_UNUSED)
1669 return st_general_foreach(tab, func, 0, arg, TRUE);
1674static inline st_index_t
1675st_general_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1677 st_index_t i, bound;
1678 st_data_t key, *keys_start, *keys_end;
1681 bound = tab->entries_bound;
1683 keys_end = keys + size;
1684 for (i = tab->entries_start; i < bound; i++) {
1685 if (keys == keys_end)
1687 curr_entry_ptr = &entries[i];
1688 key = curr_entry_ptr->key;
1689 if (! DELETED_ENTRY_P(curr_entry_ptr))
1693 return keys - keys_start;
1697st_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1699 return st_general_keys(tab, keys, size);
1704st_keys_check(
st_table *tab, st_data_t *keys, st_index_t size,
1705 st_data_t never ATTRIBUTE_UNUSED)
1707 return st_general_keys(tab, keys, size);
1712static inline st_index_t
1713st_general_values(
st_table *tab, st_data_t *values, st_index_t size)
1715 st_index_t i, bound;
1716 st_data_t *values_start, *values_end;
1719 values_start = values;
1720 values_end = values + size;
1721 bound = tab->entries_bound;
1722 for (i = tab->entries_start; i < bound; i++) {
1723 if (values == values_end)
1725 curr_entry_ptr = &entries[i];
1726 if (! DELETED_ENTRY_P(curr_entry_ptr))
1727 *values++ = curr_entry_ptr->record;
1730 return values - values_start;
1734st_values(
st_table *tab, st_data_t *values, st_index_t size)
1736 return st_general_values(tab, values, size);
1741st_values_check(
st_table *tab, st_data_t *values, st_index_t size,
1742 st_data_t never ATTRIBUTE_UNUSED)
1744 return st_general_values(tab, values, size);
1747#define FNV1_32A_INIT 0x811c9dc5
1752#define FNV_32_PRIME 0x01000193
1755#ifndef UNALIGNED_WORD_ACCESS
1756# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1757 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1758 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1759 defined(__mc68020__)
1760# define UNALIGNED_WORD_ACCESS 1
1763#ifndef UNALIGNED_WORD_ACCESS
1764# define UNALIGNED_WORD_ACCESS 0
1770#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1771#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1773#if ST_INDEX_BITS <= 32
1774#define C1 (st_index_t)0xcc9e2d51
1775#define C2 (st_index_t)0x1b873593
1777#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1778#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1780NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1781NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_finish(st_index_t h));
1782NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash(
const void *ptr,
size_t len, st_index_t h));
1784static inline st_index_t
1785murmur_step(st_index_t h, st_index_t k)
1787#if ST_INDEX_BITS <= 32
1803static inline st_index_t
1804murmur_finish(st_index_t h)
1806#if ST_INDEX_BITS <= 32
1810 const st_index_t c1 = 0x85ebca6b;
1811 const st_index_t c2 = 0xc2b2ae35;
1817 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1818 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1820#if ST_INDEX_BITS > 64
1837st_hash(
const void *ptr,
size_t len, st_index_t h)
1839 const char *data = ptr;
1843#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1844#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1845#if SIZEOF_ST_INDEX_T > 4
1846#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1847#if SIZEOF_ST_INDEX_T > 8
1848#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1849 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1850#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1852#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1854#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1857 if (
len >=
sizeof(st_index_t)) {
1858#if !UNALIGNED_WORD_ACCESS
1859 int align = (int)((st_data_t)data %
sizeof(st_index_t));
1865#ifdef WORDS_BIGENDIAN
1866# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1867 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1869# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1870 t |= data_at(n) << CHAR_BIT*(n)
1876#ifdef WORDS_BIGENDIAN
1877 t >>= (CHAR_BIT * align) - CHAR_BIT;
1879 t <<= (CHAR_BIT * align);
1882 data +=
sizeof(st_index_t)-align;
1883 len -=
sizeof(st_index_t)-align;
1885 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1886 sr = CHAR_BIT * align;
1888 while (
len >=
sizeof(st_index_t)) {
1889 d = *(st_index_t *)data;
1890#ifdef WORDS_BIGENDIAN
1891 t = (t << sr) | (d >> sl);
1893 t = (t >> sr) | (d << sl);
1895 h = murmur_step(h, t);
1897 data +=
sizeof(st_index_t);
1898 len -=
sizeof(st_index_t);
1901 pack =
len < (size_t)align ? (
int)
len : align;
1904#ifdef WORDS_BIGENDIAN
1905# define UNALIGNED_ADD(n) case (n) + 1: \
1906 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1908# define UNALIGNED_ADD(n) case (n) + 1: \
1909 d |= data_at(n) << CHAR_BIT*(n)
1914#ifdef WORDS_BIGENDIAN
1915 t = (t << sr) | (d >> sl);
1917 t = (t >> sr) | (d << sl);
1920 if (
len < (
size_t)align)
goto skip_tail;
1922 h = murmur_step(h, t);
1928#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1929#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1931#define aligned_data data
1935 h = murmur_step(h, *(st_index_t *)aligned_data);
1936 data +=
sizeof(st_index_t);
1937 len -=
sizeof(st_index_t);
1938 }
while (
len >=
sizeof(st_index_t));
1944#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1946#if SIZEOF_ST_INDEX_T > 4
1947 case 7: t |= data_at(6) << 48;
1948 case 6: t |= data_at(5) << 40;
1949 case 5: t |= data_at(4) << 32;
1951 t |= (st_index_t)*(uint32_t*)aligned_data;
1955 case 3: t |= data_at(2) << 16;
1956 case 2: t |= data_at(1) << 8;
1957 case 1: t |= data_at(0);
1959#ifdef WORDS_BIGENDIAN
1960# define UNALIGNED_ADD(n) case (n) + 1: \
1961 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1963# define UNALIGNED_ADD(n) case (n) + 1: \
1964 t |= data_at(n) << CHAR_BIT*(n)
1972 h ^= t; h -= ROTL(t, 7);
1978 return murmur_finish(h);
1982st_hash_uint32(st_index_t h, uint32_t i)
1984 return murmur_step(h, i);
1987NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1989st_hash_uint(st_index_t h, st_index_t i)
1994#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1995 h = murmur_step(h, i >> 8*8);
1997 h = murmur_step(h, i);
2002st_hash_end(st_index_t h)
2004 h = murmur_finish(h);
2010rb_st_hash_start(st_index_t h)
2016strhash(st_data_t arg)
2018 register const char *
string = (
const char *)arg;
2019 return st_hash(
string, strlen(
string), FNV1_32A_INIT);
2023st_locale_insensitive_strcasecmp(
const char *s1,
const char *s2)
2030 if (c1 ==
'\0' || c2 ==
'\0') {
2031 if (c1 !=
'\0')
return 1;
2032 if (c2 !=
'\0')
return -1;
2035 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2036 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2047st_locale_insensitive_strncasecmp(
const char *s1,
const char *s2,
size_t n)
2052 for (i = 0; i < n; i++) {
2055 if (c1 ==
'\0' || c2 ==
'\0') {
2056 if (c1 !=
'\0')
return 1;
2057 if (c2 !=
'\0')
return -1;
2060 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
2061 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
2073st_strcmp(st_data_t lhs, st_data_t rhs)
2075 const char *s1 = (
char *)lhs;
2076 const char *s2 = (
char *)rhs;
2077 return strcmp(s1, s2);
2081st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2083 const char *s1 = (
char *)lhs;
2084 const char *s2 = (
char *)rhs;
2085 return st_locale_insensitive_strcasecmp(s1, s2);
2088NO_SANITIZE(
"unsigned-integer-overflow", PUREFUNC(
static st_index_t strcasehash(st_data_t)));
2090strcasehash(st_data_t arg)
2092 register const char *
string = (
const char *)arg;
2093 register st_index_t hval = FNV1_32A_INIT;
2099 unsigned int c = (
unsigned char)*
string++;
2100 if ((
unsigned int)(c -
'A') <= (
'Z' -
'A')) c +=
'a' -
'A';
2104 hval *= FNV_32_PRIME;
2110st_numcmp(st_data_t x, st_data_t y)
2116st_numhash(st_data_t n)
2118 enum {s1 = 11, s2 = 3};
2119 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2127st_expand_table(
st_table *tab, st_index_t siz)
2132 if (siz <= get_allocated_entries(tab))
2135 tmp = st_init_table_with_size(tab->type, siz);
2136 n = get_allocated_entries(tab);
2141 tab->entry_power = tmp->entry_power;
2142 tab->bin_power = tmp->bin_power;
2143 tab->size_ind = tmp->size_ind;
2144 tab->entries = tmp->entries;
2146 tab->rebuilds_num++;
2155 int eq_p, rebuilt_p;
2162 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2163 p = &tab->entries[i];
2164 if (DELETED_ENTRY_P(p))
2166 for (j = i + 1; j < tab->entries_bound; j++) {
2167 q = &tab->entries[j];
2168 if (DELETED_ENTRY_P(q))
2170 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2171 if (EXPECT(rebuilt_p, 0))
2175 MARK_ENTRY_DELETED(q);
2177 update_range_for_deleted(tab, j);
2189 int eq_p, rebuilt_p;
2191 st_index_t
const n = bins_size(tab);
2192 unsigned int const size_ind = get_size_ind(tab);
2193 st_index_t *bins = realloc(tab->bins, n);
2195 initialize_bins(tab);
2196 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2199#ifdef QUADRATIC_PROBE
2202 st_index_t perturb = p->hash;
2205 if (DELETED_ENTRY_P(p))
2208 ind = hash_bin(p->hash, tab);
2210 st_index_t bin = get_bin(bins, size_ind, ind);
2211 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2213 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2218 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2219 if (EXPECT(rebuilt_p, 0))
2223 q->record = p->record;
2224 MARK_ENTRY_DELETED(p);
2226 update_range_for_deleted(tab, bin);
2231#ifdef QUADRATIC_PROBE
2232 ind = hash_bin(ind + d, tab);
2235 ind = secondary_hash(ind, tab, &perturb);
2253 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2254 rebuilt_p = st_rehash_linear(tab);
2256 rebuilt_p = st_rehash_indexed(tab);
2257 }
while (rebuilt_p);
2261st_stringify(
VALUE key)
2264 rb_hash_key_str(key) : key;
2270 st_data_t k = st_stringify(key);
2272 e.hash = do_hash(k, tab);
2276 tab->entries[tab->entries_bound++] = e;
2287 for (i = 0; i < argc; ) {
2288 st_data_t k = st_stringify(argv[i++]);
2289 st_data_t v = argv[i++];
2290 st_insert(tab, k, v);
2302 for (i = 0; i < argc; ) {
2303 VALUE key = argv[i++];
2304 VALUE val = argv[i++];
2305 st_insert_single(tab, hash, key, val);
2315rb_hash_bulk_insert_into_st_table(
long argc,
const VALUE *argv,
VALUE hash)
2317 st_index_t n, size = argc / 2;
2318 st_table *tab = RHASH_ST_TABLE(hash);
2320 tab = RHASH_TBL_RAW(hash);
2321 n = tab->entries_bound + size;
2322 st_expand_table(tab, n);
2323 if (UNLIKELY(tab->num_entries))
2324 st_insert_generic(tab, argc, argv, hash);
2326 st_insert_single(tab, hash, argv[0], argv[1]);
2327 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2328 st_insert_linear(tab, argc, argv, hash);
2330 st_insert_generic(tab, argc, argv, hash);
2336 st_index_t num = tab->num_entries;
2337 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2339 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2340 rebuild_table_with(new_tab, tab);
2341 rebuild_move_table(new_tab, tab);
2342 rebuild_cleanup(tab);
2350struct set_table_entry {
2356static inline st_hash_t
2357set_do_hash(st_data_t key,
set_table *tab)
2359 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2360 return normalize_hash_value(hash);
2364static inline unsigned int
2367 return tab->size_ind;
2371static inline st_index_t
2374 return ((st_index_t) 1)<<tab->bin_power;
2378static inline st_index_t
2381 return set_get_bins_num(tab) - 1;
2386static inline st_index_t
2387set_hash_bin(st_hash_t hash_value,
set_table *tab)
2389 return hash_value & set_bins_mask(tab);
2393static inline st_index_t
2394set_get_allocated_entries(
const set_table *tab)
2396 return ((st_index_t) 1)<<tab->entry_power;
2400set_allocated_entries_size(
const set_table *tab)
2402 return set_get_allocated_entries(tab) *
sizeof(set_table_entry);
2408 return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2412static inline st_index_t
2415 if (set_has_bins(tab)) {
2416 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
2422static inline st_index_t *
2425 if (set_has_bins(tab)) {
2426 return (st_index_t *)(((
char *)tab->
entries) + set_allocated_entries_size(tab));
2436 memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2443 tab->num_entries = 0;
2444 tab->entries_start = tab->entries_bound = 0;
2445 if (set_bins_ptr(tab) != NULL)
2446 set_initialize_bins(tab);
2457 const char *e = getenv(
"ST_HASH_LOG");
2458 if (!e || !*e) init_st = 1;
2467 n = get_power2(size);
2470 tab->entry_power = n;
2471 tab->bin_power = features[n].bin_power;
2472 tab->size_ind = features[n].size_ind;
2475 if (set_has_bins(tab)) {
2476 memsize += set_bins_size(tab);
2478 memsize += set_get_allocated_entries(tab) *
sizeof(set_table_entry);
2479 tab->
entries = (set_table_entry *)malloc(memsize);
2480 set_make_tab_empty(tab);
2481 tab->rebuilds_num = 0;
2491 if (tab == NULL) tab = malloc(
sizeof(
set_table));
2493 set_init_existing_table_with_size(tab, type, size);
2499set_init_numtable(
void)
2501 return set_init_table_with_size(NULL, &type_numhash, 0);
2505set_init_numtable_with_size(st_index_t size)
2507 return set_init_table_with_size(NULL, &type_numhash, size);
2511set_table_size(
const struct set_table *tbl)
2513 return tbl->num_entries;
2520 set_make_tab_empty(tab);
2521 tab->rebuilds_num++;
2538 + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2539 + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2543set_find_table_entry_ind(
set_table *tab, st_hash_t hash_value, st_data_t key);
2546set_find_table_bin_ind(
set_table *tab, st_hash_t hash_value, st_data_t key);
2549set_find_table_bin_ind_direct(
set_table *table, st_hash_t hash_value, st_data_t key);
2552set_find_table_bin_ptr_and_reserve(
set_table *tab, st_hash_t *hash_value,
2553 st_data_t key, st_index_t *bin_ind);
2557static void set_rebuild_cleanup(
set_table *
const tab);
2566 if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2567 && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2568 || tab->num_entries < (1 << MINIMAL_POWER2)) {
2570 tab->num_entries = 0;
2571 if (set_has_bins(tab))
2572 set_initialize_bins(tab);
2573 set_rebuild_table_with(tab, tab);
2580 new_tab = set_init_table_with_size(NULL, tab->type,
2581 2 * tab->num_entries - 1);
2582 set_rebuild_table_with(new_tab, tab);
2583 set_rebuild_move_table(new_tab, tab);
2585 set_rebuild_cleanup(tab);
2592 unsigned int size_ind;
2593 set_table_entry *new_entries;
2594 set_table_entry *curr_entry_ptr;
2598 new_entries = new_tab->
entries;
2601 bins = set_bins_ptr(new_tab);
2602 size_ind = set_get_size_ind(new_tab);
2603 st_index_t bound = tab->entries_bound;
2604 set_table_entry *entries = tab->
entries;
2606 for (i = tab->entries_start; i < bound; i++) {
2607 curr_entry_ptr = &entries[i];
2608 PREFETCH(entries + i + 1, 0);
2609 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2611 if (&new_entries[ni] != curr_entry_ptr)
2612 new_entries[ni] = *curr_entry_ptr;
2613 if (EXPECT(bins != NULL, 1)) {
2614 bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2615 curr_entry_ptr->key);
2616 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2618 new_tab->num_entries++;
2622 assert(new_tab->num_entries == tab->num_entries);
2628 tab->entry_power = new_tab->entry_power;
2629 tab->bin_power = new_tab->bin_power;
2630 tab->size_ind = new_tab->size_ind;
2637set_rebuild_cleanup(
set_table *
const tab)
2639 tab->entries_start = 0;
2640 tab->entries_bound = tab->num_entries;
2641 tab->rebuilds_num++;
2656static inline st_index_t
2657set_secondary_hash(st_index_t ind,
set_table *tab, st_index_t *perturb)
2660 ind = (ind << 2) + ind + *perturb + 1;
2661 return set_hash_bin(ind, tab);
2668static inline st_index_t
2669set_find_entry(
set_table *tab, st_hash_t hash_value, st_data_t key)
2671 int eq_p, rebuilt_p;
2672 st_index_t i, bound;
2673 set_table_entry *entries;
2675 bound = tab->entries_bound;
2677 for (i = tab->entries_start; i < bound; i++) {
2678 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2679 if (EXPECT(rebuilt_p, 0))
2680 return REBUILT_TABLE_ENTRY_IND;
2684 return UNDEFINED_ENTRY_IND;
2696set_find_table_entry_ind(
set_table *tab, st_hash_t hash_value, st_data_t key)
2698 int eq_p, rebuilt_p;
2700#ifdef QUADRATIC_PROBE
2706 set_table_entry *entries = tab->
entries;
2708 ind = set_hash_bin(hash_value, tab);
2709#ifdef QUADRATIC_PROBE
2712 perturb = hash_value;
2715 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2716 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2717 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2718 if (EXPECT(rebuilt_p, 0))
2719 return REBUILT_TABLE_ENTRY_IND;
2723 else if (EMPTY_BIN_P(bin))
2724 return UNDEFINED_ENTRY_IND;
2725#ifdef QUADRATIC_PROBE
2726 ind = set_hash_bin(ind + d, tab);
2729 ind = set_secondary_hash(ind, tab, &perturb);
2740set_find_table_bin_ind(
set_table *tab, st_hash_t hash_value, st_data_t key)
2742 int eq_p, rebuilt_p;
2744#ifdef QUADRATIC_PROBE
2750 set_table_entry *entries = tab->
entries;
2752 ind = set_hash_bin(hash_value, tab);
2753#ifdef QUADRATIC_PROBE
2756 perturb = hash_value;
2759 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2760 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2761 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2762 if (EXPECT(rebuilt_p, 0))
2763 return REBUILT_TABLE_BIN_IND;
2767 else if (EMPTY_BIN_P(bin))
2768 return UNDEFINED_BIN_IND;
2769#ifdef QUADRATIC_PROBE
2770 ind = set_hash_bin(ind + d, tab);
2773 ind = set_secondary_hash(ind, tab, &perturb);
2783set_find_table_bin_ind_direct(
set_table *tab, st_hash_t hash_value, st_data_t key)
2786#ifdef QUADRATIC_PROBE
2793 ind = set_hash_bin(hash_value, tab);
2794#ifdef QUADRATIC_PROBE
2797 perturb = hash_value;
2800 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2801 if (EMPTY_OR_DELETED_BIN_P(bin))
2803#ifdef QUADRATIC_PROBE
2804 ind = set_hash_bin(ind + d, tab);
2807 ind = set_secondary_hash(ind, tab, &perturb);
2814#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2826set_find_table_bin_ptr_and_reserve(
set_table *tab, st_hash_t *hash_value,
2827 st_data_t key, st_index_t *bin_ind)
2829 int eq_p, rebuilt_p;
2831 st_hash_t curr_hash_value = *hash_value;
2832#ifdef QUADRATIC_PROBE
2837 st_index_t entry_index;
2838 st_index_t firset_deleted_bin_ind;
2839 set_table_entry *entries;
2841 ind = set_hash_bin(curr_hash_value, tab);
2842#ifdef QUADRATIC_PROBE
2845 perturb = curr_hash_value;
2847 firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2850 entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2851 if (EMPTY_BIN_P(entry_index)) {
2853 entry_index = UNDEFINED_ENTRY_IND;
2854 if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2856 ind = firset_deleted_bin_ind;
2857 MARK_SET_BIN_EMPTY(tab, ind);
2861 else if (! DELETED_BIN_P(entry_index)) {
2862 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2863 if (EXPECT(rebuilt_p, 0))
2864 return REBUILT_TABLE_ENTRY_IND;
2868 else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2869 firset_deleted_bin_ind = ind;
2870#ifdef QUADRATIC_PROBE
2871 ind = set_hash_bin(ind + d, tab);
2874 ind = set_secondary_hash(ind, tab, &perturb);
2884set_table_lookup(
set_table *tab, st_data_t key)
2887 st_hash_t hash = set_do_hash(key, tab);
2890 if (!set_has_bins(tab)) {
2891 bin = set_find_entry(tab, hash, key);
2892 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2894 if (bin == UNDEFINED_ENTRY_IND)
2898 bin = set_find_table_entry_ind(tab, hash, key);
2899 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2901 if (bin == UNDEFINED_ENTRY_IND)
2910set_rebuild_table_if_necessary (
set_table *tab)
2912 st_index_t bound = tab->entries_bound;
2914 if (bound == set_get_allocated_entries(tab))
2915 set_rebuild_table(tab);
2922set_insert(
set_table *tab, st_data_t key)
2924 set_table_entry *entry;
2927 st_hash_t hash_value;
2931 hash_value = set_do_hash(key, tab);
2933 set_rebuild_table_if_necessary(tab);
2934 if (!set_has_bins(tab)) {
2935 bin = set_find_entry(tab, hash_value, key);
2936 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2938 new_p = bin == UNDEFINED_ENTRY_IND;
2941 bin_ind = UNDEFINED_BIN_IND;
2944 bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2946 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2948 new_p = bin == UNDEFINED_ENTRY_IND;
2952 ind = tab->entries_bound++;
2954 entry->hash = hash_value;
2956 if (bin_ind != UNDEFINED_BIN_IND)
2957 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2967 *new_tab = *old_tab;
2968 size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
2969 new_tab->
entries = (set_table_entry *)malloc(memsize);
2980 if (set_replace(new_tab, old_tab) == NULL) {
2981 set_free_table(new_tab);
2991set_update_range_for_deleted(
set_table *tab, st_index_t n)
2995 if (tab->entries_start == n) {
2996 st_index_t start = n + 1;
2997 st_index_t bound = tab->entries_bound;
2998 set_table_entry *entries = tab->
entries;
2999 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
3000 tab->entries_start = start;
3007#define MARK_SET_BIN_DELETED(tab, i) \
3009 set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3015set_table_delete(
set_table *tab, st_data_t *key)
3017 set_table_entry *entry;
3022 hash = set_do_hash(*key, tab);
3024 if (!set_has_bins(tab)) {
3025 bin = set_find_entry(tab, hash, *key);
3026 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3028 if (bin == UNDEFINED_ENTRY_IND) {
3033 bin_ind = set_find_table_bin_ind(tab, hash, *key);
3034 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3036 if (bin_ind == UNDEFINED_BIN_IND) {
3039 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3040 MARK_SET_BIN_DELETED(tab, bin_ind);
3044 MARK_ENTRY_DELETED(entry);
3046 set_update_range_for_deleted(tab, bin);
3059set_general_foreach(
set_table *tab, set_foreach_check_callback_func *func,
3060 set_update_callback_func *replace, st_data_t arg,
3065 set_table_entry *entries, *curr_entry_ptr;
3066 enum st_retval retval;
3067 st_index_t i, rebuilds_num;
3070 int error_p, packed_p = !set_has_bins(tab);
3075 for (i = tab->entries_start; i < tab->entries_bound; i++) {
3076 curr_entry_ptr = &entries[i];
3077 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3079 key = curr_entry_ptr->key;
3080 rebuilds_num = tab->rebuilds_num;
3081 hash = curr_entry_ptr->hash;
3082 retval = (*func)(key, arg, 0);
3084 if (retval == ST_REPLACE && replace) {
3085 retval = (*replace)(&key, arg, TRUE);
3086 curr_entry_ptr->key = key;
3089 if (rebuilds_num != tab->rebuilds_num) {
3092 packed_p = !set_has_bins(tab);
3094 i = set_find_entry(tab, hash, key);
3095 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3097 error_p = i == UNDEFINED_ENTRY_IND;
3100 i = set_find_table_entry_ind(tab, hash, key);
3101 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3103 error_p = i == UNDEFINED_ENTRY_IND;
3106 if (error_p && check_p) {
3108 retval = (*func)(0, arg, 1);
3111 curr_entry_ptr = &entries[i];
3124 st_data_t key = curr_entry_ptr->key;
3128 bin = set_find_entry(tab, hash, key);
3129 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3131 if (bin == UNDEFINED_ENTRY_IND)
3135 bin_ind = set_find_table_bin_ind(tab, hash, key);
3136 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3138 if (bin_ind == UNDEFINED_BIN_IND)
3140 bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3141 MARK_SET_BIN_DELETED(tab, bin_ind);
3143 curr_entry_ptr = &entries[bin];
3144 MARK_ENTRY_DELETED(curr_entry_ptr);
3146 set_update_range_for_deleted(tab, bin);
3155set_foreach_with_replace(
set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3157 return set_general_foreach(tab, func, replace, arg, TRUE);
3161 set_foreach_callback_func *func;
3166set_apply_functor(st_data_t k, st_data_t d,
int _)
3168 const struct set_functor *f = (
void *)d;
3169 return f->func(k, f->arg);
3173set_table_foreach(
set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3175 const struct set_functor f = { func, arg };
3176 return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3181set_foreach_check(
set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3182 st_data_t never ATTRIBUTE_UNUSED)
3184 return set_general_foreach(tab, func, NULL, arg, TRUE);
3190set_keys(
set_table *tab, st_data_t *keys, st_index_t size)
3192 st_index_t i, bound;
3193 st_data_t key, *keys_start, *keys_end;
3194 set_table_entry *curr_entry_ptr, *entries = tab->
entries;
3196 bound = tab->entries_bound;
3198 keys_end = keys + size;
3199 for (i = tab->entries_start; i < bound; i++) {
3200 if (keys == keys_end)
3202 curr_entry_ptr = &entries[i];
3203 key = curr_entry_ptr->key;
3204 if (! DELETED_ENTRY_P(curr_entry_ptr))
3208 return keys - keys_start;
3214 st_index_t num = tab->num_entries;
3215 if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3217 set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3218 set_rebuild_table_with(new_tab, tab);
3219 set_rebuild_move_table(new_tab, tab);
3220 set_rebuild_cleanup(tab);
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cString
String class.
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
int len
Length of the buffer.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
set_table_entry * entries
Array of size 2^entry_power.
uintptr_t VALUE
Type that represents a Ruby object.