Ruby 4.1.0dev (2026-03-23 revision f8459601271ebbc5e1efb101387da955ed1faabb)
constant_pool.c
1#include "prism/internal/constant_pool.h"
2
5#include "prism/internal/arena.h"
6
7#include <assert.h>
8#include <stdbool.h>
9
13void
14pm_constant_id_list_init(pm_constant_id_list_t *list) {
15 list->ids = NULL;
16 list->size = 0;
17 list->capacity = 0;
18}
19
23void
24pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity) {
25 if (capacity) {
26 list->ids = (pm_constant_id_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_id_t), PRISM_ALIGNOF(pm_constant_id_t));
27 } else {
28 list->ids = NULL;
29 }
30
31 list->size = 0;
32 list->capacity = capacity;
33}
34
38void
39pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) {
40 if (list->size >= list->capacity) {
41 size_t new_capacity = list->capacity == 0 ? 8 : list->capacity * 2;
42 pm_constant_id_t *new_ids = (pm_constant_id_t *) pm_arena_alloc(arena, sizeof(pm_constant_id_t) * new_capacity, PRISM_ALIGNOF(pm_constant_id_t));
43
44 if (list->size > 0) {
45 memcpy(new_ids, list->ids, list->size * sizeof(pm_constant_id_t));
46 }
47
48 list->ids = new_ids;
49 list->capacity = new_capacity;
50 }
51
52 list->ids[list->size++] = id;
53}
54
58void
59pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
60 assert(index < list->capacity);
61 assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
62
63 list->ids[index] = id;
64 list->size++;
65}
66
70bool
71pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
72 for (size_t index = 0; index < list->size; index++) {
73 if (list->ids[index] == id) return true;
74 }
75 return false;
76}
77
85static PRISM_INLINE uint32_t
86pm_constant_pool_hash(const uint8_t *start, size_t length) {
87 // This constant is borrowed from wyhash. It is a 64-bit odd integer with
88 // roughly equal 0/1 bits, chosen for good avalanche behavior when used in
89 // multiply-xorshift sequences.
90 static const uint64_t secret = 0x517cc1b727220a95ULL;
91 uint64_t hash = (uint64_t) length;
92
93 if (length <= 8) {
94 // Short strings: read first and last 4 bytes (overlapping for len < 8).
95 // This covers the majority of Ruby identifiers with a single multiply.
96 if (length >= 4) {
97 uint32_t a, b;
98 memcpy(&a, start, 4);
99 memcpy(&b, start + length - 4, 4);
100 hash ^= (uint64_t) a | ((uint64_t) b << 32);
101 } else if (length > 0) {
102 hash ^= (uint64_t) start[0] | ((uint64_t) start[length >> 1] << 8) | ((uint64_t) start[length - 1] << 16);
103 }
104 hash *= secret;
105 } else if (length <= 16) {
106 // Medium strings: read first and last 8 bytes (overlapping).
107 // Two multiplies instead of the three the loop-based approach needs.
108 uint64_t word;
109 memcpy(&word, start, 8);
110 hash ^= word;
111 hash *= secret;
112 memcpy(&word, start + length - 8, 8);
113 hash ^= word;
114 hash *= secret;
115 } else {
116 const uint8_t *ptr = start;
117 size_t remaining = length;
118
119 while (remaining >= 8) {
120 uint64_t word;
121 memcpy(&word, ptr, 8);
122 hash ^= word;
123 hash *= secret;
124 ptr += 8;
125 remaining -= 8;
126 }
127
128 if (remaining > 0) {
129 // Read the last 8 bytes (overlapping with already-processed data).
130 uint64_t word;
131 memcpy(&word, start + length - 8, 8);
132 hash ^= word;
133 hash *= secret;
134 }
135 }
136
137 hash ^= hash >> 32;
138 return (uint32_t) hash;
139}
140
144static uint32_t
145next_power_of_two(uint32_t v) {
146 // Avoid underflow in subtraction on next line.
147 if (v == 0) {
148 // 1 is the nearest power of 2 to 0 (2^0)
149 return 1;
150 }
151 v--;
152 v |= v >> 1;
153 v |= v >> 2;
154 v |= v >> 4;
155 v |= v >> 8;
156 v |= v >> 16;
157 v++;
158 return v;
159}
160
161#ifndef NDEBUG
162static bool
163is_power_of_two(uint32_t size) {
164 return (size & (size - 1)) == 0;
165}
166#endif
167
171static PRISM_INLINE void
172pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) {
173 assert(is_power_of_two(pool->capacity));
174
175 uint32_t next_capacity = pool->capacity * 2;
176 const uint32_t mask = next_capacity - 1;
177
178 pm_constant_pool_bucket_t *next_buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, next_capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
179 pm_constant_t *next_constants = (pm_constant_t *) pm_arena_alloc(arena, next_capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
180
181 // For each bucket in the current constant pool, find the index in the
182 // next constant pool, and insert it.
183 for (uint32_t index = 0; index < pool->capacity; index++) {
184 pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
185
186 // If an id is set on this constant, then we know we have content here.
187 // In this case we need to insert it into the next constant pool.
188 if (bucket->id != PM_CONSTANT_ID_UNSET) {
189 uint32_t next_index = bucket->hash & mask;
190
191 // This implements linear scanning to find the next available slot
192 // in case this index is already taken. We don't need to bother
193 // comparing the values since we know that the hash is unique.
194 while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
195 next_index = (next_index + 1) & mask;
196 }
197
198 // Here we copy over the entire bucket, which includes the id so
199 // that they are consistent between resizes.
200 next_buckets[next_index] = *bucket;
201 }
202 }
203
204 // The constants are stable with respect to hash table resizes.
205 memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
206
207 pool->constants = next_constants;
208 pool->buckets = next_buckets;
209 pool->capacity = next_capacity;
210}
211
215void
216pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity) {
217 capacity = next_power_of_two(capacity);
218
219 pool->buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
220 pool->constants = (pm_constant_t *) pm_arena_alloc(arena, capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
221 pool->size = 0;
222 pool->capacity = capacity;
223}
224
229pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
230 assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
231 return &pool->constants[constant_id - 1];
232}
233
239pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
240 assert(is_power_of_two(pool->capacity));
241 const uint32_t mask = pool->capacity - 1;
242
243 uint32_t hash = pm_constant_pool_hash(start, length);
244 uint32_t index = hash & mask;
246
247 while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
248 if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
249 return bucket->id;
250 }
251
252 index = (index + 1) & mask;
253 }
254
255 return PM_CONSTANT_ID_UNSET;
256}
257
262pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
263 if (pool->size >= (pool->capacity / 4 * 3)) {
264 pm_constant_pool_resize(arena, pool);
265 }
266
267 assert(is_power_of_two(pool->capacity));
268 const uint32_t mask = pool->capacity - 1;
269
270 uint32_t hash = pm_constant_pool_hash(start, length);
271 uint32_t index = hash & mask;
273
274 while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
275 // If there is a collision, then we need to check if the content is the
276 // same as the content we are trying to insert. If it is, then we can
277 // return the id of the existing constant.
278 if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
279 // Since we have found a match, we need to check if this is
280 // attempting to insert a shared or an owned constant. We want to
281 // prefer shared constants since they don't require allocations.
282 if (type != PM_CONSTANT_POOL_BUCKET_OWNED && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
283 // If we're attempting to insert a shared constant and the
284 // existing constant is owned, then we can replace it with the
285 // shared constant to prefer non-owned references.
286 bucket->start = start;
287 bucket->type = (unsigned int) (type & 0x3);
288 pool->constants[bucket->id - 1].start = start;
289 }
290
291 return bucket->id;
292 }
293
294 index = (index + 1) & mask;
295 }
296
297 // IDs are allocated starting at 1, since the value 0 denotes a non-existent
298 // constant.
299 uint32_t id = ++pool->size;
300 assert(pool->size < ((uint32_t) (1 << 30)));
301
302 *bucket = (pm_constant_pool_bucket_t) {
303 .id = (unsigned int) (id & 0x3fffffff),
304 .type = (unsigned int) (type & 0x3),
305 .hash = hash,
306 .start = start,
307 .length = length
308 };
309
310 pool->constants[id - 1] = (pm_constant_t) {
311 .start = start,
312 .length = length,
313 };
314
315 return id;
316}
317
323pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
324 return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
325}
326
333pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length) {
334 return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
335}
336
343pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
344 return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
345}
346
350const uint8_t *
351pm_constant_start(const pm_constant_t *constant) {
352 return constant->start;
353}
354
358size_t pm_constant_length(const pm_constant_t *constant) {
359 return constant->length;
360}
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition align.h:15
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PRISM_INLINE
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition inline.h:12
C99 shim for <stdbool.h>
A list of constant IDs.
size_t size
The number of constant ids in the list.
size_t capacity
The number of constant ids that have been allocated in the list.
pm_constant_id_t * ids
The constant ids in the list.