Ruby  3.4.0dev (2024-11-22 revision 0989400a925cd201defdca9eb28eb87200b30785)
pm_constant_pool.c
2 
6 void
8  list->ids = NULL;
9  list->size = 0;
10  list->capacity = 0;
11 }
12 
16 void
18  list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
19  if (list->ids == NULL) abort();
20 
21  list->size = 0;
22  list->capacity = capacity;
23 }
24 
29 bool
31  if (list->size >= list->capacity) {
32  list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
33  list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
34  if (list->ids == NULL) return false;
35  }
36 
37  list->ids[list->size++] = id;
38  return true;
39 }
40 
44 void
46  assert(index < list->capacity);
47  assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
48 
49  list->ids[index] = id;
50  list->size++;
51 }
52 
56 bool
58  for (size_t index = 0; index < list->size; index++) {
59  if (list->ids[index] == id) return true;
60  }
61  return false;
62 }
63 
67 void
69  if (list->ids != NULL) {
70  xfree(list->ids);
71  }
72 }
73 
78 static inline uint32_t
79 pm_constant_pool_hash(const uint8_t *start, size_t length) {
80  // This is a prime number used as the initial value for the hash function.
81  uint32_t value = 5381;
82 
83  for (size_t index = 0; index < length; index++) {
84  value = ((value << 5) + value) + start[index];
85  }
86 
87  return value;
88 }
89 
93 static uint32_t
94 next_power_of_two(uint32_t v) {
95  // Avoid underflow in subtraction on next line.
96  if (v == 0) {
97  // 1 is the nearest power of 2 to 0 (2^0)
98  return 1;
99  }
100  v--;
101  v |= v >> 1;
102  v |= v >> 2;
103  v |= v >> 4;
104  v |= v >> 8;
105  v |= v >> 16;
106  v++;
107  return v;
108 }
109 
110 #ifndef NDEBUG
111 static bool
112 is_power_of_two(uint32_t size) {
113  return (size & (size - 1)) == 0;
114 }
115 #endif
116 
120 static inline bool
121 pm_constant_pool_resize(pm_constant_pool_t *pool) {
122  assert(is_power_of_two(pool->capacity));
123 
124  uint32_t next_capacity = pool->capacity * 2;
125  if (next_capacity < pool->capacity) return false;
126 
127  const uint32_t mask = next_capacity - 1;
128  const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
129 
130  void *next = xcalloc(next_capacity, element_size);
131  if (next == NULL) return false;
132 
133  pm_constant_pool_bucket_t *next_buckets = next;
134  pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
135 
136  // For each bucket in the current constant pool, find the index in the
137  // next constant pool, and insert it.
138  for (uint32_t index = 0; index < pool->capacity; index++) {
139  pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
140 
141  // If an id is set on this constant, then we know we have content here.
142  // In this case we need to insert it into the next constant pool.
143  if (bucket->id != PM_CONSTANT_ID_UNSET) {
144  uint32_t next_index = bucket->hash & mask;
145 
146  // This implements linear scanning to find the next available slot
147  // in case this index is already taken. We don't need to bother
148  // comparing the values since we know that the hash is unique.
149  while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
150  next_index = (next_index + 1) & mask;
151  }
152 
153  // Here we copy over the entire bucket, which includes the id so
154  // that they are consistent between resizes.
155  next_buckets[next_index] = *bucket;
156  }
157  }
158 
159  // The constants are stable with respect to hash table resizes.
160  memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
161 
162  // pool->constants and pool->buckets are allocated out of the same chunk
163  // of memory, with the buckets coming first.
164  xfree(pool->buckets);
165  pool->constants = next_constants;
166  pool->buckets = next_buckets;
167  pool->capacity = next_capacity;
168  return true;
169 }
170 
174 bool
175 pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
176  const uint32_t maximum = (~((uint32_t) 0));
177  if (capacity >= ((maximum / 2) + 1)) return false;
178 
179  capacity = next_power_of_two(capacity);
180  const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
181  void *memory = xcalloc(capacity, element_size);
182  if (memory == NULL) return false;
183 
184  pool->buckets = memory;
185  pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
186  pool->size = 0;
187  pool->capacity = capacity;
188  return true;
189 }
190 
196  assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
197  return &pool->constants[constant_id - 1];
198 }
199 
205 pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
206  assert(is_power_of_two(pool->capacity));
207  const uint32_t mask = pool->capacity - 1;
208 
209  uint32_t hash = pm_constant_pool_hash(start, length);
210  uint32_t index = hash & mask;
212 
213  while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
214  pm_constant_t *constant = &pool->constants[bucket->id - 1];
215  if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
216  return bucket->id;
217  }
218 
219  index = (index + 1) & mask;
220  }
221 
222  return PM_CONSTANT_ID_UNSET;
223 }
224 
228 static inline pm_constant_id_t
229 pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
230  if (pool->size >= (pool->capacity / 4 * 3)) {
231  if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
232  }
233 
234  assert(is_power_of_two(pool->capacity));
235  const uint32_t mask = pool->capacity - 1;
236 
237  uint32_t hash = pm_constant_pool_hash(start, length);
238  uint32_t index = hash & mask;
240 
241  while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
242  // If there is a collision, then we need to check if the content is the
243  // same as the content we are trying to insert. If it is, then we can
244  // return the id of the existing constant.
245  pm_constant_t *constant = &pool->constants[bucket->id - 1];
246 
247  if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
248  // Since we have found a match, we need to check if this is
249  // attempting to insert a shared or an owned constant. We want to
250  // prefer shared constants since they don't require allocations.
252  // If we're attempting to insert an owned constant and we have
253  // an existing constant, then either way we don't want the given
254  // memory. Either it's duplicated with the existing constant or
255  // it's not necessary because we have a shared version.
256  xfree((void *) start);
257  } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
258  // If we're attempting to insert a shared constant and the
259  // existing constant is owned, then we can free the owned
260  // constant and replace it with the shared constant.
261  xfree((void *) constant->start);
262  constant->start = start;
263  bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
264  }
265 
266  return bucket->id;
267  }
268 
269  index = (index + 1) & mask;
270  }
271 
272  // IDs are allocated starting at 1, since the value 0 denotes a non-existent
273  // constant.
274  uint32_t id = ++pool->size;
275  assert(pool->size < ((uint32_t) (1 << 30)));
276 
277  *bucket = (pm_constant_pool_bucket_t) {
278  .id = (unsigned int) (id & 0x3fffffff),
279  .type = (unsigned int) (type & 0x3),
280  .hash = hash
281  };
282 
283  pool->constants[id - 1] = (pm_constant_t) {
284  .start = start,
285  .length = length,
286  };
287 
288  return id;
289 }
290 
296 pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
297  return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
298 }
299 
306 pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
307  return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
308 }
309 
316 pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
317  return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
318 }
319 
323 void
325  // For each constant in the current constant pool, free the contents if the
326  // contents are owned.
327  for (uint32_t index = 0; index < pool->capacity; index++) {
328  pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
329 
330  // If an id is set on this constant, then we know we have content here.
331  if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
332  pm_constant_t *constant = &pool->constants[bucket->id - 1];
333  xfree((void *) constant->start);
334  }
335  }
336 
337  xfree(pool->buckets);
338 }
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xcalloc
Old name of ruby_xcalloc.
Definition: xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
A data structure that stores a set of strings.
pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Find a constant in a constant pool.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT
By default, each constant is a slice of the source.
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity)
Initialize a new constant pool with a given capacity.
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool that is a slice of a source string.
void pm_constant_id_list_init(pm_constant_id_list_t *list)
Initialize a list of constant ids.
unsigned int pm_constant_pool_bucket_type_t
The type of bucket in the constant pool hash map.
void pm_constant_id_list_free(pm_constant_id_list_t *list)
Free the memory associated with a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is constant.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id)
Insert a constant id into a list of constant ids at the specified index.
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id)
Append a constant id to a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is now owned by the constant pool.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED
An owned constant is one for which memory has been allocated.
void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
Initialize a list of constant ids with a given capacity.
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id)
Return a pointer to the constant indicated by the given constant id.
static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT
A constant constant is known at compile time.
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id)
Checks if the current constant id list includes the given constant id.
void pm_constant_pool_free(pm_constant_pool_t *pool)
Free the memory associated with a constant pool.
A list of constant IDs.
size_t size
The number of constant ids in the list.
size_t capacity
The number of constant ids that have been allocated in the list.
pm_constant_id_t * ids
The constant ids in the list.
A bucket in the hash map.
uint32_t hash
The hash of the bucket.
unsigned int id
The incremental ID used for indexing back into the pool.
pm_constant_pool_bucket_type_t type
The type of the bucket, which determines how to free it.
The overall constant pool, which stores constants found while parsing.
uint32_t capacity
The number of buckets that have been allocated in the hash map.
pm_constant_pool_bucket_t * buckets
The buckets in the hash map.
uint32_t size
The number of buckets in the hash map.
pm_constant_t * constants
The constants that are stored in the buckets.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.