Ruby 4.0.0dev (2025-12-09 revision 90a9b64238eca659df9d59d35f6cf59fa3851119)
shape.h (90a9b64238eca659df9d59d35f6cf59fa3851119)
1#ifndef RUBY_SHAPE_H
2#define RUBY_SHAPE_H
3
4#include "internal/gc.h"
5
6typedef uint16_t attr_index_t;
7typedef uint32_t shape_id_t;
8#define SHAPE_ID_NUM_BITS 32
9#define SHAPE_ID_OFFSET_NUM_BITS 19
10
11STATIC_ASSERT(shape_id_num_bits, SHAPE_ID_NUM_BITS == sizeof(shape_id_t) * CHAR_BIT);
12
13#define SHAPE_BUFFER_SIZE (1 << SHAPE_ID_OFFSET_NUM_BITS)
14#define SHAPE_ID_OFFSET_MASK (SHAPE_BUFFER_SIZE - 1)
15
16#define SHAPE_ID_HEAP_INDEX_BITS 3
17#define SHAPE_ID_HEAP_INDEX_MAX ((1 << SHAPE_ID_HEAP_INDEX_BITS) - 1)
18
19#define SHAPE_ID_FL_USHIFT (SHAPE_ID_OFFSET_NUM_BITS + SHAPE_ID_HEAP_INDEX_BITS)
20#define SHAPE_ID_HEAP_INDEX_OFFSET SHAPE_ID_FL_USHIFT
21
22// shape_id_t bits:
23// 0-18 SHAPE_ID_OFFSET_MASK
24// index in rb_shape_tree.shape_list. Allow to access `rb_shape_t *`.
25// 19-21 SHAPE_ID_HEAP_INDEX_MASK
26// index in rb_shape_tree.capacities. Allow to access slot size.
27// Always 0 except for T_OBJECT.
28// 22 SHAPE_ID_FL_FROZEN
29// Whether the object is frozen or not.
30// 23 SHAPE_ID_FL_HAS_OBJECT_ID
31// Whether the object has an `SHAPE_OBJ_ID` transition.
32// 24 SHAPE_ID_FL_TOO_COMPLEX
33// The object is backed by a `st_table`.
34
35enum shape_id_fl_type {
36#define RBIMPL_SHAPE_ID_FL(n) (1<<(SHAPE_ID_FL_USHIFT+n))
37
38 SHAPE_ID_HEAP_INDEX_MASK = RBIMPL_SHAPE_ID_FL(0) | RBIMPL_SHAPE_ID_FL(1) | RBIMPL_SHAPE_ID_FL(2),
39
40 SHAPE_ID_FL_FROZEN = RBIMPL_SHAPE_ID_FL(3),
41 SHAPE_ID_FL_HAS_OBJECT_ID = RBIMPL_SHAPE_ID_FL(4),
42 SHAPE_ID_FL_TOO_COMPLEX = RBIMPL_SHAPE_ID_FL(5),
43
44 SHAPE_ID_FL_NON_CANONICAL_MASK = SHAPE_ID_FL_FROZEN | SHAPE_ID_FL_HAS_OBJECT_ID,
45 SHAPE_ID_FLAGS_MASK = SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_NON_CANONICAL_MASK | SHAPE_ID_FL_TOO_COMPLEX,
46
47#undef RBIMPL_SHAPE_ID_FL
48};
49
50// This mask allows to check if a shape_id contains any ivar.
51// It relies on ROOT_SHAPE_WITH_OBJ_ID==1.
52enum shape_id_mask {
53 SHAPE_ID_HAS_IVAR_MASK = SHAPE_ID_FL_TOO_COMPLEX | (SHAPE_ID_OFFSET_MASK - 1),
54};
55
56// The interpreter doesn't care about frozen status or slot size when reading ivars.
57// So we normalize shape_id by clearing these bits to improve cache hits.
58// JITs however might care about it.
59#define SHAPE_ID_READ_ONLY_MASK (~(SHAPE_ID_FL_FROZEN | SHAPE_ID_HEAP_INDEX_MASK))
60
61typedef uint32_t redblack_id_t;
62
63#define SHAPE_MAX_FIELDS (attr_index_t)(-1)
64#define SHAPE_FLAG_SHIFT ((SIZEOF_VALUE * CHAR_BIT) - SHAPE_ID_NUM_BITS)
65#define SHAPE_FLAG_MASK (((VALUE)-1) >> SHAPE_ID_NUM_BITS)
66
67#define SHAPE_MAX_VARIATIONS 8
68
69#define INVALID_SHAPE_ID ((shape_id_t)-1)
70#define ATTR_INDEX_NOT_SET ((attr_index_t)-1)
71
72#define ROOT_SHAPE_ID 0x0
73#define ROOT_SHAPE_WITH_OBJ_ID 0x1
74#define ROOT_TOO_COMPLEX_SHAPE_ID (ROOT_SHAPE_ID | SHAPE_ID_FL_TOO_COMPLEX)
75#define ROOT_TOO_COMPLEX_WITH_OBJ_ID (ROOT_SHAPE_WITH_OBJ_ID | SHAPE_ID_FL_TOO_COMPLEX | SHAPE_ID_FL_HAS_OBJECT_ID)
76
77typedef struct redblack_node redblack_node_t;
78
79struct rb_shape {
80 VALUE edges; // id_table from ID (ivar) to next shape
81 ID edge_name; // ID (ivar) for transition from parent to rb_shape
82 redblack_node_t *ancestor_index;
83 shape_id_t parent_id;
84 attr_index_t next_field_index; // Fields are either ivars or internal properties like `object_id`
85 attr_index_t capacity; // Total capacity of the object with this shape
86 uint8_t type;
87};
88
89typedef struct rb_shape rb_shape_t;
90
92 ID key;
93 rb_shape_t *value;
94 redblack_id_t l;
95 redblack_id_t r;
96};
97
98enum shape_type {
99 SHAPE_ROOT,
100 SHAPE_IVAR,
101 SHAPE_OBJ_ID,
102};
103
104enum shape_flags {
105 SHAPE_FL_FROZEN = 1 << 0,
106 SHAPE_FL_HAS_OBJECT_ID = 1 << 1,
107 SHAPE_FL_TOO_COMPLEX = 1 << 2,
108
109 SHAPE_FL_NON_CANONICAL_MASK = SHAPE_FL_FROZEN | SHAPE_FL_HAS_OBJECT_ID,
110};
111
112typedef struct {
113 /* object shapes */
114 rb_shape_t *shape_list;
115 rb_shape_t *root_shape;
116 const attr_index_t *capacities;
117 rb_atomic_t next_shape_id;
118
119 redblack_node_t *shape_cache;
120 unsigned int cache_size;
122
123RUBY_SYMBOL_EXPORT_BEGIN
124RUBY_EXTERN rb_shape_tree_t rb_shape_tree;
125RUBY_SYMBOL_EXPORT_END
126
127static inline shape_id_t
128rb_shapes_count(void)
129{
130 return (shape_id_t)RUBY_ATOMIC_LOAD(rb_shape_tree.next_shape_id);
131}
132
134 uint64_t pack;
135 struct {
136 shape_id_t shape_id;
137 attr_index_t index;
138 } unpack;
139};
140
141static inline shape_id_t
142RBASIC_SHAPE_ID(VALUE obj)
143{
145 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
146#if RBASIC_SHAPE_ID_FIELD
147 return (shape_id_t)((RBASIC(obj)->shape_id));
148#else
149 return (shape_id_t)((RBASIC(obj)->flags) >> SHAPE_FLAG_SHIFT);
150#endif
151}
152
153// Same as RBASIC_SHAPE_ID but with flags that have no impact
154// on reads removed. e.g. Remove FL_FROZEN.
155static inline shape_id_t
156RBASIC_SHAPE_ID_FOR_READ(VALUE obj)
157{
158 return RBASIC_SHAPE_ID(obj) & SHAPE_ID_READ_ONLY_MASK;
159}
160
161#if RUBY_DEBUG
162bool rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id);
163#endif
164
165static inline void
166RBASIC_SET_SHAPE_ID_NO_CHECKS(VALUE obj, shape_id_t shape_id)
167{
168#if RBASIC_SHAPE_ID_FIELD
169 RBASIC(obj)->shape_id = (VALUE)shape_id;
170#else
171 // Object shapes are occupying top bits
172 RBASIC(obj)->flags &= SHAPE_FLAG_MASK;
173 RBASIC(obj)->flags |= ((VALUE)(shape_id) << SHAPE_FLAG_SHIFT);
174#endif
175}
176
177static inline void
178RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
179{
181 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
182
183 RBASIC_SET_SHAPE_ID_NO_CHECKS(obj, shape_id);
184
185 RUBY_ASSERT(rb_shape_verify_consistency(obj, shape_id));
186}
187
188void rb_set_boxed_class_shape_id(VALUE obj, shape_id_t shape_id);
189
190static inline void
191RB_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
192{
193 switch (BUILTIN_TYPE(obj)) {
194 case T_CLASS:
195 case T_MODULE:
196 rb_set_boxed_class_shape_id(obj, shape_id);
197 break;
198 default:
199 RBASIC_SET_SHAPE_ID(obj, shape_id);
200 break;
201 }
202}
203
204static inline rb_shape_t *
205RSHAPE(shape_id_t shape_id)
206{
207 uint32_t offset = (shape_id & SHAPE_ID_OFFSET_MASK);
208 RUBY_ASSERT(offset != INVALID_SHAPE_ID);
209
210 return &rb_shape_tree.shape_list[offset];
211}
212
213int32_t rb_shape_id_offset(void);
214
215RUBY_FUNC_EXPORTED shape_id_t rb_obj_shape_id(VALUE obj);
216shape_id_t rb_shape_get_next_iv_shape(shape_id_t shape_id, ID id);
217bool rb_shape_get_iv_index(shape_id_t shape_id, ID id, attr_index_t *value);
218bool rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t *value, shape_id_t *shape_id_hint);
219bool rb_shape_find_ivar(shape_id_t shape_id, ID id, shape_id_t *ivar_shape);
220
221typedef int rb_shape_foreach_transition_callback(shape_id_t shape_id, void *data);
222bool rb_shape_foreach_field(shape_id_t shape_id, rb_shape_foreach_transition_callback func, void *data);
223
224shape_id_t rb_shape_transition_frozen(VALUE obj);
225shape_id_t rb_shape_transition_complex(VALUE obj);
226shape_id_t rb_shape_transition_remove_ivar(VALUE obj, ID id, shape_id_t *removed_shape_id);
227shape_id_t rb_shape_transition_add_ivar(VALUE obj, ID id);
228shape_id_t rb_shape_transition_add_ivar_no_warnings(VALUE klass, shape_id_t original_shape_id, ID id);
229shape_id_t rb_shape_transition_object_id(VALUE obj);
230shape_id_t rb_shape_transition_heap(VALUE obj, size_t heap_index);
231shape_id_t rb_shape_object_id(shape_id_t original_shape_id);
232
233void rb_shape_free_all(void);
234
235shape_id_t rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_id);
236void rb_shape_copy_fields(VALUE dest, VALUE *dest_buf, shape_id_t dest_shape_id, VALUE *src_buf, shape_id_t src_shape_id);
237void rb_shape_copy_complex_ivars(VALUE dest, VALUE obj, shape_id_t src_shape_id, st_table *fields_table);
238
239static inline bool
240rb_shape_too_complex_p(shape_id_t shape_id)
241{
242 return shape_id & SHAPE_ID_FL_TOO_COMPLEX;
243}
244
245static inline bool
246rb_shape_obj_too_complex_p(VALUE obj)
247{
248 return !RB_SPECIAL_CONST_P(obj) && rb_shape_too_complex_p(RBASIC_SHAPE_ID(obj));
249}
250
251static inline bool
252rb_shape_has_object_id(shape_id_t shape_id)
253{
254 return shape_id & SHAPE_ID_FL_HAS_OBJECT_ID;
255}
256
257static inline bool
258rb_shape_canonical_p(shape_id_t shape_id)
259{
260 return !(shape_id & SHAPE_ID_FL_NON_CANONICAL_MASK);
261}
262
263static inline uint8_t
264rb_shape_heap_index(shape_id_t shape_id)
265{
266 return (uint8_t)((shape_id & SHAPE_ID_HEAP_INDEX_MASK) >> SHAPE_ID_HEAP_INDEX_OFFSET);
267}
268
269static inline shape_id_t
270rb_shape_root(size_t heap_id)
271{
272 shape_id_t heap_index = (shape_id_t)(heap_id + 1);
273 shape_id_t heap_flags = heap_index << SHAPE_ID_HEAP_INDEX_OFFSET;
274
275 RUBY_ASSERT((heap_flags & SHAPE_ID_HEAP_INDEX_MASK) == heap_flags);
276 RUBY_ASSERT(rb_shape_heap_index(heap_flags) == heap_index);
277
278 return ROOT_SHAPE_ID | heap_flags;
279}
280
281static inline shape_id_t
282RSHAPE_PARENT_RAW_ID(shape_id_t shape_id)
283{
284 return RSHAPE(shape_id)->parent_id;
285}
286
287static inline bool
288RSHAPE_DIRECT_CHILD_P(shape_id_t parent_id, shape_id_t child_id)
289{
290 return (parent_id & SHAPE_ID_FLAGS_MASK) == (child_id & SHAPE_ID_FLAGS_MASK) &&
291 RSHAPE(child_id)->parent_id == (parent_id & SHAPE_ID_OFFSET_MASK);
292}
293
294static inline enum shape_type
295RSHAPE_TYPE(shape_id_t shape_id)
296{
297 return RSHAPE(shape_id)->type;
298}
299
300static inline bool
301RSHAPE_TYPE_P(shape_id_t shape_id, enum shape_type type)
302{
303 return RSHAPE_TYPE(shape_id) == type;
304}
305
306static inline attr_index_t
307RSHAPE_EMBEDDED_CAPACITY(shape_id_t shape_id)
308{
309 uint8_t heap_index = rb_shape_heap_index(shape_id);
310 if (heap_index) {
311 return rb_shape_tree.capacities[heap_index - 1];
312 }
313 return 0;
314}
315
316static inline attr_index_t
317RSHAPE_CAPACITY(shape_id_t shape_id)
318{
319 attr_index_t embedded_capacity = RSHAPE_EMBEDDED_CAPACITY(shape_id);
320
321 if (embedded_capacity > RSHAPE(shape_id)->capacity) {
322 return embedded_capacity;
323 }
324 else {
325 return RSHAPE(shape_id)->capacity;
326 }
327}
328
329static inline attr_index_t
330RSHAPE_LEN(shape_id_t shape_id)
331{
332 return RSHAPE(shape_id)->next_field_index;
333}
334
335static inline attr_index_t
336RSHAPE_INDEX(shape_id_t shape_id)
337{
338 RUBY_ASSERT(RSHAPE_LEN(shape_id) > 0);
339 return RSHAPE_LEN(shape_id) - 1;
340}
341
342static inline ID
343RSHAPE_EDGE_NAME(shape_id_t shape_id)
344{
345 return RSHAPE(shape_id)->edge_name;
346}
347
348static inline uint32_t
349ROBJECT_FIELDS_CAPACITY(VALUE obj)
350{
351 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
352 // Asking for capacity doesn't make sense when the object is using
353 // a hash table for storing instance variables
354 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
355 return RSHAPE_CAPACITY(RBASIC_SHAPE_ID(obj));
356}
357
358static inline st_table *
359ROBJECT_FIELDS_HASH(VALUE obj)
360{
361 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
362 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
363 RUBY_ASSERT(FL_TEST_RAW(obj, ROBJECT_HEAP));
364
365 return (st_table *)ROBJECT(obj)->as.heap.fields;
366}
367
368static inline void
369ROBJECT_SET_FIELDS_HASH(VALUE obj, const st_table *tbl)
370{
371 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
372 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
373 RUBY_ASSERT(FL_TEST_RAW(obj, ROBJECT_HEAP));
374
375 ROBJECT(obj)->as.heap.fields = (VALUE *)tbl;
376}
377
378static inline uint32_t
379ROBJECT_FIELDS_COUNT_COMPLEX(VALUE obj)
380{
381 return (uint32_t)rb_st_table_size(ROBJECT_FIELDS_HASH(obj));
382}
383
384static inline uint32_t
385ROBJECT_FIELDS_COUNT_NOT_COMPLEX(VALUE obj)
386{
387 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
388 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
389 return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
390}
391
392static inline uint32_t
393ROBJECT_FIELDS_COUNT(VALUE obj)
394{
395 if (rb_shape_obj_too_complex_p(obj)) {
396 return ROBJECT_FIELDS_COUNT_COMPLEX(obj);
397 }
398 else {
399 return ROBJECT_FIELDS_COUNT_NOT_COMPLEX(obj);
400 }
401}
402
403static inline uint32_t
404RBASIC_FIELDS_COUNT(VALUE obj)
405{
406 return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
407}
408
409static inline bool
410rb_shape_obj_has_id(VALUE obj)
411{
412 return rb_shape_has_object_id(RBASIC_SHAPE_ID(obj));
413}
414
415static inline bool
416rb_shape_has_ivars(shape_id_t shape_id)
417{
418 return shape_id & SHAPE_ID_HAS_IVAR_MASK;
419}
420
421static inline bool
422rb_shape_obj_has_ivars(VALUE obj)
423{
424 return rb_shape_has_ivars(RBASIC_SHAPE_ID(obj));
425}
426
427static inline bool
428rb_shape_has_fields(shape_id_t shape_id)
429{
430 return shape_id & (SHAPE_ID_OFFSET_MASK | SHAPE_ID_FL_TOO_COMPLEX);
431}
432
433static inline bool
434rb_shape_obj_has_fields(VALUE obj)
435{
436 return rb_shape_has_fields(RBASIC_SHAPE_ID(obj));
437}
438
439static inline bool
440rb_obj_gen_fields_p(VALUE obj)
441{
442 switch (TYPE(obj)) {
443 case T_NONE:
444 case T_OBJECT:
445 case T_CLASS:
446 case T_MODULE:
447 case T_IMEMO:
448 return false;
449 default:
450 break;
451 }
452 return rb_shape_obj_has_fields(obj);
453}
454
455// For ext/objspace
456RUBY_SYMBOL_EXPORT_BEGIN
457typedef void each_shape_callback(shape_id_t shape_id, void *data);
458void rb_shape_each_shape_id(each_shape_callback callback, void *data);
459size_t rb_shape_memsize(shape_id_t shape);
460size_t rb_shape_edges_count(shape_id_t shape_id);
461size_t rb_shape_depth(shape_id_t shape_id);
462RUBY_SYMBOL_EXPORT_END
463
464#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition atomic.h:69
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
Definition atomic.h:175
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition dllexport.h:45
#define TYPE(_)
Old name of rb_type.
Definition value_type.h:108
#define T_IMEMO
Old name of RUBY_T_IMEMO.
Definition value_type.h:67
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define T_MODULE
Old name of RUBY_T_MODULE.
Definition value_type.h:70
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:131
#define T_OBJECT
Old name of RUBY_T_OBJECT.
Definition value_type.h:75
#define T_CLASS
Old name of RUBY_T_CLASS.
Definition value_type.h:58
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBASIC(obj)
Convenient casting macro.
Definition rbasic.h:40
#define ROBJECT(obj)
Convenient casting macro.
Definition robject.h:43
static bool RB_SPECIAL_CONST_P(VALUE obj)
Checks if the given object is of enum ruby_special_consts.
Definition st.h:79
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
@ RUBY_T_OBJECT
Definition value_type.h:116