Ruby 4.1.0dev (2026-04-18 revision 3803204fd3a863c4936b3580b28940dd4ce22236)
shape.h (3803204fd3a863c4936b3580b28940dd4ce22236)
1#ifndef RUBY_SHAPE_H
2#define RUBY_SHAPE_H
3
4#include "internal/gc.h"
5#include "internal/struct.h"
6
7typedef uint16_t attr_index_t;
8typedef uint32_t shape_id_t;
9#define SHAPE_ID_NUM_BITS 32
10#define SHAPE_ID_OFFSET_NUM_BITS 19
11
12STATIC_ASSERT(shape_id_num_bits, SHAPE_ID_NUM_BITS == sizeof(shape_id_t) * CHAR_BIT);
13
14#define SHAPE_BUFFER_SIZE (1 << SHAPE_ID_OFFSET_NUM_BITS)
15#define SHAPE_ID_OFFSET_MASK (SHAPE_BUFFER_SIZE - 1)
16
17#define SHAPE_ID_HEAP_INDEX_BITS 5
18#define SHAPE_ID_HEAP_INDEX_MAX ((1 << SHAPE_ID_HEAP_INDEX_BITS) - 1)
19
20#define SHAPE_ID_HEAP_INDEX_OFFSET SHAPE_ID_OFFSET_NUM_BITS
21#define SHAPE_ID_FL_USHIFT (SHAPE_ID_OFFSET_NUM_BITS + SHAPE_ID_HEAP_INDEX_BITS)
22
23// shape_id_t bits:
24// 0-18 SHAPE_ID_OFFSET_MASK
25// index in rb_shape_tree.shape_list. Allow to access `rb_shape_t *`.
26// 19-23 SHAPE_ID_HEAP_INDEX_MASK
27// index in rb_shape_tree.capacities. Allow to access slot size.
28// Always 0 except for T_OBJECT.
29// 24 SHAPE_ID_FL_FROZEN
30// Whether the object is frozen or not.
31// 25 SHAPE_ID_FL_HAS_OBJECT_ID
32// Whether the object has an `SHAPE_OBJ_ID` transition.
33// 26 SHAPE_ID_FL_TOO_COMPLEX
34// The object is backed by a `st_table`.
35
36enum shape_id_fl_type {
37#define RBIMPL_SHAPE_ID_FL(n) (1<<(SHAPE_ID_FL_USHIFT+n))
38
39 SHAPE_ID_HEAP_INDEX_MASK = ((1 << SHAPE_ID_HEAP_INDEX_BITS) - 1) << SHAPE_ID_HEAP_INDEX_OFFSET,
40
41 SHAPE_ID_FL_FROZEN = RBIMPL_SHAPE_ID_FL(0),
42 SHAPE_ID_FL_HAS_OBJECT_ID = RBIMPL_SHAPE_ID_FL(1),
43 SHAPE_ID_FL_TOO_COMPLEX = RBIMPL_SHAPE_ID_FL(2),
44
45 SHAPE_ID_FL_NON_CANONICAL_MASK = SHAPE_ID_FL_FROZEN | SHAPE_ID_FL_HAS_OBJECT_ID,
46 SHAPE_ID_FLAGS_MASK = SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_NON_CANONICAL_MASK | SHAPE_ID_FL_TOO_COMPLEX,
47
48#undef RBIMPL_SHAPE_ID_FL
49};
50
51// This mask allows to check if a shape_id contains any ivar.
52// It relies on ROOT_SHAPE_WITH_OBJ_ID==1.
53enum shape_id_mask {
54 SHAPE_ID_HAS_IVAR_MASK = SHAPE_ID_FL_TOO_COMPLEX | (SHAPE_ID_OFFSET_MASK - 1),
55};
56
57// The interpreter doesn't care about frozen status or slot size when reading ivars.
58// So we normalize shape_id by clearing these bits to improve cache hits.
59// JITs however might care about it.
60#define SHAPE_ID_READ_ONLY_MASK (~(SHAPE_ID_FL_FROZEN | SHAPE_ID_HEAP_INDEX_MASK))
61
62typedef uint32_t redblack_id_t;
63
64#define SHAPE_MAX_FIELDS (attr_index_t)(-1)
65#define SHAPE_FLAG_SHIFT ((SIZEOF_VALUE * CHAR_BIT) - SHAPE_ID_NUM_BITS)
66#define SHAPE_FLAG_MASK (((VALUE)-1) >> SHAPE_ID_NUM_BITS)
67
68#define SHAPE_MAX_VARIATIONS 8
69
70#define INVALID_SHAPE_ID ((shape_id_t)-1)
71#define ATTR_INDEX_NOT_SET ((attr_index_t)-1)
72
73#define ROOT_SHAPE_ID 0x0
74#define ROOT_SHAPE_WITH_OBJ_ID 0x1
75#define ROOT_TOO_COMPLEX_SHAPE_ID (ROOT_SHAPE_ID | SHAPE_ID_FL_TOO_COMPLEX)
76#define ROOT_TOO_COMPLEX_WITH_OBJ_ID (ROOT_SHAPE_WITH_OBJ_ID | SHAPE_ID_FL_TOO_COMPLEX | SHAPE_ID_FL_HAS_OBJECT_ID)
77
78typedef struct redblack_node redblack_node_t;
79
80struct rb_shape {
81 VALUE edges; // id_table from ID (ivar) to next shape
82 ID edge_name; // ID (ivar) for transition from parent to rb_shape
83 redblack_node_t *ancestor_index;
84 shape_id_t parent_id;
85 attr_index_t next_field_index; // Fields are either ivars or internal properties like `object_id`
86 attr_index_t capacity; // Total capacity of the object with this shape
87 uint8_t type;
88};
89
90typedef struct rb_shape rb_shape_t;
91
93 ID key;
94 rb_shape_t *value;
95 redblack_id_t l;
96 redblack_id_t r;
97};
98
99enum shape_type {
100 SHAPE_ROOT,
101 SHAPE_IVAR,
102 SHAPE_OBJ_ID,
103};
104
105enum shape_flags {
106 SHAPE_FL_FROZEN = 1 << 0,
107 SHAPE_FL_HAS_OBJECT_ID = 1 << 1,
108 SHAPE_FL_TOO_COMPLEX = 1 << 2,
109
110 SHAPE_FL_NON_CANONICAL_MASK = SHAPE_FL_FROZEN | SHAPE_FL_HAS_OBJECT_ID,
111};
112
113typedef struct {
114 /* object shapes */
115 rb_shape_t *shape_list;
116 rb_shape_t *root_shape;
117 const attr_index_t *capacities;
118 size_t heaps_count;
119 rb_atomic_t next_shape_id;
120
121 redblack_node_t *shape_cache;
122 unsigned int cache_size;
124
125RUBY_SYMBOL_EXPORT_BEGIN
126RUBY_EXTERN rb_shape_tree_t rb_shape_tree;
127RUBY_SYMBOL_EXPORT_END
128
129static inline shape_id_t
130rb_shapes_count(void)
131{
132 return (shape_id_t)RUBY_ATOMIC_LOAD(rb_shape_tree.next_shape_id);
133}
134
136 uint64_t pack;
137 struct {
138 shape_id_t shape_id;
139 attr_index_t index;
140 } unpack;
141};
142
143static inline shape_id_t
144RBASIC_SHAPE_ID(VALUE obj)
145{
147 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
148#if RBASIC_SHAPE_ID_FIELD
149 return (shape_id_t)((RBASIC(obj)->shape_id));
150#else
151 return (shape_id_t)((RBASIC(obj)->flags) >> SHAPE_FLAG_SHIFT);
152#endif
153}
154
155// Same as RBASIC_SHAPE_ID but with flags that have no impact
156// on reads removed. e.g. Remove FL_FROZEN.
157static inline shape_id_t
158RBASIC_SHAPE_ID_FOR_READ(VALUE obj)
159{
160 return RBASIC_SHAPE_ID(obj) & SHAPE_ID_READ_ONLY_MASK;
161}
162
163#if RUBY_DEBUG
164bool rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id);
165#endif
166
167static inline void
168RBASIC_SET_SHAPE_ID_NO_CHECKS(VALUE obj, shape_id_t shape_id)
169{
170#if RBASIC_SHAPE_ID_FIELD
171 RBASIC(obj)->shape_id = (VALUE)shape_id;
172#else
173 // Object shapes are occupying top bits
174 RBASIC(obj)->flags &= SHAPE_FLAG_MASK;
175 RBASIC(obj)->flags |= ((VALUE)(shape_id) << SHAPE_FLAG_SHIFT);
176#endif
177}
178
179static inline void
180RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
181{
183 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
184
185 RBASIC_SET_SHAPE_ID_NO_CHECKS(obj, shape_id);
186
187 RUBY_ASSERT(rb_shape_verify_consistency(obj, shape_id));
188}
189
190void rb_set_boxed_class_shape_id(VALUE obj, shape_id_t shape_id);
191
192static inline void
193RB_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
194{
195 switch (BUILTIN_TYPE(obj)) {
196 case T_CLASS:
197 case T_MODULE:
198 rb_set_boxed_class_shape_id(obj, shape_id);
199 break;
200 default:
201 RBASIC_SET_SHAPE_ID(obj, shape_id);
202 break;
203 }
204}
205
206static inline rb_shape_t *
207RSHAPE(shape_id_t shape_id)
208{
209 uint32_t offset = (shape_id & SHAPE_ID_OFFSET_MASK);
210 RUBY_ASSERT(offset != INVALID_SHAPE_ID);
211
212 return &rb_shape_tree.shape_list[offset];
213}
214
215int32_t rb_shape_id_offset(void);
216
217RUBY_FUNC_EXPORTED shape_id_t rb_obj_shape_id(VALUE obj);
218shape_id_t rb_shape_get_next_iv_shape(shape_id_t shape_id, ID id);
219bool rb_shape_get_iv_index(shape_id_t shape_id, ID id, attr_index_t *value);
220bool rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t *value, shape_id_t *shape_id_hint);
221bool rb_shape_find_ivar(shape_id_t shape_id, ID id, shape_id_t *ivar_shape);
222
223typedef int rb_shape_foreach_transition_callback(shape_id_t shape_id, void *data);
224bool rb_shape_foreach_field(shape_id_t shape_id, rb_shape_foreach_transition_callback func, void *data);
225
226shape_id_t rb_shape_transition_frozen(VALUE obj);
227shape_id_t rb_shape_transition_complex(VALUE obj);
228shape_id_t rb_shape_transition_remove_ivar(VALUE obj, ID id, shape_id_t *removed_shape_id);
229shape_id_t rb_shape_transition_add_ivar(VALUE obj, ID id);
230shape_id_t rb_shape_transition_add_ivar_no_warnings(VALUE klass, shape_id_t original_shape_id, ID id);
231shape_id_t rb_shape_transition_object_id(VALUE obj);
232shape_id_t rb_shape_transition_heap(VALUE obj, size_t heap_index);
233shape_id_t rb_shape_object_id(shape_id_t original_shape_id);
234
235void rb_shape_free_all(void);
236
237shape_id_t rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_id);
238void rb_shape_copy_fields(VALUE dest, VALUE *dest_buf, shape_id_t dest_shape_id, VALUE *src_buf, shape_id_t src_shape_id);
239void rb_shape_copy_complex_ivars(VALUE dest, VALUE obj, shape_id_t src_shape_id, st_table *fields_table);
240
241static inline bool
242rb_shape_too_complex_p(shape_id_t shape_id)
243{
244 return shape_id & SHAPE_ID_FL_TOO_COMPLEX;
245}
246
247static inline bool
248rb_shape_obj_too_complex_p(VALUE obj)
249{
250 return !RB_SPECIAL_CONST_P(obj) && rb_shape_too_complex_p(RBASIC_SHAPE_ID(obj));
251}
252
253static inline bool
254rb_shape_has_object_id(shape_id_t shape_id)
255{
256 return shape_id & SHAPE_ID_FL_HAS_OBJECT_ID;
257}
258
259static inline bool
260rb_shape_canonical_p(shape_id_t shape_id)
261{
262 return !(shape_id & SHAPE_ID_FL_NON_CANONICAL_MASK);
263}
264
265static inline uint8_t
266rb_shape_heap_index(shape_id_t shape_id)
267{
268 return (uint8_t)((shape_id & SHAPE_ID_HEAP_INDEX_MASK) >> SHAPE_ID_HEAP_INDEX_OFFSET);
269}
270
271static inline shape_id_t
272rb_shape_root(size_t heap_id)
273{
274 shape_id_t heap_index = (shape_id_t)(heap_id + 1);
275 shape_id_t heap_flags = heap_index << SHAPE_ID_HEAP_INDEX_OFFSET;
276
277 RUBY_ASSERT((heap_flags & SHAPE_ID_HEAP_INDEX_MASK) == heap_flags);
278 RUBY_ASSERT(rb_shape_heap_index(heap_flags) == heap_index);
279
280 return ROOT_SHAPE_ID | heap_flags;
281}
282
283static inline shape_id_t
284RSHAPE_PARENT_RAW_ID(shape_id_t shape_id)
285{
286 return RSHAPE(shape_id)->parent_id;
287}
288
289static inline bool
290RSHAPE_DIRECT_CHILD_P(shape_id_t parent_id, shape_id_t child_id)
291{
292 return (parent_id & SHAPE_ID_FLAGS_MASK) == (child_id & SHAPE_ID_FLAGS_MASK) &&
293 RSHAPE(child_id)->parent_id == (parent_id & SHAPE_ID_OFFSET_MASK);
294}
295
296static inline enum shape_type
297RSHAPE_TYPE(shape_id_t shape_id)
298{
299 return RSHAPE(shape_id)->type;
300}
301
302static inline bool
303RSHAPE_TYPE_P(shape_id_t shape_id, enum shape_type type)
304{
305 return RSHAPE_TYPE(shape_id) == type;
306}
307
308static inline attr_index_t
309RSHAPE_EMBEDDED_CAPACITY(shape_id_t shape_id)
310{
311 uint8_t heap_index = rb_shape_heap_index(shape_id);
312 if (heap_index) {
313 return rb_shape_tree.capacities[heap_index - 1];
314 }
315 return 0;
316}
317
318static inline attr_index_t
319RSHAPE_CAPACITY(shape_id_t shape_id)
320{
321 attr_index_t embedded_capacity = RSHAPE_EMBEDDED_CAPACITY(shape_id);
322
323 if (embedded_capacity > RSHAPE(shape_id)->capacity) {
324 return embedded_capacity;
325 }
326 else {
327 return RSHAPE(shape_id)->capacity;
328 }
329}
330
331static inline attr_index_t
332RSHAPE_LEN(shape_id_t shape_id)
333{
334 return RSHAPE(shape_id)->next_field_index;
335}
336
337static inline attr_index_t
338RSHAPE_INDEX(shape_id_t shape_id)
339{
340 RUBY_ASSERT(RSHAPE_LEN(shape_id) > 0);
341 return RSHAPE_LEN(shape_id) - 1;
342}
343
344static inline ID
345RSHAPE_EDGE_NAME(shape_id_t shape_id)
346{
347 return RSHAPE(shape_id)->edge_name;
348}
349
350static inline uint32_t
351ROBJECT_FIELDS_CAPACITY(VALUE obj)
352{
353 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
354 // Asking for capacity doesn't make sense when the object is using
355 // a hash table for storing instance variables
356 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
357 return RSHAPE_CAPACITY(RBASIC_SHAPE_ID(obj));
358}
359
360static inline st_table *
361ROBJECT_FIELDS_HASH(VALUE obj)
362{
363 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
364 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
365 RUBY_ASSERT(FL_TEST_RAW(obj, ROBJECT_HEAP));
366
367 return (st_table *)ROBJECT(obj)->as.heap.fields;
368}
369
370static inline void
371ROBJECT_SET_FIELDS_HASH(VALUE obj, const st_table *tbl)
372{
373 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
374 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
375 RUBY_ASSERT(FL_TEST_RAW(obj, ROBJECT_HEAP));
376
377 ROBJECT(obj)->as.heap.fields = (VALUE *)tbl;
378}
379
380static inline uint32_t
381ROBJECT_FIELDS_COUNT_COMPLEX(VALUE obj)
382{
383 return (uint32_t)rb_st_table_size(ROBJECT_FIELDS_HASH(obj));
384}
385
386static inline uint32_t
387ROBJECT_FIELDS_COUNT_NOT_COMPLEX(VALUE obj)
388{
389 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
390 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
391 return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
392}
393
394static inline uint32_t
395ROBJECT_FIELDS_COUNT(VALUE obj)
396{
397 if (rb_shape_obj_too_complex_p(obj)) {
398 return ROBJECT_FIELDS_COUNT_COMPLEX(obj);
399 }
400 else {
401 return ROBJECT_FIELDS_COUNT_NOT_COMPLEX(obj);
402 }
403}
404
405static inline uint32_t
406RBASIC_FIELDS_COUNT(VALUE obj)
407{
408 return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
409}
410
411static inline bool
412rb_shape_obj_has_id(VALUE obj)
413{
414 return rb_shape_has_object_id(RBASIC_SHAPE_ID(obj));
415}
416
417static inline bool
418rb_shape_has_ivars(shape_id_t shape_id)
419{
420 return shape_id & SHAPE_ID_HAS_IVAR_MASK;
421}
422
423static inline bool
424rb_shape_obj_has_ivars(VALUE obj)
425{
426 return rb_shape_has_ivars(RBASIC_SHAPE_ID(obj));
427}
428
429static inline bool
430rb_shape_has_fields(shape_id_t shape_id)
431{
432 return shape_id & (SHAPE_ID_OFFSET_MASK | SHAPE_ID_FL_TOO_COMPLEX);
433}
434
435static inline bool
436rb_shape_obj_has_fields(VALUE obj)
437{
438 return rb_shape_has_fields(RBASIC_SHAPE_ID(obj));
439}
440
441static inline bool
442rb_obj_gen_fields_p(VALUE obj)
443{
444 switch (TYPE(obj)) {
445 case T_NONE:
446 case T_OBJECT:
447 case T_CLASS:
448 case T_MODULE:
449 case T_IMEMO:
450 return false;
451 default:
452 break;
453 }
454 return rb_shape_obj_has_fields(obj);
455}
456
457static inline bool
458rb_obj_using_gen_fields_table_p(VALUE obj)
459{
460 switch (BUILTIN_TYPE(obj)) {
461 case T_DATA:
462 if (RTYPEDDATA_P(obj)) return false;
463 break;
464
465 case T_STRUCT:
466 if (!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS)) return false;
467 break;
468
469 default:
470 break;
471 }
472
473 return rb_obj_gen_fields_p(obj);
474}
475
476// For ext/objspace
477RUBY_SYMBOL_EXPORT_BEGIN
478typedef void each_shape_callback(shape_id_t shape_id, void *data);
479void rb_shape_each_shape_id(each_shape_callback callback, void *data);
480size_t rb_shape_memsize(shape_id_t shape);
481size_t rb_shape_edges_count(shape_id_t shape_id);
482size_t rb_shape_depth(shape_id_t shape_id);
483RUBY_SYMBOL_EXPORT_END
484
485#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition atomic.h:69
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
Definition atomic.h:175
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition dllexport.h:45
#define TYPE(_)
Old name of rb_type.
Definition value_type.h:108
#define T_IMEMO
Old name of RUBY_T_IMEMO.
Definition value_type.h:67
#define T_STRUCT
Old name of RUBY_T_STRUCT.
Definition value_type.h:79
#define T_DATA
Old name of RUBY_T_DATA.
Definition value_type.h:60
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define T_MODULE
Old name of RUBY_T_MODULE.
Definition value_type.h:70
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:128
#define T_OBJECT
Old name of RUBY_T_OBJECT.
Definition value_type.h:75
#define T_CLASS
Old name of RUBY_T_CLASS.
Definition value_type.h:58
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBASIC(obj)
Convenient casting macro.
Definition rbasic.h:40
#define ROBJECT(obj)
Convenient casting macro.
Definition robject.h:43
static bool RTYPEDDATA_P(VALUE obj)
Checks whether the passed object is RTypedData or RData.
Definition rtypeddata.h:669
static bool RB_SPECIAL_CONST_P(VALUE obj)
Checks if the given object is of enum ruby_special_consts.
Definition st.h:79
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
@ RUBY_T_OBJECT
Definition value_type.h:116