Ruby 3.5.0dev (2025-06-27 revision 8bba087ae567421cd574b5b5772fd3039ff39b4d)
shape.h (8bba087ae567421cd574b5b5772fd3039ff39b4d)
1#ifndef RUBY_SHAPE_H
2#define RUBY_SHAPE_H
3
4#include "internal/gc.h"
5
6typedef uint16_t attr_index_t;
7typedef uint32_t shape_id_t;
8#define SHAPE_ID_NUM_BITS 32
9#define SHAPE_ID_OFFSET_NUM_BITS 19
10
11STATIC_ASSERT(shape_id_num_bits, SHAPE_ID_NUM_BITS == sizeof(shape_id_t) * CHAR_BIT);
12
13#define SHAPE_BUFFER_SIZE (1 << SHAPE_ID_OFFSET_NUM_BITS)
14#define SHAPE_ID_OFFSET_MASK (SHAPE_BUFFER_SIZE - 1)
15
16#define SHAPE_ID_HEAP_INDEX_BITS 3
17#define SHAPE_ID_HEAP_INDEX_MAX ((1 << SHAPE_ID_HEAP_INDEX_BITS) - 1)
18
19#define SHAPE_ID_FL_USHIFT (SHAPE_ID_OFFSET_NUM_BITS + SHAPE_ID_HEAP_INDEX_BITS)
20#define SHAPE_ID_HEAP_INDEX_OFFSET SHAPE_ID_FL_USHIFT
21
22// shape_id_t bits:
23// 0-18 SHAPE_ID_OFFSET_MASK
24// index in rb_shape_tree.shape_list. Allow to access `rb_shape_t *`.
25// 19-21 SHAPE_ID_HEAP_INDEX_MASK
26// index in rb_shape_tree.capacities. Allow to access slot size.
27// 22 SHAPE_ID_FL_FROZEN
28// Whether the object is frozen or not.
29// 23 SHAPE_ID_FL_HAS_OBJECT_ID
30// Whether the object has an `SHAPE_OBJ_ID` transition.
31// 24 SHAPE_ID_FL_TOO_COMPLEX
32// The object is backed by a `st_table`.
33
34enum shape_id_fl_type {
35#define RBIMPL_SHAPE_ID_FL(n) (1<<(SHAPE_ID_FL_USHIFT+n))
36
37 SHAPE_ID_HEAP_INDEX_MASK = RBIMPL_SHAPE_ID_FL(0) | RBIMPL_SHAPE_ID_FL(1) | RBIMPL_SHAPE_ID_FL(2),
38
39 SHAPE_ID_FL_FROZEN = RBIMPL_SHAPE_ID_FL(3),
40 SHAPE_ID_FL_HAS_OBJECT_ID = RBIMPL_SHAPE_ID_FL(4),
41 SHAPE_ID_FL_TOO_COMPLEX = RBIMPL_SHAPE_ID_FL(5),
42
43 SHAPE_ID_FL_NON_CANONICAL_MASK = SHAPE_ID_FL_FROZEN | SHAPE_ID_FL_HAS_OBJECT_ID,
44 SHAPE_ID_FLAGS_MASK = SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_NON_CANONICAL_MASK | SHAPE_ID_FL_TOO_COMPLEX,
45
46#undef RBIMPL_SHAPE_ID_FL
47};
48
49// This masks allows to check if a shape_id contains any ivar.
50// It rely on ROOT_SHAPE_WITH_OBJ_ID==1.
51#define SHAPE_ID_HAS_IVAR_MASK (SHAPE_ID_FL_TOO_COMPLEX | (SHAPE_ID_OFFSET_MASK - 1))
52
53// The interpreter doesn't care about frozen status or slot size when reading ivars.
54// So we normalize shape_id by clearing these bits to improve cache hits.
55// JITs however might care about it.
56#define SHAPE_ID_READ_ONLY_MASK (~(SHAPE_ID_FL_FROZEN | SHAPE_ID_HEAP_INDEX_MASK))
57
58typedef uint32_t redblack_id_t;
59
60#define SHAPE_MAX_FIELDS (attr_index_t)(-1)
61#define SHAPE_FLAG_SHIFT ((SIZEOF_VALUE * CHAR_BIT) - SHAPE_ID_NUM_BITS)
62#define SHAPE_FLAG_MASK (((VALUE)-1) >> SHAPE_ID_NUM_BITS)
63
64#define SHAPE_MAX_VARIATIONS 8
65
66#define INVALID_SHAPE_ID ((shape_id_t)-1)
67#define ATTR_INDEX_NOT_SET ((attr_index_t)-1)
68
69#define ROOT_SHAPE_ID 0x0
70#define ROOT_SHAPE_WITH_OBJ_ID 0x1
71#define ROOT_TOO_COMPLEX_SHAPE_ID (ROOT_SHAPE_ID | SHAPE_ID_FL_TOO_COMPLEX)
72#define ROOT_TOO_COMPLEX_WITH_OBJ_ID (ROOT_SHAPE_WITH_OBJ_ID | SHAPE_ID_FL_TOO_COMPLEX | SHAPE_ID_FL_HAS_OBJECT_ID)
73#define SPECIAL_CONST_SHAPE_ID (ROOT_SHAPE_ID | SHAPE_ID_FL_FROZEN)
74
75typedef struct redblack_node redblack_node_t;
76
77struct rb_shape {
78 VALUE edges; // id_table from ID (ivar) to next shape
79 ID edge_name; // ID (ivar) for transition from parent to rb_shape
80 redblack_node_t *ancestor_index;
81 shape_id_t parent_id;
82 attr_index_t next_field_index; // Fields are either ivars or internal properties like `object_id`
83 attr_index_t capacity; // Total capacity of the object with this shape
84 uint8_t type;
85};
86
87typedef struct rb_shape rb_shape_t;
88
90 ID key;
91 rb_shape_t *value;
92 redblack_id_t l;
93 redblack_id_t r;
94};
95
96enum shape_type {
97 SHAPE_ROOT,
98 SHAPE_IVAR,
99 SHAPE_OBJ_ID,
100};
101
102enum shape_flags {
103 SHAPE_FL_FROZEN = 1 << 0,
104 SHAPE_FL_HAS_OBJECT_ID = 1 << 1,
105 SHAPE_FL_TOO_COMPLEX = 1 << 2,
106
107 SHAPE_FL_NON_CANONICAL_MASK = SHAPE_FL_FROZEN | SHAPE_FL_HAS_OBJECT_ID,
108};
109
110typedef struct {
111 /* object shapes */
112 rb_shape_t *shape_list;
113 rb_shape_t *root_shape;
114 const attr_index_t *capacities;
115 rb_atomic_t next_shape_id;
116
117 redblack_node_t *shape_cache;
118 unsigned int cache_size;
120
121RUBY_SYMBOL_EXPORT_BEGIN
122RUBY_EXTERN rb_shape_tree_t rb_shape_tree;
123RUBY_SYMBOL_EXPORT_END
124
126 uint64_t pack;
127 struct {
128 shape_id_t shape_id;
129 attr_index_t index;
130 } unpack;
131};
132
133static inline shape_id_t
134RBASIC_SHAPE_ID(VALUE obj)
135{
137 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
138#if RBASIC_SHAPE_ID_FIELD
139 return (shape_id_t)((RBASIC(obj)->shape_id));
140#else
141 return (shape_id_t)((RBASIC(obj)->flags) >> SHAPE_FLAG_SHIFT);
142#endif
143}
144
145// Same as RBASIC_SHAPE_ID but with flags that have no impact
146// on reads removed. e.g. Remove FL_FROZEN.
147static inline shape_id_t
148RBASIC_SHAPE_ID_FOR_READ(VALUE obj)
149{
150 return RBASIC_SHAPE_ID(obj) & SHAPE_ID_READ_ONLY_MASK;
151}
152
153#if RUBY_DEBUG
154bool rb_shape_verify_consistency(VALUE obj, shape_id_t shape_id);
155#endif
156
157static inline void
158RBASIC_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
159{
161 RUBY_ASSERT(!RB_TYPE_P(obj, T_IMEMO) || IMEMO_TYPE_P(obj, imemo_fields));
162#if RBASIC_SHAPE_ID_FIELD
163 RBASIC(obj)->shape_id = (VALUE)shape_id;
164#else
165 // Object shapes are occupying top bits
166 RBASIC(obj)->flags &= SHAPE_FLAG_MASK;
167 RBASIC(obj)->flags |= ((VALUE)(shape_id) << SHAPE_FLAG_SHIFT);
168#endif
169 RUBY_ASSERT(rb_shape_verify_consistency(obj, shape_id));
170}
171
172void rb_set_namespaced_class_shape_id(VALUE obj, shape_id_t shape_id);
173
174static inline void
175RB_SET_SHAPE_ID(VALUE obj, shape_id_t shape_id)
176{
177 switch (BUILTIN_TYPE(obj)) {
178 case T_CLASS:
179 case T_MODULE:
180 rb_set_namespaced_class_shape_id(obj, shape_id);
181 break;
182 default:
183 RBASIC_SET_SHAPE_ID(obj, shape_id);
184 break;
185 }
186}
187
188static inline rb_shape_t *
189RSHAPE(shape_id_t shape_id)
190{
191 uint32_t offset = (shape_id & SHAPE_ID_OFFSET_MASK);
192 RUBY_ASSERT(offset != INVALID_SHAPE_ID);
193
194 return &rb_shape_tree.shape_list[offset];
195}
196
197int32_t rb_shape_id_offset(void);
198
199RUBY_FUNC_EXPORTED shape_id_t rb_obj_shape_id(VALUE obj);
200shape_id_t rb_shape_get_next_iv_shape(shape_id_t shape_id, ID id);
201bool rb_shape_get_iv_index(shape_id_t shape_id, ID id, attr_index_t *value);
202bool rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t *value, shape_id_t *shape_id_hint);
203bool rb_shape_find_ivar(shape_id_t shape_id, ID id, shape_id_t *ivar_shape);
204
205typedef int rb_shape_foreach_transition_callback(shape_id_t shape_id, void *data);
206bool rb_shape_foreach_field(shape_id_t shape_id, rb_shape_foreach_transition_callback func, void *data);
207
208shape_id_t rb_shape_transition_frozen(VALUE obj);
209shape_id_t rb_shape_transition_complex(VALUE obj);
210shape_id_t rb_shape_transition_remove_ivar(VALUE obj, ID id, shape_id_t *removed_shape_id);
211shape_id_t rb_shape_transition_add_ivar(VALUE obj, ID id);
212shape_id_t rb_shape_transition_add_ivar_no_warnings(VALUE obj, ID id);
213shape_id_t rb_shape_transition_object_id(VALUE obj);
214shape_id_t rb_shape_transition_heap(VALUE obj, size_t heap_index);
215shape_id_t rb_shape_object_id(shape_id_t original_shape_id);
216
217void rb_shape_free_all(void);
218
219shape_id_t rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_id);
220void rb_shape_copy_fields(VALUE dest, VALUE *dest_buf, shape_id_t dest_shape_id, VALUE src, VALUE *src_buf, shape_id_t src_shape_id);
221void rb_shape_copy_complex_ivars(VALUE dest, VALUE obj, shape_id_t src_shape_id, st_table *fields_table);
222
223static inline bool
224rb_shape_too_complex_p(shape_id_t shape_id)
225{
226 return shape_id & SHAPE_ID_FL_TOO_COMPLEX;
227}
228
229static inline bool
230rb_shape_obj_too_complex_p(VALUE obj)
231{
232 return !RB_SPECIAL_CONST_P(obj) && rb_shape_too_complex_p(RBASIC_SHAPE_ID(obj));
233}
234
235static inline bool
236rb_shape_has_object_id(shape_id_t shape_id)
237{
238 return shape_id & SHAPE_ID_FL_HAS_OBJECT_ID;
239}
240
241static inline bool
242rb_shape_canonical_p(shape_id_t shape_id)
243{
244 return !(shape_id & SHAPE_ID_FL_NON_CANONICAL_MASK);
245}
246
247static inline uint8_t
248rb_shape_heap_index(shape_id_t shape_id)
249{
250 return (uint8_t)((shape_id & SHAPE_ID_HEAP_INDEX_MASK) >> SHAPE_ID_HEAP_INDEX_OFFSET);
251}
252
253static inline shape_id_t
254rb_shape_root(size_t heap_id)
255{
256 shape_id_t heap_index = (shape_id_t)(heap_id + 1);
257 shape_id_t heap_flags = heap_index << SHAPE_ID_HEAP_INDEX_OFFSET;
258
259 RUBY_ASSERT((heap_flags & SHAPE_ID_HEAP_INDEX_MASK) == heap_flags);
260 RUBY_ASSERT(rb_shape_heap_index(heap_flags) == heap_index);
261
262 return ROOT_SHAPE_ID | heap_flags;
263}
264
265static inline shape_id_t
266RSHAPE_PARENT(shape_id_t shape_id)
267{
268 return RSHAPE(shape_id)->parent_id;
269}
270
271static inline enum shape_type
272RSHAPE_TYPE(shape_id_t shape_id)
273{
274 return RSHAPE(shape_id)->type;
275}
276
277static inline bool
278RSHAPE_TYPE_P(shape_id_t shape_id, enum shape_type type)
279{
280 return RSHAPE_TYPE(shape_id) == type;
281}
282
283static inline attr_index_t
284RSHAPE_EMBEDDED_CAPACITY(shape_id_t shape_id)
285{
286 uint8_t heap_index = rb_shape_heap_index(shape_id);
287 if (heap_index) {
288 return rb_shape_tree.capacities[heap_index - 1];
289 }
290 return 0;
291}
292
293static inline attr_index_t
294RSHAPE_CAPACITY(shape_id_t shape_id)
295{
296 attr_index_t embedded_capacity = RSHAPE_EMBEDDED_CAPACITY(shape_id);
297
298 if (embedded_capacity > RSHAPE(shape_id)->capacity) {
299 return embedded_capacity;
300 }
301 else {
302 return RSHAPE(shape_id)->capacity;
303 }
304}
305
306static inline attr_index_t
307RSHAPE_LEN(shape_id_t shape_id)
308{
309 return RSHAPE(shape_id)->next_field_index;
310}
311
312static inline attr_index_t
313RSHAPE_INDEX(shape_id_t shape_id)
314{
315 return RSHAPE_LEN(shape_id) - 1;
316}
317
318static inline ID
319RSHAPE_EDGE_NAME(shape_id_t shape_id)
320{
321 return RSHAPE(shape_id)->edge_name;
322}
323
324static inline uint32_t
325ROBJECT_FIELDS_CAPACITY(VALUE obj)
326{
327 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
328 // Asking for capacity doesn't make sense when the object is using
329 // a hash table for storing instance variables
330 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
331 return RSHAPE_CAPACITY(RBASIC_SHAPE_ID(obj));
332}
333
334static inline st_table *
335ROBJECT_FIELDS_HASH(VALUE obj)
336{
337 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
338 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
339 return (st_table *)ROBJECT(obj)->as.heap.fields;
340}
341
342static inline void
343ROBJECT_SET_FIELDS_HASH(VALUE obj, const st_table *tbl)
344{
345 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
346 RUBY_ASSERT(rb_shape_obj_too_complex_p(obj));
347 ROBJECT(obj)->as.heap.fields = (VALUE *)tbl;
348}
349
350static inline uint32_t
351ROBJECT_FIELDS_COUNT(VALUE obj)
352{
353 if (rb_shape_obj_too_complex_p(obj)) {
354 return (uint32_t)rb_st_table_size(ROBJECT_FIELDS_HASH(obj));
355 }
356 else {
357 RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
358 RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj));
359 return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
360 }
361}
362
363static inline uint32_t
364RBASIC_FIELDS_COUNT(VALUE obj)
365{
366 return RSHAPE(rb_obj_shape_id(obj))->next_field_index;
367}
368
369bool rb_obj_set_shape_id(VALUE obj, shape_id_t shape_id);
370
371static inline bool
372rb_shape_obj_has_id(VALUE obj)
373{
374 return rb_shape_has_object_id(RBASIC_SHAPE_ID(obj));
375}
376
377static inline bool
378rb_shape_has_ivars(shape_id_t shape_id)
379{
380 return shape_id & SHAPE_ID_HAS_IVAR_MASK;
381}
382
383static inline bool
384rb_shape_obj_has_ivars(VALUE obj)
385{
386 return rb_shape_has_ivars(RBASIC_SHAPE_ID(obj));
387}
388
389static inline bool
390rb_shape_has_fields(shape_id_t shape_id)
391{
392 return shape_id & (SHAPE_ID_OFFSET_MASK | SHAPE_ID_FL_TOO_COMPLEX);
393}
394
395static inline bool
396rb_shape_obj_has_fields(VALUE obj)
397{
398 return rb_shape_has_fields(RBASIC_SHAPE_ID(obj));
399}
400
401static inline bool
402rb_obj_exivar_p(VALUE obj)
403{
404 switch (TYPE(obj)) {
405 case T_NONE:
406 case T_OBJECT:
407 case T_CLASS:
408 case T_MODULE:
409 case T_IMEMO:
410 return false;
411 default:
412 break;
413 }
414 return rb_shape_obj_has_fields(obj);
415}
416
417// For ext/objspace
418RUBY_SYMBOL_EXPORT_BEGIN
419typedef void each_shape_callback(shape_id_t shape_id, void *data);
420void rb_shape_each_shape_id(each_shape_callback callback, void *data);
421size_t rb_shape_memsize(shape_id_t shape);
422size_t rb_shape_edges_count(shape_id_t shape_id);
423size_t rb_shape_depth(shape_id_t shape_id);
424RUBY_SYMBOL_EXPORT_END
425
426#endif
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition atomic.h:69
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition dllexport.h:45
#define TYPE(_)
Old name of rb_type.
Definition value_type.h:108
#define T_IMEMO
Old name of RUBY_T_IMEMO.
Definition value_type.h:67
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define T_MODULE
Old name of RUBY_T_MODULE.
Definition value_type.h:70
#define T_OBJECT
Old name of RUBY_T_OBJECT.
Definition value_type.h:75
#define T_CLASS
Old name of RUBY_T_CLASS.
Definition value_type.h:58
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RBASIC(obj)
Convenient casting macro.
Definition rbasic.h:40
#define ROBJECT(obj)
Convenient casting macro.
Definition robject.h:43
static bool RB_SPECIAL_CONST_P(VALUE obj)
Checks if the given object is of enum ruby_special_consts.
Definition st.h:79
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
@ RUBY_T_OBJECT
Definition value_type.h:116