Ruby 3.5.0dev (2025-08-27 revision d9e9a667a8c8fb6f57611c68b45eaf1f2c39fca1)
symbol.c (d9e9a667a8c8fb6f57611c68b45eaf1f2c39fca1)
1/**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "internal.h"
13#include "internal/concurrent_set.h"
14#include "internal/error.h"
15#include "internal/gc.h"
16#include "internal/hash.h"
17#include "internal/object.h"
18#include "internal/symbol.h"
19#include "internal/vm.h"
20#include "probes.h"
21#include "ruby/encoding.h"
22#include "ruby/st.h"
23#include "symbol.h"
24#include "vm_sync.h"
25#include "builtin.h"
27
28#if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
29# undef USE_SYMBOL_GC
30# define USE_SYMBOL_GC 0
31#else
32# undef USE_SYMBOL_GC
33# define USE_SYMBOL_GC 1
34#endif
35#if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
36# undef SYMBOL_DEBUG
37# define SYMBOL_DEBUG 1
38#else
39# undef SYMBOL_DEBUG
40# define SYMBOL_DEBUG 0
41#endif
42#ifndef CHECK_ID_SERIAL
43# define CHECK_ID_SERIAL SYMBOL_DEBUG
44#endif
45
46#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
47#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
48
49#define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
50
51#define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
52
53static ID register_static_symid(ID, const char *, long, rb_encoding *);
54#define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
55#include "id.c"
56
57#define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
58
59#define op_tbl_count numberof(op_tbl)
60STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
61#define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
62
63
64#define GLOBAL_SYMBOLS_LOCKING(symbols) \
65 for (rb_symbols_t *symbols = &ruby_global_symbols, **locking = &symbols; \
66 locking; \
67 locking = NULL) \
68 RB_VM_LOCKING()
69
70static void
71Init_op_tbl(void)
72{
73 int i;
74 rb_encoding *const enc = rb_usascii_encoding();
75
76 for (i = '!'; i <= '~'; ++i) {
77 if (!ISALNUM(i) && i != '_') {
78 char c = (char)i;
79 register_static_symid(i, &c, 1, enc);
80 }
81 }
82 for (i = 0; i < op_tbl_count; ++i) {
83 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
84 }
85}
86
87static const int ID_ENTRY_UNIT = 512;
88
89enum id_entry_type {
90 ID_ENTRY_STR,
91 ID_ENTRY_SYM,
92 ID_ENTRY_SIZE
93};
94
95typedef struct {
96 rb_atomic_t next_id;
97 VALUE sym_set;
98
99 VALUE ids;
101
102rb_symbols_t ruby_global_symbols = {
103 .next_id = tNEXT_ID,
104};
105
107 VALUE sym;
108 VALUE str;
109};
110
111#define SYM_SET_SYM_STATIC_TAG 1
112
113static bool
114sym_set_sym_static_p(VALUE sym)
115{
116 return sym & SYM_SET_SYM_STATIC_TAG;
117}
118
119static VALUE
120sym_set_static_sym_tag(struct sym_set_static_sym_entry *sym)
121{
122 VALUE value = (VALUE)sym | SYM_SET_SYM_STATIC_TAG;
123 RUBY_ASSERT(IMMEDIATE_P(value));
124 RUBY_ASSERT(sym_set_sym_static_p(value));
125
126 return value;
127}
128
129static struct sym_set_static_sym_entry *
130sym_set_static_sym_untag(VALUE sym)
131{
132 RUBY_ASSERT(sym_set_sym_static_p(sym));
133
134 return (struct sym_set_static_sym_entry *)(sym & ~((VALUE)SYM_SET_SYM_STATIC_TAG));
135}
136
137static VALUE
138sym_set_sym_get_str(VALUE sym)
139{
140 VALUE str;
141 if (sym_set_sym_static_p(sym)) {
142 str = sym_set_static_sym_untag(sym)->str;
143 }
144 else {
146 str = RSYMBOL(sym)->fstr;
147 }
148
150
151 return str;
152}
153
154static VALUE
155sym_set_hash(VALUE sym)
156{
157 if (sym_set_sym_static_p(sym)) {
158 return (VALUE)rb_str_hash(sym_set_static_sym_untag(sym)->str);
159 }
160 else {
161 return (VALUE)RSYMBOL(sym)->hashval;
162 }
163}
164
165static bool
166sym_set_cmp(VALUE a, VALUE b)
167{
168 return rb_str_hash_cmp(sym_set_sym_get_str(a), sym_set_sym_get_str(b)) == false;
169}
170
171
172static int
173sym_check_asciionly(VALUE str, bool fake_str)
174{
175 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
176 switch (rb_enc_str_coderange(str)) {
178 if (fake_str) {
179 str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
180 }
181 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
182 rb_enc_name(rb_enc_get(str)), str);
184 return TRUE;
185 }
186 return FALSE;
187}
188
189static VALUE
190dup_string_for_create(VALUE str)
191{
192 rb_encoding *enc = rb_enc_get(str);
193
194 str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc);
195
196 rb_encoding *ascii = rb_usascii_encoding();
197 if (enc != ascii && sym_check_asciionly(str, false)) {
198 rb_enc_associate(str, ascii);
199 }
200 OBJ_FREEZE(str);
201
202 str = rb_fstring(str);
203
204 return str;
205}
206
207static int
208rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
209{
210 const char *ptr = StringValuePtr(name);
211 long len = RSTRING_LEN(name);
212 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
213 RB_GC_GUARD(name);
214 return type;
215}
216
217static ID
218next_id_base(void)
219{
220 rb_atomic_t serial = RUBY_ATOMIC_FETCH_ADD(ruby_global_symbols.next_id, 1);
221
222 return (ID)serial << ID_SCOPE_SHIFT;
223}
224
225static void
226set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
227{
228 ASSERT_vm_locking();
231
232 size_t idx = num / ID_ENTRY_UNIT;
233
234 VALUE ary, ids = symbols->ids;
235 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
236 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
237 rb_ary_store(ids, (long)idx, ary);
238 }
239 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
240 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
241 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
242}
243
244static VALUE
245sym_set_create(VALUE sym, void *data)
246{
247 bool create_dynamic_symbol = (bool)data;
248
249 struct sym_set_static_sym_entry *static_sym_entry = sym_set_static_sym_untag(sym);
250
251 VALUE str = dup_string_for_create(static_sym_entry->str);
252
253 if (create_dynamic_symbol) {
254 NEWOBJ_OF(obj, struct RSymbol, rb_cSymbol, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0);
255
256 rb_encoding *enc = rb_enc_get(str);
257 rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc));
258 OBJ_FREEZE((VALUE)obj);
259 RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str);
260
261 int id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
262 if (id < 0) id = ID_INTERNAL;
263 obj->id = id;
264
265 obj->hashval = rb_str_hash(str);
266 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr));
267
268 return (VALUE)obj;
269 }
270 else {
271 struct sym_set_static_sym_entry *new_static_sym_entry = xmalloc(sizeof(struct sym_set_static_sym_entry));
272 new_static_sym_entry->str = str;
273
274 VALUE static_sym = static_sym_entry->sym;
275 if (static_sym == 0) {
276 ID id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
277 if (id == (ID)-1) id = ID_INTERNAL;
278
279 id |= next_id_base();
280 id |= ID_STATIC_SYM;
281
282 static_sym = STATIC_ID2SYM(id);
283 }
284 new_static_sym_entry->sym = static_sym;
285
286 RB_VM_LOCKING() {
287 set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym);
288 }
289
290 return sym_set_static_sym_tag(new_static_sym_entry);
291 }
292}
293
294static void
295sym_set_free(VALUE sym)
296{
297 if (sym_set_sym_static_p(sym)) {
298 xfree(sym_set_static_sym_untag(sym));
299 }
300}
301
302static const struct rb_concurrent_set_funcs sym_set_funcs = {
303 .hash = sym_set_hash,
304 .cmp = sym_set_cmp,
305 .create = sym_set_create,
306 .free = sym_set_free,
307};
308
309static VALUE
310sym_set_entry_to_sym(VALUE entry)
311{
312 if (sym_set_sym_static_p(entry)) {
313 RUBY_ASSERT(STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym));
314
315 if (!STATIC_SYM_P(sym_set_static_sym_untag(entry)->sym)) rb_bug("not sym");
316
317 return sym_set_static_sym_untag(entry)->sym;
318 }
319 else {
321 if (!DYNAMIC_SYM_P(entry)) rb_bug("not sym");
322
323 return entry;
324 }
325}
326
327static VALUE
328sym_find_or_insert_dynamic_symbol(rb_symbols_t *symbols, const VALUE str)
329{
330 struct sym_set_static_sym_entry static_sym = {
331 .str = str
332 };
333 return sym_set_entry_to_sym(
334 rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)true)
335 );
336}
337
338static VALUE
339sym_find_or_insert_static_symbol(rb_symbols_t *symbols, const VALUE str)
340{
341 struct sym_set_static_sym_entry static_sym = {
342 .str = str
343 };
344 return sym_set_entry_to_sym(
345 rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false)
346 );
347}
348
349static VALUE
350sym_find_or_insert_static_symbol_id(rb_symbols_t *symbols, const VALUE str, ID id)
351{
352 struct sym_set_static_sym_entry static_sym = {
353 .sym = STATIC_ID2SYM(id),
354 .str = str,
355 };
356 return sym_set_entry_to_sym(
357 rb_concurrent_set_find_or_insert(&symbols->sym_set, sym_set_static_sym_tag(&static_sym), (void *)false)
358 );
359}
360
361void
362Init_sym(void)
363{
364 rb_symbols_t *symbols = &ruby_global_symbols;
365
366 symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024);
367 symbols->ids = rb_ary_hidden_new(0);
368
369 Init_op_tbl();
370 Init_id();
371}
372
373void
374rb_sym_global_symbols_mark_and_move(void)
375{
376 rb_symbols_t *symbols = &ruby_global_symbols;
377
378 rb_gc_mark_and_move(&symbols->sym_set);
379 rb_gc_mark_and_move(&symbols->ids);
380}
381
382static int
383rb_free_global_symbol_table_i(VALUE *sym_ptr, void *data)
384{
385 sym_set_free(*sym_ptr);
386
387 return ST_DELETE;
388}
389
390void
391rb_free_global_symbol_table(void)
392{
393 rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_free_global_symbol_table_i, NULL);
394}
395
396WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
397WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
398
399ID
400rb_id_attrset(ID id)
401{
402 int scope;
403
404 if (!is_notop_id(id)) {
405 switch (id) {
406 case tAREF: case tASET:
407 return tASET; /* only exception */
408 }
409 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
410 rb_id2str(id));
411 }
412 else {
413 scope = id_type(id);
414 switch (scope) {
415 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
416 case ID_CONST: case ID_CLASS: case ID_INTERNAL:
417 break;
418 case ID_ATTRSET:
419 return id;
420 default:
421 {
422 VALUE str = lookup_id_str(id);
423 if (str != 0) {
424 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
425 scope, str);
426 }
427 else {
428 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
429 scope, (VALUE)id);
430 }
431 }
432 }
433 }
434
435 bool error = false;
436 /* make new symbol and ID */
437 VALUE str = lookup_id_str(id);
438 if (str) {
439 str = rb_str_dup(str);
440 rb_str_cat(str, "=", 1);
441 if (sym_check_asciionly(str, false)) {
442 rb_enc_associate(str, rb_usascii_encoding());
443 }
444
445 VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str);
446 id = rb_sym2id(sym);
447 }
448 else {
449 error = true;
450 }
451
452 if (error) {
453 RBIMPL_ATTR_NONSTRING_ARRAY() static const char id_types[][8] = {
454 "local",
455 "instance",
456 "invalid",
457 "global",
458 "attrset",
459 "const",
460 "class",
461 "internal",
462 };
463 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
464 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
465 }
466
467 return id;
468}
469
470static int
471is_special_global_name(const char *m, const char *e, rb_encoding *enc)
472{
473 int mb = 0;
474
475 if (m >= e) return 0;
476 if (is_global_name_punct(*m)) {
477 ++m;
478 }
479 else if (*m == '-') {
480 if (++m >= e) return 0;
481 if (is_identchar(m, e, enc)) {
482 if (!ISASCII(*m)) mb = 1;
483 m += rb_enc_mbclen(m, e, enc);
484 }
485 }
486 else {
487 if (!ISDIGIT(*m)) return 0;
488 do {
489 if (!ISASCII(*m)) mb = 1;
490 ++m;
491 } while (m < e && ISDIGIT(*m));
492 }
493 return m == e ? mb + 1 : 0;
494}
495
496int
497rb_symname_p(const char *name)
498{
499 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
500}
501
502int
503rb_enc_symname_p(const char *name, rb_encoding *enc)
504{
505 return rb_enc_symname2_p(name, strlen(name), enc);
506}
507
508static int
509rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
510{
511 int c, len;
512 const char *end = name + nlen;
513
514 if (nlen < 1) return FALSE;
515 if (ISASCII(*name)) return ISUPPER(*name);
516 c = rb_enc_precise_mbclen(name, end, enc);
517 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
519 c = rb_enc_mbc_to_codepoint(name, end, enc);
520 if (rb_enc_isupper(c, enc)) return TRUE;
521 if (rb_enc_islower(c, enc)) return FALSE;
522 if (ONIGENC_IS_UNICODE(enc)) {
523 static int ctype_titlecase = 0;
524 if (!ctype_titlecase) {
525 static const UChar cname[] = "titlecaseletter";
526 static const UChar *const end = cname + sizeof(cname) - 1;
527 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
528 }
529 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
530 }
531 else {
532 /* fallback to case-folding */
533 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
534 const OnigUChar *beg = (const OnigUChar *)name;
535 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
536 &beg, (const OnigUChar *)end,
537 fold, enc);
538 if (r > 0 && (r != len || memcmp(fold, name, r)))
539 return TRUE;
540 }
541 return FALSE;
542}
543
545 const enum { invalid, stophere, needmore, } kind;
546 const enum ruby_id_types type;
547 const long nread;
548};
549
550#define t struct enc_synmane_type_leading_chars_tag
551
553enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
554{
555 const char *m = name;
556 const char *e = m + len;
557
558 if (! rb_enc_asciicompat(enc)) {
559 return (t) { invalid, 0, 0, };
560 }
561 else if (! m) {
562 return (t) { invalid, 0, 0, };
563 }
564 else if ( len <= 0 ) {
565 return (t) { invalid, 0, 0, };
566 }
567 switch (*m) {
568 case '\0':
569 return (t) { invalid, 0, 0, };
570
571 case '$':
572 if (is_special_global_name(++m, e, enc)) {
573 return (t) { stophere, ID_GLOBAL, len, };
574 }
575 else {
576 return (t) { needmore, ID_GLOBAL, 1, };
577 }
578
579 case '@':
580 switch (*++m) {
581 default: return (t) { needmore, ID_INSTANCE, 1, };
582 case '@': return (t) { needmore, ID_CLASS, 2, };
583 }
584
585 case '<':
586 switch (*++m) {
587 default: return (t) { stophere, ID_INTERNAL, 1, };
588 case '<': return (t) { stophere, ID_INTERNAL, 2, };
589 case '=':
590 switch (*++m) {
591 default: return (t) { stophere, ID_INTERNAL, 2, };
592 case '>': return (t) { stophere, ID_INTERNAL, 3, };
593 }
594 }
595
596 case '>':
597 switch (*++m) {
598 default: return (t) { stophere, ID_INTERNAL, 1, };
599 case '>': case '=': return (t) { stophere, ID_INTERNAL, 2, };
600 }
601
602 case '=':
603 switch (*++m) {
604 default: return (t) { invalid, 0, 1, };
605 case '~': return (t) { stophere, ID_INTERNAL, 2, };
606 case '=':
607 switch (*++m) {
608 default: return (t) { stophere, ID_INTERNAL, 2, };
609 case '=': return (t) { stophere, ID_INTERNAL, 3, };
610 }
611 }
612
613 case '*':
614 switch (*++m) {
615 default: return (t) { stophere, ID_INTERNAL, 1, };
616 case '*': return (t) { stophere, ID_INTERNAL, 2, };
617 }
618
619 case '+': case '-':
620 switch (*++m) {
621 default: return (t) { stophere, ID_INTERNAL, 1, };
622 case '@': return (t) { stophere, ID_INTERNAL, 2, };
623 }
624
625 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
626 return (t) { stophere, ID_INTERNAL, 1, };
627
628 case '[':
629 switch (*++m) {
630 default: return (t) { needmore, ID_INTERNAL, 0, };
631 case ']':
632 switch (*++m) {
633 default: return (t) { stophere, ID_INTERNAL, 2, };
634 case '=': return (t) { stophere, ID_INTERNAL, 3, };
635 }
636 }
637
638 case '!':
639 switch (*++m) {
640 case '=': case '~': return (t) { stophere, ID_INTERNAL, 2, };
641 default:
642 if (allowed_attrset & (1U << ID_INTERNAL)) {
643 return (t) { needmore, ID_INTERNAL, 1, };
644 }
645 else {
646 return (t) { stophere, ID_INTERNAL, 1, };
647 }
648 }
649
650 default:
651 if (rb_sym_constant_char_p(name, len, enc)) {
652 return (t) { needmore, ID_CONST, 0, };
653 }
654 else {
655 return (t) { needmore, ID_LOCAL, 0, };
656 }
657 }
658}
659#undef t
660
661int
662rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
663{
665 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
666 const char *m = name + f.nread;
667 const char *e = name + len;
668 int type = (int)f.type;
669
670 switch (f.kind) {
671 case invalid: return -1;
672 case stophere: break;
673 case needmore:
674
675 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
676 if (len > 1 && *(e-1) == '=') {
677 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
678 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
679 }
680 return -1;
681 }
682 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
683 if (m >= e) break;
684 switch (*m) {
685 case '!': case '?':
686 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
687 type = ID_INTERNAL;
688 ++m;
689 if (m + 1 < e || *m != '=') break;
690 /* fall through */
691 case '=':
692 if (!(allowed_attrset & (1U << type))) return -1;
693 type = ID_ATTRSET;
694 ++m;
695 break;
696 }
697 }
698
699 return m == e ? type : -1;
700}
701
702int
703rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
704{
705 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
706}
707
708static VALUE
709get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
710{
711 VALUE result = 0;
712
713 GLOBAL_SYMBOLS_LOCKING(symbols) {
714 if (num && num < RUBY_ATOMIC_LOAD(symbols->next_id)) {
715 size_t idx = num / ID_ENTRY_UNIT;
716 VALUE ids = symbols->ids;
717 VALUE ary;
718 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
719 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
720 result = rb_ary_entry(ary, pos + t);
721
722 if (NIL_P(result)) {
723 result = 0;
724 }
725 else if (CHECK_ID_SERIAL) {
726 if (id) {
727 VALUE sym = result;
728 if (t != ID_ENTRY_SYM)
729 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
730 if (STATIC_SYM_P(sym)) {
731 if (STATIC_SYM2ID(sym) != id) result = 0;
732 }
733 else {
734 if (RSYMBOL(sym)->id != id) result = 0;
735 }
736 }
737 }
738 }
739 }
740 }
741
742 if (result) {
743 switch (t) {
744 case ID_ENTRY_STR:
746 break;
747 case ID_ENTRY_SYM:
749 break;
750 default:
751 break;
752 }
753 }
754
755 return result;
756}
757
758static VALUE
759get_id_entry(ID id, const enum id_entry_type t)
760{
761 return get_id_serial_entry(rb_id_to_serial(id), id, t);
762}
763
764int
765rb_static_id_valid_p(ID id)
766{
767 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
768}
769
770static inline ID
771rb_id_serial_to_id(rb_id_serial_t num)
772{
773 if (is_notop_id((ID)num)) {
774 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
775 if (sym) return SYM2ID(sym);
776 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
777 }
778 else {
779 return (ID)num;
780 }
781}
782
783static ID
784register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
785{
786 VALUE str = rb_enc_str_new(name, len, enc);
787 OBJ_FREEZE(str);
788 str = rb_fstring(str);
789
790 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
791
792 sym_find_or_insert_static_symbol_id(&ruby_global_symbols, str, id);
793
794 return id;
795}
796
797static VALUE
798sym_find(VALUE str)
799{
800 VALUE sym;
801
802 struct sym_set_static_sym_entry static_sym = {
803 .str = str
804 };
805 sym = rb_concurrent_set_find(&ruby_global_symbols.sym_set, sym_set_static_sym_tag(&static_sym));
806
807 if (sym) {
808 return sym_set_entry_to_sym(sym);
809 }
810 else {
811 return 0;
812 }
813}
814
815static ID
816lookup_str_id(VALUE str)
817{
818 VALUE sym = sym_find(str);
819
820 if (sym == 0) {
821 return (ID)0;
822 }
823
824 if (STATIC_SYM_P(sym)) {
825 return STATIC_SYM2ID(sym);
826 }
827 else if (DYNAMIC_SYM_P(sym)) {
828 ID id = RSYMBOL(sym)->id;
829 if (id & ~ID_SCOPE_MASK) return id;
830 }
831 else {
832 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
833 rb_builtin_class_name(sym), sym, str);
834 }
835
836 return (ID)0;
837}
838
839static VALUE
840lookup_id_str(ID id)
841{
842 return get_id_entry(id, ID_ENTRY_STR);
843}
844
845ID
846rb_intern3(const char *name, long len, rb_encoding *enc)
847{
848 struct RString fake_str;
849 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
850 OBJ_FREEZE(str);
851
852 VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str);
853 return rb_sym2id(sym);
854}
855
856ID
857rb_intern2(const char *name, long len)
858{
859 return rb_intern3(name, len, rb_usascii_encoding());
860}
861
862#undef rb_intern
863ID
864rb_intern(const char *name)
865{
866 return rb_intern2(name, strlen(name));
867}
868
869ID
870rb_intern_str(VALUE str)
871{
872 VALUE sym = sym_find_or_insert_static_symbol(&ruby_global_symbols, str);
873 return SYM2ID(sym);
874}
875
876bool
877rb_obj_is_symbol_table(VALUE obj)
878{
879 return obj == ruby_global_symbols.sym_set;
880}
881
883 int (*callback)(VALUE *key, void *data);
884 void *data;
885};
886
887static int
888rb_sym_global_symbol_table_foreach_weak_reference_i(VALUE *key, void *d)
889{
891 VALUE sym = *key;
892
893 if (sym_set_sym_static_p(sym)) {
894 struct sym_set_static_sym_entry *static_sym = sym_set_static_sym_untag(sym);
895
896 return data->callback(&static_sym->str, data->data);
897 }
898 else {
899 return data->callback(key, data->data);
900 }
901}
902
903void
904rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data)
905{
906 if (!ruby_global_symbols.sym_set) return;
907
909 .callback = callback,
910 .data = data,
911 };
912
913 rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_sym_global_symbol_table_foreach_weak_reference_i, &foreach_data);
914}
915
916void
917rb_gc_free_dsymbol(VALUE sym)
918{
919 VALUE str = RSYMBOL(sym)->fstr;
920
921 if (str) {
922 rb_concurrent_set_delete_by_identity(ruby_global_symbols.sym_set, sym);
923
924 RSYMBOL(sym)->fstr = 0;
925 }
926}
927
928/*
929 * call-seq:
930 * intern -> symbol
931 *
932 * :include: doc/string/intern.rdoc
933 *
934 */
935
936VALUE
938{
939 return sym_find_or_insert_dynamic_symbol(&ruby_global_symbols, str);
940}
941
942ID
944{
945 ID id = 0;
946 if (STATIC_SYM_P(sym)) {
947 id = STATIC_SYM2ID(sym);
948 }
949 else if (DYNAMIC_SYM_P(sym)) {
950 GLOBAL_SYMBOLS_LOCKING(symbols) {
951 RUBY_ASSERT(!rb_objspace_garbage_object_p(sym));
952 id = RSYMBOL(sym)->id;
953
954 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
955 VALUE fstr = RSYMBOL(sym)->fstr;
956 ID num = next_id_base();
957
958 RSYMBOL(sym)->id = id |= num;
959 /* make it permanent object */
960
961 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
962 }
963 }
964 }
965 else {
966 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
967 rb_builtin_class_name(sym));
968 }
969 return id;
970}
971
972#undef rb_id2sym
973VALUE
975{
976 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
977 return get_id_entry(x, ID_ENTRY_SYM);
978}
979
980/*
981 * call-seq:
982 * name -> string
983 *
984 * Returns a frozen string representation of +self+ (not including the leading colon):
985 *
986 * :foo.name # => "foo"
987 * :foo.name.frozen? # => true
988 *
989 * Related: Symbol#to_s, Symbol#inspect.
990 */
991
992VALUE
994{
995 VALUE str;
996 if (DYNAMIC_SYM_P(sym)) {
997 str = RSYMBOL(sym)->fstr;
999 }
1000 else {
1001 str = rb_id2str(STATIC_SYM2ID(sym));
1002 if (str) RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING);
1003 }
1004
1005 return str;
1006}
1007
1008VALUE
1009rb_id2str(ID id)
1010{
1011 return lookup_id_str(id);
1012}
1013
1014const char *
1015rb_id2name(ID id)
1016{
1017 VALUE str = rb_id2str(id);
1018
1019 if (!str) return 0;
1020 return RSTRING_PTR(str);
1021}
1022
1023ID
1024rb_make_internal_id(void)
1025{
1026 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
1027}
1028
1029ID
1030rb_make_temporary_id(size_t n)
1031{
1032 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
1033 const ID id = max_id - (ID)n;
1034 if (id < RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id)) {
1035 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
1036 }
1037 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
1038}
1039
1040static int
1041symbols_i(VALUE *key, void *data)
1042{
1043 VALUE ary = (VALUE)data;
1044 VALUE sym = (VALUE)*key;
1045
1046 if (sym_set_sym_static_p(sym)) {
1047 rb_ary_push(ary, sym_set_static_sym_untag(sym)->sym);
1048 }
1049 else if (rb_objspace_garbage_object_p(sym)) {
1050 return ST_DELETE;
1051 }
1052 else {
1053 rb_ary_push(ary, sym);
1054 }
1055
1056 return ST_CONTINUE;
1057}
1058
1059VALUE
1061{
1062 VALUE ary;
1063
1064 GLOBAL_SYMBOLS_LOCKING(symbols) {
1065 ary = rb_ary_new2(rb_concurrent_set_size(symbols->sym_set));
1066 rb_concurrent_set_foreach_with_replace(symbols->sym_set, symbols_i, (void *)ary);
1067 }
1068
1069 return ary;
1070}
1071
1072size_t
1073rb_sym_immortal_count(void)
1074{
1075 return (size_t)(RUBY_ATOMIC_LOAD(ruby_global_symbols.next_id) - 1);
1076}
1077
1078int
1080{
1081 return is_const_id(id);
1082}
1083
1084int
1086{
1087 return is_class_id(id);
1088}
1089
1090int
1092{
1093 return is_global_id(id);
1094}
1095
1096int
1098{
1099 return is_instance_id(id);
1100}
1101
1102int
1104{
1105 return is_attrset_id(id);
1106}
1107
1108int
1110{
1111 return is_local_id(id);
1112}
1113
1114int
1116{
1117 return is_internal_id(id);
1118}
1119
1120int
1121rb_is_const_sym(VALUE sym)
1122{
1123 return is_const_sym(sym);
1124}
1125
1126int
1127rb_is_attrset_sym(VALUE sym)
1128{
1129 return is_attrset_sym(sym);
1130}
1131
1132ID
1133rb_check_id(volatile VALUE *namep)
1134{
1135 VALUE tmp;
1136 VALUE name = *namep;
1137
1138 if (STATIC_SYM_P(name)) {
1139 return STATIC_SYM2ID(name);
1140 }
1141 else if (DYNAMIC_SYM_P(name)) {
1142 if (SYMBOL_PINNED_P(name)) {
1143 return RSYMBOL(name)->id;
1144 }
1145 else {
1146 *namep = RSYMBOL(name)->fstr;
1147 return 0;
1148 }
1149 }
1150 else if (!RB_TYPE_P(name, T_STRING)) {
1151 tmp = rb_check_string_type(name);
1152 if (NIL_P(tmp)) {
1153 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1154 name);
1155 }
1156 name = tmp;
1157 *namep = name;
1158 }
1159
1160 sym_check_asciionly(name, false);
1161
1162 return lookup_str_id(name);
1163}
1164
1165// Used by yjit for handling .send without throwing exceptions
1166ID
1167rb_get_symbol_id(VALUE name)
1168{
1169 if (STATIC_SYM_P(name)) {
1170 return STATIC_SYM2ID(name);
1171 }
1172 else if (DYNAMIC_SYM_P(name)) {
1173 if (SYMBOL_PINNED_P(name)) {
1174 return RSYMBOL(name)->id;
1175 }
1176 else {
1177 return 0;
1178 }
1179 }
1180 else if (RB_TYPE_P(name, T_STRING)) {
1181 return lookup_str_id(name);
1182 }
1183 else {
1184 return 0;
1185 }
1186}
1187
1188
1189VALUE
1190rb_check_symbol(volatile VALUE *namep)
1191{
1192 VALUE sym;
1193 VALUE tmp;
1194 VALUE name = *namep;
1195
1196 if (STATIC_SYM_P(name)) {
1197 return name;
1198 }
1199 else if (DYNAMIC_SYM_P(name)) {
1200 RUBY_ASSERT(!rb_objspace_garbage_object_p(name));
1201 return name;
1202 }
1203 else if (!RB_TYPE_P(name, T_STRING)) {
1204 tmp = rb_check_string_type(name);
1205 if (NIL_P(tmp)) {
1206 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1207 name);
1208 }
1209 name = tmp;
1210 *namep = name;
1211 }
1212
1213 sym_check_asciionly(name, false);
1214
1215 if ((sym = sym_find(name)) != 0) {
1216 return sym;
1217 }
1218
1219 return Qnil;
1220}
1221
1222ID
1223rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1224{
1225 struct RString fake_str;
1226 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1227
1228 sym_check_asciionly(name, true);
1229
1230 return lookup_str_id(name);
1231}
1232
1233VALUE
1234rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1235{
1236 VALUE sym;
1237 struct RString fake_str;
1238 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1239
1240 sym_check_asciionly(name, true);
1241
1242 if ((sym = sym_find(name)) != 0) {
1243 return sym;
1244 }
1245
1246 return Qnil;
1247}
1248
1249#undef rb_sym_intern_ascii_cstr
1250#ifdef __clang__
1251NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1252#else
1253FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1254FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1255FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1256#endif
1257
1258VALUE
1259rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1260{
1261 struct RString fake_str;
1262 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1263 return rb_str_intern(name);
1264}
1265
1266VALUE
1267rb_sym_intern_ascii(const char *ptr, long len)
1268{
1269 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1270}
1271
1272VALUE
1273rb_sym_intern_ascii_cstr(const char *ptr)
1274{
1275 return rb_sym_intern_ascii(ptr, strlen(ptr));
1276}
1277
1278VALUE
1279rb_to_symbol_type(VALUE obj)
1280{
1281 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1282}
1283
1284int
1285rb_is_const_name(VALUE name)
1286{
1287 return rb_str_symname_type(name, 0) == ID_CONST;
1288}
1289
1290int
1291rb_is_class_name(VALUE name)
1292{
1293 return rb_str_symname_type(name, 0) == ID_CLASS;
1294}
1295
1296int
1297rb_is_instance_name(VALUE name)
1298{
1299 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1300}
1301
1302int
1303rb_is_local_name(VALUE name)
1304{
1305 return rb_str_symname_type(name, 0) == ID_LOCAL;
1306}
1307
1308#include "id_table.c"
1309#include "symbol.rbinc"
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
Definition assert.h:291
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition atomic.h:69
#define RUBY_ATOMIC_FETCH_ADD(var, val)
Atomically replaces the value pointed by var with the result of addition of val to the old value of v...
Definition atomic.h:118
#define RUBY_ATOMIC_LOAD(var)
Atomic load.
Definition atomic.h:175
static bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isupper(), except it additionally takes an encoding.
Definition ctype.h:124
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
Definition ctype.h:63
static bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
Identical to rb_islower(), except it additionally takes an encoding.
Definition ctype.h:110
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define ISUPPER
Old name of rb_isupper.
Definition ctype.h:89
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
Definition fl_type.h:134
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:517
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define DYNAMIC_SYM_P
Old name of RB_DYNAMIC_SYM_P.
Definition value_type.h:86
#define Qnil
Old name of RUBY_Qnil.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:516
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition fl_type.h:59
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define IMMEDIATE_P
Old name of RB_IMMEDIATE_P.
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:657
#define ISALNUM
Old name of rb_isalnum.
Definition ctype.h:91
void rb_name_error(ID id, const char *fmt,...)
Raises an instance of rb_eNameError.
Definition error.c:2344
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
void rb_name_error_str(VALUE str, const char *fmt,...)
Identical to rb_name_error(), except it takes a VALUE instead of ID.
Definition error.c:2359
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_eEncodingError
EncodingError exception.
Definition error.c:1436
VALUE rb_cSymbol
Symbol class.
Definition string.c:84
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:603
Encoding relates APIs.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition string.c:932
int rb_enc_symname_p(const char *str, rb_encoding *enc)
Identical to rb_symname_p(), except it additionally takes an encoding.
Definition symbol.c:503
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id_cstr(), except for the return type.
Definition symbol.c:1234
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
Definition symbol.c:703
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id(), except it takes a pointer to a memory region instead of Ruby's string.
Definition symbol.c:1223
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_hidden_new(long capa)
Allocates a hidden (no class) empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
Definition symbol.c:1060
int rb_is_global_id(ID id)
Classifies the given ID, then sees if it is a global variable.
Definition symbol.c:1091
int rb_is_instance_id(ID id)
Classifies the given ID, then sees if it is an instance variable.
Definition symbol.c:1097
int rb_is_const_id(ID id)
Classifies the given ID, then sees if it is a constant.
Definition symbol.c:1079
int rb_is_junk_id(ID)
Classifies the given ID, then sees if it is a junk ID.
Definition symbol.c:1115
int rb_symname_p(const char *str)
Sees if the passed C string constructs a valid syntactic symbol.
Definition symbol.c:497
int rb_is_class_id(ID id)
Classifies the given ID, then sees if it is a class variable.
Definition symbol.c:1085
int rb_is_attrset_id(ID id)
Classifies the given ID, then sees if it is an attribute writer.
Definition symbol.c:1103
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1109
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition string.c:4121
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1956
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition string.c:4107
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3525
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition string.c:2910
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:937
VALUE rb_check_symbol(volatile VALUE *namep)
Identical to rb_check_id(), except it returns an instance of rb_cSymbol instead.
Definition symbol.c:1190
VALUE rb_id2sym(ID id)
Allocates an instance of rb_cSymbol that has the given id.
Definition symbol.c:974
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1133
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
Definition symbol.c:993
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:943
int len
Length of the buffer.
Definition io.h:8
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
VALUE type(ANYARGS)
ANYARGS-ed function type.
Defines RBIMPL_ATTR_NONSTRING.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
Ruby's String.
Definition rstring.h:196
char * ptr
Pointer to the contents of the string.
Definition rstring.h:222
Definition symbol.c:106
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376