Ruby 3.5.0dev (2025-09-16 revision 1213adfe5526d65cce81a9fb127074130c8faea7)
iseq.c (1213adfe5526d65cce81a9fb127074130c8faea7)
1/**********************************************************************
2
3 iseq.c -
4
5 $Author$
6 created at: 2006-07-11(Tue) 09:00:03 +0900
7
8 Copyright (C) 2006 Koichi Sasada
9
10**********************************************************************/
11
12#define RUBY_VM_INSNS_INFO 1
13/* #define RUBY_MARK_FREE_DEBUG 1 */
14
15#include "ruby/internal/config.h"
16
17#ifdef HAVE_DLADDR
18# include <dlfcn.h>
19#endif
20
21#include "eval_intern.h"
22#include "id_table.h"
23#include "internal.h"
24#include "internal/bits.h"
25#include "internal/class.h"
26#include "internal/compile.h"
27#include "internal/error.h"
28#include "internal/file.h"
29#include "internal/gc.h"
30#include "internal/hash.h"
31#include "internal/io.h"
32#include "internal/ruby_parser.h"
33#include "internal/sanitizers.h"
34#include "internal/set_table.h"
35#include "internal/symbol.h"
36#include "internal/thread.h"
37#include "internal/variable.h"
38#include "iseq.h"
39#include "ruby/util.h"
40#include "vm_core.h"
41#include "vm_callinfo.h"
42#include "yjit.h"
43#include "ruby/ractor.h"
44#include "builtin.h"
45#include "insns.inc"
46#include "insns_info.inc"
47#include "zjit.h"
48
49VALUE rb_cISeq;
50static VALUE iseqw_new(const rb_iseq_t *iseq);
51static const rb_iseq_t *iseqw_check(VALUE iseqw);
52
53#if VM_INSN_INFO_TABLE_IMPL == 2
54static struct succ_index_table *succ_index_table_create(int max_pos, int *data, int size);
55static unsigned int *succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size);
56static int succ_index_lookup(const struct succ_index_table *sd, int x);
57#endif
58
59#define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass)
60
61static inline VALUE
62obj_resurrect(VALUE obj)
63{
64 if (hidden_obj_p(obj)) {
65 switch (BUILTIN_TYPE(obj)) {
66 case T_STRING:
67 obj = rb_str_resurrect(obj);
68 break;
69 case T_ARRAY:
70 obj = rb_ary_resurrect(obj);
71 break;
72 case T_HASH:
73 obj = rb_hash_resurrect(obj);
74 break;
75 default:
76 break;
77 }
78 }
79 return obj;
80}
81
82static void
83free_arena(struct iseq_compile_data_storage *cur)
84{
85 struct iseq_compile_data_storage *next;
86
87 while (cur) {
88 next = cur->next;
89 ruby_xfree(cur);
90 cur = next;
91 }
92}
93
94static void
95compile_data_free(struct iseq_compile_data *compile_data)
96{
97 if (compile_data) {
98 free_arena(compile_data->node.storage_head);
99 free_arena(compile_data->insn.storage_head);
100 if (compile_data->ivar_cache_table) {
101 rb_id_table_free(compile_data->ivar_cache_table);
102 }
103 ruby_xfree(compile_data);
104 }
105}
106
107static void
108remove_from_constant_cache(ID id, IC ic)
109{
110 rb_vm_t *vm = GET_VM();
111 VALUE lookup_result;
112 st_data_t ic_data = (st_data_t)ic;
113
114 if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) {
115 set_table *ics = (set_table *)lookup_result;
116 set_table_delete(ics, &ic_data);
117
118 if (ics->num_entries == 0 &&
119 // See comment in vm_track_constant_cache on why we need this check
120 id != vm->inserting_constant_cache_id) {
121 rb_id_table_delete(vm->constant_cache, id);
122 set_free_table(ics);
123 }
124 }
125}
126
127// When an ISEQ is being freed, all of its associated ICs are going to go away
128// as well. Because of this, we need to iterate over the ICs, and clear them
129// from the VM's constant cache.
130static void
131iseq_clear_ic_references(const rb_iseq_t *iseq)
132{
133 // In some cases (when there is a compilation error), we end up with
134 // ic_size greater than 0, but no allocated is_entries buffer.
135 // If there's no is_entries buffer to loop through, return early.
136 // [Bug #19173]
137 if (!ISEQ_BODY(iseq)->is_entries) {
138 return;
139 }
140
141 for (unsigned int ic_idx = 0; ic_idx < ISEQ_BODY(iseq)->ic_size; ic_idx++) {
142 IC ic = &ISEQ_IS_IC_ENTRY(ISEQ_BODY(iseq), ic_idx);
143
144 // Iterate over the IC's constant path's segments and clean any references to
145 // the ICs out of the VM's constant cache table.
146 const ID *segments = ic->segments;
147
148 // It's possible that segments is NULL if we overallocated an IC but
149 // optimizations removed the instruction using it
150 if (segments == NULL)
151 continue;
152
153 for (int i = 0; segments[i]; i++) {
154 ID id = segments[i];
155 if (id == idNULL) continue;
156 remove_from_constant_cache(id, ic);
157 }
158
159 ruby_xfree((void *)segments);
160 }
161}
162
163void
164rb_iseq_free(const rb_iseq_t *iseq)
165{
166 RUBY_FREE_ENTER("iseq");
167
168 if (iseq && ISEQ_BODY(iseq)) {
169 iseq_clear_ic_references(iseq);
170 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
171#if USE_YJIT
172 rb_yjit_iseq_free(iseq);
173 if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) {
174 RUBY_ASSERT(rb_yjit_live_iseq_count > 0);
175 rb_yjit_live_iseq_count--;
176 }
177#endif
178 ruby_xfree((void *)body->iseq_encoded);
179 ruby_xfree((void *)body->insns_info.body);
180 ruby_xfree((void *)body->insns_info.positions);
181#if VM_INSN_INFO_TABLE_IMPL == 2
182 ruby_xfree(body->insns_info.succ_index_table);
183#endif
184 ruby_xfree((void *)body->is_entries);
185 ruby_xfree(body->call_data);
186 ruby_xfree((void *)body->catch_table);
187 ruby_xfree((void *)body->param.opt_table);
188 if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) {
189 ruby_xfree((void *)body->mark_bits.list);
190 }
191
192 ruby_xfree(body->variable.original_iseq);
193
194 if (body->param.keyword != NULL) {
195 if (body->param.keyword->table != &body->local_table[body->param.keyword->bits_start - body->param.keyword->num])
196 ruby_xfree((void *)body->param.keyword->table);
197 if (body->param.keyword->default_values) {
198 ruby_xfree((void *)body->param.keyword->default_values);
199 }
200 ruby_xfree((void *)body->param.keyword);
201 }
202 if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl))
203 ruby_xfree((void *)body->local_table);
204 compile_data_free(ISEQ_COMPILE_DATA(iseq));
205 if (body->outer_variables) rb_id_table_free(body->outer_variables);
206 ruby_xfree(body);
207 }
208
209 if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) {
210 rb_hook_list_free(iseq->aux.exec.local_hooks);
211 }
212
213 RUBY_FREE_LEAVE("iseq");
214}
215
216typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj);
217
218static inline void
219iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, VALUE *original_iseq)
220{
221 unsigned int offset;
222 unsigned int page_offset = (page * ISEQ_MBITS_BITLENGTH);
223
224 while (bits) {
225 offset = ntz_intptr(bits);
226 VALUE op = code[page_offset + offset];
227 rb_gc_mark_and_move(&code[page_offset + offset]);
228 VALUE newop = code[page_offset + offset];
229 if (original_iseq && newop != op) {
230 original_iseq[page_offset + offset] = newop;
231 }
232 bits &= bits - 1; // Reset Lowest Set Bit (BLSR)
233 }
234}
235
236static void
237rb_iseq_mark_and_move_each_compile_data_value(const rb_iseq_t *iseq, VALUE *original_iseq)
238{
239 unsigned int size;
240 VALUE *code;
241 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
242
243 size = compile_data->iseq_size;
244 code = compile_data->iseq_encoded;
245
246 // Embedded VALUEs
247 if (compile_data->mark_bits.list) {
248 if(compile_data->is_single_mark_bit) {
249 iseq_scan_bits(0, compile_data->mark_bits.single, code, original_iseq);
250 }
251 else {
252 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
253 iseq_bits_t bits = compile_data->mark_bits.list[i];
254 iseq_scan_bits(i, bits, code, original_iseq);
255 }
256 }
257 }
258}
259static void
260rb_iseq_mark_and_move_each_body_value(const rb_iseq_t *iseq, VALUE *original_iseq)
261{
262 unsigned int size;
263 VALUE *code;
264 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
265
266 size = body->iseq_size;
267 code = body->iseq_encoded;
268
269 union iseq_inline_storage_entry *is_entries = body->is_entries;
270
271 if (body->is_entries) {
272 // Skip iterating over ivc caches
273 is_entries += body->ivc_size;
274
275 // ICVARC entries
276 for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) {
277 ICVARC icvarc = (ICVARC)is_entries;
278 if (icvarc->entry) {
279 RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE));
280
281 rb_gc_mark_and_move(&icvarc->entry->class_value);
282 }
283 }
284
285 // ISE entries
286 for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) {
287 union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries;
288 if (is->once.value) {
289 rb_gc_mark_and_move(&is->once.value);
290 }
291 }
292
293 // IC Entries
294 for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) {
295 IC ic = (IC)is_entries;
296 if (ic->entry) {
297 rb_gc_mark_and_move_ptr(&ic->entry);
298 }
299 }
300 }
301
302 // Embedded VALUEs
303 if (body->mark_bits.list) {
304 if (ISEQ_MBITS_BUFLEN(size) == 1) {
305 iseq_scan_bits(0, body->mark_bits.single, code, original_iseq);
306 }
307 else {
308 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
309 iseq_bits_t bits = body->mark_bits.list[i];
310 iseq_scan_bits(i, bits, code, original_iseq);
311 }
312 }
313 }
314}
315
316static bool
317cc_is_active(const struct rb_callcache *cc, bool reference_updating)
318{
319 if (cc) {
320 if (cc == rb_vm_empty_cc() || rb_vm_empty_cc_for_super()) {
321 return false;
322 }
323
324 if (reference_updating) {
325 cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc);
326 }
327
328 if (vm_cc_markable(cc) && vm_cc_valid(cc)) {
329 const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc);
330 if (reference_updating) {
331 cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme);
332 }
333 if (!METHOD_ENTRY_INVALIDATED(cme)) {
334 return true;
335 }
336 }
337 }
338 return false;
339}
340
341void
342rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating)
343{
344 RUBY_MARK_ENTER("iseq");
345
346 rb_gc_mark_and_move(&iseq->wrapper);
347
348 if (ISEQ_BODY(iseq)) {
349 struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
350
351 rb_iseq_mark_and_move_each_body_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
352
353 rb_gc_mark_and_move(&body->variable.coverage);
354 rb_gc_mark_and_move(&body->variable.pc2branchindex);
355 rb_gc_mark_and_move(&body->variable.script_lines);
356 rb_gc_mark_and_move(&body->location.label);
357 rb_gc_mark_and_move(&body->location.base_label);
358 rb_gc_mark_and_move(&body->location.pathobj);
359 if (body->local_iseq) rb_gc_mark_and_move_ptr(&body->local_iseq);
360 if (body->parent_iseq) rb_gc_mark_and_move_ptr(&body->parent_iseq);
361 if (body->mandatory_only_iseq) rb_gc_mark_and_move_ptr(&body->mandatory_only_iseq);
362
363 if (body->call_data) {
364 for (unsigned int i = 0; i < body->ci_size; i++) {
365 struct rb_call_data *cds = body->call_data;
366
367 if (cds[i].ci) rb_gc_mark_and_move_ptr(&cds[i].ci);
368
369 if (cc_is_active(cds[i].cc, reference_updating)) {
370 rb_gc_mark_and_move_ptr(&cds[i].cc);
371 }
372 else if (cds[i].cc != rb_vm_empty_cc()) {
373 cds[i].cc = rb_vm_empty_cc();
374 }
375 }
376 }
377
378 if (body->param.flags.has_kw && body->param.keyword != NULL) {
379 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
380
381 if (keyword->default_values != NULL) {
382 for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) {
383 rb_gc_mark_and_move(&keyword->default_values[j]);
384 }
385 }
386 }
387
388 if (body->catch_table) {
389 struct iseq_catch_table *table = body->catch_table;
390
391 for (unsigned int i = 0; i < table->size; i++) {
392 struct iseq_catch_table_entry *entry;
393 entry = UNALIGNED_MEMBER_PTR(table, entries[i]);
394 if (entry->iseq) {
395 rb_gc_mark_and_move_ptr(&entry->iseq);
396 }
397 }
398 }
399
400 if (reference_updating) {
401#if USE_YJIT
402 rb_yjit_iseq_update_references(iseq);
403#endif
404#if USE_ZJIT
405 rb_zjit_iseq_update_references(body->zjit_payload);
406#endif
407 }
408 else {
409#if USE_YJIT
410 rb_yjit_iseq_mark(body->yjit_payload);
411#endif
412#if USE_ZJIT
413 rb_zjit_iseq_mark(body->zjit_payload);
414#endif
415 }
416 }
417
418 if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) {
419 rb_gc_mark_and_move(&iseq->aux.loader.obj);
420 }
421 else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) {
422 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
423
424 rb_iseq_mark_and_move_insn_storage(compile_data->insn.storage_head);
425 rb_iseq_mark_and_move_each_compile_data_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
426
427 rb_gc_mark_and_move((VALUE *)&compile_data->err_info);
428 rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary);
429 }
430 else {
431 /* executable */
432 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
433
434 if (iseq->aux.exec.local_hooks) {
435 rb_hook_list_mark_and_move(iseq->aux.exec.local_hooks);
436 }
437 }
438
439 RUBY_MARK_LEAVE("iseq");
440}
441
442static size_t
443param_keyword_size(const struct rb_iseq_param_keyword *pkw)
444{
445 size_t size = 0;
446
447 if (!pkw) return size;
448
449 size += sizeof(struct rb_iseq_param_keyword);
450 size += sizeof(VALUE) * (pkw->num - pkw->required_num);
451
452 return size;
453}
454
455size_t
456rb_iseq_memsize(const rb_iseq_t *iseq)
457{
458 size_t size = 0; /* struct already counted as RVALUE size */
459 const struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
460 const struct iseq_compile_data *compile_data;
461
462 /* TODO: should we count original_iseq? */
463
464 if (ISEQ_EXECUTABLE_P(iseq) && body) {
465 size += sizeof(struct rb_iseq_constant_body);
466 size += body->iseq_size * sizeof(VALUE);
467 size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
468 size += body->local_table_size * sizeof(ID);
469 size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE;
470 if (body->catch_table) {
471 size += iseq_catch_table_bytes(body->catch_table->size);
472 }
473 size += (body->param.opt_num + 1) * sizeof(VALUE);
474 size += param_keyword_size(body->param.keyword);
475
476 /* body->is_entries */
477 size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry);
478
479 if (ISEQ_BODY(iseq)->is_entries) {
480 /* IC entries constant segments */
481 for (unsigned int ic_idx = 0; ic_idx < body->ic_size; ic_idx++) {
482 IC ic = &ISEQ_IS_IC_ENTRY(body, ic_idx);
483 const ID *ids = ic->segments;
484 if (!ids) continue;
485 while (*ids++) {
486 size += sizeof(ID);
487 }
488 size += sizeof(ID); // null terminator
489 }
490 }
491
492 /* body->call_data */
493 size += body->ci_size * sizeof(struct rb_call_data);
494 // TODO: should we count imemo_callinfo?
495 }
496
497 compile_data = ISEQ_COMPILE_DATA(iseq);
498 if (compile_data) {
499 struct iseq_compile_data_storage *cur;
500
501 size += sizeof(struct iseq_compile_data);
502
503 cur = compile_data->node.storage_head;
504 while (cur) {
505 size += cur->size + offsetof(struct iseq_compile_data_storage, buff);
506 cur = cur->next;
507 }
508 }
509
510 return size;
511}
512
514rb_iseq_constant_body_alloc(void)
515{
516 struct rb_iseq_constant_body *iseq_body;
517 iseq_body = ZALLOC(struct rb_iseq_constant_body);
518 return iseq_body;
519}
520
521static rb_iseq_t *
522iseq_alloc(void)
523{
524 rb_iseq_t *iseq = iseq_imemo_alloc();
525 ISEQ_BODY(iseq) = rb_iseq_constant_body_alloc();
526 return iseq;
527}
528
529VALUE
530rb_iseq_pathobj_new(VALUE path, VALUE realpath)
531{
532 VALUE pathobj;
533 VM_ASSERT(RB_TYPE_P(path, T_STRING));
534 VM_ASSERT(NIL_P(realpath) || RB_TYPE_P(realpath, T_STRING));
535
536 if (path == realpath ||
537 (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) {
538 pathobj = rb_fstring(path);
539 }
540 else {
541 if (!NIL_P(realpath)) realpath = rb_fstring(realpath);
542 pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath);
543 rb_ary_freeze(pathobj);
544 }
545 return pathobj;
546}
547
548void
549rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath)
550{
551 RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->location.pathobj,
552 rb_iseq_pathobj_new(path, realpath));
553}
554
555// Make a dummy iseq for a dummy frame that exposes a path for profilers to inspect
556rb_iseq_t *
557rb_iseq_alloc_with_dummy_path(VALUE fname)
558{
559 rb_iseq_t *dummy_iseq = iseq_alloc();
560
561 ISEQ_BODY(dummy_iseq)->type = ISEQ_TYPE_TOP;
562 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.pathobj, fname);
563 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.label, fname);
564
565 return dummy_iseq;
566}
567
568static rb_iseq_location_t *
569iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id)
570{
571 rb_iseq_location_t *loc = &ISEQ_BODY(iseq)->location;
572
573 rb_iseq_pathobj_set(iseq, path, realpath);
574 RB_OBJ_WRITE(iseq, &loc->label, name);
575 RB_OBJ_WRITE(iseq, &loc->base_label, name);
576 loc->first_lineno = first_lineno;
577
578 if (ISEQ_BODY(iseq)->local_iseq == iseq && strcmp(RSTRING_PTR(name), "initialize") == 0) {
579 ISEQ_BODY(iseq)->param.flags.use_block = 1;
580 }
581
582 if (code_location) {
583 loc->node_id = node_id;
584 loc->code_location = *code_location;
585 }
586 else {
587 loc->code_location.beg_pos.lineno = 0;
588 loc->code_location.beg_pos.column = 0;
589 loc->code_location.end_pos.lineno = -1;
590 loc->code_location.end_pos.column = -1;
591 }
592
593 return loc;
594}
595
596static void
597set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq)
598{
599 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
600 const VALUE type = body->type;
601
602 /* set class nest stack */
603 if (type == ISEQ_TYPE_TOP) {
604 body->local_iseq = iseq;
605 }
606 else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) {
607 body->local_iseq = iseq;
608 }
609 else if (piseq) {
610 RB_OBJ_WRITE(iseq, &body->local_iseq, ISEQ_BODY(piseq)->local_iseq);
611 }
612
613 if (piseq) {
614 RB_OBJ_WRITE(iseq, &body->parent_iseq, piseq);
615 }
616
617 if (type == ISEQ_TYPE_MAIN) {
618 body->local_iseq = iseq;
619 }
620}
621
622static struct iseq_compile_data_storage *
623new_arena(void)
624{
625 struct iseq_compile_data_storage * new_arena =
627 ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE +
628 offsetof(struct iseq_compile_data_storage, buff));
629
630 new_arena->pos = 0;
631 new_arena->next = 0;
632 new_arena->size = INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE;
633
634 return new_arena;
635}
636
637static int
638prepare_node_id(const NODE *node)
639{
640 if (!node) return -1;
641
642 if (nd_type(node) == NODE_SCOPE && RNODE_SCOPE(node)->nd_parent) {
643 return nd_node_id(RNODE_SCOPE(node)->nd_parent);
644 }
645
646 return nd_node_id(node);
647}
648
649static VALUE
650prepare_iseq_build(rb_iseq_t *iseq,
651 VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id,
652 const rb_iseq_t *parent, int isolated_depth, enum rb_iseq_type type,
653 VALUE script_lines, const rb_compile_option_t *option)
654{
655 VALUE coverage = Qfalse;
656 VALUE err_info = Qnil;
657 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
658
659 if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP))
660 err_info = Qfalse;
661
662 body->type = type;
663 set_relation(iseq, parent);
664
665 name = rb_fstring(name);
666 iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id);
667 if (iseq != body->local_iseq) {
668 RB_OBJ_WRITE(iseq, &body->location.base_label, ISEQ_BODY(body->local_iseq)->location.label);
669 }
670 ISEQ_COVERAGE_SET(iseq, Qnil);
671 ISEQ_ORIGINAL_ISEQ_CLEAR(iseq);
672 body->variable.flip_count = 0;
673
674 if (NIL_P(script_lines)) {
675 RB_OBJ_WRITE(iseq, &body->variable.script_lines, Qnil);
676 }
677 else {
678 RB_OBJ_WRITE(iseq, &body->variable.script_lines, rb_ractor_make_shareable(script_lines));
679 }
680
681 ISEQ_COMPILE_DATA_ALLOC(iseq);
682 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->err_info, err_info);
683 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->catch_table_ary, Qnil);
684
685 ISEQ_COMPILE_DATA(iseq)->node.storage_head = ISEQ_COMPILE_DATA(iseq)->node.storage_current = new_arena();
686 ISEQ_COMPILE_DATA(iseq)->insn.storage_head = ISEQ_COMPILE_DATA(iseq)->insn.storage_current = new_arena();
687 ISEQ_COMPILE_DATA(iseq)->isolated_depth = isolated_depth;
688 ISEQ_COMPILE_DATA(iseq)->option = option;
689 ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL;
690 ISEQ_COMPILE_DATA(iseq)->builtin_function_table = GET_VM()->builtin_function_table;
691
692 if (option->coverage_enabled) {
693 VALUE coverages = rb_get_coverages();
694 if (RTEST(coverages)) {
695 coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq));
696 if (NIL_P(coverage)) coverage = Qfalse;
697 }
698 }
699 ISEQ_COVERAGE_SET(iseq, coverage);
700 if (coverage && ISEQ_BRANCH_COVERAGE(iseq))
701 ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_hidden_new(0));
702
703 return Qtrue;
704}
705
706#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
707static void validate_get_insn_info(const rb_iseq_t *iseq);
708#endif
709
710void
711rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq)
712{
713#if VM_INSN_INFO_TABLE_IMPL == 2
714 /* create succ_index_table */
715 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
716 int size = body->insns_info.size;
717 int max_pos = body->iseq_size;
718 int *data = (int *)body->insns_info.positions;
719 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
720 body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size);
721#if VM_CHECK_MODE == 0
722 ruby_xfree(body->insns_info.positions);
723 body->insns_info.positions = NULL;
724#endif
725#endif
726}
727
728#if VM_INSN_INFO_TABLE_IMPL == 2
729unsigned int *
730rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body *body)
731{
732 int size = body->insns_info.size;
733 int max_pos = body->iseq_size;
734 struct succ_index_table *sd = body->insns_info.succ_index_table;
735 return succ_index_table_invert(max_pos, sd, size);
736}
737#endif
738
739void
740rb_iseq_init_trace(rb_iseq_t *iseq)
741{
742 iseq->aux.exec.global_trace_events = 0;
743 if (ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS) {
744 rb_iseq_trace_set(iseq, ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS);
745 }
746}
747
748static VALUE
749finish_iseq_build(rb_iseq_t *iseq)
750{
751 struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq);
752 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
753 VALUE err = data->err_info;
754 ISEQ_COMPILE_DATA_CLEAR(iseq);
755 compile_data_free(data);
756
757#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
758 validate_get_insn_info(iseq);
759#endif
760
761 if (RTEST(err)) {
762 VALUE path = pathobj_path(body->location.pathobj);
763 if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error");
764 rb_funcallv(err, rb_intern("set_backtrace"), 1, &path);
765 rb_exc_raise(err);
766 }
767
768 RB_DEBUG_COUNTER_INC(iseq_num);
769 RB_DEBUG_COUNTER_ADD(iseq_cd_num, ISEQ_BODY(iseq)->ci_size);
770
771 rb_iseq_init_trace(iseq);
772 return Qtrue;
773}
774
775static rb_compile_option_t COMPILE_OPTION_DEFAULT = {
776 .inline_const_cache = OPT_INLINE_CONST_CACHE,
777 .peephole_optimization = OPT_PEEPHOLE_OPTIMIZATION,
778 .tailcall_optimization = OPT_TAILCALL_OPTIMIZATION,
779 .specialized_instruction = OPT_SPECIALISED_INSTRUCTION,
780 .operands_unification = OPT_OPERANDS_UNIFICATION,
781 .instructions_unification = OPT_INSTRUCTIONS_UNIFICATION,
782 .frozen_string_literal = OPT_FROZEN_STRING_LITERAL,
783 .debug_frozen_string_literal = OPT_DEBUG_FROZEN_STRING_LITERAL,
784 .coverage_enabled = TRUE,
785};
786
787static const rb_compile_option_t COMPILE_OPTION_FALSE = {
788 .frozen_string_literal = -1, // unspecified
789};
790
791int
792rb_iseq_opt_frozen_string_literal(void)
793{
794 return COMPILE_OPTION_DEFAULT.frozen_string_literal;
795}
796
797static void
798set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt)
799{
800#define SET_COMPILE_OPTION(o, h, mem) \
801 { VALUE flag = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
802 if (flag == Qtrue) { (o)->mem = 1; } \
803 else if (flag == Qfalse) { (o)->mem = 0; } \
804 }
805#define SET_COMPILE_OPTION_NUM(o, h, mem) \
806 { VALUE num = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
807 if (!NIL_P(num)) (o)->mem = NUM2INT(num); \
808 }
809 SET_COMPILE_OPTION(option, opt, inline_const_cache);
810 SET_COMPILE_OPTION(option, opt, peephole_optimization);
811 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
812 SET_COMPILE_OPTION(option, opt, specialized_instruction);
813 SET_COMPILE_OPTION(option, opt, operands_unification);
814 SET_COMPILE_OPTION(option, opt, instructions_unification);
815 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
816 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
817 SET_COMPILE_OPTION(option, opt, coverage_enabled);
818 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
819#undef SET_COMPILE_OPTION
820#undef SET_COMPILE_OPTION_NUM
821}
822
823static rb_compile_option_t *
824set_compile_option_from_ast(rb_compile_option_t *option, const rb_ast_body_t *ast)
825{
826#define SET_COMPILE_OPTION(o, a, mem) \
827 ((a)->mem < 0 ? 0 : ((o)->mem = (a)->mem > 0))
828 SET_COMPILE_OPTION(option, ast, coverage_enabled);
829#undef SET_COMPILE_OPTION
830 if (ast->frozen_string_literal >= 0) {
831 option->frozen_string_literal = ast->frozen_string_literal;
832 }
833 return option;
834}
835
836static void
837make_compile_option(rb_compile_option_t *option, VALUE opt)
838{
839 if (NIL_P(opt)) {
840 *option = COMPILE_OPTION_DEFAULT;
841 }
842 else if (opt == Qfalse) {
843 *option = COMPILE_OPTION_FALSE;
844 }
845 else if (opt == Qtrue) {
846 int i;
847 for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i)
848 ((int *)option)[i] = 1;
849 }
850 else if (RB_TYPE_P(opt, T_HASH)) {
851 *option = COMPILE_OPTION_DEFAULT;
852 set_compile_option_from_hash(option, opt);
853 }
854 else {
855 rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil");
856 }
857}
858
859static VALUE
860make_compile_option_value(rb_compile_option_t *option)
861{
862 VALUE opt = rb_hash_new_with_size(11);
863#define SET_COMPILE_OPTION(o, h, mem) \
864 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), RBOOL((o)->mem))
865#define SET_COMPILE_OPTION_NUM(o, h, mem) \
866 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem))
867 {
868 SET_COMPILE_OPTION(option, opt, inline_const_cache);
869 SET_COMPILE_OPTION(option, opt, peephole_optimization);
870 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
871 SET_COMPILE_OPTION(option, opt, specialized_instruction);
872 SET_COMPILE_OPTION(option, opt, operands_unification);
873 SET_COMPILE_OPTION(option, opt, instructions_unification);
874 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
875 SET_COMPILE_OPTION(option, opt, coverage_enabled);
876 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
877 }
878#undef SET_COMPILE_OPTION
879#undef SET_COMPILE_OPTION_NUM
880 VALUE frozen_string_literal = option->frozen_string_literal == -1 ? Qnil : RBOOL(option->frozen_string_literal);
881 rb_hash_aset(opt, ID2SYM(rb_intern("frozen_string_literal")), frozen_string_literal);
882 return opt;
883}
884
885rb_iseq_t *
886rb_iseq_new(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
887 const rb_iseq_t *parent, enum rb_iseq_type type)
888{
889 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent,
890 0, type, &COMPILE_OPTION_DEFAULT,
891 Qnil);
892}
893
894static int
895ast_line_count(const VALUE ast_value)
896{
897 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
898 return ast->body.line_count;
899}
900
901static VALUE
902iseq_setup_coverage(VALUE coverages, VALUE path, int line_count)
903{
904 if (line_count >= 0) {
905 int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count;
906
907 VALUE coverage = rb_default_coverage(len);
908 rb_hash_aset(coverages, path, coverage);
909
910 return coverage;
911 }
912
913 return Qnil;
914}
915
916static inline void
917iseq_new_setup_coverage(VALUE path, int line_count)
918{
919 VALUE coverages = rb_get_coverages();
920
921 if (RTEST(coverages)) {
922 iseq_setup_coverage(coverages, path, line_count);
923 }
924}
925
926rb_iseq_t *
927rb_iseq_new_top(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent)
928{
929 iseq_new_setup_coverage(path, ast_line_count(ast_value));
930
931 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, 0,
932 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT,
933 Qnil);
934}
935
939rb_iseq_t *
940pm_iseq_new_top(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, int *error_state)
941{
942 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
943
944 return pm_iseq_new_with_opt(node, name, path, realpath, 0, parent, 0,
945 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, error_state);
946}
947
948rb_iseq_t *
949rb_iseq_new_main(const VALUE ast_value, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt)
950{
951 iseq_new_setup_coverage(path, ast_line_count(ast_value));
952
953 return rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
954 path, realpath, 0,
955 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE,
956 Qnil);
957}
958
963rb_iseq_t *
964pm_iseq_new_main(pm_scope_node_t *node, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt, int *error_state)
965{
966 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
967
968 return pm_iseq_new_with_opt(node, rb_fstring_lit("<main>"),
969 path, realpath, 0,
970 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, error_state);
971}
972
973rb_iseq_t *
974rb_iseq_new_eval(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth)
975{
976 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
977 VALUE coverages = rb_get_coverages();
978 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
979 iseq_setup_coverage(coverages, path, ast_line_count(ast_value) + first_lineno - 1);
980 }
981 }
982
983 return rb_iseq_new_with_opt(ast_value, name, path, realpath, first_lineno,
984 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT,
985 Qnil);
986}
987
988rb_iseq_t *
989pm_iseq_new_eval(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
990 int first_lineno, const rb_iseq_t *parent, int isolated_depth, int *error_state)
991{
992 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
993 VALUE coverages = rb_get_coverages();
994 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
995 iseq_setup_coverage(coverages, path, ((int) (node->parser->newline_list.size - 1)) + first_lineno - 1);
996 }
997 }
998
999 return pm_iseq_new_with_opt(node, name, path, realpath, first_lineno,
1000 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT, error_state);
1001}
1002
1003static inline rb_iseq_t *
1004iseq_translate(rb_iseq_t *iseq)
1005{
1006 if (rb_respond_to(rb_cISeq, rb_intern("translate"))) {
1007 VALUE v1 = iseqw_new(iseq);
1008 VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1);
1009 if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) {
1010 iseq = (rb_iseq_t *)iseqw_check(v2);
1011 }
1012 }
1013
1014 return iseq;
1015}
1016
1017rb_iseq_t *
1018rb_iseq_new_with_opt(VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
1019 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1020 enum rb_iseq_type type, const rb_compile_option_t *option,
1021 VALUE script_lines)
1022{
1023 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
1024 rb_ast_body_t *body = ast ? &ast->body : NULL;
1025 const NODE *node = body ? body->root : 0;
1026 /* TODO: argument check */
1027 rb_iseq_t *iseq = iseq_alloc();
1028 rb_compile_option_t new_opt;
1029
1030 if (!option) option = &COMPILE_OPTION_DEFAULT;
1031 if (body) {
1032 new_opt = *option;
1033 option = set_compile_option_from_ast(&new_opt, body);
1034 }
1035
1036 if (!NIL_P(script_lines)) {
1037 // noop
1038 }
1039 else if (body && body->script_lines) {
1040 script_lines = rb_parser_build_script_lines_from(body->script_lines);
1041 }
1042 else if (parent) {
1043 script_lines = ISEQ_BODY(parent)->variable.script_lines;
1044 }
1045
1046 prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, prepare_node_id(node),
1047 parent, isolated_depth, type, script_lines, option);
1048
1049 rb_iseq_compile_node(iseq, node);
1050 finish_iseq_build(iseq);
1051 RB_GC_GUARD(ast_value);
1052
1053 return iseq_translate(iseq);
1054}
1055
1057 rb_iseq_t *iseq;
1058 pm_scope_node_t *node;
1059};
1060
1061VALUE
1062pm_iseq_new_with_opt_try(VALUE d)
1063{
1064 struct pm_iseq_new_with_opt_data *data = (struct pm_iseq_new_with_opt_data *)d;
1065
1066 // This can compile child iseqs, which can raise syntax errors
1067 pm_iseq_compile_node(data->iseq, data->node);
1068
1069 // This raises an exception if there is a syntax error
1070 finish_iseq_build(data->iseq);
1071
1072 return Qundef;
1073}
1074
1087rb_iseq_t *
1088pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
1089 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1090 enum rb_iseq_type type, const rb_compile_option_t *option, int *error_state)
1091{
1092 rb_iseq_t *iseq = iseq_alloc();
1093 ISEQ_BODY(iseq)->prism = true;
1094
1095 rb_compile_option_t next_option;
1096 if (!option) option = &COMPILE_OPTION_DEFAULT;
1097
1098 next_option = *option;
1099 next_option.coverage_enabled = node->coverage_enabled < 0 ? 0 : node->coverage_enabled > 0;
1100 option = &next_option;
1101
1102 pm_location_t *location = &node->base.location;
1103 int32_t start_line = node->parser->start_line;
1104
1105 pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line);
1106 pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line);
1107
1108 rb_code_location_t code_location = (rb_code_location_t) {
1109 .beg_pos = { .lineno = (int) start.line, .column = (int) start.column },
1110 .end_pos = { .lineno = (int) end.line, .column = (int) end.column }
1111 };
1112
1113 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &code_location, node->ast_node->node_id,
1114 parent, isolated_depth, type, node->script_lines == NULL ? Qnil : *node->script_lines, option);
1115
1116 struct pm_iseq_new_with_opt_data data = {
1117 .iseq = iseq,
1118 .node = node
1119 };
1120 rb_protect(pm_iseq_new_with_opt_try, (VALUE)&data, error_state);
1121
1122 if (*error_state) return NULL;
1123
1124 return iseq_translate(iseq);
1125}
1126
1127rb_iseq_t *
1128rb_iseq_new_with_callback(
1129 const struct rb_iseq_new_with_callback_callback_func * ifunc,
1130 VALUE name, VALUE path, VALUE realpath,
1131 int first_lineno, const rb_iseq_t *parent,
1132 enum rb_iseq_type type, const rb_compile_option_t *option)
1133{
1134 /* TODO: argument check */
1135 rb_iseq_t *iseq = iseq_alloc();
1136
1137 if (!option) option = &COMPILE_OPTION_DEFAULT;
1138 prepare_iseq_build(iseq, name, path, realpath, first_lineno, NULL, -1, parent, 0, type, Qnil, option);
1139
1140 rb_iseq_compile_callback(iseq, ifunc);
1141 finish_iseq_build(iseq);
1142
1143 return iseq;
1144}
1145
1146const rb_iseq_t *
1147rb_iseq_load_iseq(VALUE fname)
1148{
1149 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname);
1150
1151 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
1152 return iseqw_check(iseqv);
1153 }
1154
1155 return NULL;
1156}
1157
1158#define CHECK_ARRAY(v) rb_to_array_type(v)
1159#define CHECK_HASH(v) rb_to_hash_type(v)
1160#define CHECK_STRING(v) rb_str_to_str(v)
1161#define CHECK_SYMBOL(v) rb_to_symbol_type(v)
1162static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;}
1163
1164static enum rb_iseq_type
1165iseq_type_from_sym(VALUE type)
1166{
1167 const ID id_top = rb_intern("top");
1168 const ID id_method = rb_intern("method");
1169 const ID id_block = rb_intern("block");
1170 const ID id_class = rb_intern("class");
1171 const ID id_rescue = rb_intern("rescue");
1172 const ID id_ensure = rb_intern("ensure");
1173 const ID id_eval = rb_intern("eval");
1174 const ID id_main = rb_intern("main");
1175 const ID id_plain = rb_intern("plain");
1176 /* ensure all symbols are static or pinned down before
1177 * conversion */
1178 const ID typeid = rb_check_id(&type);
1179 if (typeid == id_top) return ISEQ_TYPE_TOP;
1180 if (typeid == id_method) return ISEQ_TYPE_METHOD;
1181 if (typeid == id_block) return ISEQ_TYPE_BLOCK;
1182 if (typeid == id_class) return ISEQ_TYPE_CLASS;
1183 if (typeid == id_rescue) return ISEQ_TYPE_RESCUE;
1184 if (typeid == id_ensure) return ISEQ_TYPE_ENSURE;
1185 if (typeid == id_eval) return ISEQ_TYPE_EVAL;
1186 if (typeid == id_main) return ISEQ_TYPE_MAIN;
1187 if (typeid == id_plain) return ISEQ_TYPE_PLAIN;
1188 return (enum rb_iseq_type)-1;
1189}
1190
1191static VALUE
1192iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt)
1193{
1194 rb_iseq_t *iseq = iseq_alloc();
1195
1196 VALUE magic, version1, version2, format_type, misc;
1197 VALUE name, path, realpath, code_location, node_id;
1198 VALUE type, body, locals, params, exception;
1199
1200 st_data_t iseq_type;
1201 rb_compile_option_t option;
1202 int i = 0;
1203 rb_code_location_t tmp_loc = { {0, 0}, {-1, -1} };
1204
1205 /* [magic, major_version, minor_version, format_type, misc,
1206 * label, path, first_lineno,
1207 * type, locals, args, exception_table, body]
1208 */
1209
1210 data = CHECK_ARRAY(data);
1211
1212 magic = CHECK_STRING(rb_ary_entry(data, i++));
1213 version1 = CHECK_INTEGER(rb_ary_entry(data, i++));
1214 version2 = CHECK_INTEGER(rb_ary_entry(data, i++));
1215 format_type = CHECK_INTEGER(rb_ary_entry(data, i++));
1216 misc = CHECK_HASH(rb_ary_entry(data, i++));
1217 ((void)magic, (void)version1, (void)version2, (void)format_type);
1218
1219 name = CHECK_STRING(rb_ary_entry(data, i++));
1220 path = CHECK_STRING(rb_ary_entry(data, i++));
1221 realpath = rb_ary_entry(data, i++);
1222 realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath);
1223 int first_lineno = RB_NUM2INT(rb_ary_entry(data, i++));
1224
1225 type = CHECK_SYMBOL(rb_ary_entry(data, i++));
1226 locals = CHECK_ARRAY(rb_ary_entry(data, i++));
1227 params = CHECK_HASH(rb_ary_entry(data, i++));
1228 exception = CHECK_ARRAY(rb_ary_entry(data, i++));
1229 body = CHECK_ARRAY(rb_ary_entry(data, i++));
1230
1231 ISEQ_BODY(iseq)->local_iseq = iseq;
1232
1233 iseq_type = iseq_type_from_sym(type);
1234 if (iseq_type == (enum rb_iseq_type)-1) {
1235 rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type));
1236 }
1237
1238 node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id")));
1239
1240 code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location")));
1241 if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) {
1242 tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0));
1243 tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1));
1244 tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2));
1245 tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3));
1246 }
1247
1248 if (SYM2ID(rb_hash_aref(misc, ID2SYM(rb_intern("parser")))) == rb_intern("prism")) {
1249 ISEQ_BODY(iseq)->prism = true;
1250 }
1251
1252 make_compile_option(&option, opt);
1253 option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */
1254 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id),
1255 parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option);
1256
1257 rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
1258
1259 finish_iseq_build(iseq);
1260
1261 return iseqw_new(iseq);
1262}
1263
1264/*
1265 * :nodoc:
1266 */
1267static VALUE
1268iseq_s_load(int argc, VALUE *argv, VALUE self)
1269{
1270 VALUE data, opt=Qnil;
1271 rb_scan_args(argc, argv, "11", &data, &opt);
1272 return iseq_load(data, NULL, opt);
1273}
1274
1275VALUE
1276rb_iseq_load(VALUE data, VALUE parent, VALUE opt)
1277{
1278 return iseq_load(data, RTEST(parent) ? (rb_iseq_t *)parent : NULL, opt);
1279}
1280
1281static rb_iseq_t *
1282rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1283{
1284 rb_iseq_t *iseq = NULL;
1285 rb_compile_option_t option;
1286#if !defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 8)
1287# define INITIALIZED volatile /* suppress warnings by gcc 4.8 */
1288#else
1289# define INITIALIZED /* volatile */
1290#endif
1291 VALUE (*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
1292 int ln;
1293 VALUE INITIALIZED ast_value;
1294 rb_ast_t *ast;
1295 VALUE name = rb_fstring_lit("<compiled>");
1296
1297 /* safe results first */
1298 make_compile_option(&option, opt);
1299 ln = NUM2INT(line);
1300 StringValueCStr(file);
1301 if (RB_TYPE_P(src, T_FILE)) {
1302 parse = rb_parser_compile_file_path;
1303 }
1304 else {
1305 parse = rb_parser_compile_string_path;
1306 StringValue(src);
1307 }
1308 {
1309 const VALUE parser = rb_parser_new();
1310 const rb_iseq_t *outer_scope = rb_iseq_new(Qnil, name, name, Qnil, 0, ISEQ_TYPE_TOP);
1311 VALUE outer_scope_v = (VALUE)outer_scope;
1312 rb_parser_set_context(parser, outer_scope, FALSE);
1313 if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
1314 RB_GC_GUARD(outer_scope_v);
1315 ast_value = (*parse)(parser, file, src, ln);
1316 }
1317
1318 ast = rb_ruby_ast_data_get(ast_value);
1319
1320 if (!ast || !ast->body.root) {
1321 rb_ast_dispose(ast);
1322 rb_exc_raise(GET_EC()->errinfo);
1323 }
1324 else {
1325 iseq = rb_iseq_new_with_opt(ast_value, name, file, realpath, ln,
1326 NULL, 0, ISEQ_TYPE_TOP, &option,
1327 Qnil);
1328 rb_ast_dispose(ast);
1329 }
1330
1331 return iseq;
1332}
1333
1334static rb_iseq_t *
1335pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1336{
1337 rb_iseq_t *iseq = NULL;
1338 rb_compile_option_t option;
1339 int ln;
1340 VALUE name = rb_fstring_lit("<compiled>");
1341
1342 /* safe results first */
1343 make_compile_option(&option, opt);
1344 ln = NUM2INT(line);
1345 StringValueCStr(file);
1346
1347 bool parse_file = false;
1348 if (RB_TYPE_P(src, T_FILE)) {
1349 parse_file = true;
1350 src = rb_io_path(src);
1351 }
1352 else {
1353 src = StringValue(src);
1354 }
1355
1356 pm_parse_result_t result = { 0 };
1357 pm_options_line_set(&result.options, NUM2INT(line));
1358 pm_options_scopes_init(&result.options, 1);
1359 result.node.coverage_enabled = 1;
1360
1361 switch (option.frozen_string_literal) {
1362 case ISEQ_FROZEN_STRING_LITERAL_UNSET:
1363 break;
1364 case ISEQ_FROZEN_STRING_LITERAL_DISABLED:
1366 break;
1367 case ISEQ_FROZEN_STRING_LITERAL_ENABLED:
1369 break;
1370 default:
1371 rb_bug("pm_iseq_compile_with_option: invalid frozen_string_literal=%d", option.frozen_string_literal);
1372 break;
1373 }
1374
1375 VALUE script_lines;
1376 VALUE error;
1377
1378 if (parse_file) {
1379 error = pm_load_parse_file(&result, src, ruby_vm_keep_script_lines ? &script_lines : NULL);
1380 }
1381 else {
1382 error = pm_parse_string(&result, src, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1383 }
1384
1385 RB_GC_GUARD(src);
1386
1387 if (error == Qnil) {
1388 int error_state;
1389 iseq = pm_iseq_new_with_opt(&result.node, name, file, realpath, ln, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1390
1391 pm_parse_result_free(&result);
1392
1393 if (error_state) {
1394 RUBY_ASSERT(iseq == NULL);
1395 rb_jump_tag(error_state);
1396 }
1397 }
1398 else {
1399 pm_parse_result_free(&result);
1400 rb_exc_raise(error);
1401 }
1402
1403 return iseq;
1404}
1405
1406VALUE
1407rb_iseq_path(const rb_iseq_t *iseq)
1408{
1409 return pathobj_path(ISEQ_BODY(iseq)->location.pathobj);
1410}
1411
1412VALUE
1413rb_iseq_realpath(const rb_iseq_t *iseq)
1414{
1415 return pathobj_realpath(ISEQ_BODY(iseq)->location.pathobj);
1416}
1417
1418VALUE
1419rb_iseq_absolute_path(const rb_iseq_t *iseq)
1420{
1421 return rb_iseq_realpath(iseq);
1422}
1423
1424int
1425rb_iseq_from_eval_p(const rb_iseq_t *iseq)
1426{
1427 return NIL_P(rb_iseq_realpath(iseq));
1428}
1429
1430VALUE
1431rb_iseq_label(const rb_iseq_t *iseq)
1432{
1433 return ISEQ_BODY(iseq)->location.label;
1434}
1435
1436VALUE
1437rb_iseq_base_label(const rb_iseq_t *iseq)
1438{
1439 return ISEQ_BODY(iseq)->location.base_label;
1440}
1441
1442VALUE
1443rb_iseq_first_lineno(const rb_iseq_t *iseq)
1444{
1445 return RB_INT2NUM(ISEQ_BODY(iseq)->location.first_lineno);
1446}
1447
1448VALUE
1449rb_iseq_method_name(const rb_iseq_t *iseq)
1450{
1451 struct rb_iseq_constant_body *const body = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq);
1452
1453 if (body->type == ISEQ_TYPE_METHOD) {
1454 return body->location.base_label;
1455 }
1456 else {
1457 return Qnil;
1458 }
1459}
1460
1461void
1462rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column)
1463{
1464 const rb_code_location_t *loc = &ISEQ_BODY(iseq)->location.code_location;
1465 if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno;
1466 if (beg_pos_column) *beg_pos_column = loc->beg_pos.column;
1467 if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno;
1468 if (end_pos_column) *end_pos_column = loc->end_pos.column;
1469}
1470
1471static ID iseq_type_id(enum rb_iseq_type type);
1472
1473VALUE
1474rb_iseq_type(const rb_iseq_t *iseq)
1475{
1476 return ID2SYM(iseq_type_id(ISEQ_BODY(iseq)->type));
1477}
1478
1479VALUE
1480rb_iseq_coverage(const rb_iseq_t *iseq)
1481{
1482 return ISEQ_COVERAGE(iseq);
1483}
1484
1485static int
1486remove_coverage_i(void *vstart, void *vend, size_t stride, void *data)
1487{
1488 VALUE v = (VALUE)vstart;
1489 for (; v != (VALUE)vend; v += stride) {
1490 void *ptr = rb_asan_poisoned_object_p(v);
1491 rb_asan_unpoison_object(v, false);
1492
1493 if (rb_obj_is_iseq(v)) {
1494 rb_iseq_t *iseq = (rb_iseq_t *)v;
1495 ISEQ_COVERAGE_SET(iseq, Qnil);
1496 }
1497
1498 asan_poison_object_if(ptr, v);
1499 }
1500 return 0;
1501}
1502
1503void
1504rb_iseq_remove_coverage_all(void)
1505{
1506 rb_objspace_each_objects(remove_coverage_i, NULL);
1507}
1508
1509/* define wrapper class methods (RubyVM::InstructionSequence) */
1510
1511static void
1512iseqw_mark_and_move(void *ptr)
1513{
1514 rb_gc_mark_and_move((VALUE *)ptr);
1515}
1516
1517static size_t
1518iseqw_memsize(const void *ptr)
1519{
1520 return rb_iseq_memsize(*(const rb_iseq_t **)ptr);
1521}
1522
1523static const rb_data_type_t iseqw_data_type = {
1524 "T_IMEMO/iseq",
1525 {
1526 iseqw_mark_and_move,
1528 iseqw_memsize,
1529 iseqw_mark_and_move,
1530 },
1531 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
1532};
1533
1534static VALUE
1535iseqw_new(const rb_iseq_t *iseq)
1536{
1537 if (iseq->wrapper) {
1538 if (*(const rb_iseq_t **)rb_check_typeddata(iseq->wrapper, &iseqw_data_type) != iseq) {
1539 rb_raise(rb_eTypeError, "wrong iseq wrapper: %" PRIsVALUE " for %p",
1540 iseq->wrapper, (void *)iseq);
1541 }
1542 return iseq->wrapper;
1543 }
1544 else {
1545 rb_iseq_t **ptr;
1546 VALUE obj = TypedData_Make_Struct(rb_cISeq, rb_iseq_t *, &iseqw_data_type, ptr);
1547 RB_OBJ_WRITE(obj, ptr, iseq);
1548
1549 /* cache a wrapper object */
1550 RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj);
1551
1552 return obj;
1553 }
1554}
1555
1556VALUE
1557rb_iseqw_new(const rb_iseq_t *iseq)
1558{
1559 return iseqw_new(iseq);
1560}
1561
1567static VALUE
1568iseqw_s_compile_parser(int argc, VALUE *argv, VALUE self, bool prism)
1569{
1570 VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil;
1571 int i;
1572
1573 i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt);
1574 if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5);
1575 switch (i) {
1576 case 5: opt = argv[--i];
1577 case 4: line = argv[--i];
1578 case 3: path = argv[--i];
1579 case 2: file = argv[--i];
1580 }
1581
1582 if (NIL_P(file)) file = rb_fstring_lit("<compiled>");
1583 if (NIL_P(path)) path = file;
1584 if (NIL_P(line)) line = INT2FIX(1);
1585
1586 Check_Type(path, T_STRING);
1587 Check_Type(file, T_STRING);
1588
1589 rb_iseq_t *iseq;
1590 if (prism) {
1591 iseq = pm_iseq_compile_with_option(src, file, path, line, opt);
1592 }
1593 else {
1594 iseq = rb_iseq_compile_with_option(src, file, path, line, opt);
1595 }
1596
1597 return iseqw_new(iseq);
1598}
1599
1600/*
1601 * call-seq:
1602 * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq
1603 * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq
1604 *
1605 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1606 * that contains Ruby source code.
1607 *
1608 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1609 * real path and first line number of the ruby code in +source+ which are
1610 * metadata attached to the returned +iseq+.
1611 *
1612 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1613 * +require_relative+ base. It is recommended these should be the same full
1614 * path.
1615 *
1616 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1617 * modify the default behavior of the Ruby iseq compiler.
1618 *
1619 * For details regarding valid compile options see ::compile_option=.
1620 *
1621 * RubyVM::InstructionSequence.compile("a = 1 + 2")
1622 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1623 *
1624 * path = "test.rb"
1625 * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path))
1626 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1627 *
1628 * file = File.open("test.rb")
1629 * RubyVM::InstructionSequence.compile(file)
1630 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1631 *
1632 * path = File.expand_path("test.rb")
1633 * RubyVM::InstructionSequence.compile(File.read(path), path, path)
1634 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1635 *
1636 */
1637static VALUE
1638iseqw_s_compile(int argc, VALUE *argv, VALUE self)
1639{
1640 return iseqw_s_compile_parser(argc, argv, self, rb_ruby_prism_p());
1641}
1642
1643/*
1644 * call-seq:
1645 * InstructionSequence.compile_parsey(source[, file[, path[, line[, options]]]]) -> iseq
1646 *
1647 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1648 * that contains Ruby source code. It parses and compiles using parse.y.
1649 *
1650 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1651 * real path and first line number of the ruby code in +source+ which are
1652 * metadata attached to the returned +iseq+.
1653 *
1654 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1655 * +require_relative+ base. It is recommended these should be the same full
1656 * path.
1657 *
1658 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1659 * modify the default behavior of the Ruby iseq compiler.
1660 *
1661 * For details regarding valid compile options see ::compile_option=.
1662 *
1663 * RubyVM::InstructionSequence.compile_parsey("a = 1 + 2")
1664 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1665 *
1666 * path = "test.rb"
1667 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, File.expand_path(path))
1668 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1669 *
1670 * file = File.open("test.rb")
1671 * RubyVM::InstructionSequence.compile_parsey(file)
1672 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1673 *
1674 * path = File.expand_path("test.rb")
1675 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, path)
1676 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1677 *
1678 */
1679static VALUE
1680iseqw_s_compile_parsey(int argc, VALUE *argv, VALUE self)
1681{
1682 return iseqw_s_compile_parser(argc, argv, self, false);
1683}
1684
1685/*
1686 * call-seq:
1687 * InstructionSequence.compile_prism(source[, file[, path[, line[, options]]]]) -> iseq
1688 *
1689 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1690 * that contains Ruby source code. It parses and compiles using prism.
1691 *
1692 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1693 * real path and first line number of the ruby code in +source+ which are
1694 * metadata attached to the returned +iseq+.
1695 *
1696 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1697 * +require_relative+ base. It is recommended these should be the same full
1698 * path.
1699 *
1700 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1701 * modify the default behavior of the Ruby iseq compiler.
1702 *
1703 * For details regarding valid compile options see ::compile_option=.
1704 *
1705 * RubyVM::InstructionSequence.compile_prism("a = 1 + 2")
1706 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1707 *
1708 * path = "test.rb"
1709 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, File.expand_path(path))
1710 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1711 *
1712 * file = File.open("test.rb")
1713 * RubyVM::InstructionSequence.compile_prism(file)
1714 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1715 *
1716 * path = File.expand_path("test.rb")
1717 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, path)
1718 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1719 *
1720 */
1721static VALUE
1722iseqw_s_compile_prism(int argc, VALUE *argv, VALUE self)
1723{
1724 return iseqw_s_compile_parser(argc, argv, self, true);
1725}
1726
1727/*
1728 * call-seq:
1729 * InstructionSequence.compile_file(file[, options]) -> iseq
1730 *
1731 * Takes +file+, a String with the location of a Ruby source file, reads,
1732 * parses and compiles the file, and returns +iseq+, the compiled
1733 * InstructionSequence with source location metadata set.
1734 *
1735 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1736 * modify the default behavior of the Ruby iseq compiler.
1737 *
1738 * For details regarding valid compile options see ::compile_option=.
1739 *
1740 * # /tmp/hello.rb
1741 * puts "Hello, world!"
1742 *
1743 * # elsewhere
1744 * RubyVM::InstructionSequence.compile_file("/tmp/hello.rb")
1745 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1746 */
1747static VALUE
1748iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
1749{
1750 VALUE file, opt = Qnil;
1751 VALUE parser, f, exc = Qnil, ret;
1752 rb_ast_t *ast;
1753 VALUE ast_value;
1754 rb_compile_option_t option;
1755 int i;
1756
1757 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1758 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1759 switch (i) {
1760 case 2: opt = argv[--i];
1761 }
1762 FilePathValue(file);
1763 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1764
1765 f = rb_file_open_str(file, "r");
1766
1767 rb_execution_context_t *ec = GET_EC();
1768 VALUE v = rb_vm_push_frame_fname(ec, file);
1769
1770 parser = rb_parser_new();
1771 rb_parser_set_context(parser, NULL, FALSE);
1772 ast_value = rb_parser_load_file(parser, file);
1773 ast = rb_ruby_ast_data_get(ast_value);
1774 if (!ast->body.root) exc = GET_EC()->errinfo;
1775
1776 rb_io_close(f);
1777 if (!ast->body.root) {
1778 rb_ast_dispose(ast);
1779 rb_exc_raise(exc);
1780 }
1781
1782 make_compile_option(&option, opt);
1783
1784 ret = iseqw_new(rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
1785 file,
1786 rb_realpath_internal(Qnil, file, 1),
1787 1, NULL, 0, ISEQ_TYPE_TOP, &option,
1788 Qnil));
1789 rb_ast_dispose(ast);
1790 RB_GC_GUARD(ast_value);
1791
1792 rb_vm_pop_frame(ec);
1793 RB_GC_GUARD(v);
1794 return ret;
1795}
1796
1797/*
1798 * call-seq:
1799 * InstructionSequence.compile_file_prism(file[, options]) -> iseq
1800 *
1801 * Takes +file+, a String with the location of a Ruby source file, reads,
1802 * parses and compiles the file, and returns +iseq+, the compiled
1803 * InstructionSequence with source location metadata set. It parses and
1804 * compiles using prism.
1805 *
1806 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1807 * modify the default behavior of the Ruby iseq compiler.
1808 *
1809 * For details regarding valid compile options see ::compile_option=.
1810 *
1811 * # /tmp/hello.rb
1812 * puts "Hello, world!"
1813 *
1814 * # elsewhere
1815 * RubyVM::InstructionSequence.compile_file_prism("/tmp/hello.rb")
1816 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1817 */
1818static VALUE
1819iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self)
1820{
1821 VALUE file, opt = Qnil, ret;
1822 rb_compile_option_t option;
1823 int i;
1824
1825 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1826 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1827 switch (i) {
1828 case 2: opt = argv[--i];
1829 }
1830 FilePathValue(file);
1831 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1832
1833 rb_execution_context_t *ec = GET_EC();
1834 VALUE v = rb_vm_push_frame_fname(ec, file);
1835
1836 pm_parse_result_t result = { 0 };
1837 result.options.line = 1;
1838 result.node.coverage_enabled = 1;
1839
1840 VALUE script_lines;
1841 VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1842
1843 if (error == Qnil) {
1844 make_compile_option(&option, opt);
1845
1846 int error_state;
1847 rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("<main>"),
1848 file,
1849 rb_realpath_internal(Qnil, file, 1),
1850 1, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1851
1852 pm_parse_result_free(&result);
1853
1854 if (error_state) {
1855 RUBY_ASSERT(iseq == NULL);
1856 rb_jump_tag(error_state);
1857 }
1858
1859 ret = iseqw_new(iseq);
1860 rb_vm_pop_frame(ec);
1861 RB_GC_GUARD(v);
1862 return ret;
1863 }
1864 else {
1865 pm_parse_result_free(&result);
1866 rb_vm_pop_frame(ec);
1867 RB_GC_GUARD(v);
1868 rb_exc_raise(error);
1869 }
1870}
1871
1872/*
1873 * call-seq:
1874 * InstructionSequence.compile_option = options
1875 *
1876 * Sets the default values for various optimizations in the Ruby iseq
1877 * compiler.
1878 *
1879 * Possible values for +options+ include +true+, which enables all options,
1880 * +false+ which disables all options, and +nil+ which leaves all options
1881 * unchanged.
1882 *
1883 * You can also pass a +Hash+ of +options+ that you want to change, any
1884 * options not present in the hash will be left unchanged.
1885 *
1886 * Possible option names (which are keys in +options+) which can be set to
1887 * +true+ or +false+ include:
1888 *
1889 * * +:inline_const_cache+
1890 * * +:instructions_unification+
1891 * * +:operands_unification+
1892 * * +:peephole_optimization+
1893 * * +:specialized_instruction+
1894 * * +:tailcall_optimization+
1895 *
1896 * Additionally, +:debug_level+ can be set to an integer.
1897 *
1898 * These default options can be overwritten for a single run of the iseq
1899 * compiler by passing any of the above values as the +options+ parameter to
1900 * ::new, ::compile and ::compile_file.
1901 */
1902static VALUE
1903iseqw_s_compile_option_set(VALUE self, VALUE opt)
1904{
1905 rb_compile_option_t option;
1906 make_compile_option(&option, opt);
1907 COMPILE_OPTION_DEFAULT = option;
1908 return opt;
1909}
1910
1911/*
1912 * call-seq:
1913 * InstructionSequence.compile_option -> options
1914 *
1915 * Returns a hash of default options used by the Ruby iseq compiler.
1916 *
1917 * For details, see InstructionSequence.compile_option=.
1918 */
1919static VALUE
1920iseqw_s_compile_option_get(VALUE self)
1921{
1922 return make_compile_option_value(&COMPILE_OPTION_DEFAULT);
1923}
1924
1925static const rb_iseq_t *
1926iseqw_check(VALUE iseqw)
1927{
1928 rb_iseq_t **iseq_ptr;
1929 TypedData_Get_Struct(iseqw, rb_iseq_t *, &iseqw_data_type, iseq_ptr);
1930 rb_iseq_t *iseq = *iseq_ptr;
1931
1932 if (!ISEQ_BODY(iseq)) {
1933 rb_ibf_load_iseq_complete(iseq);
1934 }
1935
1936 if (!ISEQ_BODY(iseq)->location.label) {
1937 rb_raise(rb_eTypeError, "uninitialized InstructionSequence");
1938 }
1939 return iseq;
1940}
1941
1942const rb_iseq_t *
1943rb_iseqw_to_iseq(VALUE iseqw)
1944{
1945 return iseqw_check(iseqw);
1946}
1947
1948/*
1949 * call-seq:
1950 * iseq.eval -> obj
1951 *
1952 * Evaluates the instruction sequence and returns the result.
1953 *
1954 * RubyVM::InstructionSequence.compile("1 + 2").eval #=> 3
1955 */
1956static VALUE
1957iseqw_eval(VALUE self)
1958{
1959 const rb_iseq_t *iseq = iseqw_check(self);
1960 if (0 == ISEQ_BODY(iseq)->iseq_size) {
1961 rb_raise(rb_eTypeError, "attempt to evaluate dummy InstructionSequence");
1962 }
1963 return rb_iseq_eval(iseq);
1964}
1965
1966/*
1967 * Returns a human-readable string representation of this instruction
1968 * sequence, including the #label and #path.
1969 */
1970static VALUE
1971iseqw_inspect(VALUE self)
1972{
1973 const rb_iseq_t *iseq = iseqw_check(self);
1974 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1975 VALUE klass = rb_class_name(rb_obj_class(self));
1976
1977 if (!body->location.label) {
1978 return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass);
1979 }
1980 else {
1981 return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>",
1982 klass,
1983 body->location.label, rb_iseq_path(iseq),
1984 FIX2INT(rb_iseq_first_lineno(iseq)));
1985 }
1986}
1987
1988/*
1989 * Returns the path of this instruction sequence.
1990 *
1991 * <code><compiled></code> if the iseq was evaluated from a string.
1992 *
1993 * For example, using irb:
1994 *
1995 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1996 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1997 * iseq.path
1998 * #=> "<compiled>"
1999 *
2000 * Using ::compile_file:
2001 *
2002 * # /tmp/method.rb
2003 * def hello
2004 * puts "hello, world"
2005 * end
2006 *
2007 * # in irb
2008 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2009 * > iseq.path #=> /tmp/method.rb
2010 */
2011static VALUE
2012iseqw_path(VALUE self)
2013{
2014 return rb_iseq_path(iseqw_check(self));
2015}
2016
2017/*
2018 * Returns the absolute path of this instruction sequence.
2019 *
2020 * +nil+ if the iseq was evaluated from a string.
2021 *
2022 * For example, using ::compile_file:
2023 *
2024 * # /tmp/method.rb
2025 * def hello
2026 * puts "hello, world"
2027 * end
2028 *
2029 * # in irb
2030 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2031 * > iseq.absolute_path #=> /tmp/method.rb
2032 */
2033static VALUE
2034iseqw_absolute_path(VALUE self)
2035{
2036 return rb_iseq_realpath(iseqw_check(self));
2037}
2038
2039/* Returns the label of this instruction sequence.
2040 *
2041 * <code><main></code> if it's at the top level, <code><compiled></code> if it
2042 * was evaluated from a string.
2043 *
2044 * For example, using irb:
2045 *
2046 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2047 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2048 * iseq.label
2049 * #=> "<compiled>"
2050 *
2051 * Using ::compile_file:
2052 *
2053 * # /tmp/method.rb
2054 * def hello
2055 * puts "hello, world"
2056 * end
2057 *
2058 * # in irb
2059 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2060 * > iseq.label #=> <main>
2061 */
2062static VALUE
2063iseqw_label(VALUE self)
2064{
2065 return rb_iseq_label(iseqw_check(self));
2066}
2067
2068/* Returns the base label of this instruction sequence.
2069 *
2070 * For example, using irb:
2071 *
2072 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2073 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2074 * iseq.base_label
2075 * #=> "<compiled>"
2076 *
2077 * Using ::compile_file:
2078 *
2079 * # /tmp/method.rb
2080 * def hello
2081 * puts "hello, world"
2082 * end
2083 *
2084 * # in irb
2085 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2086 * > iseq.base_label #=> <main>
2087 */
2088static VALUE
2089iseqw_base_label(VALUE self)
2090{
2091 return rb_iseq_base_label(iseqw_check(self));
2092}
2093
2094/* Returns the number of the first source line where the instruction sequence
2095 * was loaded from.
2096 *
2097 * For example, using irb:
2098 *
2099 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2100 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2101 * iseq.first_lineno
2102 * #=> 1
2103 */
2104static VALUE
2105iseqw_first_lineno(VALUE self)
2106{
2107 return rb_iseq_first_lineno(iseqw_check(self));
2108}
2109
2110static VALUE iseq_data_to_ary(const rb_iseq_t *iseq);
2111
2112/*
2113 * call-seq:
2114 * iseq.to_a -> ary
2115 *
2116 * Returns an Array with 14 elements representing the instruction sequence
2117 * with the following data:
2118 *
2119 * [magic]
2120 * A string identifying the data format. <b>Always
2121 * +YARVInstructionSequence/SimpleDataFormat+.</b>
2122 *
2123 * [major_version]
2124 * The major version of the instruction sequence.
2125 *
2126 * [minor_version]
2127 * The minor version of the instruction sequence.
2128 *
2129 * [format_type]
2130 * A number identifying the data format. <b>Always 1</b>.
2131 *
2132 * [misc]
2133 * A hash containing:
2134 *
2135 * [+:arg_size+]
2136 * the total number of arguments taken by the method or the block (0 if
2137 * _iseq_ doesn't represent a method or block)
2138 * [+:local_size+]
2139 * the number of local variables + 1
2140 * [+:stack_max+]
2141 * used in calculating the stack depth at which a SystemStackError is
2142 * thrown.
2143 *
2144 * [#label]
2145 * The name of the context (block, method, class, module, etc.) that this
2146 * instruction sequence belongs to.
2147 *
2148 * <code><main></code> if it's at the top level, <code><compiled></code> if
2149 * it was evaluated from a string.
2150 *
2151 * [#path]
2152 * The relative path to the Ruby file where the instruction sequence was
2153 * loaded from.
2154 *
2155 * <code><compiled></code> if the iseq was evaluated from a string.
2156 *
2157 * [#absolute_path]
2158 * The absolute path to the Ruby file where the instruction sequence was
2159 * loaded from.
2160 *
2161 * +nil+ if the iseq was evaluated from a string.
2162 *
2163 * [#first_lineno]
2164 * The number of the first source line where the instruction sequence was
2165 * loaded from.
2166 *
2167 * [type]
2168 * The type of the instruction sequence.
2169 *
2170 * Valid values are +:top+, +:method+, +:block+, +:class+, +:rescue+,
2171 * +:ensure+, +:eval+, +:main+, and +plain+.
2172 *
2173 * [locals]
2174 * An array containing the names of all arguments and local variables as
2175 * symbols.
2176 *
2177 * [params]
2178 * An Hash object containing parameter information.
2179 *
2180 * More info about these values can be found in +vm_core.h+.
2181 *
2182 * [catch_table]
2183 * A list of exceptions and control flow operators (rescue, next, redo,
2184 * break, etc.).
2185 *
2186 * [bytecode]
2187 * An array of arrays containing the instruction names and operands that
2188 * make up the body of the instruction sequence.
2189 *
2190 * Note that this format is MRI specific and version dependent.
2191 *
2192 */
2193static VALUE
2194iseqw_to_a(VALUE self)
2195{
2196 const rb_iseq_t *iseq = iseqw_check(self);
2197 return iseq_data_to_ary(iseq);
2198}
2199
2200#if VM_INSN_INFO_TABLE_IMPL == 1 /* binary search */
2201static const struct iseq_insn_info_entry *
2202get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos)
2203{
2204 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2205 size_t size = body->insns_info.size;
2206 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2207 const unsigned int *positions = body->insns_info.positions;
2208 const int debug = 0;
2209
2210 if (debug) {
2211 printf("size: %"PRIuSIZE"\n", size);
2212 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2213 (size_t)0, positions[0], insns_info[0].line_no, pos);
2214 }
2215
2216 if (size == 0) {
2217 return NULL;
2218 }
2219 else if (size == 1) {
2220 return &insns_info[0];
2221 }
2222 else {
2223 size_t l = 1, r = size - 1;
2224 while (l <= r) {
2225 size_t m = l + (r - l) / 2;
2226 if (positions[m] == pos) {
2227 return &insns_info[m];
2228 }
2229 if (positions[m] < pos) {
2230 l = m + 1;
2231 }
2232 else {
2233 r = m - 1;
2234 }
2235 }
2236 if (l >= size) {
2237 return &insns_info[size-1];
2238 }
2239 if (positions[l] > pos) {
2240 return &insns_info[l-1];
2241 }
2242 return &insns_info[l];
2243 }
2244}
2245
2246static const struct iseq_insn_info_entry *
2247get_insn_info(const rb_iseq_t *iseq, size_t pos)
2248{
2249 return get_insn_info_binary_search(iseq, pos);
2250}
2251#endif
2252
2253#if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
2254static const struct iseq_insn_info_entry *
2255get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos)
2256{
2257 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2258 size_t size = body->insns_info.size;
2259 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2260 const int debug = 0;
2261
2262 if (debug) {
2263#if VM_CHECK_MODE > 0
2264 const unsigned int *positions = body->insns_info.positions;
2265 printf("size: %"PRIuSIZE"\n", size);
2266 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2267 (size_t)0, positions[0], insns_info[0].line_no, pos);
2268#else
2269 printf("size: %"PRIuSIZE"\n", size);
2270 printf("insns_info[%"PRIuSIZE"]: line: %d, pos: %"PRIuSIZE"\n",
2271 (size_t)0, insns_info[0].line_no, pos);
2272#endif
2273 }
2274
2275 if (size == 0) {
2276 return NULL;
2277 }
2278 else if (size == 1) {
2279 return &insns_info[0];
2280 }
2281 else {
2282 int index;
2283 VM_ASSERT(body->insns_info.succ_index_table != NULL);
2284 index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos);
2285 return &insns_info[index-1];
2286 }
2287}
2288
2289static const struct iseq_insn_info_entry *
2290get_insn_info(const rb_iseq_t *iseq, size_t pos)
2291{
2292 return get_insn_info_succinct_bitvector(iseq, pos);
2293}
2294#endif
2295
2296#if VM_CHECK_MODE > 0 || VM_INSN_INFO_TABLE_IMPL == 0
2297static const struct iseq_insn_info_entry *
2298get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos)
2299{
2300 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2301 size_t i = 0, size = body->insns_info.size;
2302 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2303 const unsigned int *positions = body->insns_info.positions;
2304 const int debug = 0;
2305
2306 if (debug) {
2307 printf("size: %"PRIuSIZE"\n", size);
2308 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2309 i, positions[i], insns_info[i].line_no, pos);
2310 }
2311
2312 if (size == 0) {
2313 return NULL;
2314 }
2315 else if (size == 1) {
2316 return &insns_info[0];
2317 }
2318 else {
2319 for (i=1; i<size; i++) {
2320 if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2321 i, positions[i], insns_info[i].line_no, pos);
2322
2323 if (positions[i] == pos) {
2324 return &insns_info[i];
2325 }
2326 if (positions[i] > pos) {
2327 return &insns_info[i-1];
2328 }
2329 }
2330 }
2331 return &insns_info[i-1];
2332}
2333#endif
2334
2335#if VM_INSN_INFO_TABLE_IMPL == 0 /* linear search */
2336static const struct iseq_insn_info_entry *
2337get_insn_info(const rb_iseq_t *iseq, size_t pos)
2338{
2339 return get_insn_info_linear_search(iseq, pos);
2340}
2341#endif
2342
2343#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
2344static void
2345validate_get_insn_info(const rb_iseq_t *iseq)
2346{
2347 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2348 size_t i;
2349 for (i = 0; i < body->iseq_size; i++) {
2350 if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) {
2351 rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i);
2352 }
2353 }
2354}
2355#endif
2356
2357unsigned int
2358rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos)
2359{
2360 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2361
2362 if (entry) {
2363 return entry->line_no;
2364 }
2365 else {
2366 return 0;
2367 }
2368}
2369
2370#ifdef USE_ISEQ_NODE_ID
2371int
2372rb_iseq_node_id(const rb_iseq_t *iseq, size_t pos)
2373{
2374 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2375
2376 if (entry) {
2377 return entry->node_id;
2378 }
2379 else {
2380 return 0;
2381 }
2382}
2383#endif
2384
2386rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos)
2387{
2388 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2389 if (entry) {
2390 return entry->events;
2391 }
2392 else {
2393 return 0;
2394 }
2395}
2396
2397// Clear tracing event flags and turn off tracing for a given instruction as needed.
2398// This is currently used after updating a one-shot line coverage for the current instruction.
2399void
2400rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset)
2401{
2402 struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos);
2403 if (entry) {
2404 entry->events &= ~reset;
2405 if (!(entry->events & iseq->aux.exec.global_trace_events)) {
2406 void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos);
2407 rb_iseq_trace_flag_cleared(iseq, pos);
2408 }
2409 }
2410}
2411
2412static VALUE
2413local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op)
2414{
2415 VALUE i;
2416 VALUE name;
2417 ID lid;
2418 int idx;
2419
2420 for (i = 0; i < level; i++) {
2421 diseq = ISEQ_BODY(diseq)->parent_iseq;
2422 }
2423 idx = ISEQ_BODY(diseq)->local_table_size - (int)op - 1;
2424 lid = ISEQ_BODY(diseq)->local_table[idx];
2425 name = rb_id2str(lid);
2426 if (!name) {
2427 name = rb_str_new_cstr("?");
2428 }
2429 else if (!rb_is_local_id(lid)) {
2430 name = rb_str_inspect(name);
2431 }
2432 else {
2433 name = rb_str_dup(name);
2434 }
2435 rb_str_catf(name, "@%d", idx);
2436 return name;
2437}
2438
2439int rb_insn_unified_local_var_level(VALUE);
2440VALUE rb_dump_literal(VALUE lit);
2441
2442VALUE
2443rb_insn_operand_intern(const rb_iseq_t *iseq,
2444 VALUE insn, int op_no, VALUE op,
2445 int len, size_t pos, const VALUE *pnop, VALUE child)
2446{
2447 const char *types = insn_op_types(insn);
2448 char type = types[op_no];
2449 VALUE ret = Qundef;
2450
2451 switch (type) {
2452 case TS_OFFSET: /* LONG */
2453 ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op));
2454 break;
2455
2456 case TS_NUM: /* ULONG */
2457 if (insn == BIN(defined) && op_no == 0) {
2458 enum defined_type deftype = (enum defined_type)op;
2459 switch (deftype) {
2460 case DEFINED_FUNC:
2461 ret = rb_fstring_lit("func");
2462 break;
2463 case DEFINED_REF:
2464 ret = rb_fstring_lit("ref");
2465 break;
2466 case DEFINED_CONST_FROM:
2467 ret = rb_fstring_lit("constant-from");
2468 break;
2469 default:
2470 ret = rb_iseq_defined_string(deftype);
2471 break;
2472 }
2473 if (ret) break;
2474 }
2475 else if (insn == BIN(checktype) && op_no == 0) {
2476 const char *type_str = rb_type_str((enum ruby_value_type)op);
2477 if (type_str) {
2478 ret = rb_str_new_cstr(type_str); break;
2479 }
2480 }
2481 ret = rb_sprintf("%"PRIuVALUE, op);
2482 break;
2483
2484 case TS_LINDEX:{
2485 int level;
2486 if (types[op_no+1] == TS_NUM && pnop) {
2487 ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE);
2488 }
2489 else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) {
2490 ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE);
2491 }
2492 else {
2493 ret = rb_inspect(INT2FIX(op));
2494 }
2495 break;
2496 }
2497 case TS_ID: /* ID (symbol) */
2498 ret = rb_inspect(ID2SYM(op));
2499 break;
2500
2501 case TS_VALUE: /* VALUE */
2502 op = obj_resurrect(op);
2503 if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) {
2504 /* should be DEFINED_REF */
2505 int type = NUM2INT(op);
2506 if (type) {
2507 if (type & 1) {
2508 ret = rb_sprintf(":$%c", (type >> 1));
2509 }
2510 else {
2511 ret = rb_sprintf(":$%d", (type >> 1));
2512 }
2513 break;
2514 }
2515 }
2516 ret = rb_dump_literal(op);
2517 if (CLASS_OF(op) == rb_cISeq) {
2518 if (child) {
2519 rb_ary_push(child, op);
2520 }
2521 }
2522 break;
2523
2524 case TS_ISEQ: /* iseq */
2525 {
2526 if (op) {
2527 const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op);
2528 ret = ISEQ_BODY(iseq)->location.label;
2529 if (child) {
2530 rb_ary_push(child, (VALUE)iseq);
2531 }
2532 }
2533 else {
2534 ret = rb_str_new2("nil");
2535 }
2536 break;
2537 }
2538
2539 case TS_IC:
2540 {
2541 ret = rb_sprintf("<ic:%"PRIdPTRDIFF" ", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2542 const ID *segments = ((IC)op)->segments;
2543 rb_str_cat2(ret, rb_id2name(*segments++));
2544 while (*segments) {
2545 rb_str_catf(ret, "::%s", rb_id2name(*segments++));
2546 }
2547 rb_str_cat2(ret, ">");
2548 }
2549 break;
2550 case TS_IVC:
2551 case TS_ICVARC:
2552 case TS_ISE:
2553 ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2554 break;
2555
2556 case TS_CALLDATA:
2557 {
2558 struct rb_call_data *cd = (struct rb_call_data *)op;
2559 const struct rb_callinfo *ci = cd->ci;
2560 VALUE ary = rb_ary_new();
2561 ID mid = vm_ci_mid(ci);
2562
2563 if (mid) {
2564 rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid)));
2565 }
2566
2567 rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci)));
2568
2569 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
2570 const struct rb_callinfo_kwarg *kw_args = vm_ci_kwarg(ci);
2571 VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords);
2572 rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(","))));
2573 }
2574
2575 if (vm_ci_flag(ci)) {
2576 VALUE flags = rb_ary_new();
2577# define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n))
2578 CALL_FLAG(ARGS_SPLAT);
2579 CALL_FLAG(ARGS_SPLAT_MUT);
2580 CALL_FLAG(ARGS_BLOCKARG);
2581 CALL_FLAG(FCALL);
2582 CALL_FLAG(VCALL);
2583 CALL_FLAG(ARGS_SIMPLE);
2584 CALL_FLAG(TAILCALL);
2585 CALL_FLAG(SUPER);
2586 CALL_FLAG(ZSUPER);
2587 CALL_FLAG(KWARG);
2588 CALL_FLAG(KW_SPLAT);
2589 CALL_FLAG(KW_SPLAT_MUT);
2590 CALL_FLAG(FORWARDING);
2591 CALL_FLAG(OPT_SEND); /* maybe not reachable */
2592 rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|")));
2593 }
2594
2595 ret = rb_sprintf("<calldata!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", ")));
2596 }
2597 break;
2598
2599 case TS_CDHASH:
2600 ret = rb_str_new2("<cdhash>");
2601 break;
2602
2603 case TS_FUNCPTR:
2604 {
2605#ifdef HAVE_DLADDR
2606 Dl_info info;
2607 if (dladdr((void *)op, &info) && info.dli_sname) {
2608 ret = rb_str_new_cstr(info.dli_sname);
2609 break;
2610 }
2611#endif
2612 ret = rb_str_new2("<funcptr>");
2613 }
2614 break;
2615
2616 case TS_BUILTIN:
2617 {
2618 const struct rb_builtin_function *bf = (const struct rb_builtin_function *)op;
2619 ret = rb_sprintf("<builtin!%s/%d>",
2620 bf->name, bf->argc);
2621 }
2622 break;
2623
2624 default:
2625 rb_bug("unknown operand type: %c", type);
2626 }
2627 return ret;
2628}
2629
2630static VALUE
2631right_strip(VALUE str)
2632{
2633 const char *beg = RSTRING_PTR(str), *end = RSTRING_END(str);
2634 while (end-- > beg && *end == ' ');
2635 rb_str_set_len(str, end - beg + 1);
2636 return str;
2637}
2638
2643int
2644rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos,
2645 const rb_iseq_t *iseq, VALUE child)
2646{
2647 VALUE insn = code[pos];
2648 int len = insn_len(insn);
2649 int j;
2650 const char *types = insn_op_types(insn);
2651 VALUE str = rb_str_new(0, 0);
2652 const char *insn_name_buff;
2653
2654 insn_name_buff = insn_name(insn);
2655 if (1) {
2656 extern const int rb_vm_max_insn_name_size;
2657 rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff);
2658 }
2659 else {
2660 rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos,
2661 (int)strcspn(insn_name_buff, "_"), insn_name_buff);
2662 }
2663
2664 for (j = 0; types[j]; j++) {
2665 VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1],
2666 len, pos, &code[pos + j + 2],
2667 child);
2668 rb_str_concat(str, opstr);
2669
2670 if (types[j + 1]) {
2671 rb_str_cat2(str, ", ");
2672 }
2673 }
2674
2675 {
2676 unsigned int line_no = rb_iseq_line_no(iseq, pos);
2677 unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1);
2678 if (line_no && line_no != prev) {
2679 long slen = RSTRING_LEN(str);
2680 slen = (slen > 70) ? 0 : (70 - slen);
2681 str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no);
2682 }
2683 }
2684
2685 {
2686 rb_event_flag_t events = rb_iseq_event_flags(iseq, pos);
2687 if (events) {
2688 str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s%s]",
2689 events & RUBY_EVENT_LINE ? "Li" : "",
2690 events & RUBY_EVENT_CLASS ? "Cl" : "",
2691 events & RUBY_EVENT_END ? "En" : "",
2692 events & RUBY_EVENT_CALL ? "Ca" : "",
2693 events & RUBY_EVENT_RETURN ? "Re" : "",
2694 events & RUBY_EVENT_C_CALL ? "Cc" : "",
2695 events & RUBY_EVENT_C_RETURN ? "Cr" : "",
2696 events & RUBY_EVENT_B_CALL ? "Bc" : "",
2697 events & RUBY_EVENT_B_RETURN ? "Br" : "",
2698 events & RUBY_EVENT_RESCUE ? "Rs" : "",
2699 events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "",
2700 events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : "");
2701 }
2702 }
2703
2704 right_strip(str);
2705 if (ret) {
2706 rb_str_cat2(str, "\n");
2707 rb_str_concat(ret, str);
2708 }
2709 else {
2710 printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str));
2711 }
2712 return len;
2713}
2714
2715static const char *
2716catch_type(int type)
2717{
2718 switch (type) {
2719 case CATCH_TYPE_RESCUE:
2720 return "rescue";
2721 case CATCH_TYPE_ENSURE:
2722 return "ensure";
2723 case CATCH_TYPE_RETRY:
2724 return "retry";
2725 case CATCH_TYPE_BREAK:
2726 return "break";
2727 case CATCH_TYPE_REDO:
2728 return "redo";
2729 case CATCH_TYPE_NEXT:
2730 return "next";
2731 default:
2732 rb_bug("unknown catch type: %d", type);
2733 return 0;
2734 }
2735}
2736
2737static VALUE
2738iseq_inspect(const rb_iseq_t *iseq)
2739{
2740 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2741 if (!body->location.label) {
2742 return rb_sprintf("#<ISeq: uninitialized>");
2743 }
2744 else {
2745 const rb_code_location_t *loc = &body->location.code_location;
2746 return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>",
2747 body->location.label, rb_iseq_path(iseq),
2748 loc->beg_pos.lineno,
2749 loc->beg_pos.lineno,
2750 loc->beg_pos.column,
2751 loc->end_pos.lineno,
2752 loc->end_pos.column);
2753 }
2754}
2755
2756static const rb_data_type_t tmp_set = {
2757 "tmpset",
2758 {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,},
2759 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
2760};
2761
2762static VALUE
2763rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent)
2764{
2765 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2766 VALUE *code;
2767 VALUE str = rb_str_new(0, 0);
2768 VALUE child = rb_ary_hidden_new(3);
2769 unsigned int size;
2770 unsigned int i;
2771 long l;
2772 size_t n;
2773 enum {header_minlen = 72};
2774 st_table *done_iseq = 0;
2775 VALUE done_iseq_wrapper = Qnil;
2776 const char *indent_str;
2777 long indent_len;
2778
2779 size = body->iseq_size;
2780
2781 indent_len = RSTRING_LEN(indent);
2782 indent_str = RSTRING_PTR(indent);
2783
2784 rb_str_cat(str, indent_str, indent_len);
2785 rb_str_cat2(str, "== disasm: ");
2786
2787 rb_str_append(str, iseq_inspect(iseq));
2788 if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) {
2789 rb_str_modify_expand(str, header_minlen - l);
2790 memset(RSTRING_END(str), '=', header_minlen - l);
2791 }
2792 if (iseq->body->builtin_attrs) {
2793#define disasm_builtin_attr(str, iseq, attr) \
2794 if (iseq->body->builtin_attrs & BUILTIN_ATTR_ ## attr) { \
2795 rb_str_cat2(str, " " #attr); \
2796 }
2797 disasm_builtin_attr(str, iseq, LEAF);
2798 disasm_builtin_attr(str, iseq, SINGLE_NOARG_LEAF);
2799 disasm_builtin_attr(str, iseq, INLINE_BLOCK);
2800 disasm_builtin_attr(str, iseq, C_TRACE);
2801 }
2802 rb_str_cat2(str, "\n");
2803
2804 /* show catch table information */
2805 if (body->catch_table) {
2806 rb_str_cat(str, indent_str, indent_len);
2807 rb_str_cat2(str, "== catch table\n");
2808 }
2809 if (body->catch_table) {
2810 rb_str_cat_cstr(indent, "| ");
2811 indent_str = RSTRING_PTR(indent);
2812 for (i = 0; i < body->catch_table->size; i++) {
2813 const struct iseq_catch_table_entry *entry =
2814 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2815 rb_str_cat(str, indent_str, indent_len);
2816 rb_str_catf(str,
2817 "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n",
2818 catch_type((int)entry->type), (int)entry->start,
2819 (int)entry->end, (int)entry->sp, (int)entry->cont);
2820 if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) {
2821 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent));
2822 if (!done_iseq) {
2823 done_iseq = st_init_numtable();
2824 done_iseq_wrapper = TypedData_Wrap_Struct(0, &tmp_set, done_iseq);
2825 }
2826 st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0);
2827 indent_str = RSTRING_PTR(indent);
2828 }
2829 }
2830 rb_str_resize(indent, indent_len);
2831 indent_str = RSTRING_PTR(indent);
2832 }
2833 if (body->catch_table) {
2834 rb_str_cat(str, indent_str, indent_len);
2835 rb_str_cat2(str, "|-------------------------------------"
2836 "-----------------------------------\n");
2837 }
2838
2839 /* show local table information */
2840 if (body->local_table) {
2841 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2842 rb_str_cat(str, indent_str, indent_len);
2843 rb_str_catf(str,
2844 "local table (size: %d, argc: %d "
2845 "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n",
2846 body->local_table_size,
2847 body->param.lead_num,
2848 body->param.opt_num,
2849 body->param.flags.has_rest ? body->param.rest_start : -1,
2850 body->param.post_num,
2851 body->param.flags.has_block ? body->param.block_start : -1,
2852 body->param.flags.has_kw ? keyword->num : -1,
2853 body->param.flags.has_kw ? keyword->required_num : -1,
2854 body->param.flags.has_kwrest ? keyword->rest_start : -1);
2855
2856 for (i = body->local_table_size; i > 0;) {
2857 int li = body->local_table_size - --i - 1;
2858 long width;
2859 VALUE name = local_var_name(iseq, 0, i);
2860 char argi[0x100];
2861 char opti[0x100];
2862
2863 opti[0] = '\0';
2864 if (body->param.flags.has_opt) {
2865 int argc = body->param.lead_num;
2866 int opts = body->param.opt_num;
2867 if (li >= argc && li < argc + opts) {
2868 snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE,
2869 body->param.opt_table[li - argc]);
2870 }
2871 }
2872
2873 snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */
2874 (body->param.lead_num > li) ? (body->param.flags.ambiguous_param0 ? "AmbiguousArg" : "Arg") : "",
2875 opti,
2876 (body->param.flags.has_rest && body->param.rest_start == li) ? (body->param.flags.anon_rest ? "AnonRest" : "Rest") : "",
2877 (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "",
2878 (body->param.flags.has_kwrest && keyword->rest_start == li) ? (body->param.flags.anon_kwrest ? "AnonKwrest" : "Kwrest") : "",
2879 (body->param.flags.has_block && body->param.block_start == li) ? "Block" : "");
2880
2881 rb_str_cat(str, indent_str, indent_len);
2882 rb_str_catf(str, "[%2d] ", i + 1);
2883 width = RSTRING_LEN(str) + 11;
2884 rb_str_append(str, name);
2885 if (*argi) rb_str_catf(str, "<%s>", argi);
2886 if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, "");
2887 }
2888 rb_str_cat_cstr(right_strip(str), "\n");
2889 }
2890
2891 /* show each line */
2892 code = rb_iseq_original_iseq(iseq);
2893 for (n = 0; n < size;) {
2894 rb_str_cat(str, indent_str, indent_len);
2895 n += rb_iseq_disasm_insn(str, code, n, iseq, child);
2896 }
2897
2898 for (l = 0; l < RARRAY_LEN(child); l++) {
2899 VALUE isv = rb_ary_entry(child, l);
2900 if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue;
2901 rb_str_cat_cstr(str, "\n");
2902 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent));
2903 indent_str = RSTRING_PTR(indent);
2904 }
2905 RB_GC_GUARD(done_iseq_wrapper);
2906
2907 return str;
2908}
2909
2910VALUE
2911rb_iseq_disasm(const rb_iseq_t *iseq)
2912{
2913 VALUE str = rb_iseq_disasm_recursive(iseq, rb_str_new(0, 0));
2914 rb_str_resize(str, RSTRING_LEN(str));
2915 return str;
2916}
2917
2918/*
2919 * Estimates the number of instance variables that will be set on
2920 * a given `class` with the initialize method defined in
2921 * `initialize_iseq`
2922 */
2923attr_index_t
2924rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq)
2925{
2926 struct rb_id_table * iv_names = rb_id_table_create(0);
2927
2928 for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) {
2929 IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i];
2930
2931 if (cache->iv_set_name) {
2932 rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue);
2933 }
2934 }
2935
2936 attr_index_t count = (attr_index_t)rb_id_table_size(iv_names);
2937
2938 VALUE superclass = rb_class_superclass(klass);
2939 count += RCLASS_MAX_IV_COUNT(superclass);
2940
2941 rb_id_table_free(iv_names);
2942
2943 return count;
2944}
2945
2946/*
2947 * call-seq:
2948 * iseq.disasm -> str
2949 * iseq.disassemble -> str
2950 *
2951 * Returns the instruction sequence as a +String+ in human readable form.
2952 *
2953 * puts RubyVM::InstructionSequence.compile('1 + 2').disasm
2954 *
2955 * Produces:
2956 *
2957 * == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
2958 * 0000 trace 1 ( 1)
2959 * 0002 putobject 1
2960 * 0004 putobject 2
2961 * 0006 opt_plus <ic:1>
2962 * 0008 leave
2963 */
2964static VALUE
2965iseqw_disasm(VALUE self)
2966{
2967 return rb_iseq_disasm(iseqw_check(self));
2968}
2969
2970static int
2971iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t *child_iseq, void *data), void *data)
2972{
2973 unsigned int i;
2974 VALUE *code = rb_iseq_original_iseq(iseq);
2975 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2976 const rb_iseq_t *child;
2977 VALUE all_children = rb_obj_hide(rb_ident_hash_new());
2978
2979 if (body->catch_table) {
2980 for (i = 0; i < body->catch_table->size; i++) {
2981 const struct iseq_catch_table_entry *entry =
2982 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2983 child = entry->iseq;
2984 if (child) {
2985 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
2986 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2987 (*iter_func)(child, data);
2988 }
2989 }
2990 }
2991 }
2992
2993 for (i=0; i<body->iseq_size;) {
2994 VALUE insn = code[i];
2995 int len = insn_len(insn);
2996 const char *types = insn_op_types(insn);
2997 int j;
2998
2999 for (j=0; types[j]; j++) {
3000 switch (types[j]) {
3001 case TS_ISEQ:
3002 child = (const rb_iseq_t *)code[i+j+1];
3003 if (child) {
3004 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
3005 rb_hash_aset(all_children, (VALUE)child, Qtrue);
3006 (*iter_func)(child, data);
3007 }
3008 }
3009 break;
3010 default:
3011 break;
3012 }
3013 }
3014 i += len;
3015 }
3016
3017 return (int)RHASH_SIZE(all_children);
3018}
3019
3020static void
3021yield_each_children(const rb_iseq_t *child_iseq, void *data)
3022{
3023 rb_yield(iseqw_new(child_iseq));
3024}
3025
3026/*
3027 * call-seq:
3028 * iseq.each_child{|child_iseq| ...} -> iseq
3029 *
3030 * Iterate all direct child instruction sequences.
3031 * Iteration order is implementation/version defined
3032 * so that people should not rely on the order.
3033 */
3034static VALUE
3035iseqw_each_child(VALUE self)
3036{
3037 const rb_iseq_t *iseq = iseqw_check(self);
3038 iseq_iterate_children(iseq, yield_each_children, NULL);
3039 return self;
3040}
3041
3042static void
3043push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE ary)
3044{
3045#define C(ev, cstr, l) if (events & ev) rb_ary_push(ary, rb_ary_new_from_args(2, l, ID2SYM(rb_intern(cstr))));
3046 C(RUBY_EVENT_CLASS, "class", rb_iseq_first_lineno(iseq));
3047 C(RUBY_EVENT_CALL, "call", rb_iseq_first_lineno(iseq));
3048 C(RUBY_EVENT_B_CALL, "b_call", rb_iseq_first_lineno(iseq));
3049 C(RUBY_EVENT_LINE, "line", INT2FIX(line));
3050 C(RUBY_EVENT_END, "end", INT2FIX(line));
3051 C(RUBY_EVENT_RETURN, "return", INT2FIX(line));
3052 C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line));
3053 C(RUBY_EVENT_RESCUE, "rescue", INT2FIX(line));
3054#undef C
3055}
3056
3057/*
3058 * call-seq:
3059 * iseq.trace_points -> ary
3060 *
3061 * Return trace points in the instruction sequence.
3062 * Return an array of [line, event_symbol] pair.
3063 */
3064static VALUE
3065iseqw_trace_points(VALUE self)
3066{
3067 const rb_iseq_t *iseq = iseqw_check(self);
3068 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3069 unsigned int i;
3070 VALUE ary = rb_ary_new();
3071
3072 for (i=0; i<body->insns_info.size; i++) {
3073 const struct iseq_insn_info_entry *entry = &body->insns_info.body[i];
3074 if (entry->events) {
3075 push_event_info(iseq, entry->events, entry->line_no, ary);
3076 }
3077 }
3078 return ary;
3079}
3080
3081/*
3082 * Returns the instruction sequence containing the given proc or method.
3083 *
3084 * For example, using irb:
3085 *
3086 * # a proc
3087 * > p = proc { num = 1 + 2 }
3088 * > RubyVM::InstructionSequence.of(p)
3089 * > #=> <RubyVM::InstructionSequence:block in irb_binding@(irb)>
3090 *
3091 * # for a method
3092 * > def foo(bar); puts bar; end
3093 * > RubyVM::InstructionSequence.of(method(:foo))
3094 * > #=> <RubyVM::InstructionSequence:foo@(irb)>
3095 *
3096 * Using ::compile_file:
3097 *
3098 * # /tmp/iseq_of.rb
3099 * def hello
3100 * puts "hello, world"
3101 * end
3102 *
3103 * $a_global_proc = proc { str = 'a' + 'b' }
3104 *
3105 * # in irb
3106 * > require '/tmp/iseq_of.rb'
3107 *
3108 * # first the method hello
3109 * > RubyVM::InstructionSequence.of(method(:hello))
3110 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7cb1d0>
3111 *
3112 * # then the global proc
3113 * > RubyVM::InstructionSequence.of($a_global_proc)
3114 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7caf78>
3115 */
3116static VALUE
3117iseqw_s_of(VALUE klass, VALUE body)
3118{
3119 const rb_iseq_t *iseq = NULL;
3120
3121 if (rb_frame_info_p(body)) {
3122 iseq = rb_get_iseq_from_frame_info(body);
3123 }
3124 else if (rb_obj_is_proc(body)) {
3125 iseq = vm_proc_iseq(body);
3126
3127 if (!rb_obj_is_iseq((VALUE)iseq)) {
3128 iseq = NULL;
3129 }
3130 }
3131 else if (rb_obj_is_method(body)) {
3132 iseq = rb_method_iseq(body);
3133 }
3134 else if (rb_typeddata_is_instance_of(body, &iseqw_data_type)) {
3135 return body;
3136 }
3137
3138 return iseq ? iseqw_new(iseq) : Qnil;
3139}
3140
3141/*
3142 * call-seq:
3143 * InstructionSequence.disasm(body) -> str
3144 * InstructionSequence.disassemble(body) -> str
3145 *
3146 * Takes +body+, a +Method+ or +Proc+ object, and returns a +String+
3147 * with the human readable instructions for +body+.
3148 *
3149 * For a +Method+ object:
3150 *
3151 * # /tmp/method.rb
3152 * def hello
3153 * puts "hello, world"
3154 * end
3155 *
3156 * puts RubyVM::InstructionSequence.disasm(method(:hello))
3157 *
3158 * Produces:
3159 *
3160 * == disasm: <RubyVM::InstructionSequence:hello@/tmp/method.rb>============
3161 * 0000 trace 8 ( 1)
3162 * 0002 trace 1 ( 2)
3163 * 0004 putself
3164 * 0005 putstring "hello, world"
3165 * 0007 send :puts, 1, nil, 8, <ic:0>
3166 * 0013 trace 16 ( 3)
3167 * 0015 leave ( 2)
3168 *
3169 * For a +Proc+ object:
3170 *
3171 * # /tmp/proc.rb
3172 * p = proc { num = 1 + 2 }
3173 * puts RubyVM::InstructionSequence.disasm(p)
3174 *
3175 * Produces:
3176 *
3177 * == disasm: <RubyVM::InstructionSequence:block in <main>@/tmp/proc.rb>===
3178 * == catch table
3179 * | catch type: redo st: 0000 ed: 0012 sp: 0000 cont: 0000
3180 * | catch type: next st: 0000 ed: 0012 sp: 0000 cont: 0012
3181 * |------------------------------------------------------------------------
3182 * local table (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1] s1)
3183 * [ 2] num
3184 * 0000 trace 1 ( 1)
3185 * 0002 putobject 1
3186 * 0004 putobject 2
3187 * 0006 opt_plus <ic:1>
3188 * 0008 dup
3189 * 0009 setlocal num, 0
3190 * 0012 leave
3191 *
3192 */
3193static VALUE
3194iseqw_s_disasm(VALUE klass, VALUE body)
3195{
3196 VALUE iseqw = iseqw_s_of(klass, body);
3197 return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw));
3198}
3199
3200static VALUE
3201register_label(struct st_table *table, unsigned long idx)
3202{
3203 VALUE sym = rb_str_intern(rb_sprintf("label_%lu", idx));
3204 st_insert(table, idx, sym);
3205 return sym;
3206}
3207
3208static VALUE
3209exception_type2symbol(VALUE type)
3210{
3211 ID id;
3212 switch (type) {
3213 case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break;
3214 case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break;
3215 case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break;
3216 case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break;
3217 case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break;
3218 case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break;
3219 default:
3220 rb_bug("unknown exception type: %d", (int)type);
3221 }
3222 return ID2SYM(id);
3223}
3224
3225static int
3226cdhash_each(VALUE key, VALUE value, VALUE ary)
3227{
3228 rb_ary_push(ary, obj_resurrect(key));
3229 rb_ary_push(ary, value);
3230 return ST_CONTINUE;
3231}
3232
3233static const rb_data_type_t label_wrapper = {
3234 "label_wrapper",
3235 {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,},
3236 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
3237};
3238
3239#define DECL_ID(name) \
3240 static ID id_##name
3241
3242#define INIT_ID(name) \
3243 id_##name = rb_intern(#name)
3244
3245static VALUE
3246iseq_type_id(enum rb_iseq_type type)
3247{
3248 DECL_ID(top);
3249 DECL_ID(method);
3250 DECL_ID(block);
3251 DECL_ID(class);
3252 DECL_ID(rescue);
3253 DECL_ID(ensure);
3254 DECL_ID(eval);
3255 DECL_ID(main);
3256 DECL_ID(plain);
3257
3258 if (id_top == 0) {
3259 INIT_ID(top);
3260 INIT_ID(method);
3261 INIT_ID(block);
3262 INIT_ID(class);
3263 INIT_ID(rescue);
3264 INIT_ID(ensure);
3265 INIT_ID(eval);
3266 INIT_ID(main);
3267 INIT_ID(plain);
3268 }
3269
3270 switch (type) {
3271 case ISEQ_TYPE_TOP: return id_top;
3272 case ISEQ_TYPE_METHOD: return id_method;
3273 case ISEQ_TYPE_BLOCK: return id_block;
3274 case ISEQ_TYPE_CLASS: return id_class;
3275 case ISEQ_TYPE_RESCUE: return id_rescue;
3276 case ISEQ_TYPE_ENSURE: return id_ensure;
3277 case ISEQ_TYPE_EVAL: return id_eval;
3278 case ISEQ_TYPE_MAIN: return id_main;
3279 case ISEQ_TYPE_PLAIN: return id_plain;
3280 };
3281
3282 rb_bug("unsupported iseq type: %d", (int)type);
3283}
3284
3285static VALUE
3286iseq_data_to_ary(const rb_iseq_t *iseq)
3287{
3288 unsigned int i;
3289 long l;
3290 const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq);
3291 const struct iseq_insn_info_entry *prev_insn_info;
3292 unsigned int pos;
3293 int last_line = 0;
3294 VALUE *seq, *iseq_original;
3295
3296 VALUE val = rb_ary_new();
3297 ID type; /* Symbol */
3298 VALUE locals = rb_ary_new();
3299 VALUE params = rb_hash_new();
3300 VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */
3301 VALUE nbody;
3302 VALUE exception = rb_ary_new(); /* [[....]] */
3303 VALUE misc = rb_hash_new();
3304
3305 static ID insn_syms[VM_BARE_INSTRUCTION_SIZE]; /* w/o-trace only */
3306 struct st_table *labels_table = st_init_numtable();
3307 VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table);
3308
3309 if (insn_syms[0] == 0) {
3310 int i;
3311 for (i=0; i<numberof(insn_syms); i++) {
3312 insn_syms[i] = rb_intern(insn_name(i));
3313 }
3314 }
3315
3316 /* type */
3317 type = iseq_type_id(iseq_body->type);
3318
3319 /* locals */
3320 for (i=0; i<iseq_body->local_table_size; i++) {
3321 ID lid = iseq_body->local_table[i];
3322 if (lid) {
3323 if (rb_id2str(lid)) {
3324 rb_ary_push(locals, ID2SYM(lid));
3325 }
3326 else { /* hidden variable from id_internal() */
3327 rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1));
3328 }
3329 }
3330 else {
3331 rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest")));
3332 }
3333 }
3334
3335 /* params */
3336 {
3337 const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword;
3338 int j;
3339
3340 if (iseq_body->param.flags.has_opt) {
3341 int len = iseq_body->param.opt_num + 1;
3342 VALUE arg_opt_labels = rb_ary_new2(len);
3343
3344 for (j = 0; j < len; j++) {
3345 VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]);
3346 rb_ary_push(arg_opt_labels, l);
3347 }
3348 rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels);
3349 }
3350
3351 /* commit */
3352 if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num));
3353 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num));
3354 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start));
3355 if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start));
3356 if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start));
3357 if (iseq_body->param.flags.has_kw) {
3358 VALUE keywords = rb_ary_new();
3359 int i, j;
3360 for (i=0; i<keyword->required_num; i++) {
3361 rb_ary_push(keywords, ID2SYM(keyword->table[i]));
3362 }
3363 for (j=0; i<keyword->num; i++, j++) {
3364 VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i]));
3365 if (!UNDEF_P(keyword->default_values[j])) {
3366 rb_ary_push(key, keyword->default_values[j]);
3367 }
3368 rb_ary_push(keywords, key);
3369 }
3370
3371 rb_hash_aset(params, ID2SYM(rb_intern("kwbits")),
3372 INT2FIX(keyword->bits_start));
3373 rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords);
3374 }
3375 if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start));
3376 if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue);
3377 if (iseq_body->param.flags.use_block) rb_hash_aset(params, ID2SYM(rb_intern("use_block")), Qtrue);
3378 }
3379
3380 /* body */
3381 iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq);
3382
3383 for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) {
3384 VALUE insn = *seq++;
3385 int j, len = insn_len(insn);
3386 VALUE *nseq = seq + len - 1;
3387 VALUE ary = rb_ary_new2(len);
3388
3389 rb_ary_push(ary, ID2SYM(insn_syms[insn%numberof(insn_syms)]));
3390 for (j=0; j<len-1; j++, seq++) {
3391 enum ruby_insn_type_chars op_type = insn_op_type(insn, j);
3392
3393 switch (op_type) {
3394 case TS_OFFSET: {
3395 unsigned long idx = nseq - iseq_original + *seq;
3396 rb_ary_push(ary, register_label(labels_table, idx));
3397 break;
3398 }
3399 case TS_LINDEX:
3400 case TS_NUM:
3401 rb_ary_push(ary, INT2FIX(*seq));
3402 break;
3403 case TS_VALUE:
3404 rb_ary_push(ary, obj_resurrect(*seq));
3405 break;
3406 case TS_ISEQ:
3407 {
3408 const rb_iseq_t *iseq = (rb_iseq_t *)*seq;
3409 if (iseq) {
3410 VALUE val = iseq_data_to_ary(rb_iseq_check(iseq));
3411 rb_ary_push(ary, val);
3412 }
3413 else {
3414 rb_ary_push(ary, Qnil);
3415 }
3416 }
3417 break;
3418 case TS_IC:
3419 {
3420 VALUE list = rb_ary_new();
3421 const ID *ids = ((IC)*seq)->segments;
3422 while (*ids) {
3423 rb_ary_push(list, ID2SYM(*ids++));
3424 }
3425 rb_ary_push(ary, list);
3426 }
3427 break;
3428 case TS_IVC:
3429 case TS_ICVARC:
3430 case TS_ISE:
3431 {
3432 union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq;
3433 rb_ary_push(ary, INT2FIX(is - ISEQ_IS_ENTRY_START(ISEQ_BODY(iseq), op_type)));
3434 }
3435 break;
3436 case TS_CALLDATA:
3437 {
3438 struct rb_call_data *cd = (struct rb_call_data *)*seq;
3439 const struct rb_callinfo *ci = cd->ci;
3440 VALUE e = rb_hash_new();
3441 int argc = vm_ci_argc(ci);
3442
3443 ID mid = vm_ci_mid(ci);
3444 rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil);
3445 rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci)));
3446
3447 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
3448 const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci);
3449 int i;
3450 VALUE kw = rb_ary_new2((long)kwarg->keyword_len);
3451
3452 argc -= kwarg->keyword_len;
3453 for (i = 0; i < kwarg->keyword_len; i++) {
3454 rb_ary_push(kw, kwarg->keywords[i]);
3455 }
3456 rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw);
3457 }
3458
3459 rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")),
3460 INT2FIX(argc));
3461 rb_ary_push(ary, e);
3462 }
3463 break;
3464 case TS_ID:
3465 rb_ary_push(ary, ID2SYM(*seq));
3466 break;
3467 case TS_CDHASH:
3468 {
3469 VALUE hash = *seq;
3470 VALUE val = rb_ary_new();
3471 int i;
3472
3473 rb_hash_foreach(hash, cdhash_each, val);
3474
3475 for (i=0; i<RARRAY_LEN(val); i+=2) {
3476 VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
3477 unsigned long idx = nseq - iseq_original + pos;
3478
3479 rb_ary_store(val, i+1,
3480 register_label(labels_table, idx));
3481 }
3482 rb_ary_push(ary, val);
3483 }
3484 break;
3485 case TS_FUNCPTR:
3486 {
3487#if SIZEOF_VALUE <= SIZEOF_LONG
3488 VALUE val = LONG2NUM((SIGNED_VALUE)*seq);
3489#else
3490 VALUE val = LL2NUM((SIGNED_VALUE)*seq);
3491#endif
3492 rb_ary_push(ary, val);
3493 }
3494 break;
3495 case TS_BUILTIN:
3496 {
3497 VALUE val = rb_hash_new();
3498#if SIZEOF_VALUE <= SIZEOF_LONG
3499 VALUE func_ptr = LONG2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3500#else
3501 VALUE func_ptr = LL2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3502#endif
3503 rb_hash_aset(val, ID2SYM(rb_intern("func_ptr")), func_ptr);
3504 rb_hash_aset(val, ID2SYM(rb_intern("argc")), INT2NUM(((RB_BUILTIN)*seq)->argc));
3505 rb_hash_aset(val, ID2SYM(rb_intern("index")), INT2NUM(((RB_BUILTIN)*seq)->index));
3506 rb_hash_aset(val, ID2SYM(rb_intern("name")), rb_str_new_cstr(((RB_BUILTIN)*seq)->name));
3507 rb_ary_push(ary, val);
3508 }
3509 break;
3510 default:
3511 rb_bug("unknown operand: %c", insn_op_type(insn, j));
3512 }
3513 }
3514 rb_ary_push(body, ary);
3515 }
3516
3517 nbody = body;
3518
3519 /* exception */
3520 if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) {
3521 VALUE ary = rb_ary_new();
3522 const struct iseq_catch_table_entry *entry =
3523 UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]);
3524 rb_ary_push(ary, exception_type2symbol(entry->type));
3525 if (entry->iseq) {
3526 rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq)));
3527 }
3528 else {
3529 rb_ary_push(ary, Qnil);
3530 }
3531 rb_ary_push(ary, register_label(labels_table, entry->start));
3532 rb_ary_push(ary, register_label(labels_table, entry->end));
3533 rb_ary_push(ary, register_label(labels_table, entry->cont));
3534 rb_ary_push(ary, UINT2NUM(entry->sp));
3535 rb_ary_push(exception, ary);
3536 }
3537
3538 /* make body with labels and insert line number */
3539 body = rb_ary_new();
3540 prev_insn_info = NULL;
3541#ifdef USE_ISEQ_NODE_ID
3542 VALUE node_ids = rb_ary_new();
3543#endif
3544
3545 for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) {
3546 const struct iseq_insn_info_entry *info;
3547 VALUE ary = RARRAY_AREF(nbody, l);
3548 st_data_t label;
3549
3550 if (st_lookup(labels_table, pos, &label)) {
3551 rb_ary_push(body, (VALUE)label);
3552 }
3553
3554 info = get_insn_info(iseq, pos);
3555#ifdef USE_ISEQ_NODE_ID
3556 rb_ary_push(node_ids, INT2FIX(info->node_id));
3557#endif
3558
3559 if (prev_insn_info != info) {
3560 int line = info->line_no;
3561 rb_event_flag_t events = info->events;
3562
3563 if (line > 0 && last_line != line) {
3564 rb_ary_push(body, INT2FIX(line));
3565 last_line = line;
3566 }
3567#define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev)));
3568 CHECK_EVENT(RUBY_EVENT_LINE);
3569 CHECK_EVENT(RUBY_EVENT_CLASS);
3570 CHECK_EVENT(RUBY_EVENT_END);
3571 CHECK_EVENT(RUBY_EVENT_CALL);
3572 CHECK_EVENT(RUBY_EVENT_RETURN);
3573 CHECK_EVENT(RUBY_EVENT_B_CALL);
3574 CHECK_EVENT(RUBY_EVENT_B_RETURN);
3575 CHECK_EVENT(RUBY_EVENT_RESCUE);
3576#undef CHECK_EVENT
3577 prev_insn_info = info;
3578 }
3579
3580 rb_ary_push(body, ary);
3581 pos += RARRAY_LENINT(ary); /* reject too huge data */
3582 }
3583 RB_GC_GUARD(nbody);
3584 RB_GC_GUARD(labels_wrapper);
3585
3586 rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq_body->param.size));
3587 rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq_body->local_table_size));
3588 rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max));
3589 rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id));
3590 rb_hash_aset(misc, ID2SYM(rb_intern("code_location")),
3591 rb_ary_new_from_args(4,
3592 INT2FIX(iseq_body->location.code_location.beg_pos.lineno),
3593 INT2FIX(iseq_body->location.code_location.beg_pos.column),
3594 INT2FIX(iseq_body->location.code_location.end_pos.lineno),
3595 INT2FIX(iseq_body->location.code_location.end_pos.column)));
3596#ifdef USE_ISEQ_NODE_ID
3597 rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids);
3598#endif
3599 rb_hash_aset(misc, ID2SYM(rb_intern("parser")), iseq_body->prism ? ID2SYM(rb_intern("prism")) : ID2SYM(rb_intern("parse.y")));
3600
3601 /*
3602 * [:magic, :major_version, :minor_version, :format_type, :misc,
3603 * :name, :path, :absolute_path, :start_lineno, :type, :locals, :args,
3604 * :catch_table, :bytecode]
3605 */
3606 rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat"));
3607 rb_ary_push(val, INT2FIX(ISEQ_MAJOR_VERSION)); /* major */
3608 rb_ary_push(val, INT2FIX(ISEQ_MINOR_VERSION)); /* minor */
3609 rb_ary_push(val, INT2FIX(1));
3610 rb_ary_push(val, misc);
3611 rb_ary_push(val, iseq_body->location.label);
3612 rb_ary_push(val, rb_iseq_path(iseq));
3613 rb_ary_push(val, rb_iseq_realpath(iseq));
3614 rb_ary_push(val, RB_INT2NUM(iseq_body->location.first_lineno));
3615 rb_ary_push(val, ID2SYM(type));
3616 rb_ary_push(val, locals);
3617 rb_ary_push(val, params);
3618 rb_ary_push(val, exception);
3619 rb_ary_push(val, body);
3620 return val;
3621}
3622
3623VALUE
3624rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc)
3625{
3626 int i, r;
3627 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3628 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
3629 VALUE a, args = rb_ary_new2(body->param.size);
3630 ID req, opt, rest, block, key, keyrest;
3631#define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type))
3632#define PARAM_ID(i) body->local_table[(i)]
3633#define PARAM(i, type) ( \
3634 PARAM_TYPE(type), \
3635 rb_id2str(PARAM_ID(i)) ? \
3636 rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \
3637 a)
3638
3639 CONST_ID(req, "req");
3640 CONST_ID(opt, "opt");
3641
3642 if (body->param.flags.forwardable) {
3643 // [[:rest, :*], [:keyrest, :**], [:block, :&]]
3644 CONST_ID(rest, "rest");
3645 CONST_ID(keyrest, "keyrest");
3646 CONST_ID(block, "block");
3647 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(rest), ID2SYM(idMULT)));
3648 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(keyrest), ID2SYM(idPow)));
3649 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(block), ID2SYM(idAnd)));
3650 }
3651
3652 if (is_proc) {
3653 for (i = 0; i < body->param.lead_num; i++) {
3654 PARAM_TYPE(opt);
3655 if (rb_id2str(PARAM_ID(i))) {
3656 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3657 }
3658 rb_ary_push(args, a);
3659 }
3660 }
3661 else {
3662 for (i = 0; i < body->param.lead_num; i++) {
3663 rb_ary_push(args, PARAM(i, req));
3664 }
3665 }
3666 r = body->param.lead_num + body->param.opt_num;
3667 for (; i < r; i++) {
3668 PARAM_TYPE(opt);
3669 if (rb_id2str(PARAM_ID(i))) {
3670 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3671 }
3672 rb_ary_push(args, a);
3673 }
3674 if (body->param.flags.has_rest) {
3675 CONST_ID(rest, "rest");
3676 rb_ary_push(args, PARAM(body->param.rest_start, rest));
3677 }
3678 r = body->param.post_start + body->param.post_num;
3679 if (is_proc) {
3680 for (i = body->param.post_start; i < r; i++) {
3681 PARAM_TYPE(opt);
3682 if (rb_id2str(PARAM_ID(i))) {
3683 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3684 }
3685 rb_ary_push(args, a);
3686 }
3687 }
3688 else {
3689 for (i = body->param.post_start; i < r; i++) {
3690 rb_ary_push(args, PARAM(i, req));
3691 }
3692 }
3693 if (body->param.flags.accepts_no_kwarg) {
3694 ID nokey;
3695 CONST_ID(nokey, "nokey");
3696 PARAM_TYPE(nokey);
3697 rb_ary_push(args, a);
3698 }
3699 if (body->param.flags.has_kw) {
3700 i = 0;
3701 if (keyword->required_num > 0) {
3702 ID keyreq;
3703 CONST_ID(keyreq, "keyreq");
3704 for (; i < keyword->required_num; i++) {
3705 PARAM_TYPE(keyreq);
3706 if (rb_id2str(keyword->table[i])) {
3707 rb_ary_push(a, ID2SYM(keyword->table[i]));
3708 }
3709 rb_ary_push(args, a);
3710 }
3711 }
3712 CONST_ID(key, "key");
3713 for (; i < keyword->num; i++) {
3714 PARAM_TYPE(key);
3715 if (rb_id2str(keyword->table[i])) {
3716 rb_ary_push(a, ID2SYM(keyword->table[i]));
3717 }
3718 rb_ary_push(args, a);
3719 }
3720 }
3721 if (body->param.flags.has_kwrest || body->param.flags.ruby2_keywords) {
3722 ID param;
3723 CONST_ID(keyrest, "keyrest");
3724 PARAM_TYPE(keyrest);
3725 if (body->param.flags.has_kwrest &&
3726 rb_id2str(param = PARAM_ID(keyword->rest_start))) {
3727 rb_ary_push(a, ID2SYM(param));
3728 }
3729 else if (body->param.flags.ruby2_keywords) {
3730 rb_ary_push(a, ID2SYM(idPow));
3731 }
3732 rb_ary_push(args, a);
3733 }
3734 if (body->param.flags.has_block) {
3735 CONST_ID(block, "block");
3736 rb_ary_push(args, PARAM(body->param.block_start, block));
3737 }
3738 return args;
3739}
3740
3741VALUE
3742rb_iseq_defined_string(enum defined_type type)
3743{
3744 static const char expr_names[][18] = {
3745 "nil",
3746 "instance-variable",
3747 "local-variable",
3748 "global-variable",
3749 "class variable",
3750 "constant",
3751 "method",
3752 "yield",
3753 "super",
3754 "self",
3755 "true",
3756 "false",
3757 "assignment",
3758 "expression",
3759 };
3760 const char *estr;
3761
3762 if ((unsigned)(type - 1) >= (unsigned)numberof(expr_names)) rb_bug("unknown defined type %d", type);
3763 estr = expr_names[type - 1];
3764 return rb_fstring_cstr(estr);
3765}
3766
3767// A map from encoded_insn to insn_data: decoded insn number, its len,
3768// decoded ZJIT insn number, non-trace version of encoded insn,
3769// trace version, and zjit version.
3770static st_table *encoded_insn_data;
3771typedef struct insn_data_struct {
3772 int insn;
3773 int insn_len;
3774 void *notrace_encoded_insn;
3775 void *trace_encoded_insn;
3776#if USE_ZJIT
3777 int zjit_insn;
3778 void *zjit_encoded_insn;
3779#endif
3780} insn_data_t;
3781static insn_data_t insn_data[VM_BARE_INSTRUCTION_SIZE];
3782
3783void
3784rb_free_encoded_insn_data(void)
3785{
3786 st_free_table(encoded_insn_data);
3787}
3788
3789// Initialize a table to decode bare, trace, and zjit instructions.
3790// This function also determines which instructions are used when TracePoint is enabled.
3791void
3792rb_vm_encoded_insn_data_table_init(void)
3793{
3794#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3795 const void * const *table = rb_vm_get_insns_address_table();
3796#define INSN_CODE(insn) ((VALUE)table[insn])
3797#else
3798#define INSN_CODE(insn) ((VALUE)(insn))
3799#endif
3800 encoded_insn_data = st_init_numtable_with_size(VM_BARE_INSTRUCTION_SIZE);
3801
3802 for (int insn = 0; insn < VM_BARE_INSTRUCTION_SIZE; insn++) {
3803 insn_data[insn].insn = insn;
3804 insn_data[insn].insn_len = insn_len(insn);
3805
3806 // When tracing :return events, we convert opt_invokebuiltin_delegate_leave + leave into
3807 // opt_invokebuiltin_delegate + trace_leave, presumably because we don't want to fire
3808 // :return events before invokebuiltin. https://github.com/ruby/ruby/pull/3256
3809 int notrace_insn = (insn != BIN(opt_invokebuiltin_delegate_leave)) ? insn : BIN(opt_invokebuiltin_delegate);
3810 insn_data[insn].notrace_encoded_insn = (void *)INSN_CODE(notrace_insn);
3811 insn_data[insn].trace_encoded_insn = (void *)INSN_CODE(notrace_insn + VM_BARE_INSTRUCTION_SIZE);
3812
3813 st_data_t key1 = (st_data_t)INSN_CODE(insn);
3814 st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_BARE_INSTRUCTION_SIZE);
3815 st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]);
3816 st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]);
3817
3818#if USE_ZJIT
3819 int zjit_insn = vm_bare_insn_to_zjit_insn(insn);
3820 insn_data[insn].zjit_insn = zjit_insn;
3821 insn_data[insn].zjit_encoded_insn = (insn != zjit_insn) ? (void *)INSN_CODE(zjit_insn) : 0;
3822
3823 if (insn != zjit_insn) {
3824 st_data_t key3 = (st_data_t)INSN_CODE(zjit_insn);
3825 st_add_direct(encoded_insn_data, key3, (st_data_t)&insn_data[insn]);
3826 }
3827#endif
3828 }
3829}
3830
3831// Decode an insn address to an insn. This returns bare instructions
3832// even if they're trace/zjit instructions. Use rb_vm_insn_addr2opcode
3833// to decode trace/zjit instructions as is.
3834int
3835rb_vm_insn_addr2insn(const void *addr)
3836{
3837 st_data_t key = (st_data_t)addr;
3838 st_data_t val;
3839
3840 if (st_lookup(encoded_insn_data, key, &val)) {
3841 insn_data_t *e = (insn_data_t *)val;
3842 return (int)e->insn;
3843 }
3844
3845 rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr);
3846}
3847
3848// Decode an insn address to an insn. Unlike rb_vm_insn_addr2insn,
3849// this function can return trace/zjit opcode variants.
3850int
3851rb_vm_insn_addr2opcode(const void *addr)
3852{
3853 st_data_t key = (st_data_t)addr;
3854 st_data_t val;
3855
3856 if (st_lookup(encoded_insn_data, key, &val)) {
3857 insn_data_t *e = (insn_data_t *)val;
3858 int opcode = e->insn;
3859 if (addr == e->trace_encoded_insn) {
3860 opcode += VM_BARE_INSTRUCTION_SIZE;
3861 }
3862#if USE_ZJIT
3863 else if (addr == e->zjit_encoded_insn) {
3864 opcode = e->zjit_insn;
3865 }
3866#endif
3867 return opcode;
3868 }
3869
3870 rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr);
3871}
3872
3873// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. This returns
3874// bare instructions even if they're trace/zjit instructions. Use
3875// rb_vm_insn_addr2opcode to decode trace/zjit instructions as is.
3876int
3877rb_vm_insn_decode(const VALUE encoded)
3878{
3879#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3880 int insn = rb_vm_insn_addr2insn((void *)encoded);
3881#else
3882 int insn = (int)encoded;
3883#endif
3884 return insn;
3885}
3886
3887// Turn on or off tracing for a given instruction address
3888static inline int
3889encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_current_trace)
3890{
3891 st_data_t key = (st_data_t)*iseq_encoded_insn;
3892 st_data_t val;
3893
3894 if (st_lookup(encoded_insn_data, key, &val)) {
3895 insn_data_t *e = (insn_data_t *)val;
3896 if (remain_current_trace && key == (st_data_t)e->trace_encoded_insn) {
3897 turnon = 1;
3898 }
3899 *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn);
3900 return e->insn_len;
3901 }
3902
3903 rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn);
3904}
3905
3906// Turn off tracing for an instruction at pos after tracing event flags are cleared
3907void
3908rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos)
3909{
3910 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3911 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3912 encoded_iseq_trace_instrument(&iseq_encoded[pos], 0, false);
3913}
3914
3915// We need to fire call events on instructions with b_call events if the block
3916// is running as a method. So, if we are listening for call events, then
3917// instructions that have b_call events need to become trace variants.
3918// Use this function when making decisions about recompiling to trace variants.
3919static inline rb_event_flag_t
3920add_bmethod_events(rb_event_flag_t events)
3921{
3922 if (events & RUBY_EVENT_CALL) {
3923 events |= RUBY_EVENT_B_CALL;
3924 }
3925 if (events & RUBY_EVENT_RETURN) {
3926 events |= RUBY_EVENT_B_RETURN;
3927 }
3928 return events;
3929}
3930
3931// Note, to support call/return events for bmethods, turnon_event can have more events than tpval.
3932static int
3933iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3934{
3935 unsigned int pc;
3936 int n = 0;
3937 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3938 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3939
3940 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
3941
3942 for (pc=0; pc<body->iseq_size;) {
3943 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
3944 rb_event_flag_t pc_events = entry->events;
3945 rb_event_flag_t target_events = turnon_events;
3946 unsigned int line = (int)entry->line_no;
3947
3948 if (target_line == 0 || target_line == line) {
3949 /* ok */
3950 }
3951 else {
3952 target_events &= ~RUBY_EVENT_LINE;
3953 }
3954
3955 if (pc_events & target_events) {
3956 n++;
3957 }
3958 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (target_events | iseq->aux.exec.global_trace_events), true);
3959 }
3960
3961 if (n > 0) {
3962 if (iseq->aux.exec.local_hooks == NULL) {
3963 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t);
3964 iseq->aux.exec.local_hooks->is_local = true;
3965 }
3966 rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line);
3967 }
3968
3969 return n;
3970}
3971
3973 rb_event_flag_t turnon_events;
3974 VALUE tpval;
3975 unsigned int target_line;
3976 int n;
3977};
3978
3979static void
3980iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3981{
3983 data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line);
3984 iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p);
3985}
3986
3987int
3988rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod)
3989{
3991 if (target_bmethod) {
3992 turnon_events = add_bmethod_events(turnon_events);
3993 }
3994 data.turnon_events = turnon_events;
3995 data.tpval = tpval;
3996 data.target_line = target_line;
3997 data.n = 0;
3998
3999 iseq_add_local_tracepoint_i(iseq, (void *)&data);
4000 if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */
4001 return data.n;
4002}
4003
4004static int
4005iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval)
4006{
4007 int n = 0;
4008
4009 if (iseq->aux.exec.local_hooks) {
4010 unsigned int pc;
4011 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4012 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4013 rb_event_flag_t local_events = 0;
4014
4015 rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval);
4016 local_events = iseq->aux.exec.local_hooks->events;
4017
4018 if (local_events == 0) {
4019 rb_hook_list_free(iseq->aux.exec.local_hooks);
4020 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL;
4021 }
4022
4023 local_events = add_bmethod_events(local_events);
4024 for (pc = 0; pc<body->iseq_size;) {
4025 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4026 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false);
4027 }
4028 }
4029 return n;
4030}
4031
4033 VALUE tpval;
4034 int n;
4035};
4036
4037static void
4038iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
4039{
4041 data->n += iseq_remove_local_tracepoint(iseq, data->tpval);
4042 iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p);
4043}
4044
4045int
4046rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval)
4047{
4049 data.tpval = tpval;
4050 data.n = 0;
4051
4052 iseq_remove_local_tracepoint_i(iseq, (void *)&data);
4053 return data.n;
4054}
4055
4056void
4057rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
4058{
4059 if (iseq->aux.exec.global_trace_events == turnon_events) {
4060 return;
4061 }
4062
4063 if (!ISEQ_EXECUTABLE_P(iseq)) {
4064 /* this is building ISeq */
4065 return;
4066 }
4067 else {
4068 unsigned int pc;
4069 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4070 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4071 rb_event_flag_t enabled_events;
4072 rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0;
4073 ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events;
4074 enabled_events = add_bmethod_events(turnon_events | local_events);
4075
4076 for (pc=0; pc<body->iseq_size;) {
4077 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4078 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true);
4079 }
4080 }
4081}
4082
4083void rb_vm_cc_general(const struct rb_callcache *cc);
4084
4085static bool
4086clear_attr_cc(VALUE v)
4087{
4088 if (imemo_type_p(v, imemo_callcache) && vm_cc_ivar_p((const struct rb_callcache *)v)) {
4089 rb_vm_cc_general((struct rb_callcache *)v);
4090 return true;
4091 }
4092 else {
4093 return false;
4094 }
4095}
4096
4097static bool
4098clear_bf_cc(VALUE v)
4099{
4100 if (imemo_type_p(v, imemo_callcache) && vm_cc_bf_p((const struct rb_callcache *)v)) {
4101 rb_vm_cc_general((struct rb_callcache *)v);
4102 return true;
4103 }
4104 else {
4105 return false;
4106 }
4107}
4108
4109static int
4110clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4111{
4112 VALUE v = (VALUE)vstart;
4113 for (; v != (VALUE)vend; v += stride) {
4114 void *ptr = rb_asan_poisoned_object_p(v);
4115 rb_asan_unpoison_object(v, false);
4116 clear_attr_cc(v);
4117 asan_poison_object_if(ptr, v);
4118 }
4119 return 0;
4120}
4121
4122void
4123rb_clear_attr_ccs(void)
4124{
4125 rb_objspace_each_objects(clear_attr_ccs_i, NULL);
4126}
4127
4128static int
4129clear_bf_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4130{
4131 VALUE v = (VALUE)vstart;
4132 for (; v != (VALUE)vend; v += stride) {
4133 void *ptr = rb_asan_poisoned_object_p(v);
4134 rb_asan_unpoison_object(v, false);
4135 clear_bf_cc(v);
4136 asan_poison_object_if(ptr, v);
4137 }
4138 return 0;
4139}
4140
4141void
4142rb_clear_bf_ccs(void)
4143{
4144 rb_objspace_each_objects(clear_bf_ccs_i, NULL);
4145}
4146
4147static int
4148trace_set_i(void *vstart, void *vend, size_t stride, void *data)
4149{
4150 rb_event_flag_t turnon_events = *(rb_event_flag_t *)data;
4151
4152 VALUE v = (VALUE)vstart;
4153 for (; v != (VALUE)vend; v += stride) {
4154 void *ptr = rb_asan_poisoned_object_p(v);
4155 rb_asan_unpoison_object(v, false);
4156
4157 if (rb_obj_is_iseq(v)) {
4158 rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events);
4159 }
4160 else if (clear_attr_cc(v)) {
4161 }
4162 else if (clear_bf_cc(v)) {
4163 }
4164
4165 asan_poison_object_if(ptr, v);
4166 }
4167 return 0;
4168}
4169
4170void
4171rb_iseq_trace_set_all(rb_event_flag_t turnon_events)
4172{
4173 rb_objspace_each_objects(trace_set_i, &turnon_events);
4174}
4175
4176VALUE
4177rb_iseqw_local_variables(VALUE iseqval)
4178{
4179 return rb_iseq_local_variables(iseqw_check(iseqval));
4180}
4181
4182/*
4183 * call-seq:
4184 * iseq.to_binary(extra_data = nil) -> binary str
4185 *
4186 * Returns serialized iseq binary format data as a String object.
4187 * A corresponding iseq object is created by
4188 * RubyVM::InstructionSequence.load_from_binary() method.
4189 *
4190 * String extra_data will be saved with binary data.
4191 * You can access this data with
4192 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary).
4193 *
4194 * Note that the translated binary data is not portable.
4195 * You can not move this binary data to another machine.
4196 * You can not use the binary data which is created by another
4197 * version/another architecture of Ruby.
4198 */
4199static VALUE
4200iseqw_to_binary(int argc, VALUE *argv, VALUE self)
4201{
4202 VALUE opt = !rb_check_arity(argc, 0, 1) ? Qnil : argv[0];
4203 return rb_iseq_ibf_dump(iseqw_check(self), opt);
4204}
4205
4206/*
4207 * call-seq:
4208 * RubyVM::InstructionSequence.load_from_binary(binary) -> iseq
4209 *
4210 * Load an iseq object from binary format String object
4211 * created by RubyVM::InstructionSequence.to_binary.
4212 *
4213 * This loader does not have a verifier, so that loading broken/modified
4214 * binary causes critical problem.
4215 *
4216 * You should not load binary data provided by others.
4217 * You should use binary data translated by yourself.
4218 */
4219static VALUE
4220iseqw_s_load_from_binary(VALUE self, VALUE str)
4221{
4222 return iseqw_new(rb_iseq_ibf_load(str));
4223}
4224
4225/*
4226 * call-seq:
4227 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary) -> str
4228 *
4229 * Load extra data embed into binary format String object.
4230 */
4231static VALUE
4232iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str)
4233{
4234 return rb_iseq_ibf_load_extra_data(str);
4235}
4236
4237#if VM_INSN_INFO_TABLE_IMPL == 2
4238
4239/* An implementation of succinct bit-vector for insn_info table.
4240 *
4241 * A succinct bit-vector is a small and efficient data structure that provides
4242 * a bit-vector augmented with an index for O(1) rank operation:
4243 *
4244 * rank(bv, n): the number of 1's within a range from index 0 to index n
4245 *
4246 * This can be used to lookup insn_info table from PC.
4247 * For example, consider the following iseq and insn_info_table:
4248 *
4249 * iseq insn_info_table
4250 * PC insn+operand position lineno event
4251 * 0: insn1 0: 1 [Li]
4252 * 2: insn2 2: 2 [Li] <= (A)
4253 * 5: insn3 8: 3 [Li] <= (B)
4254 * 8: insn4
4255 *
4256 * In this case, a succinct bit-vector whose indexes 0, 2, 8 is "1" and
4257 * other indexes is "0", i.e., "101000001", is created.
4258 * To lookup the lineno of insn2, calculate rank("10100001", 2) = 2, so
4259 * the line (A) is the entry in question.
4260 * To lookup the lineno of insn4, calculate rank("10100001", 8) = 3, so
4261 * the line (B) is the entry in question.
4262 *
4263 * A naive implementation of succinct bit-vector works really well
4264 * not only for large size but also for small size. However, it has
4265 * tiny overhead for very small size. So, this implementation consist
4266 * of two parts: one part is the "immediate" table that keeps rank result
4267 * as a raw table, and the other part is a normal succinct bit-vector.
4268 */
4269
4270#define IMMEDIATE_TABLE_SIZE 54 /* a multiple of 9, and < 128 */
4271
4272struct succ_index_table {
4273 uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9];
4274 struct succ_dict_block {
4275 unsigned int rank;
4276 uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */
4277 uint64_t bits[512/64];
4278 } succ_part[FLEX_ARY_LEN];
4279};
4280
4281#define imm_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (7 * (i))
4282#define imm_block_rank_get(v, i) (((int)((v) >> ((i) * 7))) & 0x7f)
4283#define small_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (9 * ((i) - 1))
4284#define small_block_rank_get(v, i) ((i) == 0 ? 0 : (((int)((v) >> (((i) - 1) * 9))) & 0x1ff))
4285
4286static struct succ_index_table *
4287succ_index_table_create(int max_pos, int *data, int size)
4288{
4289 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4290 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4291 struct succ_index_table *sd =
4292 rb_xcalloc_mul_add_mul(
4293 imm_size, sizeof(uint64_t),
4294 succ_size, sizeof(struct succ_dict_block));
4295 int i, j, k, r;
4296
4297 r = 0;
4298 for (j = 0; j < imm_size; j++) {
4299 for (i = 0; i < 9; i++) {
4300 if (r < size && data[r] == j * 9 + i) r++;
4301 imm_block_rank_set(sd->imm_part[j], i, r);
4302 }
4303 }
4304 for (k = 0; k < succ_size; k++) {
4305 struct succ_dict_block *sd_block = &sd->succ_part[k];
4306 int small_rank = 0;
4307 sd_block->rank = r;
4308 for (j = 0; j < 8; j++) {
4309 uint64_t bits = 0;
4310 if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank);
4311 for (i = 0; i < 64; i++) {
4312 if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) {
4313 bits |= ((uint64_t)1) << i;
4314 r++;
4315 }
4316 }
4317 sd_block->bits[j] = bits;
4318 small_rank += rb_popcount64(bits);
4319 }
4320 }
4321 return sd;
4322}
4323
4324static unsigned int *
4325succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size)
4326{
4327 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4328 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4329 unsigned int *positions = ALLOC_N(unsigned int, size), *p;
4330 int i, j, k, r = -1;
4331 p = positions;
4332 for (j = 0; j < imm_size; j++) {
4333 for (i = 0; i < 9; i++) {
4334 int nr = imm_block_rank_get(sd->imm_part[j], i);
4335 if (r != nr) *p++ = j * 9 + i;
4336 r = nr;
4337 }
4338 }
4339 for (k = 0; k < succ_size; k++) {
4340 for (j = 0; j < 8; j++) {
4341 for (i = 0; i < 64; i++) {
4342 if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) {
4343 *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE;
4344 }
4345 }
4346 }
4347 }
4348 return positions;
4349}
4350
4351static int
4352succ_index_lookup(const struct succ_index_table *sd, int x)
4353{
4354 if (x < IMMEDIATE_TABLE_SIZE) {
4355 const int i = x / 9;
4356 const int j = x % 9;
4357 return imm_block_rank_get(sd->imm_part[i], j);
4358 }
4359 else {
4360 const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512;
4361 const struct succ_dict_block *block = &sd->succ_part[block_index];
4362 const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512;
4363 const int small_block_index = block_bit_index / 64;
4364 const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index);
4365 const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64));
4366
4367 return block->rank + small_block_popcount + popcnt;
4368 }
4369}
4370#endif
4371
4372
4373/*
4374 * call-seq:
4375 * iseq.script_lines -> array or nil
4376 *
4377 * It returns recorded script lines if it is available.
4378 * The script lines are not limited to the iseq range, but
4379 * are entire lines of the source file.
4380 *
4381 * Note that this is an API for ruby internal use, debugging,
4382 * and research. Do not use this for any other purpose.
4383 * The compatibility is not guaranteed.
4384 */
4385static VALUE
4386iseqw_script_lines(VALUE self)
4387{
4388 const rb_iseq_t *iseq = iseqw_check(self);
4389 return ISEQ_BODY(iseq)->variable.script_lines;
4390}
4391
4392/*
4393 * Document-class: RubyVM::InstructionSequence
4394 *
4395 * The InstructionSequence class represents a compiled sequence of
4396 * instructions for the Virtual Machine used in MRI. Not all implementations of Ruby
4397 * may implement this class, and for the implementations that implement it,
4398 * the methods defined and behavior of the methods can change in any version.
4399 *
4400 * With it, you can get a handle to the instructions that make up a method or
4401 * a proc, compile strings of Ruby code down to VM instructions, and
4402 * disassemble instruction sequences to strings for easy inspection. It is
4403 * mostly useful if you want to learn how YARV works, but it also lets
4404 * you control various settings for the Ruby iseq compiler.
4405 *
4406 * You can find the source for the VM instructions in +insns.def+ in the Ruby
4407 * source.
4408 *
4409 * The instruction sequence results will almost certainly change as Ruby
4410 * changes, so example output in this documentation may be different from what
4411 * you see.
4412 *
4413 * Of course, this class is MRI specific.
4414 */
4415
4416void
4417Init_ISeq(void)
4418{
4419 /* declare ::RubyVM::InstructionSequence */
4420 rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject);
4421 rb_undef_alloc_func(rb_cISeq);
4422 rb_define_method(rb_cISeq, "inspect", iseqw_inspect, 0);
4423 rb_define_method(rb_cISeq, "disasm", iseqw_disasm, 0);
4424 rb_define_method(rb_cISeq, "disassemble", iseqw_disasm, 0);
4425 rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0);
4426 rb_define_method(rb_cISeq, "eval", iseqw_eval, 0);
4427
4428 rb_define_method(rb_cISeq, "to_binary", iseqw_to_binary, -1);
4429 rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1);
4430 rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1);
4431
4432 /* location APIs */
4433 rb_define_method(rb_cISeq, "path", iseqw_path, 0);
4434 rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0);
4435 rb_define_method(rb_cISeq, "label", iseqw_label, 0);
4436 rb_define_method(rb_cISeq, "base_label", iseqw_base_label, 0);
4437 rb_define_method(rb_cISeq, "first_lineno", iseqw_first_lineno, 0);
4438 rb_define_method(rb_cISeq, "trace_points", iseqw_trace_points, 0);
4439 rb_define_method(rb_cISeq, "each_child", iseqw_each_child, 0);
4440
4441#if 0 /* TBD */
4442 rb_define_private_method(rb_cISeq, "marshal_dump", iseqw_marshal_dump, 0);
4443 rb_define_private_method(rb_cISeq, "marshal_load", iseqw_marshal_load, 1);
4444 /* disable this feature because there is no verifier. */
4445 rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1);
4446#endif
4447 (void)iseq_s_load;
4448
4449 rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1);
4450 rb_define_singleton_method(rb_cISeq, "compile_parsey", iseqw_s_compile_parsey, -1);
4451 rb_define_singleton_method(rb_cISeq, "compile_prism", iseqw_s_compile_prism, -1);
4452 rb_define_singleton_method(rb_cISeq, "compile_file_prism", iseqw_s_compile_file_prism, -1);
4453 rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1);
4454 rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1);
4455 rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0);
4456 rb_define_singleton_method(rb_cISeq, "compile_option=", iseqw_s_compile_option_set, 1);
4457 rb_define_singleton_method(rb_cISeq, "disasm", iseqw_s_disasm, 1);
4458 rb_define_singleton_method(rb_cISeq, "disassemble", iseqw_s_disasm, 1);
4459 rb_define_singleton_method(rb_cISeq, "of", iseqw_s_of, 1);
4460
4461 // script lines
4462 rb_define_method(rb_cISeq, "script_lines", iseqw_script_lines, 0);
4463
4464 rb_undef_method(CLASS_OF(rb_cISeq), "translate");
4465 rb_undef_method(CLASS_OF(rb_cISeq), "load_iseq");
4466}
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define rb_define_private_method(klass, mid, func, arity)
Defines klass#mid and makes it private.
#define RUBY_EVENT_END
Encountered an end of a class clause.
Definition event.h:40
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition event.h:43
#define RUBY_EVENT_B_RETURN
Encountered a next statement.
Definition event.h:56
#define RUBY_EVENT_CLASS
Encountered a new class.
Definition event.h:39
#define RUBY_EVENT_LINE
Encountered a new line.
Definition event.h:38
#define RUBY_EVENT_RETURN
Encountered a return statement.
Definition event.h:42
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition event.h:44
#define RUBY_EVENT_B_CALL
Encountered an yield statement.
Definition event.h:55
uint32_t rb_event_flag_t
Represents event(s).
Definition event.h:108
#define RUBY_EVENT_CALL
A method, written in Ruby, is called.
Definition event.h:41
#define RUBY_EVENT_RESCUE
Encountered a rescue statement.
Definition event.h:61
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:1512
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
Definition class.c:2665
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition class.c:3135
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1674
#define T_FILE
Old name of RUBY_T_FILE.
Definition value_type.h:62
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define rb_str_cat2
Old name of rb_str_cat_cstr.
Definition string.h:1682
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ZALLOC
Old name of RB_ZALLOC.
Definition memory.h:402
#define LL2NUM
Old name of RB_LL2NUM.
Definition long_long.h:30
#define CLASS_OF
Old name of rb_class_of.
Definition globals.h:206
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define FIX2INT
Old name of RB_FIX2INT.
Definition int.h:41
#define T_HASH
Old name of RUBY_T_HASH.
Definition value_type.h:65
#define ALLOC_N
Old name of RB_ALLOC_N.
Definition memory.h:399
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:131
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition long.h:50
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define INT2NUM
Old name of RB_INT2NUM.
Definition int.h:43
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define UINT2NUM
Old name of RB_UINT2NUM.
Definition int.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
Definition symbol.h:47
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:657
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition eval.c:682
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Identical to rb_typeddata_is_kind_of(), except it raises exceptions instead of returning false.
Definition error.c:1397
VALUE rb_eSyntaxError
SyntaxError exception.
Definition error.c:1447
VALUE rb_class_superclass(VALUE klass)
Queries the parent of the given class.
Definition object.c:2208
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:100
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:262
VALUE rb_inspect(VALUE obj)
Generates a human-readable textual representation of the given object.
Definition object.c:684
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:603
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
Definition vm_eval.c:1117
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new_from_values(long n, const VALUE *elts)
Identical to rb_ary_new_from_args(), except how objects are passed.
VALUE rb_ary_resurrect(VALUE ary)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_hidden_new(long capa)
Allocates a hidden (no class) empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
VALUE rb_ary_join(VALUE ary, VALUE sep)
Recursively stringises the elements of the passed array, flattens that result, then joins the sequenc...
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
Definition error.h:284
VALUE rb_file_open_str(VALUE fname, const char *fmode)
Identical to rb_file_open(), except it takes the pathname as a Ruby's string instead of C's.
Definition io.c:7264
VALUE rb_io_close(VALUE io)
Closes the IO.
Definition io.c:5753
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1109
VALUE rb_obj_is_method(VALUE recv)
Queries if the given object is a method.
Definition proc.c:1678
VALUE rb_obj_is_proc(VALUE recv)
Queries if the given object is a proc.
Definition proc.c:120
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition string.c:3760
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1497
#define rb_exc_new_cstr(exc, str)
Identical to rb_exc_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1669
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1959
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3528
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
Definition string.c:1977
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3350
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition string.c:7315
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
Definition string.c:4178
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
Definition string.c:3997
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1655
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2707
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1513
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:937
VALUE rb_class_name(VALUE obj)
Queries the name of the given object's class.
Definition variable.c:498
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
Definition vm_method.c:3340
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
Definition vm_method.c:1603
VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcallv(), except it returns RUBY_Qundef instead of raising rb_eNoMethodError.
Definition vm_eval.c:686
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1133
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
Definition symbol.c:993
VALUE rb_io_path(VALUE io)
Returns the path for the given IO.
Definition io.c:2973
int len
Length of the buffer.
Definition io.h:8
VALUE rb_ractor_make_shareable(VALUE obj)
Destructively transforms the passed object so that multiple Ractors can share it.
Definition ractor.c:1429
#define RB_NUM2INT
Just another name of rb_num2int_inline.
Definition int.h:38
#define RB_INT2NUM
Just another name of rb_int2num_inline.
Definition int.h:37
VALUE rb_yield(VALUE val)
Yields the block.
Definition vm_eval.c:1372
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
#define RB_ZALLOC(type)
Shorthand of RB_ZALLOC_N with n=1.
Definition memory.h:249
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line)
Set the line option on the given options struct.
Definition options.c:40
PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal)
Set the frozen string literal option on the given options struct.
Definition options.c:48
PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count)
Allocate and zero out the scopes array on the given options struct.
Definition options.c:162
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
Definition rarray.h:281
#define RARRAY_AREF(a, i)
Definition rarray.h:403
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
Definition rbasic.h:163
#define RHASH_SIZE(h)
Queries the size of the hash.
Definition rhash.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:442
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
Definition rstring.h:89
#define RUBY_TYPED_DEFAULT_FREE
This is a value you can set to rb_data_type_struct::dfree.
Definition rtypeddata.h:80
#define TypedData_Get_Struct(obj, type, data_type, sval)
Obtains a C struct from inside of a wrapper Ruby object.
Definition rtypeddata.h:521
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
Definition rtypeddata.h:456
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
Definition rtypeddata.h:503
#define FilePathValue(v)
Ensures that the parameter object is a path.
Definition ruby.h:90
#define RTEST
This is an old name of RB_TEST.
Definition iseq.h:280
const ID * segments
A null-terminated list of ids, used to represent a constant's path idNULL is used to represent the ::...
Definition vm_core.h:285
Definition vm_core.h:293
Definition vm_core.h:288
Definition iseq.h:251
A line and column in a string.
uint32_t column
The column number.
int32_t line
The line number.
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
size_t size
The number of offsets in the list.
uint32_t node_id
The unique identifier for this node, which is deterministic based on the source.
Definition ast.h:1085
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_scope_node_t node
The resulting scope node that will hold the generated AST.
pm_options_t options
The options that will be passed to the parser.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
VALUE * script_lines
This is a pointer to the list of script lines for the ISEQs that will be associated with this scope n...
Definition method.h:63
This is the struct that holds necessary info for a struct.
Definition rtypeddata.h:202
struct rb_iseq_constant_body::@154 param
parameter information
Definition st.h:79
Definition vm_core.h:297
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
Definition value_type.h:433
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
ruby_value_type
C-level type of an object.
Definition value_type.h:113