Ruby 3.5.0dev (2025-06-12 revision d55c463d563800311d6dab23edeec16abd45068d)
iseq.c (d55c463d563800311d6dab23edeec16abd45068d)
1/**********************************************************************
2
3 iseq.c -
4
5 $Author$
6 created at: 2006-07-11(Tue) 09:00:03 +0900
7
8 Copyright (C) 2006 Koichi Sasada
9
10**********************************************************************/
11
12#define RUBY_VM_INSNS_INFO 1
13/* #define RUBY_MARK_FREE_DEBUG 1 */
14
15#include "ruby/internal/config.h"
16
17#ifdef HAVE_DLADDR
18# include <dlfcn.h>
19#endif
20
21#include "eval_intern.h"
22#include "id_table.h"
23#include "internal.h"
24#include "internal/bits.h"
25#include "internal/class.h"
26#include "internal/compile.h"
27#include "internal/error.h"
28#include "internal/file.h"
29#include "internal/gc.h"
30#include "internal/hash.h"
31#include "internal/io.h"
32#include "internal/ruby_parser.h"
33#include "internal/sanitizers.h"
34#include "internal/set_table.h"
35#include "internal/symbol.h"
36#include "internal/thread.h"
37#include "internal/variable.h"
38#include "iseq.h"
39#include "ruby/util.h"
40#include "vm_core.h"
41#include "vm_callinfo.h"
42#include "yjit.h"
43#include "ruby/ractor.h"
44#include "builtin.h"
45#include "insns.inc"
46#include "insns_info.inc"
47
48VALUE rb_cISeq;
49static VALUE iseqw_new(const rb_iseq_t *iseq);
50static const rb_iseq_t *iseqw_check(VALUE iseqw);
51
52#if VM_INSN_INFO_TABLE_IMPL == 2
53static struct succ_index_table *succ_index_table_create(int max_pos, int *data, int size);
54static unsigned int *succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size);
55static int succ_index_lookup(const struct succ_index_table *sd, int x);
56#endif
57
58#define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass)
59
60static inline VALUE
61obj_resurrect(VALUE obj)
62{
63 if (hidden_obj_p(obj)) {
64 switch (BUILTIN_TYPE(obj)) {
65 case T_STRING:
66 obj = rb_str_resurrect(obj);
67 break;
68 case T_ARRAY:
69 obj = rb_ary_resurrect(obj);
70 break;
71 case T_HASH:
72 obj = rb_hash_resurrect(obj);
73 break;
74 default:
75 break;
76 }
77 }
78 return obj;
79}
80
81static void
82free_arena(struct iseq_compile_data_storage *cur)
83{
84 struct iseq_compile_data_storage *next;
85
86 while (cur) {
87 next = cur->next;
88 ruby_xfree(cur);
89 cur = next;
90 }
91}
92
93static void
94compile_data_free(struct iseq_compile_data *compile_data)
95{
96 if (compile_data) {
97 free_arena(compile_data->node.storage_head);
98 free_arena(compile_data->insn.storage_head);
99 if (compile_data->ivar_cache_table) {
100 rb_id_table_free(compile_data->ivar_cache_table);
101 }
102 ruby_xfree(compile_data);
103 }
104}
105
106static void
107remove_from_constant_cache(ID id, IC ic)
108{
109 rb_vm_t *vm = GET_VM();
110 VALUE lookup_result;
111 st_data_t ic_data = (st_data_t)ic;
112
113 if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) {
114 set_table *ics = (set_table *)lookup_result;
115 set_delete(ics, &ic_data);
116
117 if (ics->num_entries == 0 &&
118 // See comment in vm_track_constant_cache on why we need this check
119 id != vm->inserting_constant_cache_id) {
120 rb_id_table_delete(vm->constant_cache, id);
121 set_free_table(ics);
122 }
123 }
124}
125
126// When an ISEQ is being freed, all of its associated ICs are going to go away
127// as well. Because of this, we need to iterate over the ICs, and clear them
128// from the VM's constant cache.
129static void
130iseq_clear_ic_references(const rb_iseq_t *iseq)
131{
132 // In some cases (when there is a compilation error), we end up with
133 // ic_size greater than 0, but no allocated is_entries buffer.
134 // If there's no is_entries buffer to loop through, return early.
135 // [Bug #19173]
136 if (!ISEQ_BODY(iseq)->is_entries) {
137 return;
138 }
139
140 for (unsigned int ic_idx = 0; ic_idx < ISEQ_BODY(iseq)->ic_size; ic_idx++) {
141 IC ic = &ISEQ_IS_IC_ENTRY(ISEQ_BODY(iseq), ic_idx);
142
143 // Iterate over the IC's constant path's segments and clean any references to
144 // the ICs out of the VM's constant cache table.
145 const ID *segments = ic->segments;
146
147 // It's possible that segments is NULL if we overallocated an IC but
148 // optimizations removed the instruction using it
149 if (segments == NULL)
150 continue;
151
152 for (int i = 0; segments[i]; i++) {
153 ID id = segments[i];
154 if (id == idNULL) continue;
155 remove_from_constant_cache(id, ic);
156 }
157
158 ruby_xfree((void *)segments);
159 }
160}
161
162void
163rb_iseq_free(const rb_iseq_t *iseq)
164{
165 RUBY_FREE_ENTER("iseq");
166
167 if (iseq && ISEQ_BODY(iseq)) {
168 iseq_clear_ic_references(iseq);
169 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
170#if USE_YJIT
171 rb_yjit_iseq_free(iseq);
172 if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) {
173 RUBY_ASSERT(rb_yjit_live_iseq_count > 0);
174 rb_yjit_live_iseq_count--;
175 }
176#endif
177 ruby_xfree((void *)body->iseq_encoded);
178 ruby_xfree((void *)body->insns_info.body);
179 ruby_xfree((void *)body->insns_info.positions);
180#if VM_INSN_INFO_TABLE_IMPL == 2
181 ruby_xfree(body->insns_info.succ_index_table);
182#endif
183 ruby_xfree((void *)body->is_entries);
184 ruby_xfree(body->call_data);
185 ruby_xfree((void *)body->catch_table);
186 ruby_xfree((void *)body->param.opt_table);
187 if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) {
188 ruby_xfree((void *)body->mark_bits.list);
189 }
190
191 ruby_xfree(body->variable.original_iseq);
192
193 if (body->param.keyword != NULL) {
194 if (body->param.keyword->table != &body->local_table[body->param.keyword->bits_start - body->param.keyword->num])
195 ruby_xfree((void *)body->param.keyword->table);
196 if (body->param.keyword->default_values) {
197 ruby_xfree((void *)body->param.keyword->default_values);
198 }
199 ruby_xfree((void *)body->param.keyword);
200 }
201 if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl))
202 ruby_xfree((void *)body->local_table);
203 compile_data_free(ISEQ_COMPILE_DATA(iseq));
204 if (body->outer_variables) rb_id_table_free(body->outer_variables);
205 ruby_xfree(body);
206 }
207
208 if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) {
209 rb_hook_list_free(iseq->aux.exec.local_hooks);
210 }
211
212 RUBY_FREE_LEAVE("iseq");
213}
214
215typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj);
216
217static inline void
218iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, VALUE *original_iseq)
219{
220 unsigned int offset;
221 unsigned int page_offset = (page * ISEQ_MBITS_BITLENGTH);
222
223 while (bits) {
224 offset = ntz_intptr(bits);
225 VALUE op = code[page_offset + offset];
226 rb_gc_mark_and_move(&code[page_offset + offset]);
227 VALUE newop = code[page_offset + offset];
228 if (original_iseq && newop != op) {
229 original_iseq[page_offset + offset] = newop;
230 }
231 bits &= bits - 1; // Reset Lowest Set Bit (BLSR)
232 }
233}
234
235static void
236rb_iseq_mark_and_move_each_compile_data_value(const rb_iseq_t *iseq, VALUE *original_iseq)
237{
238 unsigned int size;
239 VALUE *code;
240 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
241
242 size = compile_data->iseq_size;
243 code = compile_data->iseq_encoded;
244
245 // Embedded VALUEs
246 if (compile_data->mark_bits.list) {
247 if(compile_data->is_single_mark_bit) {
248 iseq_scan_bits(0, compile_data->mark_bits.single, code, original_iseq);
249 }
250 else {
251 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
252 iseq_bits_t bits = compile_data->mark_bits.list[i];
253 iseq_scan_bits(i, bits, code, original_iseq);
254 }
255 }
256 }
257}
258static void
259rb_iseq_mark_and_move_each_body_value(const rb_iseq_t *iseq, VALUE *original_iseq)
260{
261 unsigned int size;
262 VALUE *code;
263 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
264
265 size = body->iseq_size;
266 code = body->iseq_encoded;
267
268 union iseq_inline_storage_entry *is_entries = body->is_entries;
269
270 if (body->is_entries) {
271 // Skip iterating over ivc caches
272 is_entries += body->ivc_size;
273
274 // ICVARC entries
275 for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) {
276 ICVARC icvarc = (ICVARC)is_entries;
277 if (icvarc->entry) {
278 RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE));
279
280 rb_gc_mark_and_move(&icvarc->entry->class_value);
281 }
282 }
283
284 // ISE entries
285 for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) {
286 union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries;
287 if (is->once.value) {
288 rb_gc_mark_and_move(&is->once.value);
289 }
290 }
291
292 // IC Entries
293 for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) {
294 IC ic = (IC)is_entries;
295 if (ic->entry) {
296 rb_gc_mark_and_move_ptr(&ic->entry);
297 }
298 }
299 }
300
301 // Embedded VALUEs
302 if (body->mark_bits.list) {
303 if (ISEQ_MBITS_BUFLEN(size) == 1) {
304 iseq_scan_bits(0, body->mark_bits.single, code, original_iseq);
305 }
306 else {
307 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
308 iseq_bits_t bits = body->mark_bits.list[i];
309 iseq_scan_bits(i, bits, code, original_iseq);
310 }
311 }
312 }
313}
314
315static bool
316cc_is_active(const struct rb_callcache *cc, bool reference_updating)
317{
318 if (cc) {
319 if (cc == rb_vm_empty_cc() || rb_vm_empty_cc_for_super()) {
320 return false;
321 }
322
323 if (reference_updating) {
324 cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc);
325 }
326
327 if (vm_cc_markable(cc)) {
328 if (cc->klass) { // cc is not invalidated
329 const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc);
330 if (reference_updating) {
331 cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme);
332 }
333 if (!METHOD_ENTRY_INVALIDATED(cme)) {
334 return true;
335 }
336 }
337 }
338 }
339 return false;
340}
341
342void
343rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating)
344{
345 RUBY_MARK_ENTER("iseq");
346
347 rb_gc_mark_and_move(&iseq->wrapper);
348
349 if (ISEQ_BODY(iseq)) {
350 struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
351
352 rb_iseq_mark_and_move_each_body_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
353
354 rb_gc_mark_and_move(&body->variable.coverage);
355 rb_gc_mark_and_move(&body->variable.pc2branchindex);
356 rb_gc_mark_and_move(&body->variable.script_lines);
357 rb_gc_mark_and_move(&body->location.label);
358 rb_gc_mark_and_move(&body->location.base_label);
359 rb_gc_mark_and_move(&body->location.pathobj);
360 if (body->local_iseq) rb_gc_mark_and_move_ptr(&body->local_iseq);
361 if (body->parent_iseq) rb_gc_mark_and_move_ptr(&body->parent_iseq);
362 if (body->mandatory_only_iseq) rb_gc_mark_and_move_ptr(&body->mandatory_only_iseq);
363
364 if (body->call_data) {
365 for (unsigned int i = 0; i < body->ci_size; i++) {
366 struct rb_call_data *cds = body->call_data;
367
368 if (cds[i].ci) rb_gc_mark_and_move_ptr(&cds[i].ci);
369
370 if (cc_is_active(cds[i].cc, reference_updating)) {
371 rb_gc_mark_and_move_ptr(&cds[i].cc);
372 }
373 else if (cds[i].cc != rb_vm_empty_cc()) {
374 cds[i].cc = rb_vm_empty_cc();
375 }
376 }
377 }
378
379 if (body->param.flags.has_kw && body->param.keyword != NULL) {
380 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
381
382 if (keyword->default_values != NULL) {
383 for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) {
384 rb_gc_mark_and_move(&keyword->default_values[j]);
385 }
386 }
387 }
388
389 if (body->catch_table) {
390 struct iseq_catch_table *table = body->catch_table;
391
392 for (unsigned int i = 0; i < table->size; i++) {
393 struct iseq_catch_table_entry *entry;
394 entry = UNALIGNED_MEMBER_PTR(table, entries[i]);
395 if (entry->iseq) {
396 rb_gc_mark_and_move_ptr(&entry->iseq);
397 }
398 }
399 }
400
401 if (reference_updating) {
402#if USE_YJIT
403 rb_yjit_iseq_update_references(iseq);
404#endif
405 }
406 else {
407#if USE_YJIT
408 rb_yjit_iseq_mark(body->yjit_payload);
409#endif
410 }
411 }
412
413 if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) {
414 rb_gc_mark_and_move(&iseq->aux.loader.obj);
415 }
416 else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) {
417 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
418
419 rb_iseq_mark_and_move_insn_storage(compile_data->insn.storage_head);
420 rb_iseq_mark_and_move_each_compile_data_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
421
422 rb_gc_mark_and_move((VALUE *)&compile_data->err_info);
423 rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary);
424 }
425 else {
426 /* executable */
427 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
428
429 if (iseq->aux.exec.local_hooks) {
430 rb_hook_list_mark_and_update(iseq->aux.exec.local_hooks);
431 }
432 }
433
434 RUBY_MARK_LEAVE("iseq");
435}
436
437static size_t
438param_keyword_size(const struct rb_iseq_param_keyword *pkw)
439{
440 size_t size = 0;
441
442 if (!pkw) return size;
443
444 size += sizeof(struct rb_iseq_param_keyword);
445 size += sizeof(VALUE) * (pkw->num - pkw->required_num);
446
447 return size;
448}
449
450size_t
451rb_iseq_memsize(const rb_iseq_t *iseq)
452{
453 size_t size = 0; /* struct already counted as RVALUE size */
454 const struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
455 const struct iseq_compile_data *compile_data;
456
457 /* TODO: should we count original_iseq? */
458
459 if (ISEQ_EXECUTABLE_P(iseq) && body) {
460 size += sizeof(struct rb_iseq_constant_body);
461 size += body->iseq_size * sizeof(VALUE);
462 size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
463 size += body->local_table_size * sizeof(ID);
464 size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE;
465 if (body->catch_table) {
466 size += iseq_catch_table_bytes(body->catch_table->size);
467 }
468 size += (body->param.opt_num + 1) * sizeof(VALUE);
469 size += param_keyword_size(body->param.keyword);
470
471 /* body->is_entries */
472 size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry);
473
474 if (ISEQ_BODY(iseq)->is_entries) {
475 /* IC entries constant segments */
476 for (unsigned int ic_idx = 0; ic_idx < body->ic_size; ic_idx++) {
477 IC ic = &ISEQ_IS_IC_ENTRY(body, ic_idx);
478 const ID *ids = ic->segments;
479 if (!ids) continue;
480 while (*ids++) {
481 size += sizeof(ID);
482 }
483 size += sizeof(ID); // null terminator
484 }
485 }
486
487 /* body->call_data */
488 size += body->ci_size * sizeof(struct rb_call_data);
489 // TODO: should we count imemo_callinfo?
490 }
491
492 compile_data = ISEQ_COMPILE_DATA(iseq);
493 if (compile_data) {
494 struct iseq_compile_data_storage *cur;
495
496 size += sizeof(struct iseq_compile_data);
497
498 cur = compile_data->node.storage_head;
499 while (cur) {
500 size += cur->size + offsetof(struct iseq_compile_data_storage, buff);
501 cur = cur->next;
502 }
503 }
504
505 return size;
506}
507
509rb_iseq_constant_body_alloc(void)
510{
511 struct rb_iseq_constant_body *iseq_body;
512 iseq_body = ZALLOC(struct rb_iseq_constant_body);
513 return iseq_body;
514}
515
516static rb_iseq_t *
517iseq_alloc(void)
518{
519 rb_iseq_t *iseq = iseq_imemo_alloc();
520 ISEQ_BODY(iseq) = rb_iseq_constant_body_alloc();
521 return iseq;
522}
523
524VALUE
525rb_iseq_pathobj_new(VALUE path, VALUE realpath)
526{
527 VALUE pathobj;
528 VM_ASSERT(RB_TYPE_P(path, T_STRING));
529 VM_ASSERT(NIL_P(realpath) || RB_TYPE_P(realpath, T_STRING));
530
531 if (path == realpath ||
532 (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) {
533 pathobj = rb_fstring(path);
534 }
535 else {
536 if (!NIL_P(realpath)) realpath = rb_fstring(realpath);
537 pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath);
538 rb_ary_freeze(pathobj);
539 }
540 return pathobj;
541}
542
543void
544rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath)
545{
546 RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->location.pathobj,
547 rb_iseq_pathobj_new(path, realpath));
548}
549
550// Make a dummy iseq for a dummy frame that exposes a path for profilers to inspect
551rb_iseq_t *
552rb_iseq_alloc_with_dummy_path(VALUE fname)
553{
554 rb_iseq_t *dummy_iseq = iseq_alloc();
555
556 ISEQ_BODY(dummy_iseq)->type = ISEQ_TYPE_TOP;
557 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.pathobj, fname);
558 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.label, fname);
559
560 return dummy_iseq;
561}
562
563static rb_iseq_location_t *
564iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id)
565{
566 rb_iseq_location_t *loc = &ISEQ_BODY(iseq)->location;
567
568 rb_iseq_pathobj_set(iseq, path, realpath);
569 RB_OBJ_WRITE(iseq, &loc->label, name);
570 RB_OBJ_WRITE(iseq, &loc->base_label, name);
571 loc->first_lineno = first_lineno;
572
573 if (ISEQ_BODY(iseq)->local_iseq == iseq && strcmp(RSTRING_PTR(name), "initialize") == 0) {
574 ISEQ_BODY(iseq)->param.flags.use_block = 1;
575 }
576
577 if (code_location) {
578 loc->node_id = node_id;
579 loc->code_location = *code_location;
580 }
581 else {
582 loc->code_location.beg_pos.lineno = 0;
583 loc->code_location.beg_pos.column = 0;
584 loc->code_location.end_pos.lineno = -1;
585 loc->code_location.end_pos.column = -1;
586 }
587
588 return loc;
589}
590
591static void
592set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq)
593{
594 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
595 const VALUE type = body->type;
596
597 /* set class nest stack */
598 if (type == ISEQ_TYPE_TOP) {
599 body->local_iseq = iseq;
600 }
601 else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) {
602 body->local_iseq = iseq;
603 }
604 else if (piseq) {
605 body->local_iseq = ISEQ_BODY(piseq)->local_iseq;
606 }
607
608 if (piseq) {
609 body->parent_iseq = piseq;
610 }
611
612 if (type == ISEQ_TYPE_MAIN) {
613 body->local_iseq = iseq;
614 }
615}
616
617static struct iseq_compile_data_storage *
618new_arena(void)
619{
620 struct iseq_compile_data_storage * new_arena =
622 ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE +
623 offsetof(struct iseq_compile_data_storage, buff));
624
625 new_arena->pos = 0;
626 new_arena->next = 0;
627 new_arena->size = INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE;
628
629 return new_arena;
630}
631
632static VALUE
633prepare_iseq_build(rb_iseq_t *iseq,
634 VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id,
635 const rb_iseq_t *parent, int isolated_depth, enum rb_iseq_type type,
636 VALUE script_lines, const rb_compile_option_t *option)
637{
638 VALUE coverage = Qfalse;
639 VALUE err_info = Qnil;
640 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
641
642 if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP))
643 err_info = Qfalse;
644
645 body->type = type;
646 set_relation(iseq, parent);
647
648 name = rb_fstring(name);
649 iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id);
650 if (iseq != body->local_iseq) {
651 RB_OBJ_WRITE(iseq, &body->location.base_label, ISEQ_BODY(body->local_iseq)->location.label);
652 }
653 ISEQ_COVERAGE_SET(iseq, Qnil);
654 ISEQ_ORIGINAL_ISEQ_CLEAR(iseq);
655 body->variable.flip_count = 0;
656
657 if (NIL_P(script_lines)) {
658 RB_OBJ_WRITE(iseq, &body->variable.script_lines, Qnil);
659 }
660 else {
661 RB_OBJ_WRITE(iseq, &body->variable.script_lines, rb_ractor_make_shareable(script_lines));
662 }
663
664 ISEQ_COMPILE_DATA_ALLOC(iseq);
665 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->err_info, err_info);
666 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->catch_table_ary, Qnil);
667
668 ISEQ_COMPILE_DATA(iseq)->node.storage_head = ISEQ_COMPILE_DATA(iseq)->node.storage_current = new_arena();
669 ISEQ_COMPILE_DATA(iseq)->insn.storage_head = ISEQ_COMPILE_DATA(iseq)->insn.storage_current = new_arena();
670 ISEQ_COMPILE_DATA(iseq)->isolated_depth = isolated_depth;
671 ISEQ_COMPILE_DATA(iseq)->option = option;
672 ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL;
673 ISEQ_COMPILE_DATA(iseq)->builtin_function_table = GET_VM()->builtin_function_table;
674
675 if (option->coverage_enabled) {
676 VALUE coverages = rb_get_coverages();
677 if (RTEST(coverages)) {
678 coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq));
679 if (NIL_P(coverage)) coverage = Qfalse;
680 }
681 }
682 ISEQ_COVERAGE_SET(iseq, coverage);
683 if (coverage && ISEQ_BRANCH_COVERAGE(iseq))
684 ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_hidden_new(0));
685
686 return Qtrue;
687}
688
689#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
690static void validate_get_insn_info(const rb_iseq_t *iseq);
691#endif
692
693void
694rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq)
695{
696#if VM_INSN_INFO_TABLE_IMPL == 2
697 /* create succ_index_table */
698 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
699 int size = body->insns_info.size;
700 int max_pos = body->iseq_size;
701 int *data = (int *)body->insns_info.positions;
702 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
703 body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size);
704#if VM_CHECK_MODE == 0
705 ruby_xfree(body->insns_info.positions);
706 body->insns_info.positions = NULL;
707#endif
708#endif
709}
710
711#if VM_INSN_INFO_TABLE_IMPL == 2
712unsigned int *
713rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body *body)
714{
715 int size = body->insns_info.size;
716 int max_pos = body->iseq_size;
717 struct succ_index_table *sd = body->insns_info.succ_index_table;
718 return succ_index_table_invert(max_pos, sd, size);
719}
720#endif
721
722void
723rb_iseq_init_trace(rb_iseq_t *iseq)
724{
725 iseq->aux.exec.global_trace_events = 0;
726 if (ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS) {
727 rb_iseq_trace_set(iseq, ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS);
728 }
729}
730
731static VALUE
732finish_iseq_build(rb_iseq_t *iseq)
733{
734 struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq);
735 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
736 VALUE err = data->err_info;
737 ISEQ_COMPILE_DATA_CLEAR(iseq);
738 compile_data_free(data);
739
740#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
741 validate_get_insn_info(iseq);
742#endif
743
744 if (RTEST(err)) {
745 VALUE path = pathobj_path(body->location.pathobj);
746 if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error");
747 rb_funcallv(err, rb_intern("set_backtrace"), 1, &path);
748 rb_exc_raise(err);
749 }
750
751 RB_DEBUG_COUNTER_INC(iseq_num);
752 RB_DEBUG_COUNTER_ADD(iseq_cd_num, ISEQ_BODY(iseq)->ci_size);
753
754 rb_iseq_init_trace(iseq);
755 return Qtrue;
756}
757
758static rb_compile_option_t COMPILE_OPTION_DEFAULT = {
759 .inline_const_cache = OPT_INLINE_CONST_CACHE,
760 .peephole_optimization = OPT_PEEPHOLE_OPTIMIZATION,
761 .tailcall_optimization = OPT_TAILCALL_OPTIMIZATION,
762 .specialized_instruction = OPT_SPECIALISED_INSTRUCTION,
763 .operands_unification = OPT_OPERANDS_UNIFICATION,
764 .instructions_unification = OPT_INSTRUCTIONS_UNIFICATION,
765 .frozen_string_literal = OPT_FROZEN_STRING_LITERAL,
766 .debug_frozen_string_literal = OPT_DEBUG_FROZEN_STRING_LITERAL,
767 .coverage_enabled = TRUE,
768};
769
770static const rb_compile_option_t COMPILE_OPTION_FALSE = {
771 .frozen_string_literal = -1, // unspecified
772};
773
774int
775rb_iseq_opt_frozen_string_literal(void)
776{
777 return COMPILE_OPTION_DEFAULT.frozen_string_literal;
778}
779
780static void
781set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt)
782{
783#define SET_COMPILE_OPTION(o, h, mem) \
784 { VALUE flag = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
785 if (flag == Qtrue) { (o)->mem = 1; } \
786 else if (flag == Qfalse) { (o)->mem = 0; } \
787 }
788#define SET_COMPILE_OPTION_NUM(o, h, mem) \
789 { VALUE num = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
790 if (!NIL_P(num)) (o)->mem = NUM2INT(num); \
791 }
792 SET_COMPILE_OPTION(option, opt, inline_const_cache);
793 SET_COMPILE_OPTION(option, opt, peephole_optimization);
794 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
795 SET_COMPILE_OPTION(option, opt, specialized_instruction);
796 SET_COMPILE_OPTION(option, opt, operands_unification);
797 SET_COMPILE_OPTION(option, opt, instructions_unification);
798 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
799 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
800 SET_COMPILE_OPTION(option, opt, coverage_enabled);
801 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
802#undef SET_COMPILE_OPTION
803#undef SET_COMPILE_OPTION_NUM
804}
805
806static rb_compile_option_t *
807set_compile_option_from_ast(rb_compile_option_t *option, const rb_ast_body_t *ast)
808{
809#define SET_COMPILE_OPTION(o, a, mem) \
810 ((a)->mem < 0 ? 0 : ((o)->mem = (a)->mem > 0))
811 SET_COMPILE_OPTION(option, ast, coverage_enabled);
812#undef SET_COMPILE_OPTION
813 if (ast->frozen_string_literal >= 0) {
814 option->frozen_string_literal = ast->frozen_string_literal;
815 }
816 return option;
817}
818
819static void
820make_compile_option(rb_compile_option_t *option, VALUE opt)
821{
822 if (NIL_P(opt)) {
823 *option = COMPILE_OPTION_DEFAULT;
824 }
825 else if (opt == Qfalse) {
826 *option = COMPILE_OPTION_FALSE;
827 }
828 else if (opt == Qtrue) {
829 int i;
830 for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i)
831 ((int *)option)[i] = 1;
832 }
833 else if (RB_TYPE_P(opt, T_HASH)) {
834 *option = COMPILE_OPTION_DEFAULT;
835 set_compile_option_from_hash(option, opt);
836 }
837 else {
838 rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil");
839 }
840}
841
842static VALUE
843make_compile_option_value(rb_compile_option_t *option)
844{
845 VALUE opt = rb_hash_new_with_size(11);
846#define SET_COMPILE_OPTION(o, h, mem) \
847 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), RBOOL((o)->mem))
848#define SET_COMPILE_OPTION_NUM(o, h, mem) \
849 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem))
850 {
851 SET_COMPILE_OPTION(option, opt, inline_const_cache);
852 SET_COMPILE_OPTION(option, opt, peephole_optimization);
853 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
854 SET_COMPILE_OPTION(option, opt, specialized_instruction);
855 SET_COMPILE_OPTION(option, opt, operands_unification);
856 SET_COMPILE_OPTION(option, opt, instructions_unification);
857 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
858 SET_COMPILE_OPTION(option, opt, coverage_enabled);
859 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
860 }
861#undef SET_COMPILE_OPTION
862#undef SET_COMPILE_OPTION_NUM
863 VALUE frozen_string_literal = option->frozen_string_literal == -1 ? Qnil : RBOOL(option->frozen_string_literal);
864 rb_hash_aset(opt, ID2SYM(rb_intern("frozen_string_literal")), frozen_string_literal);
865 return opt;
866}
867
868rb_iseq_t *
869rb_iseq_new(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
870 const rb_iseq_t *parent, enum rb_iseq_type type)
871{
872 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent,
873 0, type, &COMPILE_OPTION_DEFAULT,
874 Qnil);
875}
876
877static int
878ast_line_count(const VALUE ast_value)
879{
880 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
881 return ast->body.line_count;
882}
883
884static VALUE
885iseq_setup_coverage(VALUE coverages, VALUE path, int line_count)
886{
887 if (line_count >= 0) {
888 int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count;
889
890 VALUE coverage = rb_default_coverage(len);
891 rb_hash_aset(coverages, path, coverage);
892
893 return coverage;
894 }
895
896 return Qnil;
897}
898
899static inline void
900iseq_new_setup_coverage(VALUE path, int line_count)
901{
902 VALUE coverages = rb_get_coverages();
903
904 if (RTEST(coverages)) {
905 iseq_setup_coverage(coverages, path, line_count);
906 }
907}
908
909rb_iseq_t *
910rb_iseq_new_top(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent)
911{
912 iseq_new_setup_coverage(path, ast_line_count(ast_value));
913
914 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, 0,
915 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT,
916 Qnil);
917}
918
922rb_iseq_t *
923pm_iseq_new_top(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, int *error_state)
924{
925 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
926
927 return pm_iseq_new_with_opt(node, name, path, realpath, 0, parent, 0,
928 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, error_state);
929}
930
931rb_iseq_t *
932rb_iseq_new_main(const VALUE ast_value, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt)
933{
934 iseq_new_setup_coverage(path, ast_line_count(ast_value));
935
936 return rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
937 path, realpath, 0,
938 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE,
939 Qnil);
940}
941
946rb_iseq_t *
947pm_iseq_new_main(pm_scope_node_t *node, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt, int *error_state)
948{
949 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
950
951 return pm_iseq_new_with_opt(node, rb_fstring_lit("<main>"),
952 path, realpath, 0,
953 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, error_state);
954}
955
956rb_iseq_t *
957rb_iseq_new_eval(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth)
958{
959 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
960 VALUE coverages = rb_get_coverages();
961 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
962 iseq_setup_coverage(coverages, path, ast_line_count(ast_value) + first_lineno - 1);
963 }
964 }
965
966 return rb_iseq_new_with_opt(ast_value, name, path, realpath, first_lineno,
967 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT,
968 Qnil);
969}
970
971rb_iseq_t *
972pm_iseq_new_eval(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
973 int first_lineno, const rb_iseq_t *parent, int isolated_depth, int *error_state)
974{
975 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
976 VALUE coverages = rb_get_coverages();
977 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
978 iseq_setup_coverage(coverages, path, ((int) (node->parser->newline_list.size - 1)) + first_lineno - 1);
979 }
980 }
981
982 return pm_iseq_new_with_opt(node, name, path, realpath, first_lineno,
983 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT, error_state);
984}
985
986static inline rb_iseq_t *
987iseq_translate(rb_iseq_t *iseq)
988{
989 if (rb_respond_to(rb_cISeq, rb_intern("translate"))) {
990 VALUE v1 = iseqw_new(iseq);
991 VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1);
992 if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) {
993 iseq = (rb_iseq_t *)iseqw_check(v2);
994 }
995 }
996
997 return iseq;
998}
999
1000rb_iseq_t *
1001rb_iseq_new_with_opt(VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
1002 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1003 enum rb_iseq_type type, const rb_compile_option_t *option,
1004 VALUE script_lines)
1005{
1006 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
1007 rb_ast_body_t *body = ast ? &ast->body : NULL;
1008 const NODE *node = body ? body->root : 0;
1009 /* TODO: argument check */
1010 rb_iseq_t *iseq = iseq_alloc();
1011 rb_compile_option_t new_opt;
1012
1013 if (!option) option = &COMPILE_OPTION_DEFAULT;
1014 if (body) {
1015 new_opt = *option;
1016 option = set_compile_option_from_ast(&new_opt, body);
1017 }
1018
1019 if (!NIL_P(script_lines)) {
1020 // noop
1021 }
1022 else if (body && body->script_lines) {
1023 script_lines = rb_parser_build_script_lines_from(body->script_lines);
1024 }
1025 else if (parent) {
1026 script_lines = ISEQ_BODY(parent)->variable.script_lines;
1027 }
1028
1029 prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, node ? nd_node_id(node) : -1,
1030 parent, isolated_depth, type, script_lines, option);
1031
1032 rb_iseq_compile_node(iseq, node);
1033 finish_iseq_build(iseq);
1034 RB_GC_GUARD(ast_value);
1035
1036 return iseq_translate(iseq);
1037}
1038
1040 rb_iseq_t *iseq;
1041 pm_scope_node_t *node;
1042};
1043
1044VALUE
1045pm_iseq_new_with_opt_try(VALUE d)
1046{
1047 struct pm_iseq_new_with_opt_data *data = (struct pm_iseq_new_with_opt_data *)d;
1048
1049 // This can compile child iseqs, which can raise syntax errors
1050 pm_iseq_compile_node(data->iseq, data->node);
1051
1052 // This raises an exception if there is a syntax error
1053 finish_iseq_build(data->iseq);
1054
1055 return Qundef;
1056}
1057
1070rb_iseq_t *
1071pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
1072 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1073 enum rb_iseq_type type, const rb_compile_option_t *option, int *error_state)
1074{
1075 rb_iseq_t *iseq = iseq_alloc();
1076 ISEQ_BODY(iseq)->prism = true;
1077
1078 rb_compile_option_t next_option;
1079 if (!option) option = &COMPILE_OPTION_DEFAULT;
1080
1081 next_option = *option;
1082 next_option.coverage_enabled = node->coverage_enabled < 0 ? 0 : node->coverage_enabled > 0;
1083 option = &next_option;
1084
1085 pm_location_t *location = &node->base.location;
1086 int32_t start_line = node->parser->start_line;
1087
1088 pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line);
1089 pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line);
1090
1091 rb_code_location_t code_location = (rb_code_location_t) {
1092 .beg_pos = { .lineno = (int) start.line, .column = (int) start.column },
1093 .end_pos = { .lineno = (int) end.line, .column = (int) end.column }
1094 };
1095
1096 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &code_location, node->ast_node->node_id,
1097 parent, isolated_depth, type, node->script_lines == NULL ? Qnil : *node->script_lines, option);
1098
1099 struct pm_iseq_new_with_opt_data data = {
1100 .iseq = iseq,
1101 .node = node
1102 };
1103 rb_protect(pm_iseq_new_with_opt_try, (VALUE)&data, error_state);
1104
1105 if (*error_state) return NULL;
1106
1107 return iseq_translate(iseq);
1108}
1109
1110rb_iseq_t *
1111rb_iseq_new_with_callback(
1112 const struct rb_iseq_new_with_callback_callback_func * ifunc,
1113 VALUE name, VALUE path, VALUE realpath,
1114 int first_lineno, const rb_iseq_t *parent,
1115 enum rb_iseq_type type, const rb_compile_option_t *option)
1116{
1117 /* TODO: argument check */
1118 rb_iseq_t *iseq = iseq_alloc();
1119
1120 if (!option) option = &COMPILE_OPTION_DEFAULT;
1121 prepare_iseq_build(iseq, name, path, realpath, first_lineno, NULL, -1, parent, 0, type, Qnil, option);
1122
1123 rb_iseq_compile_callback(iseq, ifunc);
1124 finish_iseq_build(iseq);
1125
1126 return iseq;
1127}
1128
1129const rb_iseq_t *
1130rb_iseq_load_iseq(VALUE fname)
1131{
1132 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname);
1133
1134 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
1135 return iseqw_check(iseqv);
1136 }
1137
1138 return NULL;
1139}
1140
1141#define CHECK_ARRAY(v) rb_to_array_type(v)
1142#define CHECK_HASH(v) rb_to_hash_type(v)
1143#define CHECK_STRING(v) rb_str_to_str(v)
1144#define CHECK_SYMBOL(v) rb_to_symbol_type(v)
1145static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;}
1146
1147static enum rb_iseq_type
1148iseq_type_from_sym(VALUE type)
1149{
1150 const ID id_top = rb_intern("top");
1151 const ID id_method = rb_intern("method");
1152 const ID id_block = rb_intern("block");
1153 const ID id_class = rb_intern("class");
1154 const ID id_rescue = rb_intern("rescue");
1155 const ID id_ensure = rb_intern("ensure");
1156 const ID id_eval = rb_intern("eval");
1157 const ID id_main = rb_intern("main");
1158 const ID id_plain = rb_intern("plain");
1159 /* ensure all symbols are static or pinned down before
1160 * conversion */
1161 const ID typeid = rb_check_id(&type);
1162 if (typeid == id_top) return ISEQ_TYPE_TOP;
1163 if (typeid == id_method) return ISEQ_TYPE_METHOD;
1164 if (typeid == id_block) return ISEQ_TYPE_BLOCK;
1165 if (typeid == id_class) return ISEQ_TYPE_CLASS;
1166 if (typeid == id_rescue) return ISEQ_TYPE_RESCUE;
1167 if (typeid == id_ensure) return ISEQ_TYPE_ENSURE;
1168 if (typeid == id_eval) return ISEQ_TYPE_EVAL;
1169 if (typeid == id_main) return ISEQ_TYPE_MAIN;
1170 if (typeid == id_plain) return ISEQ_TYPE_PLAIN;
1171 return (enum rb_iseq_type)-1;
1172}
1173
1174static VALUE
1175iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt)
1176{
1177 rb_iseq_t *iseq = iseq_alloc();
1178
1179 VALUE magic, version1, version2, format_type, misc;
1180 VALUE name, path, realpath, code_location, node_id;
1181 VALUE type, body, locals, params, exception;
1182
1183 st_data_t iseq_type;
1184 rb_compile_option_t option;
1185 int i = 0;
1186 rb_code_location_t tmp_loc = { {0, 0}, {-1, -1} };
1187
1188 /* [magic, major_version, minor_version, format_type, misc,
1189 * label, path, first_lineno,
1190 * type, locals, args, exception_table, body]
1191 */
1192
1193 data = CHECK_ARRAY(data);
1194
1195 magic = CHECK_STRING(rb_ary_entry(data, i++));
1196 version1 = CHECK_INTEGER(rb_ary_entry(data, i++));
1197 version2 = CHECK_INTEGER(rb_ary_entry(data, i++));
1198 format_type = CHECK_INTEGER(rb_ary_entry(data, i++));
1199 misc = CHECK_HASH(rb_ary_entry(data, i++));
1200 ((void)magic, (void)version1, (void)version2, (void)format_type);
1201
1202 name = CHECK_STRING(rb_ary_entry(data, i++));
1203 path = CHECK_STRING(rb_ary_entry(data, i++));
1204 realpath = rb_ary_entry(data, i++);
1205 realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath);
1206 int first_lineno = RB_NUM2INT(rb_ary_entry(data, i++));
1207
1208 type = CHECK_SYMBOL(rb_ary_entry(data, i++));
1209 locals = CHECK_ARRAY(rb_ary_entry(data, i++));
1210 params = CHECK_HASH(rb_ary_entry(data, i++));
1211 exception = CHECK_ARRAY(rb_ary_entry(data, i++));
1212 body = CHECK_ARRAY(rb_ary_entry(data, i++));
1213
1214 ISEQ_BODY(iseq)->local_iseq = iseq;
1215
1216 iseq_type = iseq_type_from_sym(type);
1217 if (iseq_type == (enum rb_iseq_type)-1) {
1218 rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type));
1219 }
1220
1221 node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id")));
1222
1223 code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location")));
1224 if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) {
1225 tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0));
1226 tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1));
1227 tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2));
1228 tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3));
1229 }
1230
1231 if (SYM2ID(rb_hash_aref(misc, ID2SYM(rb_intern("parser")))) == rb_intern("prism")) {
1232 ISEQ_BODY(iseq)->prism = true;
1233 }
1234
1235 make_compile_option(&option, opt);
1236 option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */
1237 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id),
1238 parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option);
1239
1240 rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
1241
1242 finish_iseq_build(iseq);
1243
1244 return iseqw_new(iseq);
1245}
1246
1247/*
1248 * :nodoc:
1249 */
1250static VALUE
1251iseq_s_load(int argc, VALUE *argv, VALUE self)
1252{
1253 VALUE data, opt=Qnil;
1254 rb_scan_args(argc, argv, "11", &data, &opt);
1255 return iseq_load(data, NULL, opt);
1256}
1257
1258VALUE
1259rb_iseq_load(VALUE data, VALUE parent, VALUE opt)
1260{
1261 return iseq_load(data, RTEST(parent) ? (rb_iseq_t *)parent : NULL, opt);
1262}
1263
1264static rb_iseq_t *
1265rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1266{
1267 rb_iseq_t *iseq = NULL;
1268 rb_compile_option_t option;
1269#if !defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 8)
1270# define INITIALIZED volatile /* suppress warnings by gcc 4.8 */
1271#else
1272# define INITIALIZED /* volatile */
1273#endif
1274 VALUE (*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
1275 int ln;
1276 VALUE INITIALIZED ast_value;
1277 rb_ast_t *ast;
1278 VALUE name = rb_fstring_lit("<compiled>");
1279
1280 /* safe results first */
1281 make_compile_option(&option, opt);
1282 ln = NUM2INT(line);
1283 StringValueCStr(file);
1284 if (RB_TYPE_P(src, T_FILE)) {
1285 parse = rb_parser_compile_file_path;
1286 }
1287 else {
1288 parse = rb_parser_compile_string_path;
1289 StringValue(src);
1290 }
1291 {
1292 const VALUE parser = rb_parser_new();
1293 const rb_iseq_t *outer_scope = rb_iseq_new(Qnil, name, name, Qnil, 0, ISEQ_TYPE_TOP);
1294 VALUE outer_scope_v = (VALUE)outer_scope;
1295 rb_parser_set_context(parser, outer_scope, FALSE);
1296 if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
1297 RB_GC_GUARD(outer_scope_v);
1298 ast_value = (*parse)(parser, file, src, ln);
1299 }
1300
1301 ast = rb_ruby_ast_data_get(ast_value);
1302
1303 if (!ast || !ast->body.root) {
1304 rb_ast_dispose(ast);
1305 rb_exc_raise(GET_EC()->errinfo);
1306 }
1307 else {
1308 iseq = rb_iseq_new_with_opt(ast_value, name, file, realpath, ln,
1309 NULL, 0, ISEQ_TYPE_TOP, &option,
1310 Qnil);
1311 rb_ast_dispose(ast);
1312 }
1313
1314 return iseq;
1315}
1316
1317static rb_iseq_t *
1318pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1319{
1320 rb_iseq_t *iseq = NULL;
1321 rb_compile_option_t option;
1322 int ln;
1323 VALUE name = rb_fstring_lit("<compiled>");
1324
1325 /* safe results first */
1326 make_compile_option(&option, opt);
1327 ln = NUM2INT(line);
1328 StringValueCStr(file);
1329
1330 bool parse_file = false;
1331 if (RB_TYPE_P(src, T_FILE)) {
1332 parse_file = true;
1333 src = rb_io_path(src);
1334 }
1335 else {
1336 src = StringValue(src);
1337 }
1338
1339 pm_parse_result_t result = { 0 };
1340 pm_options_line_set(&result.options, NUM2INT(line));
1341 pm_options_scopes_init(&result.options, 1);
1342 result.node.coverage_enabled = 1;
1343
1344 switch (option.frozen_string_literal) {
1345 case ISEQ_FROZEN_STRING_LITERAL_UNSET:
1346 break;
1347 case ISEQ_FROZEN_STRING_LITERAL_DISABLED:
1348 pm_options_frozen_string_literal_set(&result.options, false);
1349 break;
1350 case ISEQ_FROZEN_STRING_LITERAL_ENABLED:
1351 pm_options_frozen_string_literal_set(&result.options, true);
1352 break;
1353 default:
1354 rb_bug("pm_iseq_compile_with_option: invalid frozen_string_literal=%d", option.frozen_string_literal);
1355 break;
1356 }
1357
1358 VALUE script_lines;
1359 VALUE error;
1360
1361 if (parse_file) {
1362 error = pm_load_parse_file(&result, src, ruby_vm_keep_script_lines ? &script_lines : NULL);
1363 }
1364 else {
1365 error = pm_parse_string(&result, src, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1366 }
1367
1368 RB_GC_GUARD(src);
1369
1370 if (error == Qnil) {
1371 int error_state;
1372 iseq = pm_iseq_new_with_opt(&result.node, name, file, realpath, ln, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1373
1374 pm_parse_result_free(&result);
1375
1376 if (error_state) {
1377 RUBY_ASSERT(iseq == NULL);
1378 rb_jump_tag(error_state);
1379 }
1380 }
1381 else {
1382 pm_parse_result_free(&result);
1383 rb_exc_raise(error);
1384 }
1385
1386 return iseq;
1387}
1388
1389VALUE
1390rb_iseq_path(const rb_iseq_t *iseq)
1391{
1392 return pathobj_path(ISEQ_BODY(iseq)->location.pathobj);
1393}
1394
1395VALUE
1396rb_iseq_realpath(const rb_iseq_t *iseq)
1397{
1398 return pathobj_realpath(ISEQ_BODY(iseq)->location.pathobj);
1399}
1400
1401VALUE
1402rb_iseq_absolute_path(const rb_iseq_t *iseq)
1403{
1404 return rb_iseq_realpath(iseq);
1405}
1406
1407int
1408rb_iseq_from_eval_p(const rb_iseq_t *iseq)
1409{
1410 return NIL_P(rb_iseq_realpath(iseq));
1411}
1412
1413VALUE
1414rb_iseq_label(const rb_iseq_t *iseq)
1415{
1416 return ISEQ_BODY(iseq)->location.label;
1417}
1418
1419VALUE
1420rb_iseq_base_label(const rb_iseq_t *iseq)
1421{
1422 return ISEQ_BODY(iseq)->location.base_label;
1423}
1424
1425VALUE
1426rb_iseq_first_lineno(const rb_iseq_t *iseq)
1427{
1428 return RB_INT2NUM(ISEQ_BODY(iseq)->location.first_lineno);
1429}
1430
1431VALUE
1432rb_iseq_method_name(const rb_iseq_t *iseq)
1433{
1434 struct rb_iseq_constant_body *const body = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq);
1435
1436 if (body->type == ISEQ_TYPE_METHOD) {
1437 return body->location.base_label;
1438 }
1439 else {
1440 return Qnil;
1441 }
1442}
1443
1444void
1445rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column)
1446{
1447 const rb_code_location_t *loc = &ISEQ_BODY(iseq)->location.code_location;
1448 if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno;
1449 if (beg_pos_column) *beg_pos_column = loc->beg_pos.column;
1450 if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno;
1451 if (end_pos_column) *end_pos_column = loc->end_pos.column;
1452}
1453
1454static ID iseq_type_id(enum rb_iseq_type type);
1455
1456VALUE
1457rb_iseq_type(const rb_iseq_t *iseq)
1458{
1459 return ID2SYM(iseq_type_id(ISEQ_BODY(iseq)->type));
1460}
1461
1462VALUE
1463rb_iseq_coverage(const rb_iseq_t *iseq)
1464{
1465 return ISEQ_COVERAGE(iseq);
1466}
1467
1468static int
1469remove_coverage_i(void *vstart, void *vend, size_t stride, void *data)
1470{
1471 VALUE v = (VALUE)vstart;
1472 for (; v != (VALUE)vend; v += stride) {
1473 void *ptr = rb_asan_poisoned_object_p(v);
1474 rb_asan_unpoison_object(v, false);
1475
1476 if (rb_obj_is_iseq(v)) {
1477 rb_iseq_t *iseq = (rb_iseq_t *)v;
1478 ISEQ_COVERAGE_SET(iseq, Qnil);
1479 }
1480
1481 asan_poison_object_if(ptr, v);
1482 }
1483 return 0;
1484}
1485
1486void
1487rb_iseq_remove_coverage_all(void)
1488{
1489 rb_objspace_each_objects(remove_coverage_i, NULL);
1490}
1491
1492/* define wrapper class methods (RubyVM::InstructionSequence) */
1493
1494static void
1495iseqw_mark(void *ptr)
1496{
1497 rb_gc_mark_movable(*(VALUE *)ptr);
1498}
1499
1500static size_t
1501iseqw_memsize(const void *ptr)
1502{
1503 return rb_iseq_memsize(*(const rb_iseq_t **)ptr);
1504}
1505
1506static void
1507iseqw_ref_update(void *ptr)
1508{
1509 VALUE *vptr = ptr;
1510 *vptr = rb_gc_location(*vptr);
1511}
1512
1513static const rb_data_type_t iseqw_data_type = {
1514 "T_IMEMO/iseq",
1515 {
1516 iseqw_mark,
1518 iseqw_memsize,
1519 iseqw_ref_update,
1520 },
1521 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
1522};
1523
1524static VALUE
1525iseqw_new(const rb_iseq_t *iseq)
1526{
1527 if (iseq->wrapper) {
1528 if (*(const rb_iseq_t **)rb_check_typeddata(iseq->wrapper, &iseqw_data_type) != iseq) {
1529 rb_raise(rb_eTypeError, "wrong iseq wrapper: %" PRIsVALUE " for %p",
1530 iseq->wrapper, (void *)iseq);
1531 }
1532 return iseq->wrapper;
1533 }
1534 else {
1535 rb_iseq_t **ptr;
1536 VALUE obj = TypedData_Make_Struct(rb_cISeq, rb_iseq_t *, &iseqw_data_type, ptr);
1537 RB_OBJ_WRITE(obj, ptr, iseq);
1538
1539 /* cache a wrapper object */
1540 RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj);
1541
1542 return obj;
1543 }
1544}
1545
1546VALUE
1547rb_iseqw_new(const rb_iseq_t *iseq)
1548{
1549 return iseqw_new(iseq);
1550}
1551
1557static VALUE
1558iseqw_s_compile_parser(int argc, VALUE *argv, VALUE self, bool prism)
1559{
1560 VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil;
1561 int i;
1562
1563 i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt);
1564 if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5);
1565 switch (i) {
1566 case 5: opt = argv[--i];
1567 case 4: line = argv[--i];
1568 case 3: path = argv[--i];
1569 case 2: file = argv[--i];
1570 }
1571
1572 if (NIL_P(file)) file = rb_fstring_lit("<compiled>");
1573 if (NIL_P(path)) path = file;
1574 if (NIL_P(line)) line = INT2FIX(1);
1575
1576 Check_Type(path, T_STRING);
1577 Check_Type(file, T_STRING);
1578
1579 rb_iseq_t *iseq;
1580 if (prism) {
1581 iseq = pm_iseq_compile_with_option(src, file, path, line, opt);
1582 }
1583 else {
1584 iseq = rb_iseq_compile_with_option(src, file, path, line, opt);
1585 }
1586
1587 return iseqw_new(iseq);
1588}
1589
1590/*
1591 * call-seq:
1592 * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq
1593 * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq
1594 *
1595 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1596 * that contains Ruby source code.
1597 *
1598 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1599 * real path and first line number of the ruby code in +source+ which are
1600 * metadata attached to the returned +iseq+.
1601 *
1602 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1603 * +require_relative+ base. It is recommended these should be the same full
1604 * path.
1605 *
1606 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1607 * modify the default behavior of the Ruby iseq compiler.
1608 *
1609 * For details regarding valid compile options see ::compile_option=.
1610 *
1611 * RubyVM::InstructionSequence.compile("a = 1 + 2")
1612 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1613 *
1614 * path = "test.rb"
1615 * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path))
1616 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1617 *
1618 * file = File.open("test.rb")
1619 * RubyVM::InstructionSequence.compile(file)
1620 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1621 *
1622 * path = File.expand_path("test.rb")
1623 * RubyVM::InstructionSequence.compile(File.read(path), path, path)
1624 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1625 *
1626 */
1627static VALUE
1628iseqw_s_compile(int argc, VALUE *argv, VALUE self)
1629{
1630 return iseqw_s_compile_parser(argc, argv, self, rb_ruby_prism_p());
1631}
1632
1633/*
1634 * call-seq:
1635 * InstructionSequence.compile_parsey(source[, file[, path[, line[, options]]]]) -> iseq
1636 *
1637 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1638 * that contains Ruby source code. It parses and compiles using parse.y.
1639 *
1640 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1641 * real path and first line number of the ruby code in +source+ which are
1642 * metadata attached to the returned +iseq+.
1643 *
1644 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1645 * +require_relative+ base. It is recommended these should be the same full
1646 * path.
1647 *
1648 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1649 * modify the default behavior of the Ruby iseq compiler.
1650 *
1651 * For details regarding valid compile options see ::compile_option=.
1652 *
1653 * RubyVM::InstructionSequence.compile_parsey("a = 1 + 2")
1654 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1655 *
1656 * path = "test.rb"
1657 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, File.expand_path(path))
1658 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1659 *
1660 * file = File.open("test.rb")
1661 * RubyVM::InstructionSequence.compile_parsey(file)
1662 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1663 *
1664 * path = File.expand_path("test.rb")
1665 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, path)
1666 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1667 *
1668 */
1669static VALUE
1670iseqw_s_compile_parsey(int argc, VALUE *argv, VALUE self)
1671{
1672 return iseqw_s_compile_parser(argc, argv, self, false);
1673}
1674
1675/*
1676 * call-seq:
1677 * InstructionSequence.compile_prism(source[, file[, path[, line[, options]]]]) -> iseq
1678 *
1679 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1680 * that contains Ruby source code. It parses and compiles using prism.
1681 *
1682 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1683 * real path and first line number of the ruby code in +source+ which are
1684 * metadata attached to the returned +iseq+.
1685 *
1686 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1687 * +require_relative+ base. It is recommended these should be the same full
1688 * path.
1689 *
1690 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1691 * modify the default behavior of the Ruby iseq compiler.
1692 *
1693 * For details regarding valid compile options see ::compile_option=.
1694 *
1695 * RubyVM::InstructionSequence.compile_prism("a = 1 + 2")
1696 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1697 *
1698 * path = "test.rb"
1699 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, File.expand_path(path))
1700 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1701 *
1702 * file = File.open("test.rb")
1703 * RubyVM::InstructionSequence.compile_prism(file)
1704 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1705 *
1706 * path = File.expand_path("test.rb")
1707 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, path)
1708 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1709 *
1710 */
1711static VALUE
1712iseqw_s_compile_prism(int argc, VALUE *argv, VALUE self)
1713{
1714 return iseqw_s_compile_parser(argc, argv, self, true);
1715}
1716
1717/*
1718 * call-seq:
1719 * InstructionSequence.compile_file(file[, options]) -> iseq
1720 *
1721 * Takes +file+, a String with the location of a Ruby source file, reads,
1722 * parses and compiles the file, and returns +iseq+, the compiled
1723 * InstructionSequence with source location metadata set.
1724 *
1725 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1726 * modify the default behavior of the Ruby iseq compiler.
1727 *
1728 * For details regarding valid compile options see ::compile_option=.
1729 *
1730 * # /tmp/hello.rb
1731 * puts "Hello, world!"
1732 *
1733 * # elsewhere
1734 * RubyVM::InstructionSequence.compile_file("/tmp/hello.rb")
1735 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1736 */
1737static VALUE
1738iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
1739{
1740 VALUE file, opt = Qnil;
1741 VALUE parser, f, exc = Qnil, ret;
1742 rb_ast_t *ast;
1743 VALUE ast_value;
1744 rb_compile_option_t option;
1745 int i;
1746
1747 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1748 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1749 switch (i) {
1750 case 2: opt = argv[--i];
1751 }
1752 FilePathValue(file);
1753 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1754
1755 f = rb_file_open_str(file, "r");
1756
1757 rb_execution_context_t *ec = GET_EC();
1758 VALUE v = rb_vm_push_frame_fname(ec, file);
1759
1760 parser = rb_parser_new();
1761 rb_parser_set_context(parser, NULL, FALSE);
1762 ast_value = rb_parser_load_file(parser, file);
1763 ast = rb_ruby_ast_data_get(ast_value);
1764 if (!ast->body.root) exc = GET_EC()->errinfo;
1765
1766 rb_io_close(f);
1767 if (!ast->body.root) {
1768 rb_ast_dispose(ast);
1769 rb_exc_raise(exc);
1770 }
1771
1772 make_compile_option(&option, opt);
1773
1774 ret = iseqw_new(rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
1775 file,
1776 rb_realpath_internal(Qnil, file, 1),
1777 1, NULL, 0, ISEQ_TYPE_TOP, &option,
1778 Qnil));
1779 rb_ast_dispose(ast);
1780 RB_GC_GUARD(ast_value);
1781
1782 rb_vm_pop_frame(ec);
1783 RB_GC_GUARD(v);
1784 return ret;
1785}
1786
1787/*
1788 * call-seq:
1789 * InstructionSequence.compile_file_prism(file[, options]) -> iseq
1790 *
1791 * Takes +file+, a String with the location of a Ruby source file, reads,
1792 * parses and compiles the file, and returns +iseq+, the compiled
1793 * InstructionSequence with source location metadata set. It parses and
1794 * compiles using prism.
1795 *
1796 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1797 * modify the default behavior of the Ruby iseq compiler.
1798 *
1799 * For details regarding valid compile options see ::compile_option=.
1800 *
1801 * # /tmp/hello.rb
1802 * puts "Hello, world!"
1803 *
1804 * # elsewhere
1805 * RubyVM::InstructionSequence.compile_file_prism("/tmp/hello.rb")
1806 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1807 */
1808static VALUE
1809iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self)
1810{
1811 VALUE file, opt = Qnil, ret;
1812 rb_compile_option_t option;
1813 int i;
1814
1815 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1816 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1817 switch (i) {
1818 case 2: opt = argv[--i];
1819 }
1820 FilePathValue(file);
1821 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1822
1823 rb_execution_context_t *ec = GET_EC();
1824 VALUE v = rb_vm_push_frame_fname(ec, file);
1825
1826 pm_parse_result_t result = { 0 };
1827 result.options.line = 1;
1828 result.node.coverage_enabled = 1;
1829
1830 VALUE script_lines;
1831 VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1832
1833 if (error == Qnil) {
1834 make_compile_option(&option, opt);
1835
1836 int error_state;
1837 rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("<main>"),
1838 file,
1839 rb_realpath_internal(Qnil, file, 1),
1840 1, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1841
1842 pm_parse_result_free(&result);
1843
1844 if (error_state) {
1845 RUBY_ASSERT(iseq == NULL);
1846 rb_jump_tag(error_state);
1847 }
1848
1849 ret = iseqw_new(iseq);
1850 rb_vm_pop_frame(ec);
1851 RB_GC_GUARD(v);
1852 return ret;
1853 }
1854 else {
1855 pm_parse_result_free(&result);
1856 rb_vm_pop_frame(ec);
1857 RB_GC_GUARD(v);
1858 rb_exc_raise(error);
1859 }
1860}
1861
1862/*
1863 * call-seq:
1864 * InstructionSequence.compile_option = options
1865 *
1866 * Sets the default values for various optimizations in the Ruby iseq
1867 * compiler.
1868 *
1869 * Possible values for +options+ include +true+, which enables all options,
1870 * +false+ which disables all options, and +nil+ which leaves all options
1871 * unchanged.
1872 *
1873 * You can also pass a +Hash+ of +options+ that you want to change, any
1874 * options not present in the hash will be left unchanged.
1875 *
1876 * Possible option names (which are keys in +options+) which can be set to
1877 * +true+ or +false+ include:
1878 *
1879 * * +:inline_const_cache+
1880 * * +:instructions_unification+
1881 * * +:operands_unification+
1882 * * +:peephole_optimization+
1883 * * +:specialized_instruction+
1884 * * +:tailcall_optimization+
1885 *
1886 * Additionally, +:debug_level+ can be set to an integer.
1887 *
1888 * These default options can be overwritten for a single run of the iseq
1889 * compiler by passing any of the above values as the +options+ parameter to
1890 * ::new, ::compile and ::compile_file.
1891 */
1892static VALUE
1893iseqw_s_compile_option_set(VALUE self, VALUE opt)
1894{
1895 rb_compile_option_t option;
1896 make_compile_option(&option, opt);
1897 COMPILE_OPTION_DEFAULT = option;
1898 return opt;
1899}
1900
1901/*
1902 * call-seq:
1903 * InstructionSequence.compile_option -> options
1904 *
1905 * Returns a hash of default options used by the Ruby iseq compiler.
1906 *
1907 * For details, see InstructionSequence.compile_option=.
1908 */
1909static VALUE
1910iseqw_s_compile_option_get(VALUE self)
1911{
1912 return make_compile_option_value(&COMPILE_OPTION_DEFAULT);
1913}
1914
1915static const rb_iseq_t *
1916iseqw_check(VALUE iseqw)
1917{
1918 rb_iseq_t **iseq_ptr;
1919 TypedData_Get_Struct(iseqw, rb_iseq_t *, &iseqw_data_type, iseq_ptr);
1920 rb_iseq_t *iseq = *iseq_ptr;
1921
1922 if (!ISEQ_BODY(iseq)) {
1923 rb_ibf_load_iseq_complete(iseq);
1924 }
1925
1926 if (!ISEQ_BODY(iseq)->location.label) {
1927 rb_raise(rb_eTypeError, "uninitialized InstructionSequence");
1928 }
1929 return iseq;
1930}
1931
1932const rb_iseq_t *
1933rb_iseqw_to_iseq(VALUE iseqw)
1934{
1935 return iseqw_check(iseqw);
1936}
1937
1938/*
1939 * call-seq:
1940 * iseq.eval -> obj
1941 *
1942 * Evaluates the instruction sequence and returns the result.
1943 *
1944 * RubyVM::InstructionSequence.compile("1 + 2").eval #=> 3
1945 */
1946static VALUE
1947iseqw_eval(VALUE self)
1948{
1949 const rb_iseq_t *iseq = iseqw_check(self);
1950 if (0 == ISEQ_BODY(iseq)->iseq_size) {
1951 rb_raise(rb_eTypeError, "attempt to evaluate dummy InstructionSequence");
1952 }
1953 return rb_iseq_eval(iseq);
1954}
1955
1956/*
1957 * Returns a human-readable string representation of this instruction
1958 * sequence, including the #label and #path.
1959 */
1960static VALUE
1961iseqw_inspect(VALUE self)
1962{
1963 const rb_iseq_t *iseq = iseqw_check(self);
1964 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1965 VALUE klass = rb_class_name(rb_obj_class(self));
1966
1967 if (!body->location.label) {
1968 return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass);
1969 }
1970 else {
1971 return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>",
1972 klass,
1973 body->location.label, rb_iseq_path(iseq),
1974 FIX2INT(rb_iseq_first_lineno(iseq)));
1975 }
1976}
1977
1978/*
1979 * Returns the path of this instruction sequence.
1980 *
1981 * <code><compiled></code> if the iseq was evaluated from a string.
1982 *
1983 * For example, using irb:
1984 *
1985 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1986 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1987 * iseq.path
1988 * #=> "<compiled>"
1989 *
1990 * Using ::compile_file:
1991 *
1992 * # /tmp/method.rb
1993 * def hello
1994 * puts "hello, world"
1995 * end
1996 *
1997 * # in irb
1998 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1999 * > iseq.path #=> /tmp/method.rb
2000 */
2001static VALUE
2002iseqw_path(VALUE self)
2003{
2004 return rb_iseq_path(iseqw_check(self));
2005}
2006
2007/*
2008 * Returns the absolute path of this instruction sequence.
2009 *
2010 * +nil+ if the iseq was evaluated from a string.
2011 *
2012 * For example, using ::compile_file:
2013 *
2014 * # /tmp/method.rb
2015 * def hello
2016 * puts "hello, world"
2017 * end
2018 *
2019 * # in irb
2020 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2021 * > iseq.absolute_path #=> /tmp/method.rb
2022 */
2023static VALUE
2024iseqw_absolute_path(VALUE self)
2025{
2026 return rb_iseq_realpath(iseqw_check(self));
2027}
2028
2029/* Returns the label of this instruction sequence.
2030 *
2031 * <code><main></code> if it's at the top level, <code><compiled></code> if it
2032 * was evaluated from a string.
2033 *
2034 * For example, using irb:
2035 *
2036 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2037 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2038 * iseq.label
2039 * #=> "<compiled>"
2040 *
2041 * Using ::compile_file:
2042 *
2043 * # /tmp/method.rb
2044 * def hello
2045 * puts "hello, world"
2046 * end
2047 *
2048 * # in irb
2049 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2050 * > iseq.label #=> <main>
2051 */
2052static VALUE
2053iseqw_label(VALUE self)
2054{
2055 return rb_iseq_label(iseqw_check(self));
2056}
2057
2058/* Returns the base label of this instruction sequence.
2059 *
2060 * For example, using irb:
2061 *
2062 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2063 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2064 * iseq.base_label
2065 * #=> "<compiled>"
2066 *
2067 * Using ::compile_file:
2068 *
2069 * # /tmp/method.rb
2070 * def hello
2071 * puts "hello, world"
2072 * end
2073 *
2074 * # in irb
2075 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2076 * > iseq.base_label #=> <main>
2077 */
2078static VALUE
2079iseqw_base_label(VALUE self)
2080{
2081 return rb_iseq_base_label(iseqw_check(self));
2082}
2083
2084/* Returns the number of the first source line where the instruction sequence
2085 * was loaded from.
2086 *
2087 * For example, using irb:
2088 *
2089 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2090 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2091 * iseq.first_lineno
2092 * #=> 1
2093 */
2094static VALUE
2095iseqw_first_lineno(VALUE self)
2096{
2097 return rb_iseq_first_lineno(iseqw_check(self));
2098}
2099
2100static VALUE iseq_data_to_ary(const rb_iseq_t *iseq);
2101
2102/*
2103 * call-seq:
2104 * iseq.to_a -> ary
2105 *
2106 * Returns an Array with 14 elements representing the instruction sequence
2107 * with the following data:
2108 *
2109 * [magic]
2110 * A string identifying the data format. <b>Always
2111 * +YARVInstructionSequence/SimpleDataFormat+.</b>
2112 *
2113 * [major_version]
2114 * The major version of the instruction sequence.
2115 *
2116 * [minor_version]
2117 * The minor version of the instruction sequence.
2118 *
2119 * [format_type]
2120 * A number identifying the data format. <b>Always 1</b>.
2121 *
2122 * [misc]
2123 * A hash containing:
2124 *
2125 * [+:arg_size+]
2126 * the total number of arguments taken by the method or the block (0 if
2127 * _iseq_ doesn't represent a method or block)
2128 * [+:local_size+]
2129 * the number of local variables + 1
2130 * [+:stack_max+]
2131 * used in calculating the stack depth at which a SystemStackError is
2132 * thrown.
2133 *
2134 * [#label]
2135 * The name of the context (block, method, class, module, etc.) that this
2136 * instruction sequence belongs to.
2137 *
2138 * <code><main></code> if it's at the top level, <code><compiled></code> if
2139 * it was evaluated from a string.
2140 *
2141 * [#path]
2142 * The relative path to the Ruby file where the instruction sequence was
2143 * loaded from.
2144 *
2145 * <code><compiled></code> if the iseq was evaluated from a string.
2146 *
2147 * [#absolute_path]
2148 * The absolute path to the Ruby file where the instruction sequence was
2149 * loaded from.
2150 *
2151 * +nil+ if the iseq was evaluated from a string.
2152 *
2153 * [#first_lineno]
2154 * The number of the first source line where the instruction sequence was
2155 * loaded from.
2156 *
2157 * [type]
2158 * The type of the instruction sequence.
2159 *
2160 * Valid values are +:top+, +:method+, +:block+, +:class+, +:rescue+,
2161 * +:ensure+, +:eval+, +:main+, and +plain+.
2162 *
2163 * [locals]
2164 * An array containing the names of all arguments and local variables as
2165 * symbols.
2166 *
2167 * [params]
2168 * An Hash object containing parameter information.
2169 *
2170 * More info about these values can be found in +vm_core.h+.
2171 *
2172 * [catch_table]
2173 * A list of exceptions and control flow operators (rescue, next, redo,
2174 * break, etc.).
2175 *
2176 * [bytecode]
2177 * An array of arrays containing the instruction names and operands that
2178 * make up the body of the instruction sequence.
2179 *
2180 * Note that this format is MRI specific and version dependent.
2181 *
2182 */
2183static VALUE
2184iseqw_to_a(VALUE self)
2185{
2186 const rb_iseq_t *iseq = iseqw_check(self);
2187 return iseq_data_to_ary(iseq);
2188}
2189
2190#if VM_INSN_INFO_TABLE_IMPL == 1 /* binary search */
2191static const struct iseq_insn_info_entry *
2192get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos)
2193{
2194 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2195 size_t size = body->insns_info.size;
2196 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2197 const unsigned int *positions = body->insns_info.positions;
2198 const int debug = 0;
2199
2200 if (debug) {
2201 printf("size: %"PRIuSIZE"\n", size);
2202 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2203 (size_t)0, positions[0], insns_info[0].line_no, pos);
2204 }
2205
2206 if (size == 0) {
2207 return NULL;
2208 }
2209 else if (size == 1) {
2210 return &insns_info[0];
2211 }
2212 else {
2213 size_t l = 1, r = size - 1;
2214 while (l <= r) {
2215 size_t m = l + (r - l) / 2;
2216 if (positions[m] == pos) {
2217 return &insns_info[m];
2218 }
2219 if (positions[m] < pos) {
2220 l = m + 1;
2221 }
2222 else {
2223 r = m - 1;
2224 }
2225 }
2226 if (l >= size) {
2227 return &insns_info[size-1];
2228 }
2229 if (positions[l] > pos) {
2230 return &insns_info[l-1];
2231 }
2232 return &insns_info[l];
2233 }
2234}
2235
2236static const struct iseq_insn_info_entry *
2237get_insn_info(const rb_iseq_t *iseq, size_t pos)
2238{
2239 return get_insn_info_binary_search(iseq, pos);
2240}
2241#endif
2242
2243#if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
2244static const struct iseq_insn_info_entry *
2245get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos)
2246{
2247 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2248 size_t size = body->insns_info.size;
2249 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2250 const int debug = 0;
2251
2252 if (debug) {
2253#if VM_CHECK_MODE > 0
2254 const unsigned int *positions = body->insns_info.positions;
2255 printf("size: %"PRIuSIZE"\n", size);
2256 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2257 (size_t)0, positions[0], insns_info[0].line_no, pos);
2258#else
2259 printf("size: %"PRIuSIZE"\n", size);
2260 printf("insns_info[%"PRIuSIZE"]: line: %d, pos: %"PRIuSIZE"\n",
2261 (size_t)0, insns_info[0].line_no, pos);
2262#endif
2263 }
2264
2265 if (size == 0) {
2266 return NULL;
2267 }
2268 else if (size == 1) {
2269 return &insns_info[0];
2270 }
2271 else {
2272 int index;
2273 VM_ASSERT(body->insns_info.succ_index_table != NULL);
2274 index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos);
2275 return &insns_info[index-1];
2276 }
2277}
2278
2279static const struct iseq_insn_info_entry *
2280get_insn_info(const rb_iseq_t *iseq, size_t pos)
2281{
2282 return get_insn_info_succinct_bitvector(iseq, pos);
2283}
2284#endif
2285
2286#if VM_CHECK_MODE > 0 || VM_INSN_INFO_TABLE_IMPL == 0
2287static const struct iseq_insn_info_entry *
2288get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos)
2289{
2290 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2291 size_t i = 0, size = body->insns_info.size;
2292 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2293 const unsigned int *positions = body->insns_info.positions;
2294 const int debug = 0;
2295
2296 if (debug) {
2297 printf("size: %"PRIuSIZE"\n", size);
2298 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2299 i, positions[i], insns_info[i].line_no, pos);
2300 }
2301
2302 if (size == 0) {
2303 return NULL;
2304 }
2305 else if (size == 1) {
2306 return &insns_info[0];
2307 }
2308 else {
2309 for (i=1; i<size; i++) {
2310 if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2311 i, positions[i], insns_info[i].line_no, pos);
2312
2313 if (positions[i] == pos) {
2314 return &insns_info[i];
2315 }
2316 if (positions[i] > pos) {
2317 return &insns_info[i-1];
2318 }
2319 }
2320 }
2321 return &insns_info[i-1];
2322}
2323#endif
2324
2325#if VM_INSN_INFO_TABLE_IMPL == 0 /* linear search */
2326static const struct iseq_insn_info_entry *
2327get_insn_info(const rb_iseq_t *iseq, size_t pos)
2328{
2329 return get_insn_info_linear_search(iseq, pos);
2330}
2331#endif
2332
2333#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
2334static void
2335validate_get_insn_info(const rb_iseq_t *iseq)
2336{
2337 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2338 size_t i;
2339 for (i = 0; i < body->iseq_size; i++) {
2340 if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) {
2341 rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i);
2342 }
2343 }
2344}
2345#endif
2346
2347unsigned int
2348rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos)
2349{
2350 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2351
2352 if (entry) {
2353 return entry->line_no;
2354 }
2355 else {
2356 return 0;
2357 }
2358}
2359
2360#ifdef USE_ISEQ_NODE_ID
2361int
2362rb_iseq_node_id(const rb_iseq_t *iseq, size_t pos)
2363{
2364 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2365
2366 if (entry) {
2367 return entry->node_id;
2368 }
2369 else {
2370 return 0;
2371 }
2372}
2373#endif
2374
2376rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos)
2377{
2378 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2379 if (entry) {
2380 return entry->events;
2381 }
2382 else {
2383 return 0;
2384 }
2385}
2386
2387// Clear tracing event flags and turn off tracing for a given instruction as needed.
2388// This is currently used after updating a one-shot line coverage for the current instruction.
2389void
2390rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset)
2391{
2392 struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos);
2393 if (entry) {
2394 entry->events &= ~reset;
2395 if (!(entry->events & iseq->aux.exec.global_trace_events)) {
2396 void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos);
2397 rb_iseq_trace_flag_cleared(iseq, pos);
2398 }
2399 }
2400}
2401
2402static VALUE
2403local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op)
2404{
2405 VALUE i;
2406 VALUE name;
2407 ID lid;
2408 int idx;
2409
2410 for (i = 0; i < level; i++) {
2411 diseq = ISEQ_BODY(diseq)->parent_iseq;
2412 }
2413 idx = ISEQ_BODY(diseq)->local_table_size - (int)op - 1;
2414 lid = ISEQ_BODY(diseq)->local_table[idx];
2415 name = rb_id2str(lid);
2416 if (!name) {
2417 name = rb_str_new_cstr("?");
2418 }
2419 else if (!rb_is_local_id(lid)) {
2420 name = rb_str_inspect(name);
2421 }
2422 else {
2423 name = rb_str_dup(name);
2424 }
2425 rb_str_catf(name, "@%d", idx);
2426 return name;
2427}
2428
2429int rb_insn_unified_local_var_level(VALUE);
2430VALUE rb_dump_literal(VALUE lit);
2431
2432VALUE
2433rb_insn_operand_intern(const rb_iseq_t *iseq,
2434 VALUE insn, int op_no, VALUE op,
2435 int len, size_t pos, const VALUE *pnop, VALUE child)
2436{
2437 const char *types = insn_op_types(insn);
2438 char type = types[op_no];
2439 VALUE ret = Qundef;
2440
2441 switch (type) {
2442 case TS_OFFSET: /* LONG */
2443 ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op));
2444 break;
2445
2446 case TS_NUM: /* ULONG */
2447 if (insn == BIN(defined) && op_no == 0) {
2448 enum defined_type deftype = (enum defined_type)op;
2449 switch (deftype) {
2450 case DEFINED_FUNC:
2451 ret = rb_fstring_lit("func");
2452 break;
2453 case DEFINED_REF:
2454 ret = rb_fstring_lit("ref");
2455 break;
2456 case DEFINED_CONST_FROM:
2457 ret = rb_fstring_lit("constant-from");
2458 break;
2459 default:
2460 ret = rb_iseq_defined_string(deftype);
2461 break;
2462 }
2463 if (ret) break;
2464 }
2465 else if (insn == BIN(checktype) && op_no == 0) {
2466 const char *type_str = rb_type_str((enum ruby_value_type)op);
2467 if (type_str) {
2468 ret = rb_str_new_cstr(type_str); break;
2469 }
2470 }
2471 ret = rb_sprintf("%"PRIuVALUE, op);
2472 break;
2473
2474 case TS_LINDEX:{
2475 int level;
2476 if (types[op_no+1] == TS_NUM && pnop) {
2477 ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE);
2478 }
2479 else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) {
2480 ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE);
2481 }
2482 else {
2483 ret = rb_inspect(INT2FIX(op));
2484 }
2485 break;
2486 }
2487 case TS_ID: /* ID (symbol) */
2488 ret = rb_inspect(ID2SYM(op));
2489 break;
2490
2491 case TS_VALUE: /* VALUE */
2492 op = obj_resurrect(op);
2493 if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) {
2494 /* should be DEFINED_REF */
2495 int type = NUM2INT(op);
2496 if (type) {
2497 if (type & 1) {
2498 ret = rb_sprintf(":$%c", (type >> 1));
2499 }
2500 else {
2501 ret = rb_sprintf(":$%d", (type >> 1));
2502 }
2503 break;
2504 }
2505 }
2506 ret = rb_dump_literal(op);
2507 if (CLASS_OF(op) == rb_cISeq) {
2508 if (child) {
2509 rb_ary_push(child, op);
2510 }
2511 }
2512 break;
2513
2514 case TS_ISEQ: /* iseq */
2515 {
2516 if (op) {
2517 const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op);
2518 ret = ISEQ_BODY(iseq)->location.label;
2519 if (child) {
2520 rb_ary_push(child, (VALUE)iseq);
2521 }
2522 }
2523 else {
2524 ret = rb_str_new2("nil");
2525 }
2526 break;
2527 }
2528
2529 case TS_IC:
2530 {
2531 ret = rb_sprintf("<ic:%"PRIdPTRDIFF" ", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2532 const ID *segments = ((IC)op)->segments;
2533 rb_str_cat2(ret, rb_id2name(*segments++));
2534 while (*segments) {
2535 rb_str_catf(ret, "::%s", rb_id2name(*segments++));
2536 }
2537 rb_str_cat2(ret, ">");
2538 }
2539 break;
2540 case TS_IVC:
2541 case TS_ICVARC:
2542 case TS_ISE:
2543 ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2544 break;
2545
2546 case TS_CALLDATA:
2547 {
2548 struct rb_call_data *cd = (struct rb_call_data *)op;
2549 const struct rb_callinfo *ci = cd->ci;
2550 VALUE ary = rb_ary_new();
2551 ID mid = vm_ci_mid(ci);
2552
2553 if (mid) {
2554 rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid)));
2555 }
2556
2557 rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci)));
2558
2559 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
2560 const struct rb_callinfo_kwarg *kw_args = vm_ci_kwarg(ci);
2561 VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords);
2562 rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(","))));
2563 }
2564
2565 if (vm_ci_flag(ci)) {
2566 VALUE flags = rb_ary_new();
2567# define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n))
2568 CALL_FLAG(ARGS_SPLAT);
2569 CALL_FLAG(ARGS_SPLAT_MUT);
2570 CALL_FLAG(ARGS_BLOCKARG);
2571 CALL_FLAG(FCALL);
2572 CALL_FLAG(VCALL);
2573 CALL_FLAG(ARGS_SIMPLE);
2574 CALL_FLAG(TAILCALL);
2575 CALL_FLAG(SUPER);
2576 CALL_FLAG(ZSUPER);
2577 CALL_FLAG(KWARG);
2578 CALL_FLAG(KW_SPLAT);
2579 CALL_FLAG(KW_SPLAT_MUT);
2580 CALL_FLAG(FORWARDING);
2581 CALL_FLAG(OPT_SEND); /* maybe not reachable */
2582 rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|")));
2583 }
2584
2585 ret = rb_sprintf("<calldata!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", ")));
2586 }
2587 break;
2588
2589 case TS_CDHASH:
2590 ret = rb_str_new2("<cdhash>");
2591 break;
2592
2593 case TS_FUNCPTR:
2594 {
2595#ifdef HAVE_DLADDR
2596 Dl_info info;
2597 if (dladdr((void *)op, &info) && info.dli_sname) {
2598 ret = rb_str_new_cstr(info.dli_sname);
2599 break;
2600 }
2601#endif
2602 ret = rb_str_new2("<funcptr>");
2603 }
2604 break;
2605
2606 case TS_BUILTIN:
2607 {
2608 const struct rb_builtin_function *bf = (const struct rb_builtin_function *)op;
2609 ret = rb_sprintf("<builtin!%s/%d>",
2610 bf->name, bf->argc);
2611 }
2612 break;
2613
2614 default:
2615 rb_bug("unknown operand type: %c", type);
2616 }
2617 return ret;
2618}
2619
2620static VALUE
2621right_strip(VALUE str)
2622{
2623 const char *beg = RSTRING_PTR(str), *end = RSTRING_END(str);
2624 while (end-- > beg && *end == ' ');
2625 rb_str_set_len(str, end - beg + 1);
2626 return str;
2627}
2628
2633int
2634rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos,
2635 const rb_iseq_t *iseq, VALUE child)
2636{
2637 VALUE insn = code[pos];
2638 int len = insn_len(insn);
2639 int j;
2640 const char *types = insn_op_types(insn);
2641 VALUE str = rb_str_new(0, 0);
2642 const char *insn_name_buff;
2643
2644 insn_name_buff = insn_name(insn);
2645 if (1) {
2646 extern const int rb_vm_max_insn_name_size;
2647 rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff);
2648 }
2649 else {
2650 rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos,
2651 (int)strcspn(insn_name_buff, "_"), insn_name_buff);
2652 }
2653
2654 for (j = 0; types[j]; j++) {
2655 VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1],
2656 len, pos, &code[pos + j + 2],
2657 child);
2658 rb_str_concat(str, opstr);
2659
2660 if (types[j + 1]) {
2661 rb_str_cat2(str, ", ");
2662 }
2663 }
2664
2665 {
2666 unsigned int line_no = rb_iseq_line_no(iseq, pos);
2667 unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1);
2668 if (line_no && line_no != prev) {
2669 long slen = RSTRING_LEN(str);
2670 slen = (slen > 70) ? 0 : (70 - slen);
2671 str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no);
2672 }
2673 }
2674
2675 {
2676 rb_event_flag_t events = rb_iseq_event_flags(iseq, pos);
2677 if (events) {
2678 str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s%s]",
2679 events & RUBY_EVENT_LINE ? "Li" : "",
2680 events & RUBY_EVENT_CLASS ? "Cl" : "",
2681 events & RUBY_EVENT_END ? "En" : "",
2682 events & RUBY_EVENT_CALL ? "Ca" : "",
2683 events & RUBY_EVENT_RETURN ? "Re" : "",
2684 events & RUBY_EVENT_C_CALL ? "Cc" : "",
2685 events & RUBY_EVENT_C_RETURN ? "Cr" : "",
2686 events & RUBY_EVENT_B_CALL ? "Bc" : "",
2687 events & RUBY_EVENT_B_RETURN ? "Br" : "",
2688 events & RUBY_EVENT_RESCUE ? "Rs" : "",
2689 events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "",
2690 events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : "");
2691 }
2692 }
2693
2694 right_strip(str);
2695 if (ret) {
2696 rb_str_cat2(str, "\n");
2697 rb_str_concat(ret, str);
2698 }
2699 else {
2700 printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str));
2701 }
2702 return len;
2703}
2704
2705static const char *
2706catch_type(int type)
2707{
2708 switch (type) {
2709 case CATCH_TYPE_RESCUE:
2710 return "rescue";
2711 case CATCH_TYPE_ENSURE:
2712 return "ensure";
2713 case CATCH_TYPE_RETRY:
2714 return "retry";
2715 case CATCH_TYPE_BREAK:
2716 return "break";
2717 case CATCH_TYPE_REDO:
2718 return "redo";
2719 case CATCH_TYPE_NEXT:
2720 return "next";
2721 default:
2722 rb_bug("unknown catch type: %d", type);
2723 return 0;
2724 }
2725}
2726
2727static VALUE
2728iseq_inspect(const rb_iseq_t *iseq)
2729{
2730 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2731 if (!body->location.label) {
2732 return rb_sprintf("#<ISeq: uninitialized>");
2733 }
2734 else {
2735 const rb_code_location_t *loc = &body->location.code_location;
2736 return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>",
2737 body->location.label, rb_iseq_path(iseq),
2738 loc->beg_pos.lineno,
2739 loc->beg_pos.lineno,
2740 loc->beg_pos.column,
2741 loc->end_pos.lineno,
2742 loc->end_pos.column);
2743 }
2744}
2745
2746static const rb_data_type_t tmp_set = {
2747 "tmpset",
2748 {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,},
2749 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
2750};
2751
2752static VALUE
2753rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent)
2754{
2755 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2756 VALUE *code;
2757 VALUE str = rb_str_new(0, 0);
2758 VALUE child = rb_ary_hidden_new(3);
2759 unsigned int size;
2760 unsigned int i;
2761 long l;
2762 size_t n;
2763 enum {header_minlen = 72};
2764 st_table *done_iseq = 0;
2765 VALUE done_iseq_wrapper = Qnil;
2766 const char *indent_str;
2767 long indent_len;
2768
2769 size = body->iseq_size;
2770
2771 indent_len = RSTRING_LEN(indent);
2772 indent_str = RSTRING_PTR(indent);
2773
2774 rb_str_cat(str, indent_str, indent_len);
2775 rb_str_cat2(str, "== disasm: ");
2776
2777 rb_str_append(str, iseq_inspect(iseq));
2778 if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) {
2779 rb_str_modify_expand(str, header_minlen - l);
2780 memset(RSTRING_END(str), '=', header_minlen - l);
2781 }
2782 if (iseq->body->builtin_attrs) {
2783#define disasm_builtin_attr(str, iseq, attr) \
2784 if (iseq->body->builtin_attrs & BUILTIN_ATTR_ ## attr) { \
2785 rb_str_cat2(str, " " #attr); \
2786 }
2787 disasm_builtin_attr(str, iseq, LEAF);
2788 disasm_builtin_attr(str, iseq, SINGLE_NOARG_LEAF);
2789 disasm_builtin_attr(str, iseq, INLINE_BLOCK);
2790 disasm_builtin_attr(str, iseq, C_TRACE);
2791 }
2792 rb_str_cat2(str, "\n");
2793
2794 /* show catch table information */
2795 if (body->catch_table) {
2796 rb_str_cat(str, indent_str, indent_len);
2797 rb_str_cat2(str, "== catch table\n");
2798 }
2799 if (body->catch_table) {
2800 rb_str_cat_cstr(indent, "| ");
2801 indent_str = RSTRING_PTR(indent);
2802 for (i = 0; i < body->catch_table->size; i++) {
2803 const struct iseq_catch_table_entry *entry =
2804 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2805 rb_str_cat(str, indent_str, indent_len);
2806 rb_str_catf(str,
2807 "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n",
2808 catch_type((int)entry->type), (int)entry->start,
2809 (int)entry->end, (int)entry->sp, (int)entry->cont);
2810 if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) {
2811 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent));
2812 if (!done_iseq) {
2813 done_iseq = st_init_numtable();
2814 done_iseq_wrapper = TypedData_Wrap_Struct(0, &tmp_set, done_iseq);
2815 }
2816 st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0);
2817 indent_str = RSTRING_PTR(indent);
2818 }
2819 }
2820 rb_str_resize(indent, indent_len);
2821 indent_str = RSTRING_PTR(indent);
2822 }
2823 if (body->catch_table) {
2824 rb_str_cat(str, indent_str, indent_len);
2825 rb_str_cat2(str, "|-------------------------------------"
2826 "-----------------------------------\n");
2827 }
2828
2829 /* show local table information */
2830 if (body->local_table) {
2831 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2832 rb_str_cat(str, indent_str, indent_len);
2833 rb_str_catf(str,
2834 "local table (size: %d, argc: %d "
2835 "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n",
2836 body->local_table_size,
2837 body->param.lead_num,
2838 body->param.opt_num,
2839 body->param.flags.has_rest ? body->param.rest_start : -1,
2840 body->param.post_num,
2841 body->param.flags.has_block ? body->param.block_start : -1,
2842 body->param.flags.has_kw ? keyword->num : -1,
2843 body->param.flags.has_kw ? keyword->required_num : -1,
2844 body->param.flags.has_kwrest ? keyword->rest_start : -1);
2845
2846 for (i = body->local_table_size; i > 0;) {
2847 int li = body->local_table_size - --i - 1;
2848 long width;
2849 VALUE name = local_var_name(iseq, 0, i);
2850 char argi[0x100];
2851 char opti[0x100];
2852
2853 opti[0] = '\0';
2854 if (body->param.flags.has_opt) {
2855 int argc = body->param.lead_num;
2856 int opts = body->param.opt_num;
2857 if (li >= argc && li < argc + opts) {
2858 snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE,
2859 body->param.opt_table[li - argc]);
2860 }
2861 }
2862
2863 snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */
2864 (body->param.lead_num > li) ? (body->param.flags.ambiguous_param0 ? "AmbiguousArg" : "Arg") : "",
2865 opti,
2866 (body->param.flags.has_rest && body->param.rest_start == li) ? (body->param.flags.anon_rest ? "AnonRest" : "Rest") : "",
2867 (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "",
2868 (body->param.flags.has_kwrest && keyword->rest_start == li) ? (body->param.flags.anon_kwrest ? "AnonKwrest" : "Kwrest") : "",
2869 (body->param.flags.has_block && body->param.block_start == li) ? "Block" : "");
2870
2871 rb_str_cat(str, indent_str, indent_len);
2872 rb_str_catf(str, "[%2d] ", i + 1);
2873 width = RSTRING_LEN(str) + 11;
2874 rb_str_append(str, name);
2875 if (*argi) rb_str_catf(str, "<%s>", argi);
2876 if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, "");
2877 }
2878 rb_str_cat_cstr(right_strip(str), "\n");
2879 }
2880
2881 /* show each line */
2882 code = rb_iseq_original_iseq(iseq);
2883 for (n = 0; n < size;) {
2884 rb_str_cat(str, indent_str, indent_len);
2885 n += rb_iseq_disasm_insn(str, code, n, iseq, child);
2886 }
2887
2888 for (l = 0; l < RARRAY_LEN(child); l++) {
2889 VALUE isv = rb_ary_entry(child, l);
2890 if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue;
2891 rb_str_cat_cstr(str, "\n");
2892 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent));
2893 indent_str = RSTRING_PTR(indent);
2894 }
2895 RB_GC_GUARD(done_iseq_wrapper);
2896
2897 return str;
2898}
2899
2900VALUE
2901rb_iseq_disasm(const rb_iseq_t *iseq)
2902{
2903 VALUE str = rb_iseq_disasm_recursive(iseq, rb_str_new(0, 0));
2904 rb_str_resize(str, RSTRING_LEN(str));
2905 return str;
2906}
2907
2908/*
2909 * Estimates the number of instance variables that will be set on
2910 * a given `class` with the initialize method defined in
2911 * `initialize_iseq`
2912 */
2913attr_index_t
2914rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq)
2915{
2916 struct rb_id_table * iv_names = rb_id_table_create(0);
2917
2918 for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) {
2919 IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i];
2920
2921 if (cache->iv_set_name) {
2922 rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue);
2923 }
2924 }
2925
2926 attr_index_t count = (attr_index_t)rb_id_table_size(iv_names);
2927
2928 VALUE superclass = rb_class_superclass(klass);
2929 count += RCLASS_MAX_IV_COUNT(superclass);
2930
2931 rb_id_table_free(iv_names);
2932
2933 return count;
2934}
2935
2936/*
2937 * call-seq:
2938 * iseq.disasm -> str
2939 * iseq.disassemble -> str
2940 *
2941 * Returns the instruction sequence as a +String+ in human readable form.
2942 *
2943 * puts RubyVM::InstructionSequence.compile('1 + 2').disasm
2944 *
2945 * Produces:
2946 *
2947 * == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
2948 * 0000 trace 1 ( 1)
2949 * 0002 putobject 1
2950 * 0004 putobject 2
2951 * 0006 opt_plus <ic:1>
2952 * 0008 leave
2953 */
2954static VALUE
2955iseqw_disasm(VALUE self)
2956{
2957 return rb_iseq_disasm(iseqw_check(self));
2958}
2959
2960static int
2961iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t *child_iseq, void *data), void *data)
2962{
2963 unsigned int i;
2964 VALUE *code = rb_iseq_original_iseq(iseq);
2965 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2966 const rb_iseq_t *child;
2967 VALUE all_children = rb_obj_hide(rb_ident_hash_new());
2968
2969 if (body->catch_table) {
2970 for (i = 0; i < body->catch_table->size; i++) {
2971 const struct iseq_catch_table_entry *entry =
2972 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2973 child = entry->iseq;
2974 if (child) {
2975 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
2976 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2977 (*iter_func)(child, data);
2978 }
2979 }
2980 }
2981 }
2982
2983 for (i=0; i<body->iseq_size;) {
2984 VALUE insn = code[i];
2985 int len = insn_len(insn);
2986 const char *types = insn_op_types(insn);
2987 int j;
2988
2989 for (j=0; types[j]; j++) {
2990 switch (types[j]) {
2991 case TS_ISEQ:
2992 child = (const rb_iseq_t *)code[i+j+1];
2993 if (child) {
2994 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
2995 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2996 (*iter_func)(child, data);
2997 }
2998 }
2999 break;
3000 default:
3001 break;
3002 }
3003 }
3004 i += len;
3005 }
3006
3007 return (int)RHASH_SIZE(all_children);
3008}
3009
3010static void
3011yield_each_children(const rb_iseq_t *child_iseq, void *data)
3012{
3013 rb_yield(iseqw_new(child_iseq));
3014}
3015
3016/*
3017 * call-seq:
3018 * iseq.each_child{|child_iseq| ...} -> iseq
3019 *
3020 * Iterate all direct child instruction sequences.
3021 * Iteration order is implementation/version defined
3022 * so that people should not rely on the order.
3023 */
3024static VALUE
3025iseqw_each_child(VALUE self)
3026{
3027 const rb_iseq_t *iseq = iseqw_check(self);
3028 iseq_iterate_children(iseq, yield_each_children, NULL);
3029 return self;
3030}
3031
3032static void
3033push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE ary)
3034{
3035#define C(ev, cstr, l) if (events & ev) rb_ary_push(ary, rb_ary_new_from_args(2, l, ID2SYM(rb_intern(cstr))));
3036 C(RUBY_EVENT_CLASS, "class", rb_iseq_first_lineno(iseq));
3037 C(RUBY_EVENT_CALL, "call", rb_iseq_first_lineno(iseq));
3038 C(RUBY_EVENT_B_CALL, "b_call", rb_iseq_first_lineno(iseq));
3039 C(RUBY_EVENT_LINE, "line", INT2FIX(line));
3040 C(RUBY_EVENT_END, "end", INT2FIX(line));
3041 C(RUBY_EVENT_RETURN, "return", INT2FIX(line));
3042 C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line));
3043 C(RUBY_EVENT_RESCUE, "rescue", INT2FIX(line));
3044#undef C
3045}
3046
3047/*
3048 * call-seq:
3049 * iseq.trace_points -> ary
3050 *
3051 * Return trace points in the instruction sequence.
3052 * Return an array of [line, event_symbol] pair.
3053 */
3054static VALUE
3055iseqw_trace_points(VALUE self)
3056{
3057 const rb_iseq_t *iseq = iseqw_check(self);
3058 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3059 unsigned int i;
3060 VALUE ary = rb_ary_new();
3061
3062 for (i=0; i<body->insns_info.size; i++) {
3063 const struct iseq_insn_info_entry *entry = &body->insns_info.body[i];
3064 if (entry->events) {
3065 push_event_info(iseq, entry->events, entry->line_no, ary);
3066 }
3067 }
3068 return ary;
3069}
3070
3071/*
3072 * Returns the instruction sequence containing the given proc or method.
3073 *
3074 * For example, using irb:
3075 *
3076 * # a proc
3077 * > p = proc { num = 1 + 2 }
3078 * > RubyVM::InstructionSequence.of(p)
3079 * > #=> <RubyVM::InstructionSequence:block in irb_binding@(irb)>
3080 *
3081 * # for a method
3082 * > def foo(bar); puts bar; end
3083 * > RubyVM::InstructionSequence.of(method(:foo))
3084 * > #=> <RubyVM::InstructionSequence:foo@(irb)>
3085 *
3086 * Using ::compile_file:
3087 *
3088 * # /tmp/iseq_of.rb
3089 * def hello
3090 * puts "hello, world"
3091 * end
3092 *
3093 * $a_global_proc = proc { str = 'a' + 'b' }
3094 *
3095 * # in irb
3096 * > require '/tmp/iseq_of.rb'
3097 *
3098 * # first the method hello
3099 * > RubyVM::InstructionSequence.of(method(:hello))
3100 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7cb1d0>
3101 *
3102 * # then the global proc
3103 * > RubyVM::InstructionSequence.of($a_global_proc)
3104 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7caf78>
3105 */
3106static VALUE
3107iseqw_s_of(VALUE klass, VALUE body)
3108{
3109 const rb_iseq_t *iseq = NULL;
3110
3111 if (rb_frame_info_p(body)) {
3112 iseq = rb_get_iseq_from_frame_info(body);
3113 }
3114 else if (rb_obj_is_proc(body)) {
3115 iseq = vm_proc_iseq(body);
3116
3117 if (!rb_obj_is_iseq((VALUE)iseq)) {
3118 iseq = NULL;
3119 }
3120 }
3121 else if (rb_obj_is_method(body)) {
3122 iseq = rb_method_iseq(body);
3123 }
3124 else if (rb_typeddata_is_instance_of(body, &iseqw_data_type)) {
3125 return body;
3126 }
3127
3128 return iseq ? iseqw_new(iseq) : Qnil;
3129}
3130
3131/*
3132 * call-seq:
3133 * InstructionSequence.disasm(body) -> str
3134 * InstructionSequence.disassemble(body) -> str
3135 *
3136 * Takes +body+, a +Method+ or +Proc+ object, and returns a +String+
3137 * with the human readable instructions for +body+.
3138 *
3139 * For a +Method+ object:
3140 *
3141 * # /tmp/method.rb
3142 * def hello
3143 * puts "hello, world"
3144 * end
3145 *
3146 * puts RubyVM::InstructionSequence.disasm(method(:hello))
3147 *
3148 * Produces:
3149 *
3150 * == disasm: <RubyVM::InstructionSequence:hello@/tmp/method.rb>============
3151 * 0000 trace 8 ( 1)
3152 * 0002 trace 1 ( 2)
3153 * 0004 putself
3154 * 0005 putstring "hello, world"
3155 * 0007 send :puts, 1, nil, 8, <ic:0>
3156 * 0013 trace 16 ( 3)
3157 * 0015 leave ( 2)
3158 *
3159 * For a +Proc+ object:
3160 *
3161 * # /tmp/proc.rb
3162 * p = proc { num = 1 + 2 }
3163 * puts RubyVM::InstructionSequence.disasm(p)
3164 *
3165 * Produces:
3166 *
3167 * == disasm: <RubyVM::InstructionSequence:block in <main>@/tmp/proc.rb>===
3168 * == catch table
3169 * | catch type: redo st: 0000 ed: 0012 sp: 0000 cont: 0000
3170 * | catch type: next st: 0000 ed: 0012 sp: 0000 cont: 0012
3171 * |------------------------------------------------------------------------
3172 * local table (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1] s1)
3173 * [ 2] num
3174 * 0000 trace 1 ( 1)
3175 * 0002 putobject 1
3176 * 0004 putobject 2
3177 * 0006 opt_plus <ic:1>
3178 * 0008 dup
3179 * 0009 setlocal num, 0
3180 * 0012 leave
3181 *
3182 */
3183static VALUE
3184iseqw_s_disasm(VALUE klass, VALUE body)
3185{
3186 VALUE iseqw = iseqw_s_of(klass, body);
3187 return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw));
3188}
3189
3190static VALUE
3191register_label(struct st_table *table, unsigned long idx)
3192{
3193 VALUE sym = rb_str_intern(rb_sprintf("label_%lu", idx));
3194 st_insert(table, idx, sym);
3195 return sym;
3196}
3197
3198static VALUE
3199exception_type2symbol(VALUE type)
3200{
3201 ID id;
3202 switch (type) {
3203 case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break;
3204 case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break;
3205 case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break;
3206 case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break;
3207 case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break;
3208 case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break;
3209 default:
3210 rb_bug("unknown exception type: %d", (int)type);
3211 }
3212 return ID2SYM(id);
3213}
3214
3215static int
3216cdhash_each(VALUE key, VALUE value, VALUE ary)
3217{
3218 rb_ary_push(ary, obj_resurrect(key));
3219 rb_ary_push(ary, value);
3220 return ST_CONTINUE;
3221}
3222
3223static const rb_data_type_t label_wrapper = {
3224 "label_wrapper",
3225 {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,},
3226 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
3227};
3228
3229#define DECL_ID(name) \
3230 static ID id_##name
3231
3232#define INIT_ID(name) \
3233 id_##name = rb_intern(#name)
3234
3235static VALUE
3236iseq_type_id(enum rb_iseq_type type)
3237{
3238 DECL_ID(top);
3239 DECL_ID(method);
3240 DECL_ID(block);
3241 DECL_ID(class);
3242 DECL_ID(rescue);
3243 DECL_ID(ensure);
3244 DECL_ID(eval);
3245 DECL_ID(main);
3246 DECL_ID(plain);
3247
3248 if (id_top == 0) {
3249 INIT_ID(top);
3250 INIT_ID(method);
3251 INIT_ID(block);
3252 INIT_ID(class);
3253 INIT_ID(rescue);
3254 INIT_ID(ensure);
3255 INIT_ID(eval);
3256 INIT_ID(main);
3257 INIT_ID(plain);
3258 }
3259
3260 switch (type) {
3261 case ISEQ_TYPE_TOP: return id_top;
3262 case ISEQ_TYPE_METHOD: return id_method;
3263 case ISEQ_TYPE_BLOCK: return id_block;
3264 case ISEQ_TYPE_CLASS: return id_class;
3265 case ISEQ_TYPE_RESCUE: return id_rescue;
3266 case ISEQ_TYPE_ENSURE: return id_ensure;
3267 case ISEQ_TYPE_EVAL: return id_eval;
3268 case ISEQ_TYPE_MAIN: return id_main;
3269 case ISEQ_TYPE_PLAIN: return id_plain;
3270 };
3271
3272 rb_bug("unsupported iseq type: %d", (int)type);
3273}
3274
3275static VALUE
3276iseq_data_to_ary(const rb_iseq_t *iseq)
3277{
3278 unsigned int i;
3279 long l;
3280 const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq);
3281 const struct iseq_insn_info_entry *prev_insn_info;
3282 unsigned int pos;
3283 int last_line = 0;
3284 VALUE *seq, *iseq_original;
3285
3286 VALUE val = rb_ary_new();
3287 ID type; /* Symbol */
3288 VALUE locals = rb_ary_new();
3289 VALUE params = rb_hash_new();
3290 VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */
3291 VALUE nbody;
3292 VALUE exception = rb_ary_new(); /* [[....]] */
3293 VALUE misc = rb_hash_new();
3294
3295 static ID insn_syms[VM_BARE_INSTRUCTION_SIZE]; /* w/o-trace only */
3296 struct st_table *labels_table = st_init_numtable();
3297 VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table);
3298
3299 if (insn_syms[0] == 0) {
3300 int i;
3301 for (i=0; i<numberof(insn_syms); i++) {
3302 insn_syms[i] = rb_intern(insn_name(i));
3303 }
3304 }
3305
3306 /* type */
3307 type = iseq_type_id(iseq_body->type);
3308
3309 /* locals */
3310 for (i=0; i<iseq_body->local_table_size; i++) {
3311 ID lid = iseq_body->local_table[i];
3312 if (lid) {
3313 if (rb_id2str(lid)) {
3314 rb_ary_push(locals, ID2SYM(lid));
3315 }
3316 else { /* hidden variable from id_internal() */
3317 rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1));
3318 }
3319 }
3320 else {
3321 rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest")));
3322 }
3323 }
3324
3325 /* params */
3326 {
3327 const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword;
3328 int j;
3329
3330 if (iseq_body->param.flags.has_opt) {
3331 int len = iseq_body->param.opt_num + 1;
3332 VALUE arg_opt_labels = rb_ary_new2(len);
3333
3334 for (j = 0; j < len; j++) {
3335 VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]);
3336 rb_ary_push(arg_opt_labels, l);
3337 }
3338 rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels);
3339 }
3340
3341 /* commit */
3342 if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num));
3343 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num));
3344 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start));
3345 if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start));
3346 if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start));
3347 if (iseq_body->param.flags.has_kw) {
3348 VALUE keywords = rb_ary_new();
3349 int i, j;
3350 for (i=0; i<keyword->required_num; i++) {
3351 rb_ary_push(keywords, ID2SYM(keyword->table[i]));
3352 }
3353 for (j=0; i<keyword->num; i++, j++) {
3354 VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i]));
3355 if (!UNDEF_P(keyword->default_values[j])) {
3356 rb_ary_push(key, keyword->default_values[j]);
3357 }
3358 rb_ary_push(keywords, key);
3359 }
3360
3361 rb_hash_aset(params, ID2SYM(rb_intern("kwbits")),
3362 INT2FIX(keyword->bits_start));
3363 rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords);
3364 }
3365 if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start));
3366 if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue);
3367 if (iseq_body->param.flags.use_block) rb_hash_aset(params, ID2SYM(rb_intern("use_block")), Qtrue);
3368 }
3369
3370 /* body */
3371 iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq);
3372
3373 for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) {
3374 VALUE insn = *seq++;
3375 int j, len = insn_len(insn);
3376 VALUE *nseq = seq + len - 1;
3377 VALUE ary = rb_ary_new2(len);
3378
3379 rb_ary_push(ary, ID2SYM(insn_syms[insn%numberof(insn_syms)]));
3380 for (j=0; j<len-1; j++, seq++) {
3381 enum ruby_insn_type_chars op_type = insn_op_type(insn, j);
3382
3383 switch (op_type) {
3384 case TS_OFFSET: {
3385 unsigned long idx = nseq - iseq_original + *seq;
3386 rb_ary_push(ary, register_label(labels_table, idx));
3387 break;
3388 }
3389 case TS_LINDEX:
3390 case TS_NUM:
3391 rb_ary_push(ary, INT2FIX(*seq));
3392 break;
3393 case TS_VALUE:
3394 rb_ary_push(ary, obj_resurrect(*seq));
3395 break;
3396 case TS_ISEQ:
3397 {
3398 const rb_iseq_t *iseq = (rb_iseq_t *)*seq;
3399 if (iseq) {
3400 VALUE val = iseq_data_to_ary(rb_iseq_check(iseq));
3401 rb_ary_push(ary, val);
3402 }
3403 else {
3404 rb_ary_push(ary, Qnil);
3405 }
3406 }
3407 break;
3408 case TS_IC:
3409 {
3410 VALUE list = rb_ary_new();
3411 const ID *ids = ((IC)*seq)->segments;
3412 while (*ids) {
3413 rb_ary_push(list, ID2SYM(*ids++));
3414 }
3415 rb_ary_push(ary, list);
3416 }
3417 break;
3418 case TS_IVC:
3419 case TS_ICVARC:
3420 case TS_ISE:
3421 {
3422 union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq;
3423 rb_ary_push(ary, INT2FIX(is - ISEQ_IS_ENTRY_START(ISEQ_BODY(iseq), op_type)));
3424 }
3425 break;
3426 case TS_CALLDATA:
3427 {
3428 struct rb_call_data *cd = (struct rb_call_data *)*seq;
3429 const struct rb_callinfo *ci = cd->ci;
3430 VALUE e = rb_hash_new();
3431 int argc = vm_ci_argc(ci);
3432
3433 ID mid = vm_ci_mid(ci);
3434 rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil);
3435 rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci)));
3436
3437 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
3438 const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci);
3439 int i;
3440 VALUE kw = rb_ary_new2((long)kwarg->keyword_len);
3441
3442 argc -= kwarg->keyword_len;
3443 for (i = 0; i < kwarg->keyword_len; i++) {
3444 rb_ary_push(kw, kwarg->keywords[i]);
3445 }
3446 rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw);
3447 }
3448
3449 rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")),
3450 INT2FIX(argc));
3451 rb_ary_push(ary, e);
3452 }
3453 break;
3454 case TS_ID:
3455 rb_ary_push(ary, ID2SYM(*seq));
3456 break;
3457 case TS_CDHASH:
3458 {
3459 VALUE hash = *seq;
3460 VALUE val = rb_ary_new();
3461 int i;
3462
3463 rb_hash_foreach(hash, cdhash_each, val);
3464
3465 for (i=0; i<RARRAY_LEN(val); i+=2) {
3466 VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
3467 unsigned long idx = nseq - iseq_original + pos;
3468
3469 rb_ary_store(val, i+1,
3470 register_label(labels_table, idx));
3471 }
3472 rb_ary_push(ary, val);
3473 }
3474 break;
3475 case TS_FUNCPTR:
3476 {
3477#if SIZEOF_VALUE <= SIZEOF_LONG
3478 VALUE val = LONG2NUM((SIGNED_VALUE)*seq);
3479#else
3480 VALUE val = LL2NUM((SIGNED_VALUE)*seq);
3481#endif
3482 rb_ary_push(ary, val);
3483 }
3484 break;
3485 case TS_BUILTIN:
3486 {
3487 VALUE val = rb_hash_new();
3488#if SIZEOF_VALUE <= SIZEOF_LONG
3489 VALUE func_ptr = LONG2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3490#else
3491 VALUE func_ptr = LL2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3492#endif
3493 rb_hash_aset(val, ID2SYM(rb_intern("func_ptr")), func_ptr);
3494 rb_hash_aset(val, ID2SYM(rb_intern("argc")), INT2NUM(((RB_BUILTIN)*seq)->argc));
3495 rb_hash_aset(val, ID2SYM(rb_intern("index")), INT2NUM(((RB_BUILTIN)*seq)->index));
3496 rb_hash_aset(val, ID2SYM(rb_intern("name")), rb_str_new_cstr(((RB_BUILTIN)*seq)->name));
3497 rb_ary_push(ary, val);
3498 }
3499 break;
3500 default:
3501 rb_bug("unknown operand: %c", insn_op_type(insn, j));
3502 }
3503 }
3504 rb_ary_push(body, ary);
3505 }
3506
3507 nbody = body;
3508
3509 /* exception */
3510 if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) {
3511 VALUE ary = rb_ary_new();
3512 const struct iseq_catch_table_entry *entry =
3513 UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]);
3514 rb_ary_push(ary, exception_type2symbol(entry->type));
3515 if (entry->iseq) {
3516 rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq)));
3517 }
3518 else {
3519 rb_ary_push(ary, Qnil);
3520 }
3521 rb_ary_push(ary, register_label(labels_table, entry->start));
3522 rb_ary_push(ary, register_label(labels_table, entry->end));
3523 rb_ary_push(ary, register_label(labels_table, entry->cont));
3524 rb_ary_push(ary, UINT2NUM(entry->sp));
3525 rb_ary_push(exception, ary);
3526 }
3527
3528 /* make body with labels and insert line number */
3529 body = rb_ary_new();
3530 prev_insn_info = NULL;
3531#ifdef USE_ISEQ_NODE_ID
3532 VALUE node_ids = rb_ary_new();
3533#endif
3534
3535 for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) {
3536 const struct iseq_insn_info_entry *info;
3537 VALUE ary = RARRAY_AREF(nbody, l);
3538 st_data_t label;
3539
3540 if (st_lookup(labels_table, pos, &label)) {
3541 rb_ary_push(body, (VALUE)label);
3542 }
3543
3544 info = get_insn_info(iseq, pos);
3545#ifdef USE_ISEQ_NODE_ID
3546 rb_ary_push(node_ids, INT2FIX(info->node_id));
3547#endif
3548
3549 if (prev_insn_info != info) {
3550 int line = info->line_no;
3551 rb_event_flag_t events = info->events;
3552
3553 if (line > 0 && last_line != line) {
3554 rb_ary_push(body, INT2FIX(line));
3555 last_line = line;
3556 }
3557#define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev)));
3558 CHECK_EVENT(RUBY_EVENT_LINE);
3559 CHECK_EVENT(RUBY_EVENT_CLASS);
3560 CHECK_EVENT(RUBY_EVENT_END);
3561 CHECK_EVENT(RUBY_EVENT_CALL);
3562 CHECK_EVENT(RUBY_EVENT_RETURN);
3563 CHECK_EVENT(RUBY_EVENT_B_CALL);
3564 CHECK_EVENT(RUBY_EVENT_B_RETURN);
3565 CHECK_EVENT(RUBY_EVENT_RESCUE);
3566#undef CHECK_EVENT
3567 prev_insn_info = info;
3568 }
3569
3570 rb_ary_push(body, ary);
3571 pos += RARRAY_LENINT(ary); /* reject too huge data */
3572 }
3573 RB_GC_GUARD(nbody);
3574 RB_GC_GUARD(labels_wrapper);
3575
3576 rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq_body->param.size));
3577 rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq_body->local_table_size));
3578 rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max));
3579 rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id));
3580 rb_hash_aset(misc, ID2SYM(rb_intern("code_location")),
3581 rb_ary_new_from_args(4,
3582 INT2FIX(iseq_body->location.code_location.beg_pos.lineno),
3583 INT2FIX(iseq_body->location.code_location.beg_pos.column),
3584 INT2FIX(iseq_body->location.code_location.end_pos.lineno),
3585 INT2FIX(iseq_body->location.code_location.end_pos.column)));
3586#ifdef USE_ISEQ_NODE_ID
3587 rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids);
3588#endif
3589 rb_hash_aset(misc, ID2SYM(rb_intern("parser")), iseq_body->prism ? ID2SYM(rb_intern("prism")) : ID2SYM(rb_intern("parse.y")));
3590
3591 /*
3592 * [:magic, :major_version, :minor_version, :format_type, :misc,
3593 * :name, :path, :absolute_path, :start_lineno, :type, :locals, :args,
3594 * :catch_table, :bytecode]
3595 */
3596 rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat"));
3597 rb_ary_push(val, INT2FIX(ISEQ_MAJOR_VERSION)); /* major */
3598 rb_ary_push(val, INT2FIX(ISEQ_MINOR_VERSION)); /* minor */
3599 rb_ary_push(val, INT2FIX(1));
3600 rb_ary_push(val, misc);
3601 rb_ary_push(val, iseq_body->location.label);
3602 rb_ary_push(val, rb_iseq_path(iseq));
3603 rb_ary_push(val, rb_iseq_realpath(iseq));
3604 rb_ary_push(val, RB_INT2NUM(iseq_body->location.first_lineno));
3605 rb_ary_push(val, ID2SYM(type));
3606 rb_ary_push(val, locals);
3607 rb_ary_push(val, params);
3608 rb_ary_push(val, exception);
3609 rb_ary_push(val, body);
3610 return val;
3611}
3612
3613VALUE
3614rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc)
3615{
3616 int i, r;
3617 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3618 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
3619 VALUE a, args = rb_ary_new2(body->param.size);
3620 ID req, opt, rest, block, key, keyrest;
3621#define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type))
3622#define PARAM_ID(i) body->local_table[(i)]
3623#define PARAM(i, type) ( \
3624 PARAM_TYPE(type), \
3625 rb_id2str(PARAM_ID(i)) ? \
3626 rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \
3627 a)
3628
3629 CONST_ID(req, "req");
3630 CONST_ID(opt, "opt");
3631
3632 if (body->param.flags.forwardable) {
3633 // [[:rest, :*], [:keyrest, :**], [:block, :&]]
3634 CONST_ID(rest, "rest");
3635 CONST_ID(keyrest, "keyrest");
3636 CONST_ID(block, "block");
3637 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(rest), ID2SYM(idMULT)));
3638 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(keyrest), ID2SYM(idPow)));
3639 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(block), ID2SYM(idAnd)));
3640 }
3641
3642 if (is_proc) {
3643 for (i = 0; i < body->param.lead_num; i++) {
3644 PARAM_TYPE(opt);
3645 if (rb_id2str(PARAM_ID(i))) {
3646 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3647 }
3648 rb_ary_push(args, a);
3649 }
3650 }
3651 else {
3652 for (i = 0; i < body->param.lead_num; i++) {
3653 rb_ary_push(args, PARAM(i, req));
3654 }
3655 }
3656 r = body->param.lead_num + body->param.opt_num;
3657 for (; i < r; i++) {
3658 PARAM_TYPE(opt);
3659 if (rb_id2str(PARAM_ID(i))) {
3660 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3661 }
3662 rb_ary_push(args, a);
3663 }
3664 if (body->param.flags.has_rest) {
3665 CONST_ID(rest, "rest");
3666 rb_ary_push(args, PARAM(body->param.rest_start, rest));
3667 }
3668 r = body->param.post_start + body->param.post_num;
3669 if (is_proc) {
3670 for (i = body->param.post_start; i < r; i++) {
3671 PARAM_TYPE(opt);
3672 if (rb_id2str(PARAM_ID(i))) {
3673 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3674 }
3675 rb_ary_push(args, a);
3676 }
3677 }
3678 else {
3679 for (i = body->param.post_start; i < r; i++) {
3680 rb_ary_push(args, PARAM(i, req));
3681 }
3682 }
3683 if (body->param.flags.accepts_no_kwarg) {
3684 ID nokey;
3685 CONST_ID(nokey, "nokey");
3686 PARAM_TYPE(nokey);
3687 rb_ary_push(args, a);
3688 }
3689 if (body->param.flags.has_kw) {
3690 i = 0;
3691 if (keyword->required_num > 0) {
3692 ID keyreq;
3693 CONST_ID(keyreq, "keyreq");
3694 for (; i < keyword->required_num; i++) {
3695 PARAM_TYPE(keyreq);
3696 if (rb_id2str(keyword->table[i])) {
3697 rb_ary_push(a, ID2SYM(keyword->table[i]));
3698 }
3699 rb_ary_push(args, a);
3700 }
3701 }
3702 CONST_ID(key, "key");
3703 for (; i < keyword->num; i++) {
3704 PARAM_TYPE(key);
3705 if (rb_id2str(keyword->table[i])) {
3706 rb_ary_push(a, ID2SYM(keyword->table[i]));
3707 }
3708 rb_ary_push(args, a);
3709 }
3710 }
3711 if (body->param.flags.has_kwrest || body->param.flags.ruby2_keywords) {
3712 ID param;
3713 CONST_ID(keyrest, "keyrest");
3714 PARAM_TYPE(keyrest);
3715 if (body->param.flags.has_kwrest &&
3716 rb_id2str(param = PARAM_ID(keyword->rest_start))) {
3717 rb_ary_push(a, ID2SYM(param));
3718 }
3719 else if (body->param.flags.ruby2_keywords) {
3720 rb_ary_push(a, ID2SYM(idPow));
3721 }
3722 rb_ary_push(args, a);
3723 }
3724 if (body->param.flags.has_block) {
3725 CONST_ID(block, "block");
3726 rb_ary_push(args, PARAM(body->param.block_start, block));
3727 }
3728 return args;
3729}
3730
3731VALUE
3732rb_iseq_defined_string(enum defined_type type)
3733{
3734 static const char expr_names[][18] = {
3735 "nil",
3736 "instance-variable",
3737 "local-variable",
3738 "global-variable",
3739 "class variable",
3740 "constant",
3741 "method",
3742 "yield",
3743 "super",
3744 "self",
3745 "true",
3746 "false",
3747 "assignment",
3748 "expression",
3749 };
3750 const char *estr;
3751
3752 if ((unsigned)(type - 1) >= (unsigned)numberof(expr_names)) rb_bug("unknown defined type %d", type);
3753 estr = expr_names[type - 1];
3754 return rb_fstring_cstr(estr);
3755}
3756
3757// A map from encoded_insn to insn_data: decoded insn number, its len,
3758// decoded ZJIT insn number, non-trace version of encoded insn,
3759// trace version, and zjit version.
3760static st_table *encoded_insn_data;
3761typedef struct insn_data_struct {
3762 int insn;
3763 int insn_len;
3764 void *notrace_encoded_insn;
3765 void *trace_encoded_insn;
3766#if USE_ZJIT
3767 int zjit_insn;
3768 void *zjit_encoded_insn;
3769#endif
3770} insn_data_t;
3771static insn_data_t insn_data[VM_BARE_INSTRUCTION_SIZE];
3772
3773void
3774rb_free_encoded_insn_data(void)
3775{
3776 st_free_table(encoded_insn_data);
3777}
3778
3779// Initialize a table to decode bare, trace, and zjit instructions.
3780// This function also determines which instructions are used when TracePoint is enabled.
3781void
3782rb_vm_encoded_insn_data_table_init(void)
3783{
3784#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3785 const void * const *table = rb_vm_get_insns_address_table();
3786#define INSN_CODE(insn) ((VALUE)table[insn])
3787#else
3788#define INSN_CODE(insn) ((VALUE)(insn))
3789#endif
3790 encoded_insn_data = st_init_numtable_with_size(VM_BARE_INSTRUCTION_SIZE);
3791
3792 for (int insn = 0; insn < VM_BARE_INSTRUCTION_SIZE; insn++) {
3793 insn_data[insn].insn = insn;
3794 insn_data[insn].insn_len = insn_len(insn);
3795
3796 // When tracing :return events, we convert opt_invokebuiltin_delegate_leave + leave into
3797 // opt_invokebuiltin_delegate + trace_leave, presumably because we don't want to fire
3798 // :return events before invokebuiltin. https://github.com/ruby/ruby/pull/3256
3799 int notrace_insn = (insn != BIN(opt_invokebuiltin_delegate_leave)) ? insn : BIN(opt_invokebuiltin_delegate);
3800 insn_data[insn].notrace_encoded_insn = (void *)INSN_CODE(notrace_insn);
3801 insn_data[insn].trace_encoded_insn = (void *)INSN_CODE(notrace_insn + VM_BARE_INSTRUCTION_SIZE);
3802
3803 st_data_t key1 = (st_data_t)INSN_CODE(insn);
3804 st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_BARE_INSTRUCTION_SIZE);
3805 st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]);
3806 st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]);
3807
3808#if USE_ZJIT
3809 int zjit_insn = vm_bare_insn_to_zjit_insn(insn);
3810 insn_data[insn].zjit_insn = zjit_insn;
3811 insn_data[insn].zjit_encoded_insn = (insn != zjit_insn) ? (void *)INSN_CODE(zjit_insn) : 0;
3812
3813 if (insn != zjit_insn) {
3814 st_data_t key3 = (st_data_t)INSN_CODE(zjit_insn);
3815 st_add_direct(encoded_insn_data, key3, (st_data_t)&insn_data[insn]);
3816 }
3817#endif
3818 }
3819}
3820
3821// Decode an insn address to an insn. This returns bare instructions
3822// even if they're trace/zjit instructions. Use rb_vm_insn_addr2opcode
3823// to decode trace/zjit instructions as is.
3824int
3825rb_vm_insn_addr2insn(const void *addr)
3826{
3827 st_data_t key = (st_data_t)addr;
3828 st_data_t val;
3829
3830 if (st_lookup(encoded_insn_data, key, &val)) {
3831 insn_data_t *e = (insn_data_t *)val;
3832 return (int)e->insn;
3833 }
3834
3835 rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr);
3836}
3837
3838// Decode an insn address to an insn. Unlike rb_vm_insn_addr2insn,
3839// this function can return trace/zjit opcode variants.
3840int
3841rb_vm_insn_addr2opcode(const void *addr)
3842{
3843 st_data_t key = (st_data_t)addr;
3844 st_data_t val;
3845
3846 if (st_lookup(encoded_insn_data, key, &val)) {
3847 insn_data_t *e = (insn_data_t *)val;
3848 int opcode = e->insn;
3849 if (addr == e->trace_encoded_insn) {
3850 opcode += VM_BARE_INSTRUCTION_SIZE;
3851 }
3852#if USE_ZJIT
3853 else if (addr == e->zjit_encoded_insn) {
3854 opcode = e->zjit_insn;
3855 }
3856#endif
3857 return opcode;
3858 }
3859
3860 rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr);
3861}
3862
3863// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. This returns
3864// bare instructions even if they're trace/zjit instructions. Use
3865// rb_vm_insn_addr2opcode to decode trace/zjit instructions as is.
3866int
3867rb_vm_insn_decode(const VALUE encoded)
3868{
3869#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3870 int insn = rb_vm_insn_addr2insn((void *)encoded);
3871#else
3872 int insn = (int)encoded;
3873#endif
3874 return insn;
3875}
3876
3877// Turn on or off tracing for a given instruction address
3878static inline int
3879encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_current_trace)
3880{
3881 st_data_t key = (st_data_t)*iseq_encoded_insn;
3882 st_data_t val;
3883
3884 if (st_lookup(encoded_insn_data, key, &val)) {
3885 insn_data_t *e = (insn_data_t *)val;
3886 if (remain_current_trace && key == (st_data_t)e->trace_encoded_insn) {
3887 turnon = 1;
3888 }
3889 *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn);
3890 return e->insn_len;
3891 }
3892
3893 rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn);
3894}
3895
3896// Turn off tracing for an instruction at pos after tracing event flags are cleared
3897void
3898rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos)
3899{
3900 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3901 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3902 encoded_iseq_trace_instrument(&iseq_encoded[pos], 0, false);
3903}
3904
3905// We need to fire call events on instructions with b_call events if the block
3906// is running as a method. So, if we are listening for call events, then
3907// instructions that have b_call events need to become trace variants.
3908// Use this function when making decisions about recompiling to trace variants.
3909static inline rb_event_flag_t
3910add_bmethod_events(rb_event_flag_t events)
3911{
3912 if (events & RUBY_EVENT_CALL) {
3913 events |= RUBY_EVENT_B_CALL;
3914 }
3915 if (events & RUBY_EVENT_RETURN) {
3916 events |= RUBY_EVENT_B_RETURN;
3917 }
3918 return events;
3919}
3920
3921// Note, to support call/return events for bmethods, turnon_event can have more events than tpval.
3922static int
3923iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3924{
3925 unsigned int pc;
3926 int n = 0;
3927 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3928 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3929
3930 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
3931
3932 for (pc=0; pc<body->iseq_size;) {
3933 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
3934 rb_event_flag_t pc_events = entry->events;
3935 rb_event_flag_t target_events = turnon_events;
3936 unsigned int line = (int)entry->line_no;
3937
3938 if (target_line == 0 || target_line == line) {
3939 /* ok */
3940 }
3941 else {
3942 target_events &= ~RUBY_EVENT_LINE;
3943 }
3944
3945 if (pc_events & target_events) {
3946 n++;
3947 }
3948 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (target_events | iseq->aux.exec.global_trace_events), true);
3949 }
3950
3951 if (n > 0) {
3952 if (iseq->aux.exec.local_hooks == NULL) {
3953 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t);
3954 iseq->aux.exec.local_hooks->is_local = true;
3955 }
3956 rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line);
3957 }
3958
3959 return n;
3960}
3961
3963 rb_event_flag_t turnon_events;
3964 VALUE tpval;
3965 unsigned int target_line;
3966 int n;
3967};
3968
3969static void
3970iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3971{
3973 data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line);
3974 iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p);
3975}
3976
3977int
3978rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod)
3979{
3981 if (target_bmethod) {
3982 turnon_events = add_bmethod_events(turnon_events);
3983 }
3984 data.turnon_events = turnon_events;
3985 data.tpval = tpval;
3986 data.target_line = target_line;
3987 data.n = 0;
3988
3989 iseq_add_local_tracepoint_i(iseq, (void *)&data);
3990 if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */
3991 return data.n;
3992}
3993
3994static int
3995iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval)
3996{
3997 int n = 0;
3998
3999 if (iseq->aux.exec.local_hooks) {
4000 unsigned int pc;
4001 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4002 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4003 rb_event_flag_t local_events = 0;
4004
4005 rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval);
4006 local_events = iseq->aux.exec.local_hooks->events;
4007
4008 if (local_events == 0) {
4009 rb_hook_list_free(iseq->aux.exec.local_hooks);
4010 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL;
4011 }
4012
4013 local_events = add_bmethod_events(local_events);
4014 for (pc = 0; pc<body->iseq_size;) {
4015 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4016 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false);
4017 }
4018 }
4019 return n;
4020}
4021
4023 VALUE tpval;
4024 int n;
4025};
4026
4027static void
4028iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
4029{
4031 data->n += iseq_remove_local_tracepoint(iseq, data->tpval);
4032 iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p);
4033}
4034
4035int
4036rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval)
4037{
4039 data.tpval = tpval;
4040 data.n = 0;
4041
4042 iseq_remove_local_tracepoint_i(iseq, (void *)&data);
4043 return data.n;
4044}
4045
4046void
4047rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
4048{
4049 if (iseq->aux.exec.global_trace_events == turnon_events) {
4050 return;
4051 }
4052
4053 if (!ISEQ_EXECUTABLE_P(iseq)) {
4054 /* this is building ISeq */
4055 return;
4056 }
4057 else {
4058 unsigned int pc;
4059 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4060 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4061 rb_event_flag_t enabled_events;
4062 rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0;
4063 ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events;
4064 enabled_events = add_bmethod_events(turnon_events | local_events);
4065
4066 for (pc=0; pc<body->iseq_size;) {
4067 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4068 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true);
4069 }
4070 }
4071}
4072
4073void rb_vm_cc_general(const struct rb_callcache *cc);
4074
4075static bool
4076clear_attr_cc(VALUE v)
4077{
4078 if (imemo_type_p(v, imemo_callcache) && vm_cc_ivar_p((const struct rb_callcache *)v)) {
4079 rb_vm_cc_general((struct rb_callcache *)v);
4080 return true;
4081 }
4082 else {
4083 return false;
4084 }
4085}
4086
4087static bool
4088clear_bf_cc(VALUE v)
4089{
4090 if (imemo_type_p(v, imemo_callcache) && vm_cc_bf_p((const struct rb_callcache *)v)) {
4091 rb_vm_cc_general((struct rb_callcache *)v);
4092 return true;
4093 }
4094 else {
4095 return false;
4096 }
4097}
4098
4099static int
4100clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4101{
4102 VALUE v = (VALUE)vstart;
4103 for (; v != (VALUE)vend; v += stride) {
4104 void *ptr = rb_asan_poisoned_object_p(v);
4105 rb_asan_unpoison_object(v, false);
4106 clear_attr_cc(v);
4107 asan_poison_object_if(ptr, v);
4108 }
4109 return 0;
4110}
4111
4112void
4113rb_clear_attr_ccs(void)
4114{
4115 rb_objspace_each_objects(clear_attr_ccs_i, NULL);
4116}
4117
4118static int
4119clear_bf_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4120{
4121 VALUE v = (VALUE)vstart;
4122 for (; v != (VALUE)vend; v += stride) {
4123 void *ptr = rb_asan_poisoned_object_p(v);
4124 rb_asan_unpoison_object(v, false);
4125 clear_bf_cc(v);
4126 asan_poison_object_if(ptr, v);
4127 }
4128 return 0;
4129}
4130
4131void
4132rb_clear_bf_ccs(void)
4133{
4134 rb_objspace_each_objects(clear_bf_ccs_i, NULL);
4135}
4136
4137static int
4138trace_set_i(void *vstart, void *vend, size_t stride, void *data)
4139{
4140 rb_event_flag_t turnon_events = *(rb_event_flag_t *)data;
4141
4142 VALUE v = (VALUE)vstart;
4143 for (; v != (VALUE)vend; v += stride) {
4144 void *ptr = rb_asan_poisoned_object_p(v);
4145 rb_asan_unpoison_object(v, false);
4146
4147 if (rb_obj_is_iseq(v)) {
4148 rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events);
4149 }
4150 else if (clear_attr_cc(v)) {
4151 }
4152 else if (clear_bf_cc(v)) {
4153 }
4154
4155 asan_poison_object_if(ptr, v);
4156 }
4157 return 0;
4158}
4159
4160void
4161rb_iseq_trace_set_all(rb_event_flag_t turnon_events)
4162{
4163 rb_objspace_each_objects(trace_set_i, &turnon_events);
4164}
4165
4166VALUE
4167rb_iseqw_local_variables(VALUE iseqval)
4168{
4169 return rb_iseq_local_variables(iseqw_check(iseqval));
4170}
4171
4172/*
4173 * call-seq:
4174 * iseq.to_binary(extra_data = nil) -> binary str
4175 *
4176 * Returns serialized iseq binary format data as a String object.
4177 * A corresponding iseq object is created by
4178 * RubyVM::InstructionSequence.load_from_binary() method.
4179 *
4180 * String extra_data will be saved with binary data.
4181 * You can access this data with
4182 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary).
4183 *
4184 * Note that the translated binary data is not portable.
4185 * You can not move this binary data to another machine.
4186 * You can not use the binary data which is created by another
4187 * version/another architecture of Ruby.
4188 */
4189static VALUE
4190iseqw_to_binary(int argc, VALUE *argv, VALUE self)
4191{
4192 VALUE opt = !rb_check_arity(argc, 0, 1) ? Qnil : argv[0];
4193 return rb_iseq_ibf_dump(iseqw_check(self), opt);
4194}
4195
4196/*
4197 * call-seq:
4198 * RubyVM::InstructionSequence.load_from_binary(binary) -> iseq
4199 *
4200 * Load an iseq object from binary format String object
4201 * created by RubyVM::InstructionSequence.to_binary.
4202 *
4203 * This loader does not have a verifier, so that loading broken/modified
4204 * binary causes critical problem.
4205 *
4206 * You should not load binary data provided by others.
4207 * You should use binary data translated by yourself.
4208 */
4209static VALUE
4210iseqw_s_load_from_binary(VALUE self, VALUE str)
4211{
4212 return iseqw_new(rb_iseq_ibf_load(str));
4213}
4214
4215/*
4216 * call-seq:
4217 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary) -> str
4218 *
4219 * Load extra data embed into binary format String object.
4220 */
4221static VALUE
4222iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str)
4223{
4224 return rb_iseq_ibf_load_extra_data(str);
4225}
4226
4227#if VM_INSN_INFO_TABLE_IMPL == 2
4228
4229/* An implementation of succinct bit-vector for insn_info table.
4230 *
4231 * A succinct bit-vector is a small and efficient data structure that provides
4232 * a bit-vector augmented with an index for O(1) rank operation:
4233 *
4234 * rank(bv, n): the number of 1's within a range from index 0 to index n
4235 *
4236 * This can be used to lookup insn_info table from PC.
4237 * For example, consider the following iseq and insn_info_table:
4238 *
4239 * iseq insn_info_table
4240 * PC insn+operand position lineno event
4241 * 0: insn1 0: 1 [Li]
4242 * 2: insn2 2: 2 [Li] <= (A)
4243 * 5: insn3 8: 3 [Li] <= (B)
4244 * 8: insn4
4245 *
4246 * In this case, a succinct bit-vector whose indexes 0, 2, 8 is "1" and
4247 * other indexes is "0", i.e., "101000001", is created.
4248 * To lookup the lineno of insn2, calculate rank("10100001", 2) = 2, so
4249 * the line (A) is the entry in question.
4250 * To lookup the lineno of insn4, calculate rank("10100001", 8) = 3, so
4251 * the line (B) is the entry in question.
4252 *
4253 * A naive implementation of succinct bit-vector works really well
4254 * not only for large size but also for small size. However, it has
4255 * tiny overhead for very small size. So, this implementation consist
4256 * of two parts: one part is the "immediate" table that keeps rank result
4257 * as a raw table, and the other part is a normal succinct bit-vector.
4258 */
4259
4260#define IMMEDIATE_TABLE_SIZE 54 /* a multiple of 9, and < 128 */
4261
4262struct succ_index_table {
4263 uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9];
4264 struct succ_dict_block {
4265 unsigned int rank;
4266 uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */
4267 uint64_t bits[512/64];
4268 } succ_part[FLEX_ARY_LEN];
4269};
4270
4271#define imm_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (7 * (i))
4272#define imm_block_rank_get(v, i) (((int)((v) >> ((i) * 7))) & 0x7f)
4273#define small_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (9 * ((i) - 1))
4274#define small_block_rank_get(v, i) ((i) == 0 ? 0 : (((int)((v) >> (((i) - 1) * 9))) & 0x1ff))
4275
4276static struct succ_index_table *
4277succ_index_table_create(int max_pos, int *data, int size)
4278{
4279 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4280 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4281 struct succ_index_table *sd =
4282 rb_xcalloc_mul_add_mul(
4283 imm_size, sizeof(uint64_t),
4284 succ_size, sizeof(struct succ_dict_block));
4285 int i, j, k, r;
4286
4287 r = 0;
4288 for (j = 0; j < imm_size; j++) {
4289 for (i = 0; i < 9; i++) {
4290 if (r < size && data[r] == j * 9 + i) r++;
4291 imm_block_rank_set(sd->imm_part[j], i, r);
4292 }
4293 }
4294 for (k = 0; k < succ_size; k++) {
4295 struct succ_dict_block *sd_block = &sd->succ_part[k];
4296 int small_rank = 0;
4297 sd_block->rank = r;
4298 for (j = 0; j < 8; j++) {
4299 uint64_t bits = 0;
4300 if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank);
4301 for (i = 0; i < 64; i++) {
4302 if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) {
4303 bits |= ((uint64_t)1) << i;
4304 r++;
4305 }
4306 }
4307 sd_block->bits[j] = bits;
4308 small_rank += rb_popcount64(bits);
4309 }
4310 }
4311 return sd;
4312}
4313
4314static unsigned int *
4315succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size)
4316{
4317 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4318 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4319 unsigned int *positions = ALLOC_N(unsigned int, size), *p;
4320 int i, j, k, r = -1;
4321 p = positions;
4322 for (j = 0; j < imm_size; j++) {
4323 for (i = 0; i < 9; i++) {
4324 int nr = imm_block_rank_get(sd->imm_part[j], i);
4325 if (r != nr) *p++ = j * 9 + i;
4326 r = nr;
4327 }
4328 }
4329 for (k = 0; k < succ_size; k++) {
4330 for (j = 0; j < 8; j++) {
4331 for (i = 0; i < 64; i++) {
4332 if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) {
4333 *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE;
4334 }
4335 }
4336 }
4337 }
4338 return positions;
4339}
4340
4341static int
4342succ_index_lookup(const struct succ_index_table *sd, int x)
4343{
4344 if (x < IMMEDIATE_TABLE_SIZE) {
4345 const int i = x / 9;
4346 const int j = x % 9;
4347 return imm_block_rank_get(sd->imm_part[i], j);
4348 }
4349 else {
4350 const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512;
4351 const struct succ_dict_block *block = &sd->succ_part[block_index];
4352 const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512;
4353 const int small_block_index = block_bit_index / 64;
4354 const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index);
4355 const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64));
4356
4357 return block->rank + small_block_popcount + popcnt;
4358 }
4359}
4360#endif
4361
4362
4363/*
4364 * call-seq:
4365 * iseq.script_lines -> array or nil
4366 *
4367 * It returns recorded script lines if it is available.
4368 * The script lines are not limited to the iseq range, but
4369 * are entire lines of the source file.
4370 *
4371 * Note that this is an API for ruby internal use, debugging,
4372 * and research. Do not use this for any other purpose.
4373 * The compatibility is not guaranteed.
4374 */
4375static VALUE
4376iseqw_script_lines(VALUE self)
4377{
4378 const rb_iseq_t *iseq = iseqw_check(self);
4379 return ISEQ_BODY(iseq)->variable.script_lines;
4380}
4381
4382/*
4383 * Document-class: RubyVM::InstructionSequence
4384 *
4385 * The InstructionSequence class represents a compiled sequence of
4386 * instructions for the Virtual Machine used in MRI. Not all implementations of Ruby
4387 * may implement this class, and for the implementations that implement it,
4388 * the methods defined and behavior of the methods can change in any version.
4389 *
4390 * With it, you can get a handle to the instructions that make up a method or
4391 * a proc, compile strings of Ruby code down to VM instructions, and
4392 * disassemble instruction sequences to strings for easy inspection. It is
4393 * mostly useful if you want to learn how YARV works, but it also lets
4394 * you control various settings for the Ruby iseq compiler.
4395 *
4396 * You can find the source for the VM instructions in +insns.def+ in the Ruby
4397 * source.
4398 *
4399 * The instruction sequence results will almost certainly change as Ruby
4400 * changes, so example output in this documentation may be different from what
4401 * you see.
4402 *
4403 * Of course, this class is MRI specific.
4404 */
4405
4406void
4407Init_ISeq(void)
4408{
4409 /* declare ::RubyVM::InstructionSequence */
4410 rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject);
4411 rb_undef_alloc_func(rb_cISeq);
4412 rb_define_method(rb_cISeq, "inspect", iseqw_inspect, 0);
4413 rb_define_method(rb_cISeq, "disasm", iseqw_disasm, 0);
4414 rb_define_method(rb_cISeq, "disassemble", iseqw_disasm, 0);
4415 rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0);
4416 rb_define_method(rb_cISeq, "eval", iseqw_eval, 0);
4417
4418 rb_define_method(rb_cISeq, "to_binary", iseqw_to_binary, -1);
4419 rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1);
4420 rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1);
4421
4422 /* location APIs */
4423 rb_define_method(rb_cISeq, "path", iseqw_path, 0);
4424 rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0);
4425 rb_define_method(rb_cISeq, "label", iseqw_label, 0);
4426 rb_define_method(rb_cISeq, "base_label", iseqw_base_label, 0);
4427 rb_define_method(rb_cISeq, "first_lineno", iseqw_first_lineno, 0);
4428 rb_define_method(rb_cISeq, "trace_points", iseqw_trace_points, 0);
4429 rb_define_method(rb_cISeq, "each_child", iseqw_each_child, 0);
4430
4431#if 0 /* TBD */
4432 rb_define_private_method(rb_cISeq, "marshal_dump", iseqw_marshal_dump, 0);
4433 rb_define_private_method(rb_cISeq, "marshal_load", iseqw_marshal_load, 1);
4434 /* disable this feature because there is no verifier. */
4435 rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1);
4436#endif
4437 (void)iseq_s_load;
4438
4439 rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1);
4440 rb_define_singleton_method(rb_cISeq, "compile_parsey", iseqw_s_compile_parsey, -1);
4441 rb_define_singleton_method(rb_cISeq, "compile_prism", iseqw_s_compile_prism, -1);
4442 rb_define_singleton_method(rb_cISeq, "compile_file_prism", iseqw_s_compile_file_prism, -1);
4443 rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1);
4444 rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1);
4445 rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0);
4446 rb_define_singleton_method(rb_cISeq, "compile_option=", iseqw_s_compile_option_set, 1);
4447 rb_define_singleton_method(rb_cISeq, "disasm", iseqw_s_disasm, 1);
4448 rb_define_singleton_method(rb_cISeq, "disassemble", iseqw_s_disasm, 1);
4449 rb_define_singleton_method(rb_cISeq, "of", iseqw_s_of, 1);
4450
4451 // script lines
4452 rb_define_method(rb_cISeq, "script_lines", iseqw_script_lines, 0);
4453
4454 rb_undef_method(CLASS_OF(rb_cISeq), "translate");
4455 rb_undef_method(CLASS_OF(rb_cISeq), "load_iseq");
4456}
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define rb_define_private_method(klass, mid, func, arity)
Defines klass#mid and makes it private.
#define RUBY_EVENT_END
Encountered an end of a class clause.
Definition event.h:40
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition event.h:43
#define RUBY_EVENT_B_RETURN
Encountered a next statement.
Definition event.h:56
#define RUBY_EVENT_CLASS
Encountered a new class.
Definition event.h:39
#define RUBY_EVENT_LINE
Encountered a new line.
Definition event.h:38
#define RUBY_EVENT_RETURN
Encountered a return statement.
Definition event.h:42
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition event.h:44
#define RUBY_EVENT_B_CALL
Encountered an yield statement.
Definition event.h:55
uint32_t rb_event_flag_t
Represents event(s).
Definition event.h:108
#define RUBY_EVENT_CALL
A method, written in Ruby, is called.
Definition event.h:41
#define RUBY_EVENT_RESCUE
Encountered a rescue statement.
Definition event.h:61
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:1493
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
Definition class.c:2641
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition class.c:3110
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1675
#define T_FILE
Old name of RUBY_T_FILE.
Definition value_type.h:62
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define rb_str_cat2
Old name of rb_str_cat_cstr.
Definition string.h:1683
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ZALLOC
Old name of RB_ZALLOC.
Definition memory.h:402
#define LL2NUM
Old name of RB_LL2NUM.
Definition long_long.h:30
#define CLASS_OF
Old name of rb_class_of.
Definition globals.h:205
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define FIX2INT
Old name of RB_FIX2INT.
Definition int.h:41
#define T_HASH
Old name of RUBY_T_HASH.
Definition value_type.h:65
#define ALLOC_N
Old name of RB_ALLOC_N.
Definition memory.h:399
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:131
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition long.h:50
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define INT2NUM
Old name of RB_INT2NUM.
Definition int.h:43
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define UINT2NUM
Old name of RB_UINT2NUM.
Definition int.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
Definition symbol.h:47
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:657
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition eval.c:681
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Identical to rb_typeddata_is_kind_of(), except it raises exceptions instead of returning false.
Definition error.c:1397
VALUE rb_eSyntaxError
SyntaxError exception.
Definition error.c:1447
VALUE rb_class_superclass(VALUE klass)
Queries the parent of the given class.
Definition object.c:2183
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:101
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:243
VALUE rb_inspect(VALUE obj)
Generates a human-readable textual representation of the given object.
Definition object.c:659
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:603
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
Definition vm_eval.c:1117
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new_from_values(long n, const VALUE *elts)
Identical to rb_ary_new_from_args(), except how objects are passed.
VALUE rb_ary_resurrect(VALUE ary)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_hidden_new(long capa)
Allocates a hidden (no class) empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
VALUE rb_ary_join(VALUE ary, VALUE sep)
Recursively stringises the elements of the passed array, flattens that result, then joins the sequenc...
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
Definition error.h:284
VALUE rb_file_open_str(VALUE fname, const char *fmode)
Identical to rb_file_open(), except it takes the pathname as a Ruby's string instead of C's.
Definition io.c:7264
VALUE rb_io_close(VALUE io)
Closes the IO.
Definition io.c:5753
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1093
VALUE rb_obj_is_method(VALUE recv)
Queries if the given object is a method.
Definition proc.c:1674
VALUE rb_obj_is_proc(VALUE recv)
Queries if the given object is a proc.
Definition proc.c:120
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition string.c:4107
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
#define rb_exc_new_cstr(exc, str)
Identical to rb_exc_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1670
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:2309
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3875
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
Definition string.c:2327
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3697
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition string.c:7702
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
Definition string.c:4539
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
Definition string.c:4356
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1656
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:3056
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1514
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:884
VALUE rb_class_name(VALUE obj)
Queries the name of the given object's class.
Definition variable.c:493
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
Definition vm_method.c:3093
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
Definition vm_method.c:1418
VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcallv(), except it returns RUBY_Qundef instead of raising rb_eNoMethodError.
Definition vm_eval.c:686
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1117
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
Definition symbol.c:972
VALUE rb_io_path(VALUE io)
Returns the path for the given IO.
Definition io.c:2973
int len
Length of the buffer.
Definition io.h:8
VALUE rb_ractor_make_shareable(VALUE obj)
Destructively transforms the passed object so that multiple Ractors can share it.
Definition ractor.c:1387
#define RB_NUM2INT
Just another name of rb_num2int_inline.
Definition int.h:38
#define RB_INT2NUM
Just another name of rb_int2num_inline.
Definition int.h:37
VALUE rb_yield(VALUE val)
Yields the block.
Definition vm_eval.c:1372
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
#define RB_ZALLOC(type)
Shorthand of RB_ZALLOC_N with n=1.
Definition memory.h:249
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
Definition rarray.h:281
#define RARRAY_AREF(a, i)
Definition rarray.h:403
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
Definition rbasic.h:163
#define RHASH_SIZE(h)
Queries the size of the hash.
Definition rhash.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:442
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
Definition rstring.h:89
#define RUBY_TYPED_DEFAULT_FREE
This is a value you can set to rb_data_type_struct::dfree.
Definition rtypeddata.h:79
#define TypedData_Get_Struct(obj, type, data_type, sval)
Obtains a C struct from inside of a wrapper Ruby object.
Definition rtypeddata.h:515
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
Definition rtypeddata.h:450
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
Definition rtypeddata.h:497
#define FilePathValue(v)
Ensures that the parameter object is a path.
Definition ruby.h:90
#define RTEST
This is an old name of RB_TEST.
Definition iseq.h:280
const ID * segments
A null-terminated list of ids, used to represent a constant's path idNULL is used to represent the ::...
Definition vm_core.h:287
Definition vm_core.h:295
Definition vm_core.h:290
Definition iseq.h:251
A line and column in a string.
uint32_t column
The column number.
int32_t line
The line number.
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
size_t size
The number of offsets in the list.
uint32_t node_id
The unique identifier for this node, which is deterministic based on the source.
Definition ast.h:1085
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
int32_t line
The line within the file that the parse starts on.
Definition options.h:118
pm_scope_node_t node
The resulting scope node that will hold the generated AST.
pm_options_t options
The options that will be passed to the parser.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
VALUE * script_lines
This is a pointer to the list of script lines for the ISEQs that will be associated with this scope n...
Definition method.h:63
This is the struct that holds necessary info for a struct.
Definition rtypeddata.h:203
struct rb_iseq_constant_body::@157 param
parameter information
Definition st.h:79
Definition vm_core.h:299
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
Definition value_type.h:433
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
ruby_value_type
C-level type of an object.
Definition value_type.h:113