Ruby 3.5.0dev (2025-10-31 revision 57f76f62d5e12766465f11ebb0d0b0b0d4d549ce)
iseq.c (57f76f62d5e12766465f11ebb0d0b0b0d4d549ce)
1/**********************************************************************
2
3 iseq.c -
4
5 $Author$
6 created at: 2006-07-11(Tue) 09:00:03 +0900
7
8 Copyright (C) 2006 Koichi Sasada
9
10**********************************************************************/
11
12#define RUBY_VM_INSNS_INFO 1
13/* #define RUBY_MARK_FREE_DEBUG 1 */
14
15#include "ruby/internal/config.h"
16
17#ifdef HAVE_DLADDR
18# include <dlfcn.h>
19#endif
20
21#include "eval_intern.h"
22#include "id_table.h"
23#include "internal.h"
24#include "internal/bits.h"
25#include "internal/class.h"
26#include "internal/compile.h"
27#include "internal/error.h"
28#include "internal/file.h"
29#include "internal/gc.h"
30#include "internal/hash.h"
31#include "internal/io.h"
32#include "internal/ruby_parser.h"
33#include "internal/sanitizers.h"
34#include "internal/set_table.h"
35#include "internal/symbol.h"
36#include "internal/thread.h"
37#include "internal/variable.h"
38#include "iseq.h"
39#include "ruby/util.h"
40#include "vm_core.h"
41#include "vm_callinfo.h"
42#include "yjit.h"
43#include "ruby/ractor.h"
44#include "builtin.h"
45#include "insns.inc"
46#include "insns_info.inc"
47#include "zjit.h"
48
49VALUE rb_cISeq;
50static VALUE iseqw_new(const rb_iseq_t *iseq);
51static const rb_iseq_t *iseqw_check(VALUE iseqw);
52
53#if VM_INSN_INFO_TABLE_IMPL == 2
54static struct succ_index_table *succ_index_table_create(int max_pos, int *data, int size);
55static unsigned int *succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size);
56static int succ_index_lookup(const struct succ_index_table *sd, int x);
57#endif
58
59#define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass)
60
61static inline VALUE
62obj_resurrect(VALUE obj)
63{
64 if (hidden_obj_p(obj)) {
65 switch (BUILTIN_TYPE(obj)) {
66 case T_STRING:
67 obj = rb_str_resurrect(obj);
68 break;
69 case T_ARRAY:
70 obj = rb_ary_resurrect(obj);
71 break;
72 case T_HASH:
73 obj = rb_hash_resurrect(obj);
74 break;
75 default:
76 break;
77 }
78 }
79 return obj;
80}
81
82static void
83free_arena(struct iseq_compile_data_storage *cur)
84{
85 struct iseq_compile_data_storage *next;
86
87 while (cur) {
88 next = cur->next;
89 ruby_xfree(cur);
90 cur = next;
91 }
92}
93
94static void
95compile_data_free(struct iseq_compile_data *compile_data)
96{
97 if (compile_data) {
98 free_arena(compile_data->node.storage_head);
99 free_arena(compile_data->insn.storage_head);
100 if (compile_data->ivar_cache_table) {
101 rb_id_table_free(compile_data->ivar_cache_table);
102 }
103 ruby_xfree(compile_data);
104 }
105}
106
107static void
108remove_from_constant_cache(ID id, IC ic)
109{
110 rb_vm_t *vm = GET_VM();
111 VALUE lookup_result;
112 st_data_t ic_data = (st_data_t)ic;
113
114 if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) {
115 set_table *ics = (set_table *)lookup_result;
116 set_table_delete(ics, &ic_data);
117
118 if (ics->num_entries == 0 &&
119 // See comment in vm_track_constant_cache on why we need this check
120 id != vm->inserting_constant_cache_id) {
121 rb_id_table_delete(vm->constant_cache, id);
122 set_free_table(ics);
123 }
124 }
125}
126
127// When an ISEQ is being freed, all of its associated ICs are going to go away
128// as well. Because of this, we need to iterate over the ICs, and clear them
129// from the VM's constant cache.
130static void
131iseq_clear_ic_references(const rb_iseq_t *iseq)
132{
133 // In some cases (when there is a compilation error), we end up with
134 // ic_size greater than 0, but no allocated is_entries buffer.
135 // If there's no is_entries buffer to loop through, return early.
136 // [Bug #19173]
137 if (!ISEQ_BODY(iseq)->is_entries) {
138 return;
139 }
140
141 for (unsigned int ic_idx = 0; ic_idx < ISEQ_BODY(iseq)->ic_size; ic_idx++) {
142 IC ic = &ISEQ_IS_IC_ENTRY(ISEQ_BODY(iseq), ic_idx);
143
144 // Iterate over the IC's constant path's segments and clean any references to
145 // the ICs out of the VM's constant cache table.
146 const ID *segments = ic->segments;
147
148 // It's possible that segments is NULL if we overallocated an IC but
149 // optimizations removed the instruction using it
150 if (segments == NULL)
151 continue;
152
153 for (int i = 0; segments[i]; i++) {
154 ID id = segments[i];
155 if (id == idNULL) continue;
156 remove_from_constant_cache(id, ic);
157 }
158
159 ruby_xfree((void *)segments);
160 }
161}
162
163void
164rb_iseq_free(const rb_iseq_t *iseq)
165{
166 RUBY_FREE_ENTER("iseq");
167
168 if (iseq && ISEQ_BODY(iseq)) {
169 iseq_clear_ic_references(iseq);
170 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
171#if USE_YJIT
172 rb_yjit_iseq_free(iseq);
173 if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) {
174 RUBY_ASSERT(rb_yjit_live_iseq_count > 0);
175 rb_yjit_live_iseq_count--;
176 }
177#endif
178#if USE_ZJIT
179 rb_zjit_iseq_free(iseq);
180#endif
181 ruby_xfree((void *)body->iseq_encoded);
182 ruby_xfree((void *)body->insns_info.body);
183 ruby_xfree((void *)body->insns_info.positions);
184#if VM_INSN_INFO_TABLE_IMPL == 2
185 ruby_xfree(body->insns_info.succ_index_table);
186#endif
187 ruby_xfree((void *)body->is_entries);
188 ruby_xfree(body->call_data);
189 ruby_xfree((void *)body->catch_table);
190 ruby_xfree((void *)body->param.opt_table);
191 if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) {
192 ruby_xfree((void *)body->mark_bits.list);
193 }
194
195 ruby_xfree(body->variable.original_iseq);
196
197 if (body->param.keyword != NULL) {
198 if (body->param.keyword->table != &body->local_table[body->param.keyword->bits_start - body->param.keyword->num])
199 ruby_xfree((void *)body->param.keyword->table);
200 if (body->param.keyword->default_values) {
201 ruby_xfree((void *)body->param.keyword->default_values);
202 }
203 ruby_xfree((void *)body->param.keyword);
204 }
205 if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl)) {
206 ruby_xfree((void *)body->local_table);
207 }
208 ruby_xfree((void *)body->lvar_states);
209
210 compile_data_free(ISEQ_COMPILE_DATA(iseq));
211 if (body->outer_variables) rb_id_table_free(body->outer_variables);
212 ruby_xfree(body);
213 }
214
215 if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) {
216 rb_hook_list_free(iseq->aux.exec.local_hooks);
217 }
218
219 RUBY_FREE_LEAVE("iseq");
220}
221
222typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj);
223
224static inline void
225iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, VALUE *original_iseq)
226{
227 unsigned int offset;
228 unsigned int page_offset = (page * ISEQ_MBITS_BITLENGTH);
229
230 while (bits) {
231 offset = ntz_intptr(bits);
232 VALUE op = code[page_offset + offset];
233 rb_gc_mark_and_move(&code[page_offset + offset]);
234 VALUE newop = code[page_offset + offset];
235 if (original_iseq && newop != op) {
236 original_iseq[page_offset + offset] = newop;
237 }
238 bits &= bits - 1; // Reset Lowest Set Bit (BLSR)
239 }
240}
241
242static void
243rb_iseq_mark_and_move_each_compile_data_value(const rb_iseq_t *iseq, VALUE *original_iseq)
244{
245 unsigned int size;
246 VALUE *code;
247 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
248
249 size = compile_data->iseq_size;
250 code = compile_data->iseq_encoded;
251
252 // Embedded VALUEs
253 if (compile_data->mark_bits.list) {
254 if(compile_data->is_single_mark_bit) {
255 iseq_scan_bits(0, compile_data->mark_bits.single, code, original_iseq);
256 }
257 else {
258 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
259 iseq_bits_t bits = compile_data->mark_bits.list[i];
260 iseq_scan_bits(i, bits, code, original_iseq);
261 }
262 }
263 }
264}
265static void
266rb_iseq_mark_and_move_each_body_value(const rb_iseq_t *iseq, VALUE *original_iseq)
267{
268 unsigned int size;
269 VALUE *code;
270 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
271
272 size = body->iseq_size;
273 code = body->iseq_encoded;
274
275 union iseq_inline_storage_entry *is_entries = body->is_entries;
276
277 if (body->is_entries) {
278 // Skip iterating over ivc caches
279 is_entries += body->ivc_size;
280
281 // ICVARC entries
282 for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) {
283 ICVARC icvarc = (ICVARC)is_entries;
284 if (icvarc->entry) {
285 RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE));
286
287 rb_gc_mark_and_move(&icvarc->entry->class_value);
288 }
289 }
290
291 // ISE entries
292 for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) {
293 union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries;
294 if (is->once.value) {
295 rb_gc_mark_and_move(&is->once.value);
296 }
297 }
298
299 // IC Entries
300 for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) {
301 IC ic = (IC)is_entries;
302 if (ic->entry) {
303 rb_gc_mark_and_move_ptr(&ic->entry);
304 }
305 }
306 }
307
308 // Embedded VALUEs
309 if (body->mark_bits.list) {
310 if (ISEQ_MBITS_BUFLEN(size) == 1) {
311 iseq_scan_bits(0, body->mark_bits.single, code, original_iseq);
312 }
313 else {
314 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
315 iseq_bits_t bits = body->mark_bits.list[i];
316 iseq_scan_bits(i, bits, code, original_iseq);
317 }
318 }
319 }
320}
321
322static bool
323cc_is_active(const struct rb_callcache *cc, bool reference_updating)
324{
325 if (cc) {
326 if (cc == rb_vm_empty_cc() || rb_vm_empty_cc_for_super()) {
327 return false;
328 }
329
330 if (reference_updating) {
331 cc = (const struct rb_callcache *)rb_gc_location((VALUE)cc);
332 }
333
334 if (vm_cc_markable(cc) && vm_cc_valid(cc)) {
335 const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc);
336 if (reference_updating) {
337 cme = (const struct rb_callable_method_entry_struct *)rb_gc_location((VALUE)cme);
338 }
339 if (!METHOD_ENTRY_INVALIDATED(cme)) {
340 return true;
341 }
342 }
343 }
344 return false;
345}
346
347void
348rb_iseq_mark_and_move(rb_iseq_t *iseq, bool reference_updating)
349{
350 RUBY_MARK_ENTER("iseq");
351
352 rb_gc_mark_and_move(&iseq->wrapper);
353
354 if (ISEQ_BODY(iseq)) {
355 struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
356
357 rb_iseq_mark_and_move_each_body_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
358
359 rb_gc_mark_and_move(&body->variable.script_lines);
360 rb_gc_mark_and_move(&body->location.label);
361 rb_gc_mark_and_move(&body->location.base_label);
362 rb_gc_mark_and_move(&body->location.pathobj);
363 if (body->local_iseq) rb_gc_mark_and_move_ptr(&body->local_iseq);
364 if (body->parent_iseq) rb_gc_mark_and_move_ptr(&body->parent_iseq);
365 if (body->mandatory_only_iseq) rb_gc_mark_and_move_ptr(&body->mandatory_only_iseq);
366
367 if (body->call_data) {
368 for (unsigned int i = 0; i < body->ci_size; i++) {
369 struct rb_call_data *cds = body->call_data;
370
371 if (cds[i].ci) rb_gc_mark_and_move_ptr(&cds[i].ci);
372
373 if (cc_is_active(cds[i].cc, reference_updating)) {
374 rb_gc_mark_and_move_ptr(&cds[i].cc);
375 }
376 else if (cds[i].cc != rb_vm_empty_cc()) {
377 cds[i].cc = rb_vm_empty_cc();
378 }
379 }
380 }
381
382 if (body->param.flags.has_kw && body->param.keyword != NULL) {
383 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
384
385 if (keyword->default_values != NULL) {
386 for (int j = 0, i = keyword->required_num; i < keyword->num; i++, j++) {
387 rb_gc_mark_and_move(&keyword->default_values[j]);
388 }
389 }
390 }
391
392 if (body->catch_table) {
393 struct iseq_catch_table *table = body->catch_table;
394
395 for (unsigned int i = 0; i < table->size; i++) {
396 struct iseq_catch_table_entry *entry;
397 entry = UNALIGNED_MEMBER_PTR(table, entries[i]);
398 if (entry->iseq) {
399 rb_gc_mark_and_move_ptr(&entry->iseq);
400 }
401 }
402 }
403
404 if (reference_updating) {
405#if USE_YJIT
406 rb_yjit_iseq_update_references(iseq);
407#endif
408#if USE_ZJIT
409 rb_zjit_iseq_update_references(body->zjit_payload);
410#endif
411 }
412 else {
413 // TODO: check jit payload
414 if (!rb_gc_checking_shareable()) {
415#if USE_YJIT
416 rb_yjit_iseq_mark(body->yjit_payload);
417#endif
418#if USE_ZJIT
419 rb_zjit_iseq_mark(body->zjit_payload);
420#endif
421 }
422 }
423
424 // TODO: ractor aware coverage
425 if (!rb_gc_checking_shareable()) {
426 rb_gc_mark_and_move(&body->variable.coverage);
427 rb_gc_mark_and_move(&body->variable.pc2branchindex);
428 }
429 }
430
431 if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) {
432 if (!rb_gc_checking_shareable()) {
433 rb_gc_mark_and_move(&iseq->aux.loader.obj);
434 }
435 }
436 else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) {
437 if (!rb_gc_checking_shareable()) {
438 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
439
440 rb_iseq_mark_and_move_insn_storage(compile_data->insn.storage_head);
441 rb_iseq_mark_and_move_each_compile_data_value(iseq, reference_updating ? ISEQ_ORIGINAL_ISEQ(iseq) : NULL);
442
443 rb_gc_mark_and_move((VALUE *)&compile_data->err_info);
444 rb_gc_mark_and_move((VALUE *)&compile_data->catch_table_ary);
445 }
446 }
447 else {
448 /* executable */
449 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
450
451 if (iseq->aux.exec.local_hooks) {
452 rb_hook_list_mark_and_move(iseq->aux.exec.local_hooks);
453 }
454 }
455
456 RUBY_MARK_LEAVE("iseq");
457}
458
459static size_t
460param_keyword_size(const struct rb_iseq_param_keyword *pkw)
461{
462 size_t size = 0;
463
464 if (!pkw) return size;
465
466 size += sizeof(struct rb_iseq_param_keyword);
467 size += sizeof(VALUE) * (pkw->num - pkw->required_num);
468
469 return size;
470}
471
472size_t
473rb_iseq_memsize(const rb_iseq_t *iseq)
474{
475 size_t size = 0; /* struct already counted as RVALUE size */
476 const struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
477 const struct iseq_compile_data *compile_data;
478
479 /* TODO: should we count original_iseq? */
480
481 if (ISEQ_EXECUTABLE_P(iseq) && body) {
482 size += sizeof(struct rb_iseq_constant_body);
483 size += body->iseq_size * sizeof(VALUE);
484 size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
485 size += body->local_table_size * sizeof(ID);
486 size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE;
487 if (body->catch_table) {
488 size += iseq_catch_table_bytes(body->catch_table->size);
489 }
490 size += (body->param.opt_num + 1) * sizeof(VALUE);
491 size += param_keyword_size(body->param.keyword);
492
493 /* body->is_entries */
494 size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry);
495
496 if (ISEQ_BODY(iseq)->is_entries) {
497 /* IC entries constant segments */
498 for (unsigned int ic_idx = 0; ic_idx < body->ic_size; ic_idx++) {
499 IC ic = &ISEQ_IS_IC_ENTRY(body, ic_idx);
500 const ID *ids = ic->segments;
501 if (!ids) continue;
502 while (*ids++) {
503 size += sizeof(ID);
504 }
505 size += sizeof(ID); // null terminator
506 }
507 }
508
509 /* body->call_data */
510 size += body->ci_size * sizeof(struct rb_call_data);
511 // TODO: should we count imemo_callinfo?
512 }
513
514 compile_data = ISEQ_COMPILE_DATA(iseq);
515 if (compile_data) {
516 struct iseq_compile_data_storage *cur;
517
518 size += sizeof(struct iseq_compile_data);
519
520 cur = compile_data->node.storage_head;
521 while (cur) {
522 size += cur->size + offsetof(struct iseq_compile_data_storage, buff);
523 cur = cur->next;
524 }
525 }
526
527 return size;
528}
529
531rb_iseq_constant_body_alloc(void)
532{
533 struct rb_iseq_constant_body *iseq_body;
534 iseq_body = ZALLOC(struct rb_iseq_constant_body);
535 return iseq_body;
536}
537
538static rb_iseq_t *
539iseq_alloc(void)
540{
541 rb_iseq_t *iseq = iseq_imemo_alloc();
542 ISEQ_BODY(iseq) = rb_iseq_constant_body_alloc();
543 return iseq;
544}
545
546VALUE
547rb_iseq_pathobj_new(VALUE path, VALUE realpath)
548{
549 VALUE pathobj;
550 VM_ASSERT(RB_TYPE_P(path, T_STRING));
551 VM_ASSERT(NIL_P(realpath) || RB_TYPE_P(realpath, T_STRING));
552
553 if (path == realpath ||
554 (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) {
555 pathobj = rb_fstring(path);
556 }
557 else {
558 if (!NIL_P(realpath)) {
559 realpath = rb_fstring(realpath);
560 }
561 VALUE fpath = rb_fstring(path);
562
563 pathobj = rb_ary_new_from_args(2, fpath, realpath);
564 rb_ary_freeze(pathobj);
565 RB_OBJ_SET_SHAREABLE(pathobj);
566 }
567 return pathobj;
568}
569
570void
571rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath)
572{
573 RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->location.pathobj,
574 rb_iseq_pathobj_new(path, realpath));
575}
576
577// Make a dummy iseq for a dummy frame that exposes a path for profilers to inspect
578rb_iseq_t *
579rb_iseq_alloc_with_dummy_path(VALUE fname)
580{
581 rb_iseq_t *dummy_iseq = iseq_alloc();
582
583 ISEQ_BODY(dummy_iseq)->type = ISEQ_TYPE_TOP;
584
585 if (!RB_OBJ_SHAREABLE_P(fname)) {
586 RB_OBJ_SET_FROZEN_SHAREABLE(fname);
587 }
588
589 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.pathobj, fname);
590 RB_OBJ_WRITE(dummy_iseq, &ISEQ_BODY(dummy_iseq)->location.label, fname);
591
592 return dummy_iseq;
593}
594
595static rb_iseq_location_t *
596iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id)
597{
598 rb_iseq_location_t *loc = &ISEQ_BODY(iseq)->location;
599
600 rb_iseq_pathobj_set(iseq, path, realpath);
601 RB_OBJ_WRITE(iseq, &loc->label, name);
602 RB_OBJ_WRITE(iseq, &loc->base_label, name);
603 loc->first_lineno = first_lineno;
604
605 if (ISEQ_BODY(iseq)->local_iseq == iseq && strcmp(RSTRING_PTR(name), "initialize") == 0) {
606 ISEQ_BODY(iseq)->param.flags.use_block = 1;
607 }
608
609 if (code_location) {
610 loc->node_id = node_id;
611 loc->code_location = *code_location;
612 }
613 else {
614 loc->code_location.beg_pos.lineno = 0;
615 loc->code_location.beg_pos.column = 0;
616 loc->code_location.end_pos.lineno = -1;
617 loc->code_location.end_pos.column = -1;
618 }
619
620 return loc;
621}
622
623static void
624set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq)
625{
626 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
627 const VALUE type = body->type;
628
629 /* set class nest stack */
630 if (type == ISEQ_TYPE_TOP) {
631 body->local_iseq = iseq;
632 }
633 else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) {
634 body->local_iseq = iseq;
635 }
636 else if (piseq) {
637 RB_OBJ_WRITE(iseq, &body->local_iseq, ISEQ_BODY(piseq)->local_iseq);
638 }
639
640 if (piseq) {
641 RB_OBJ_WRITE(iseq, &body->parent_iseq, piseq);
642 }
643
644 if (type == ISEQ_TYPE_MAIN) {
645 body->local_iseq = iseq;
646 }
647}
648
649static struct iseq_compile_data_storage *
650new_arena(void)
651{
652 struct iseq_compile_data_storage * new_arena =
654 ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE +
655 offsetof(struct iseq_compile_data_storage, buff));
656
657 new_arena->pos = 0;
658 new_arena->next = 0;
659 new_arena->size = INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE;
660
661 return new_arena;
662}
663
664static int
665prepare_node_id(const NODE *node)
666{
667 if (!node) return -1;
668
669 if (nd_type(node) == NODE_SCOPE && RNODE_SCOPE(node)->nd_parent) {
670 return nd_node_id(RNODE_SCOPE(node)->nd_parent);
671 }
672
673 return nd_node_id(node);
674}
675
676static VALUE
677prepare_iseq_build(rb_iseq_t *iseq,
678 VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id,
679 const rb_iseq_t *parent, int isolated_depth, enum rb_iseq_type type,
680 VALUE script_lines, const rb_compile_option_t *option)
681{
682 VALUE coverage = Qfalse;
683 VALUE err_info = Qnil;
684 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
685
686 if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP))
687 err_info = Qfalse;
688
689 body->type = type;
690 set_relation(iseq, parent);
691
692 name = rb_fstring(name);
693 iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id);
694 if (iseq != body->local_iseq) {
695 RB_OBJ_WRITE(iseq, &body->location.base_label, ISEQ_BODY(body->local_iseq)->location.label);
696 }
697 ISEQ_COVERAGE_SET(iseq, Qnil);
698 ISEQ_ORIGINAL_ISEQ_CLEAR(iseq);
699 body->variable.flip_count = 0;
700
701 if (NIL_P(script_lines)) {
702 RB_OBJ_WRITE(iseq, &body->variable.script_lines, Qnil);
703 }
704 else {
705 RB_OBJ_WRITE(iseq, &body->variable.script_lines, rb_ractor_make_shareable(script_lines));
706 }
707
708 ISEQ_COMPILE_DATA_ALLOC(iseq);
709 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->err_info, err_info);
710 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->catch_table_ary, Qnil);
711
712 ISEQ_COMPILE_DATA(iseq)->node.storage_head = ISEQ_COMPILE_DATA(iseq)->node.storage_current = new_arena();
713 ISEQ_COMPILE_DATA(iseq)->insn.storage_head = ISEQ_COMPILE_DATA(iseq)->insn.storage_current = new_arena();
714 ISEQ_COMPILE_DATA(iseq)->isolated_depth = isolated_depth;
715 ISEQ_COMPILE_DATA(iseq)->option = option;
716 ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL;
717 ISEQ_COMPILE_DATA(iseq)->builtin_function_table = GET_VM()->builtin_function_table;
718
719 if (option->coverage_enabled) {
720 VALUE coverages = rb_get_coverages();
721 if (RTEST(coverages)) {
722 coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq));
723 if (NIL_P(coverage)) coverage = Qfalse;
724 }
725 }
726 ISEQ_COVERAGE_SET(iseq, coverage);
727 if (coverage && ISEQ_BRANCH_COVERAGE(iseq))
728 ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_hidden_new(0));
729
730 return Qtrue;
731}
732
733#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
734static void validate_get_insn_info(const rb_iseq_t *iseq);
735#endif
736
737void
738rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq)
739{
740#if VM_INSN_INFO_TABLE_IMPL == 2
741 /* create succ_index_table */
742 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
743 int size = body->insns_info.size;
744 int max_pos = body->iseq_size;
745 int *data = (int *)body->insns_info.positions;
746 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
747 body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size);
748#if VM_CHECK_MODE == 0
749 ruby_xfree(body->insns_info.positions);
750 body->insns_info.positions = NULL;
751#endif
752#endif
753}
754
755#if VM_INSN_INFO_TABLE_IMPL == 2
756unsigned int *
757rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body *body)
758{
759 int size = body->insns_info.size;
760 int max_pos = body->iseq_size;
761 struct succ_index_table *sd = body->insns_info.succ_index_table;
762 return succ_index_table_invert(max_pos, sd, size);
763}
764#endif
765
766void
767rb_iseq_init_trace(rb_iseq_t *iseq)
768{
769 iseq->aux.exec.global_trace_events = 0;
770 if (ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS) {
771 rb_iseq_trace_set(iseq, ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS);
772 }
773}
774
775static VALUE
776finish_iseq_build(rb_iseq_t *iseq)
777{
778 struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq);
779 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
780 VALUE err = data->err_info;
781 ISEQ_COMPILE_DATA_CLEAR(iseq);
782 compile_data_free(data);
783
784#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
785 validate_get_insn_info(iseq);
786#endif
787
788 if (RTEST(err)) {
789 VALUE path = pathobj_path(body->location.pathobj);
790 if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error");
791 rb_funcallv(err, rb_intern("set_backtrace"), 1, &path);
792 rb_exc_raise(err);
793 }
794
795 RB_DEBUG_COUNTER_INC(iseq_num);
796 RB_DEBUG_COUNTER_ADD(iseq_cd_num, ISEQ_BODY(iseq)->ci_size);
797
798 rb_iseq_init_trace(iseq);
799 return Qtrue;
800}
801
802static rb_compile_option_t COMPILE_OPTION_DEFAULT = {
803 .inline_const_cache = OPT_INLINE_CONST_CACHE,
804 .peephole_optimization = OPT_PEEPHOLE_OPTIMIZATION,
805 .tailcall_optimization = OPT_TAILCALL_OPTIMIZATION,
806 .specialized_instruction = OPT_SPECIALISED_INSTRUCTION,
807 .operands_unification = OPT_OPERANDS_UNIFICATION,
808 .instructions_unification = OPT_INSTRUCTIONS_UNIFICATION,
809 .frozen_string_literal = OPT_FROZEN_STRING_LITERAL,
810 .debug_frozen_string_literal = OPT_DEBUG_FROZEN_STRING_LITERAL,
811 .coverage_enabled = TRUE,
812};
813
814static const rb_compile_option_t COMPILE_OPTION_FALSE = {
815 .frozen_string_literal = -1, // unspecified
816};
817
818int
819rb_iseq_opt_frozen_string_literal(void)
820{
821 return COMPILE_OPTION_DEFAULT.frozen_string_literal;
822}
823
824static void
825set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt)
826{
827#define SET_COMPILE_OPTION(o, h, mem) \
828 { VALUE flag = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
829 if (flag == Qtrue) { (o)->mem = 1; } \
830 else if (flag == Qfalse) { (o)->mem = 0; } \
831 }
832#define SET_COMPILE_OPTION_NUM(o, h, mem) \
833 { VALUE num = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
834 if (!NIL_P(num)) (o)->mem = NUM2INT(num); \
835 }
836 SET_COMPILE_OPTION(option, opt, inline_const_cache);
837 SET_COMPILE_OPTION(option, opt, peephole_optimization);
838 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
839 SET_COMPILE_OPTION(option, opt, specialized_instruction);
840 SET_COMPILE_OPTION(option, opt, operands_unification);
841 SET_COMPILE_OPTION(option, opt, instructions_unification);
842 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
843 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
844 SET_COMPILE_OPTION(option, opt, coverage_enabled);
845 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
846#undef SET_COMPILE_OPTION
847#undef SET_COMPILE_OPTION_NUM
848}
849
850static rb_compile_option_t *
851set_compile_option_from_ast(rb_compile_option_t *option, const rb_ast_body_t *ast)
852{
853#define SET_COMPILE_OPTION(o, a, mem) \
854 ((a)->mem < 0 ? 0 : ((o)->mem = (a)->mem > 0))
855 SET_COMPILE_OPTION(option, ast, coverage_enabled);
856#undef SET_COMPILE_OPTION
857 if (ast->frozen_string_literal >= 0) {
858 option->frozen_string_literal = ast->frozen_string_literal;
859 }
860 return option;
861}
862
863static void
864make_compile_option(rb_compile_option_t *option, VALUE opt)
865{
866 if (NIL_P(opt)) {
867 *option = COMPILE_OPTION_DEFAULT;
868 }
869 else if (opt == Qfalse) {
870 *option = COMPILE_OPTION_FALSE;
871 }
872 else if (opt == Qtrue) {
873 int i;
874 for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i)
875 ((int *)option)[i] = 1;
876 }
877 else if (RB_TYPE_P(opt, T_HASH)) {
878 *option = COMPILE_OPTION_DEFAULT;
879 set_compile_option_from_hash(option, opt);
880 }
881 else {
882 rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil");
883 }
884}
885
886static VALUE
887make_compile_option_value(rb_compile_option_t *option)
888{
889 VALUE opt = rb_hash_new_with_size(11);
890#define SET_COMPILE_OPTION(o, h, mem) \
891 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), RBOOL((o)->mem))
892#define SET_COMPILE_OPTION_NUM(o, h, mem) \
893 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem))
894 {
895 SET_COMPILE_OPTION(option, opt, inline_const_cache);
896 SET_COMPILE_OPTION(option, opt, peephole_optimization);
897 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
898 SET_COMPILE_OPTION(option, opt, specialized_instruction);
899 SET_COMPILE_OPTION(option, opt, operands_unification);
900 SET_COMPILE_OPTION(option, opt, instructions_unification);
901 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
902 SET_COMPILE_OPTION(option, opt, coverage_enabled);
903 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
904 }
905#undef SET_COMPILE_OPTION
906#undef SET_COMPILE_OPTION_NUM
907 VALUE frozen_string_literal = option->frozen_string_literal == -1 ? Qnil : RBOOL(option->frozen_string_literal);
908 rb_hash_aset(opt, ID2SYM(rb_intern("frozen_string_literal")), frozen_string_literal);
909 return opt;
910}
911
912rb_iseq_t *
913rb_iseq_new(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
914 const rb_iseq_t *parent, enum rb_iseq_type type)
915{
916 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent,
917 0, type, &COMPILE_OPTION_DEFAULT,
918 Qnil);
919}
920
921static int
922ast_line_count(const VALUE ast_value)
923{
924 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
925 return ast->body.line_count;
926}
927
928static VALUE
929iseq_setup_coverage(VALUE coverages, VALUE path, int line_count)
930{
931 if (line_count >= 0) {
932 int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count;
933
934 VALUE coverage = rb_default_coverage(len);
935 rb_hash_aset(coverages, path, coverage);
936
937 return coverage;
938 }
939
940 return Qnil;
941}
942
943static inline void
944iseq_new_setup_coverage(VALUE path, int line_count)
945{
946 VALUE coverages = rb_get_coverages();
947
948 if (RTEST(coverages)) {
949 iseq_setup_coverage(coverages, path, line_count);
950 }
951}
952
953rb_iseq_t *
954rb_iseq_new_top(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent)
955{
956 iseq_new_setup_coverage(path, ast_line_count(ast_value));
957
958 return rb_iseq_new_with_opt(ast_value, name, path, realpath, 0, parent, 0,
959 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT,
960 Qnil);
961}
962
966rb_iseq_t *
967pm_iseq_new_top(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent, int *error_state)
968{
969 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
970
971 return pm_iseq_new_with_opt(node, name, path, realpath, 0, parent, 0,
972 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT, error_state);
973}
974
975rb_iseq_t *
976rb_iseq_new_main(const VALUE ast_value, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt)
977{
978 iseq_new_setup_coverage(path, ast_line_count(ast_value));
979
980 return rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
981 path, realpath, 0,
982 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE,
983 Qnil);
984}
985
990rb_iseq_t *
991pm_iseq_new_main(pm_scope_node_t *node, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt, int *error_state)
992{
993 iseq_new_setup_coverage(path, (int) (node->parser->newline_list.size - 1));
994
995 return pm_iseq_new_with_opt(node, rb_fstring_lit("<main>"),
996 path, realpath, 0,
997 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE, error_state);
998}
999
1000rb_iseq_t *
1001rb_iseq_new_eval(const VALUE ast_value, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth)
1002{
1003 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
1004 VALUE coverages = rb_get_coverages();
1005 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
1006 iseq_setup_coverage(coverages, path, ast_line_count(ast_value) + first_lineno - 1);
1007 }
1008 }
1009
1010 return rb_iseq_new_with_opt(ast_value, name, path, realpath, first_lineno,
1011 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT,
1012 Qnil);
1013}
1014
1015rb_iseq_t *
1016pm_iseq_new_eval(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
1017 int first_lineno, const rb_iseq_t *parent, int isolated_depth, int *error_state)
1018{
1019 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
1020 VALUE coverages = rb_get_coverages();
1021 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
1022 iseq_setup_coverage(coverages, path, ((int) (node->parser->newline_list.size - 1)) + first_lineno - 1);
1023 }
1024 }
1025
1026 return pm_iseq_new_with_opt(node, name, path, realpath, first_lineno,
1027 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT, error_state);
1028}
1029
1030static inline rb_iseq_t *
1031iseq_translate(rb_iseq_t *iseq)
1032{
1033 if (rb_respond_to(rb_cISeq, rb_intern("translate"))) {
1034 VALUE v1 = iseqw_new(iseq);
1035 VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1);
1036 if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) {
1037 iseq = (rb_iseq_t *)iseqw_check(v2);
1038 }
1039 }
1040
1041 return iseq;
1042}
1043
1044rb_iseq_t *
1045rb_iseq_new_with_opt(VALUE ast_value, VALUE name, VALUE path, VALUE realpath,
1046 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1047 enum rb_iseq_type type, const rb_compile_option_t *option,
1048 VALUE script_lines)
1049{
1050 rb_ast_t *ast = rb_ruby_ast_data_get(ast_value);
1051 rb_ast_body_t *body = ast ? &ast->body : NULL;
1052 const NODE *node = body ? body->root : 0;
1053 /* TODO: argument check */
1054 rb_iseq_t *iseq = iseq_alloc();
1055 rb_compile_option_t new_opt;
1056
1057 if (!option) option = &COMPILE_OPTION_DEFAULT;
1058 if (body) {
1059 new_opt = *option;
1060 option = set_compile_option_from_ast(&new_opt, body);
1061 }
1062
1063 if (!NIL_P(script_lines)) {
1064 // noop
1065 }
1066 else if (body && body->script_lines) {
1067 script_lines = rb_parser_build_script_lines_from(body->script_lines);
1068 }
1069 else if (parent) {
1070 script_lines = ISEQ_BODY(parent)->variable.script_lines;
1071 }
1072
1073 prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, prepare_node_id(node),
1074 parent, isolated_depth, type, script_lines, option);
1075
1076 rb_iseq_compile_node(iseq, node);
1077 finish_iseq_build(iseq);
1078 RB_GC_GUARD(ast_value);
1079
1080 return iseq_translate(iseq);
1081}
1082
1084 rb_iseq_t *iseq;
1085 pm_scope_node_t *node;
1086};
1087
1088VALUE
1089pm_iseq_new_with_opt_try(VALUE d)
1090{
1091 struct pm_iseq_new_with_opt_data *data = (struct pm_iseq_new_with_opt_data *)d;
1092
1093 // This can compile child iseqs, which can raise syntax errors
1094 pm_iseq_compile_node(data->iseq, data->node);
1095
1096 // This raises an exception if there is a syntax error
1097 finish_iseq_build(data->iseq);
1098
1099 return Qundef;
1100}
1101
1114rb_iseq_t *
1115pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpath,
1116 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
1117 enum rb_iseq_type type, const rb_compile_option_t *option, int *error_state)
1118{
1119 rb_iseq_t *iseq = iseq_alloc();
1120 ISEQ_BODY(iseq)->prism = true;
1121
1122 rb_compile_option_t next_option;
1123 if (!option) option = &COMPILE_OPTION_DEFAULT;
1124
1125 next_option = *option;
1126 next_option.coverage_enabled = node->coverage_enabled < 0 ? 0 : node->coverage_enabled > 0;
1127 option = &next_option;
1128
1129 pm_location_t *location = &node->base.location;
1130 int32_t start_line = node->parser->start_line;
1131
1132 pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line);
1133 pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line);
1134
1135 rb_code_location_t code_location = (rb_code_location_t) {
1136 .beg_pos = { .lineno = (int) start.line, .column = (int) start.column },
1137 .end_pos = { .lineno = (int) end.line, .column = (int) end.column }
1138 };
1139
1140 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &code_location, node->ast_node->node_id,
1141 parent, isolated_depth, type, node->script_lines == NULL ? Qnil : *node->script_lines, option);
1142
1143 struct pm_iseq_new_with_opt_data data = {
1144 .iseq = iseq,
1145 .node = node
1146 };
1147 rb_protect(pm_iseq_new_with_opt_try, (VALUE)&data, error_state);
1148
1149 if (*error_state) return NULL;
1150
1151 return iseq_translate(iseq);
1152}
1153
1154rb_iseq_t *
1155rb_iseq_new_with_callback(
1156 const struct rb_iseq_new_with_callback_callback_func * ifunc,
1157 VALUE name, VALUE path, VALUE realpath,
1158 int first_lineno, const rb_iseq_t *parent,
1159 enum rb_iseq_type type, const rb_compile_option_t *option)
1160{
1161 /* TODO: argument check */
1162 rb_iseq_t *iseq = iseq_alloc();
1163
1164 if (!option) option = &COMPILE_OPTION_DEFAULT;
1165 prepare_iseq_build(iseq, name, path, realpath, first_lineno, NULL, -1, parent, 0, type, Qnil, option);
1166
1167 rb_iseq_compile_callback(iseq, ifunc);
1168 finish_iseq_build(iseq);
1169
1170 return iseq;
1171}
1172
1173const rb_iseq_t *
1174rb_iseq_load_iseq(VALUE fname)
1175{
1176 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname);
1177
1178 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
1179 return iseqw_check(iseqv);
1180 }
1181
1182 return NULL;
1183}
1184
1185const rb_iseq_t *
1186rb_iseq_compile_iseq(VALUE str, VALUE fname)
1187{
1188 VALUE args[] = {
1189 str, fname
1190 };
1191 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("compile"), 2, args);
1192
1193 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
1194 return iseqw_check(iseqv);
1195 }
1196
1197 return NULL;
1198}
1199
1200#define CHECK_ARRAY(v) rb_to_array_type(v)
1201#define CHECK_HASH(v) rb_to_hash_type(v)
1202#define CHECK_STRING(v) rb_str_to_str(v)
1203#define CHECK_SYMBOL(v) rb_to_symbol_type(v)
1204static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;}
1205
1206static enum rb_iseq_type
1207iseq_type_from_sym(VALUE type)
1208{
1209 const ID id_top = rb_intern("top");
1210 const ID id_method = rb_intern("method");
1211 const ID id_block = rb_intern("block");
1212 const ID id_class = rb_intern("class");
1213 const ID id_rescue = rb_intern("rescue");
1214 const ID id_ensure = rb_intern("ensure");
1215 const ID id_eval = rb_intern("eval");
1216 const ID id_main = rb_intern("main");
1217 const ID id_plain = rb_intern("plain");
1218 /* ensure all symbols are static or pinned down before
1219 * conversion */
1220 const ID typeid = rb_check_id(&type);
1221 if (typeid == id_top) return ISEQ_TYPE_TOP;
1222 if (typeid == id_method) return ISEQ_TYPE_METHOD;
1223 if (typeid == id_block) return ISEQ_TYPE_BLOCK;
1224 if (typeid == id_class) return ISEQ_TYPE_CLASS;
1225 if (typeid == id_rescue) return ISEQ_TYPE_RESCUE;
1226 if (typeid == id_ensure) return ISEQ_TYPE_ENSURE;
1227 if (typeid == id_eval) return ISEQ_TYPE_EVAL;
1228 if (typeid == id_main) return ISEQ_TYPE_MAIN;
1229 if (typeid == id_plain) return ISEQ_TYPE_PLAIN;
1230 return (enum rb_iseq_type)-1;
1231}
1232
1233static VALUE
1234iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt)
1235{
1236 rb_iseq_t *iseq = iseq_alloc();
1237
1238 VALUE magic, version1, version2, format_type, misc;
1239 VALUE name, path, realpath, code_location, node_id;
1240 VALUE type, body, locals, params, exception;
1241
1242 st_data_t iseq_type;
1243 rb_compile_option_t option;
1244 int i = 0;
1245 rb_code_location_t tmp_loc = { {0, 0}, {-1, -1} };
1246
1247 /* [magic, major_version, minor_version, format_type, misc,
1248 * label, path, first_lineno,
1249 * type, locals, args, exception_table, body]
1250 */
1251
1252 data = CHECK_ARRAY(data);
1253
1254 magic = CHECK_STRING(rb_ary_entry(data, i++));
1255 version1 = CHECK_INTEGER(rb_ary_entry(data, i++));
1256 version2 = CHECK_INTEGER(rb_ary_entry(data, i++));
1257 format_type = CHECK_INTEGER(rb_ary_entry(data, i++));
1258 misc = CHECK_HASH(rb_ary_entry(data, i++));
1259 ((void)magic, (void)version1, (void)version2, (void)format_type);
1260
1261 name = CHECK_STRING(rb_ary_entry(data, i++));
1262 path = CHECK_STRING(rb_ary_entry(data, i++));
1263 realpath = rb_ary_entry(data, i++);
1264 realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath);
1265 int first_lineno = RB_NUM2INT(rb_ary_entry(data, i++));
1266
1267 type = CHECK_SYMBOL(rb_ary_entry(data, i++));
1268 locals = CHECK_ARRAY(rb_ary_entry(data, i++));
1269 params = CHECK_HASH(rb_ary_entry(data, i++));
1270 exception = CHECK_ARRAY(rb_ary_entry(data, i++));
1271 body = CHECK_ARRAY(rb_ary_entry(data, i++));
1272
1273 ISEQ_BODY(iseq)->local_iseq = iseq;
1274
1275 iseq_type = iseq_type_from_sym(type);
1276 if (iseq_type == (enum rb_iseq_type)-1) {
1277 rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type));
1278 }
1279
1280 node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id")));
1281
1282 code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location")));
1283 if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) {
1284 tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0));
1285 tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1));
1286 tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2));
1287 tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3));
1288 }
1289
1290 if (SYM2ID(rb_hash_aref(misc, ID2SYM(rb_intern("parser")))) == rb_intern("prism")) {
1291 ISEQ_BODY(iseq)->prism = true;
1292 }
1293
1294 make_compile_option(&option, opt);
1295 option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */
1296 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id),
1297 parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option);
1298
1299 rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
1300
1301 finish_iseq_build(iseq);
1302
1303 return iseqw_new(iseq);
1304}
1305
1306/*
1307 * :nodoc:
1308 */
1309static VALUE
1310iseq_s_load(int argc, VALUE *argv, VALUE self)
1311{
1312 VALUE data, opt=Qnil;
1313 rb_scan_args(argc, argv, "11", &data, &opt);
1314 return iseq_load(data, NULL, opt);
1315}
1316
1317VALUE
1318rb_iseq_load(VALUE data, VALUE parent, VALUE opt)
1319{
1320 return iseq_load(data, RTEST(parent) ? (rb_iseq_t *)parent : NULL, opt);
1321}
1322
1323static rb_iseq_t *
1324rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1325{
1326 rb_iseq_t *iseq = NULL;
1327 rb_compile_option_t option;
1328#if !defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 8)
1329# define INITIALIZED volatile /* suppress warnings by gcc 4.8 */
1330#else
1331# define INITIALIZED /* volatile */
1332#endif
1333 VALUE (*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
1334 int ln;
1335 VALUE INITIALIZED ast_value;
1336 rb_ast_t *ast;
1337 VALUE name = rb_fstring_lit("<compiled>");
1338
1339 /* safe results first */
1340 make_compile_option(&option, opt);
1341 ln = NUM2INT(line);
1342 StringValueCStr(file);
1343 if (RB_TYPE_P(src, T_FILE)) {
1344 parse = rb_parser_compile_file_path;
1345 }
1346 else {
1347 parse = rb_parser_compile_string_path;
1348 StringValue(src);
1349 }
1350 {
1351 const VALUE parser = rb_parser_new();
1352 const rb_iseq_t *outer_scope = rb_iseq_new(Qnil, name, name, Qnil, 0, ISEQ_TYPE_TOP);
1353 VALUE outer_scope_v = (VALUE)outer_scope;
1354 rb_parser_set_context(parser, outer_scope, FALSE);
1355 if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
1356 RB_GC_GUARD(outer_scope_v);
1357 ast_value = (*parse)(parser, file, src, ln);
1358 }
1359
1360 ast = rb_ruby_ast_data_get(ast_value);
1361
1362 if (!ast || !ast->body.root) {
1363 rb_ast_dispose(ast);
1364 rb_exc_raise(GET_EC()->errinfo);
1365 }
1366 else {
1367 iseq = rb_iseq_new_with_opt(ast_value, name, file, realpath, ln,
1368 NULL, 0, ISEQ_TYPE_TOP, &option,
1369 Qnil);
1370 rb_ast_dispose(ast);
1371 }
1372
1373 return iseq;
1374}
1375
1376static rb_iseq_t *
1377pm_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1378{
1379 rb_iseq_t *iseq = NULL;
1380 rb_compile_option_t option;
1381 int ln;
1382 VALUE name = rb_fstring_lit("<compiled>");
1383
1384 /* safe results first */
1385 make_compile_option(&option, opt);
1386 ln = NUM2INT(line);
1387 StringValueCStr(file);
1388
1389 bool parse_file = false;
1390 if (RB_TYPE_P(src, T_FILE)) {
1391 parse_file = true;
1392 src = rb_io_path(src);
1393 }
1394 else {
1395 src = StringValue(src);
1396 }
1397
1398 pm_parse_result_t result = { 0 };
1399 pm_options_line_set(&result.options, NUM2INT(line));
1400 pm_options_scopes_init(&result.options, 1);
1401 result.node.coverage_enabled = 1;
1402
1403 switch (option.frozen_string_literal) {
1404 case ISEQ_FROZEN_STRING_LITERAL_UNSET:
1405 break;
1406 case ISEQ_FROZEN_STRING_LITERAL_DISABLED:
1408 break;
1409 case ISEQ_FROZEN_STRING_LITERAL_ENABLED:
1411 break;
1412 default:
1413 rb_bug("pm_iseq_compile_with_option: invalid frozen_string_literal=%d", option.frozen_string_literal);
1414 break;
1415 }
1416
1417 VALUE script_lines;
1418 VALUE error;
1419
1420 if (parse_file) {
1421 error = pm_load_parse_file(&result, src, ruby_vm_keep_script_lines ? &script_lines : NULL);
1422 }
1423 else {
1424 error = pm_parse_string(&result, src, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1425 }
1426
1427 RB_GC_GUARD(src);
1428
1429 if (error == Qnil) {
1430 int error_state;
1431 iseq = pm_iseq_new_with_opt(&result.node, name, file, realpath, ln, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1432
1433 pm_parse_result_free(&result);
1434
1435 if (error_state) {
1436 RUBY_ASSERT(iseq == NULL);
1437 rb_jump_tag(error_state);
1438 }
1439 }
1440 else {
1441 pm_parse_result_free(&result);
1442 rb_exc_raise(error);
1443 }
1444
1445 return iseq;
1446}
1447
1448VALUE
1449rb_iseq_path(const rb_iseq_t *iseq)
1450{
1451 return pathobj_path(ISEQ_BODY(iseq)->location.pathobj);
1452}
1453
1454VALUE
1455rb_iseq_realpath(const rb_iseq_t *iseq)
1456{
1457 return pathobj_realpath(ISEQ_BODY(iseq)->location.pathobj);
1458}
1459
1460VALUE
1461rb_iseq_absolute_path(const rb_iseq_t *iseq)
1462{
1463 return rb_iseq_realpath(iseq);
1464}
1465
1466int
1467rb_iseq_from_eval_p(const rb_iseq_t *iseq)
1468{
1469 return NIL_P(rb_iseq_realpath(iseq));
1470}
1471
1472VALUE
1473rb_iseq_label(const rb_iseq_t *iseq)
1474{
1475 return ISEQ_BODY(iseq)->location.label;
1476}
1477
1478VALUE
1479rb_iseq_base_label(const rb_iseq_t *iseq)
1480{
1481 return ISEQ_BODY(iseq)->location.base_label;
1482}
1483
1484VALUE
1485rb_iseq_first_lineno(const rb_iseq_t *iseq)
1486{
1487 return RB_INT2NUM(ISEQ_BODY(iseq)->location.first_lineno);
1488}
1489
1490VALUE
1491rb_iseq_method_name(const rb_iseq_t *iseq)
1492{
1493 struct rb_iseq_constant_body *const body = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq);
1494
1495 if (body->type == ISEQ_TYPE_METHOD) {
1496 return body->location.base_label;
1497 }
1498 else {
1499 return Qnil;
1500 }
1501}
1502
1503void
1504rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column)
1505{
1506 const rb_code_location_t *loc = &ISEQ_BODY(iseq)->location.code_location;
1507 if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno;
1508 if (beg_pos_column) *beg_pos_column = loc->beg_pos.column;
1509 if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno;
1510 if (end_pos_column) *end_pos_column = loc->end_pos.column;
1511}
1512
1513static ID iseq_type_id(enum rb_iseq_type type);
1514
1515VALUE
1516rb_iseq_type(const rb_iseq_t *iseq)
1517{
1518 return ID2SYM(iseq_type_id(ISEQ_BODY(iseq)->type));
1519}
1520
1521VALUE
1522rb_iseq_coverage(const rb_iseq_t *iseq)
1523{
1524 return ISEQ_COVERAGE(iseq);
1525}
1526
1527static int
1528remove_coverage_i(void *vstart, void *vend, size_t stride, void *data)
1529{
1530 VALUE v = (VALUE)vstart;
1531 for (; v != (VALUE)vend; v += stride) {
1532 void *ptr = rb_asan_poisoned_object_p(v);
1533 rb_asan_unpoison_object(v, false);
1534
1535 if (rb_obj_is_iseq(v)) {
1536 rb_iseq_t *iseq = (rb_iseq_t *)v;
1537 ISEQ_COVERAGE_SET(iseq, Qnil);
1538 }
1539
1540 asan_poison_object_if(ptr, v);
1541 }
1542 return 0;
1543}
1544
1545void
1546rb_iseq_remove_coverage_all(void)
1547{
1548 rb_objspace_each_objects(remove_coverage_i, NULL);
1549}
1550
1551/* define wrapper class methods (RubyVM::InstructionSequence) */
1552
1553static void
1554iseqw_mark_and_move(void *ptr)
1555{
1556 rb_gc_mark_and_move((VALUE *)ptr);
1557}
1558
1559static size_t
1560iseqw_memsize(const void *ptr)
1561{
1562 return rb_iseq_memsize(*(const rb_iseq_t **)ptr);
1563}
1564
1565static const rb_data_type_t iseqw_data_type = {
1566 "T_IMEMO/iseq",
1567 {
1568 iseqw_mark_and_move,
1570 iseqw_memsize,
1571 iseqw_mark_and_move,
1572 },
1573 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
1574};
1575
1576static VALUE
1577iseqw_new(const rb_iseq_t *iseq)
1578{
1579 if (iseq->wrapper) {
1580 if (*(const rb_iseq_t **)rb_check_typeddata(iseq->wrapper, &iseqw_data_type) != iseq) {
1581 rb_raise(rb_eTypeError, "wrong iseq wrapper: %" PRIsVALUE " for %p",
1582 iseq->wrapper, (void *)iseq);
1583 }
1584 return iseq->wrapper;
1585 }
1586 else {
1587 rb_iseq_t **ptr;
1588 VALUE obj = TypedData_Make_Struct(rb_cISeq, rb_iseq_t *, &iseqw_data_type, ptr);
1589 RB_OBJ_WRITE(obj, ptr, iseq);
1590
1591 /* cache a wrapper object */
1592 RB_OBJ_SET_FROZEN_SHAREABLE((VALUE)obj);
1593 RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj);
1594
1595 return obj;
1596 }
1597}
1598
1599VALUE
1600rb_iseqw_new(const rb_iseq_t *iseq)
1601{
1602 return iseqw_new(iseq);
1603}
1604
1610static VALUE
1611iseqw_s_compile_parser(int argc, VALUE *argv, VALUE self, bool prism)
1612{
1613 VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil;
1614 int i;
1615
1616 i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt);
1617 if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5);
1618 switch (i) {
1619 case 5: opt = argv[--i];
1620 case 4: line = argv[--i];
1621 case 3: path = argv[--i];
1622 case 2: file = argv[--i];
1623 }
1624
1625 if (NIL_P(file)) file = rb_fstring_lit("<compiled>");
1626 if (NIL_P(path)) path = file;
1627 if (NIL_P(line)) line = INT2FIX(1);
1628
1629 Check_Type(path, T_STRING);
1630 Check_Type(file, T_STRING);
1631
1632 rb_iseq_t *iseq;
1633 if (prism) {
1634 iseq = pm_iseq_compile_with_option(src, file, path, line, opt);
1635 }
1636 else {
1637 iseq = rb_iseq_compile_with_option(src, file, path, line, opt);
1638 }
1639
1640 return iseqw_new(iseq);
1641}
1642
1643/*
1644 * call-seq:
1645 * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq
1646 * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq
1647 *
1648 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1649 * that contains Ruby source code.
1650 *
1651 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1652 * real path and first line number of the ruby code in +source+ which are
1653 * metadata attached to the returned +iseq+.
1654 *
1655 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1656 * +require_relative+ base. It is recommended these should be the same full
1657 * path.
1658 *
1659 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1660 * modify the default behavior of the Ruby iseq compiler.
1661 *
1662 * For details regarding valid compile options see ::compile_option=.
1663 *
1664 * RubyVM::InstructionSequence.compile("a = 1 + 2")
1665 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1666 *
1667 * path = "test.rb"
1668 * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path))
1669 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1670 *
1671 * file = File.open("test.rb")
1672 * RubyVM::InstructionSequence.compile(file)
1673 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1674 *
1675 * path = File.expand_path("test.rb")
1676 * RubyVM::InstructionSequence.compile(File.read(path), path, path)
1677 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1678 *
1679 */
1680static VALUE
1681iseqw_s_compile(int argc, VALUE *argv, VALUE self)
1682{
1683 return iseqw_s_compile_parser(argc, argv, self, rb_ruby_prism_p());
1684}
1685
1686/*
1687 * call-seq:
1688 * InstructionSequence.compile_parsey(source[, file[, path[, line[, options]]]]) -> iseq
1689 *
1690 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1691 * that contains Ruby source code. It parses and compiles using parse.y.
1692 *
1693 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1694 * real path and first line number of the ruby code in +source+ which are
1695 * metadata attached to the returned +iseq+.
1696 *
1697 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1698 * +require_relative+ base. It is recommended these should be the same full
1699 * path.
1700 *
1701 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1702 * modify the default behavior of the Ruby iseq compiler.
1703 *
1704 * For details regarding valid compile options see ::compile_option=.
1705 *
1706 * RubyVM::InstructionSequence.compile_parsey("a = 1 + 2")
1707 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1708 *
1709 * path = "test.rb"
1710 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, File.expand_path(path))
1711 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1712 *
1713 * file = File.open("test.rb")
1714 * RubyVM::InstructionSequence.compile_parsey(file)
1715 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1716 *
1717 * path = File.expand_path("test.rb")
1718 * RubyVM::InstructionSequence.compile_parsey(File.read(path), path, path)
1719 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1720 *
1721 */
1722static VALUE
1723iseqw_s_compile_parsey(int argc, VALUE *argv, VALUE self)
1724{
1725 return iseqw_s_compile_parser(argc, argv, self, false);
1726}
1727
1728/*
1729 * call-seq:
1730 * InstructionSequence.compile_prism(source[, file[, path[, line[, options]]]]) -> iseq
1731 *
1732 * Takes +source+, which can be a string of Ruby code, or an open +File+ object.
1733 * that contains Ruby source code. It parses and compiles using prism.
1734 *
1735 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1736 * real path and first line number of the ruby code in +source+ which are
1737 * metadata attached to the returned +iseq+.
1738 *
1739 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1740 * +require_relative+ base. It is recommended these should be the same full
1741 * path.
1742 *
1743 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1744 * modify the default behavior of the Ruby iseq compiler.
1745 *
1746 * For details regarding valid compile options see ::compile_option=.
1747 *
1748 * RubyVM::InstructionSequence.compile_prism("a = 1 + 2")
1749 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1750 *
1751 * path = "test.rb"
1752 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, File.expand_path(path))
1753 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1754 *
1755 * file = File.open("test.rb")
1756 * RubyVM::InstructionSequence.compile_prism(file)
1757 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>:1>
1758 *
1759 * path = File.expand_path("test.rb")
1760 * RubyVM::InstructionSequence.compile_prism(File.read(path), path, path)
1761 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1762 *
1763 */
1764static VALUE
1765iseqw_s_compile_prism(int argc, VALUE *argv, VALUE self)
1766{
1767 return iseqw_s_compile_parser(argc, argv, self, true);
1768}
1769
1770/*
1771 * call-seq:
1772 * InstructionSequence.compile_file(file[, options]) -> iseq
1773 *
1774 * Takes +file+, a String with the location of a Ruby source file, reads,
1775 * parses and compiles the file, and returns +iseq+, the compiled
1776 * InstructionSequence with source location metadata set.
1777 *
1778 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1779 * modify the default behavior of the Ruby iseq compiler.
1780 *
1781 * For details regarding valid compile options see ::compile_option=.
1782 *
1783 * # /tmp/hello.rb
1784 * puts "Hello, world!"
1785 *
1786 * # elsewhere
1787 * RubyVM::InstructionSequence.compile_file("/tmp/hello.rb")
1788 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1789 */
1790static VALUE
1791iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
1792{
1793 VALUE file, opt = Qnil;
1794 VALUE parser, f, exc = Qnil, ret;
1795 rb_ast_t *ast;
1796 VALUE ast_value;
1797 rb_compile_option_t option;
1798 int i;
1799
1800 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1801 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1802 switch (i) {
1803 case 2: opt = argv[--i];
1804 }
1805 FilePathValue(file);
1806 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1807
1808 f = rb_file_open_str(file, "r");
1809
1810 rb_execution_context_t *ec = GET_EC();
1811 VALUE v = rb_vm_push_frame_fname(ec, file);
1812
1813 parser = rb_parser_new();
1814 rb_parser_set_context(parser, NULL, FALSE);
1815 ast_value = rb_parser_load_file(parser, file);
1816 ast = rb_ruby_ast_data_get(ast_value);
1817 if (!ast->body.root) exc = GET_EC()->errinfo;
1818
1819 rb_io_close(f);
1820 if (!ast->body.root) {
1821 rb_ast_dispose(ast);
1822 rb_exc_raise(exc);
1823 }
1824
1825 make_compile_option(&option, opt);
1826
1827 ret = iseqw_new(rb_iseq_new_with_opt(ast_value, rb_fstring_lit("<main>"),
1828 file,
1829 rb_realpath_internal(Qnil, file, 1),
1830 1, NULL, 0, ISEQ_TYPE_TOP, &option,
1831 Qnil));
1832 rb_ast_dispose(ast);
1833 RB_GC_GUARD(ast_value);
1834
1835 rb_vm_pop_frame(ec);
1836 RB_GC_GUARD(v);
1837 return ret;
1838}
1839
1840/*
1841 * call-seq:
1842 * InstructionSequence.compile_file_prism(file[, options]) -> iseq
1843 *
1844 * Takes +file+, a String with the location of a Ruby source file, reads,
1845 * parses and compiles the file, and returns +iseq+, the compiled
1846 * InstructionSequence with source location metadata set. It parses and
1847 * compiles using prism.
1848 *
1849 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1850 * modify the default behavior of the Ruby iseq compiler.
1851 *
1852 * For details regarding valid compile options see ::compile_option=.
1853 *
1854 * # /tmp/hello.rb
1855 * puts "Hello, world!"
1856 *
1857 * # elsewhere
1858 * RubyVM::InstructionSequence.compile_file_prism("/tmp/hello.rb")
1859 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1860 */
1861static VALUE
1862iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self)
1863{
1864 VALUE file, opt = Qnil, ret;
1865 rb_compile_option_t option;
1866 int i;
1867
1868 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1869 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1870 switch (i) {
1871 case 2: opt = argv[--i];
1872 }
1873 FilePathValue(file);
1874 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1875
1876 rb_execution_context_t *ec = GET_EC();
1877 VALUE v = rb_vm_push_frame_fname(ec, file);
1878
1879 pm_parse_result_t result = { 0 };
1880 result.options.line = 1;
1881 result.node.coverage_enabled = 1;
1882
1883 VALUE script_lines;
1884 VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL);
1885
1886 if (error == Qnil) {
1887 make_compile_option(&option, opt);
1888
1889 int error_state;
1890 rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("<main>"),
1891 file,
1892 rb_realpath_internal(Qnil, file, 1),
1893 1, NULL, 0, ISEQ_TYPE_TOP, &option, &error_state);
1894
1895 pm_parse_result_free(&result);
1896
1897 if (error_state) {
1898 RUBY_ASSERT(iseq == NULL);
1899 rb_jump_tag(error_state);
1900 }
1901
1902 ret = iseqw_new(iseq);
1903 rb_vm_pop_frame(ec);
1904 RB_GC_GUARD(v);
1905 return ret;
1906 }
1907 else {
1908 pm_parse_result_free(&result);
1909 rb_vm_pop_frame(ec);
1910 RB_GC_GUARD(v);
1911 rb_exc_raise(error);
1912 }
1913}
1914
1915/*
1916 * call-seq:
1917 * InstructionSequence.compile_option = options
1918 *
1919 * Sets the default values for various optimizations in the Ruby iseq
1920 * compiler.
1921 *
1922 * Possible values for +options+ include +true+, which enables all options,
1923 * +false+ which disables all options, and +nil+ which leaves all options
1924 * unchanged.
1925 *
1926 * You can also pass a +Hash+ of +options+ that you want to change, any
1927 * options not present in the hash will be left unchanged.
1928 *
1929 * Possible option names (which are keys in +options+) which can be set to
1930 * +true+ or +false+ include:
1931 *
1932 * * +:inline_const_cache+
1933 * * +:instructions_unification+
1934 * * +:operands_unification+
1935 * * +:peephole_optimization+
1936 * * +:specialized_instruction+
1937 * * +:tailcall_optimization+
1938 *
1939 * Additionally, +:debug_level+ can be set to an integer.
1940 *
1941 * These default options can be overwritten for a single run of the iseq
1942 * compiler by passing any of the above values as the +options+ parameter to
1943 * ::new, ::compile and ::compile_file.
1944 */
1945static VALUE
1946iseqw_s_compile_option_set(VALUE self, VALUE opt)
1947{
1948 rb_compile_option_t option;
1949 make_compile_option(&option, opt);
1950 COMPILE_OPTION_DEFAULT = option;
1951 return opt;
1952}
1953
1954/*
1955 * call-seq:
1956 * InstructionSequence.compile_option -> options
1957 *
1958 * Returns a hash of default options used by the Ruby iseq compiler.
1959 *
1960 * For details, see InstructionSequence.compile_option=.
1961 */
1962static VALUE
1963iseqw_s_compile_option_get(VALUE self)
1964{
1965 return make_compile_option_value(&COMPILE_OPTION_DEFAULT);
1966}
1967
1968static const rb_iseq_t *
1969iseqw_check(VALUE iseqw)
1970{
1971 rb_iseq_t **iseq_ptr;
1972 TypedData_Get_Struct(iseqw, rb_iseq_t *, &iseqw_data_type, iseq_ptr);
1973 rb_iseq_t *iseq = *iseq_ptr;
1974
1975 if (!ISEQ_BODY(iseq)) {
1976 rb_ibf_load_iseq_complete(iseq);
1977 }
1978
1979 if (!ISEQ_BODY(iseq)->location.label) {
1980 rb_raise(rb_eTypeError, "uninitialized InstructionSequence");
1981 }
1982 return iseq;
1983}
1984
1985const rb_iseq_t *
1986rb_iseqw_to_iseq(VALUE iseqw)
1987{
1988 return iseqw_check(iseqw);
1989}
1990
1991/*
1992 * call-seq:
1993 * iseq.eval -> obj
1994 *
1995 * Evaluates the instruction sequence and returns the result.
1996 *
1997 * RubyVM::InstructionSequence.compile("1 + 2").eval #=> 3
1998 */
1999static VALUE
2000iseqw_eval(VALUE self)
2001{
2002 const rb_iseq_t *iseq = iseqw_check(self);
2003 if (0 == ISEQ_BODY(iseq)->iseq_size) {
2004 rb_raise(rb_eTypeError, "attempt to evaluate dummy InstructionSequence");
2005 }
2006 return rb_iseq_eval(iseq, rb_current_namespace());
2007}
2008
2009/*
2010 * Returns a human-readable string representation of this instruction
2011 * sequence, including the #label and #path.
2012 */
2013static VALUE
2014iseqw_inspect(VALUE self)
2015{
2016 const rb_iseq_t *iseq = iseqw_check(self);
2017 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2018 VALUE klass = rb_class_name(rb_obj_class(self));
2019
2020 if (!body->location.label) {
2021 return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass);
2022 }
2023 else {
2024 return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>",
2025 klass,
2026 body->location.label, rb_iseq_path(iseq),
2027 FIX2INT(rb_iseq_first_lineno(iseq)));
2028 }
2029}
2030
2031/*
2032 * Returns the path of this instruction sequence.
2033 *
2034 * <code><compiled></code> if the iseq was evaluated from a string.
2035 *
2036 * For example, using irb:
2037 *
2038 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2039 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2040 * iseq.path
2041 * #=> "<compiled>"
2042 *
2043 * Using ::compile_file:
2044 *
2045 * # /tmp/method.rb
2046 * def hello
2047 * puts "hello, world"
2048 * end
2049 *
2050 * # in irb
2051 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2052 * > iseq.path #=> /tmp/method.rb
2053 */
2054static VALUE
2055iseqw_path(VALUE self)
2056{
2057 return rb_iseq_path(iseqw_check(self));
2058}
2059
2060/*
2061 * Returns the absolute path of this instruction sequence.
2062 *
2063 * +nil+ if the iseq was evaluated from a string.
2064 *
2065 * For example, using ::compile_file:
2066 *
2067 * # /tmp/method.rb
2068 * def hello
2069 * puts "hello, world"
2070 * end
2071 *
2072 * # in irb
2073 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2074 * > iseq.absolute_path #=> /tmp/method.rb
2075 */
2076static VALUE
2077iseqw_absolute_path(VALUE self)
2078{
2079 return rb_iseq_realpath(iseqw_check(self));
2080}
2081
2082/* Returns the label of this instruction sequence.
2083 *
2084 * <code><main></code> if it's at the top level, <code><compiled></code> if it
2085 * was evaluated from a string.
2086 *
2087 * For example, using irb:
2088 *
2089 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2090 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2091 * iseq.label
2092 * #=> "<compiled>"
2093 *
2094 * Using ::compile_file:
2095 *
2096 * # /tmp/method.rb
2097 * def hello
2098 * puts "hello, world"
2099 * end
2100 *
2101 * # in irb
2102 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2103 * > iseq.label #=> <main>
2104 */
2105static VALUE
2106iseqw_label(VALUE self)
2107{
2108 return rb_iseq_label(iseqw_check(self));
2109}
2110
2111/* Returns the base label of this instruction sequence.
2112 *
2113 * For example, using irb:
2114 *
2115 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2116 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2117 * iseq.base_label
2118 * #=> "<compiled>"
2119 *
2120 * Using ::compile_file:
2121 *
2122 * # /tmp/method.rb
2123 * def hello
2124 * puts "hello, world"
2125 * end
2126 *
2127 * # in irb
2128 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
2129 * > iseq.base_label #=> <main>
2130 */
2131static VALUE
2132iseqw_base_label(VALUE self)
2133{
2134 return rb_iseq_base_label(iseqw_check(self));
2135}
2136
2137/* Returns the number of the first source line where the instruction sequence
2138 * was loaded from.
2139 *
2140 * For example, using irb:
2141 *
2142 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
2143 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
2144 * iseq.first_lineno
2145 * #=> 1
2146 */
2147static VALUE
2148iseqw_first_lineno(VALUE self)
2149{
2150 return rb_iseq_first_lineno(iseqw_check(self));
2151}
2152
2153static VALUE iseq_data_to_ary(const rb_iseq_t *iseq);
2154
2155/*
2156 * call-seq:
2157 * iseq.to_a -> ary
2158 *
2159 * Returns an Array with 14 elements representing the instruction sequence
2160 * with the following data:
2161 *
2162 * [magic]
2163 * A string identifying the data format. <b>Always
2164 * +YARVInstructionSequence/SimpleDataFormat+.</b>
2165 *
2166 * [major_version]
2167 * The major version of the instruction sequence.
2168 *
2169 * [minor_version]
2170 * The minor version of the instruction sequence.
2171 *
2172 * [format_type]
2173 * A number identifying the data format. <b>Always 1</b>.
2174 *
2175 * [misc]
2176 * A hash containing:
2177 *
2178 * [+:arg_size+]
2179 * the total number of arguments taken by the method or the block (0 if
2180 * _iseq_ doesn't represent a method or block)
2181 * [+:local_size+]
2182 * the number of local variables + 1
2183 * [+:stack_max+]
2184 * used in calculating the stack depth at which a SystemStackError is
2185 * thrown.
2186 *
2187 * [#label]
2188 * The name of the context (block, method, class, module, etc.) that this
2189 * instruction sequence belongs to.
2190 *
2191 * <code><main></code> if it's at the top level, <code><compiled></code> if
2192 * it was evaluated from a string.
2193 *
2194 * [#path]
2195 * The relative path to the Ruby file where the instruction sequence was
2196 * loaded from.
2197 *
2198 * <code><compiled></code> if the iseq was evaluated from a string.
2199 *
2200 * [#absolute_path]
2201 * The absolute path to the Ruby file where the instruction sequence was
2202 * loaded from.
2203 *
2204 * +nil+ if the iseq was evaluated from a string.
2205 *
2206 * [#first_lineno]
2207 * The number of the first source line where the instruction sequence was
2208 * loaded from.
2209 *
2210 * [type]
2211 * The type of the instruction sequence.
2212 *
2213 * Valid values are +:top+, +:method+, +:block+, +:class+, +:rescue+,
2214 * +:ensure+, +:eval+, +:main+, and +plain+.
2215 *
2216 * [locals]
2217 * An array containing the names of all arguments and local variables as
2218 * symbols.
2219 *
2220 * [params]
2221 * An Hash object containing parameter information.
2222 *
2223 * More info about these values can be found in +vm_core.h+.
2224 *
2225 * [catch_table]
2226 * A list of exceptions and control flow operators (rescue, next, redo,
2227 * break, etc.).
2228 *
2229 * [bytecode]
2230 * An array of arrays containing the instruction names and operands that
2231 * make up the body of the instruction sequence.
2232 *
2233 * Note that this format is MRI specific and version dependent.
2234 *
2235 */
2236static VALUE
2237iseqw_to_a(VALUE self)
2238{
2239 const rb_iseq_t *iseq = iseqw_check(self);
2240 return iseq_data_to_ary(iseq);
2241}
2242
2243#if VM_INSN_INFO_TABLE_IMPL == 1 /* binary search */
2244static const struct iseq_insn_info_entry *
2245get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos)
2246{
2247 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2248 size_t size = body->insns_info.size;
2249 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2250 const unsigned int *positions = body->insns_info.positions;
2251 const int debug = 0;
2252
2253 if (debug) {
2254 printf("size: %"PRIuSIZE"\n", size);
2255 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2256 (size_t)0, positions[0], insns_info[0].line_no, pos);
2257 }
2258
2259 if (size == 0) {
2260 return NULL;
2261 }
2262 else if (size == 1) {
2263 return &insns_info[0];
2264 }
2265 else {
2266 size_t l = 1, r = size - 1;
2267 while (l <= r) {
2268 size_t m = l + (r - l) / 2;
2269 if (positions[m] == pos) {
2270 return &insns_info[m];
2271 }
2272 if (positions[m] < pos) {
2273 l = m + 1;
2274 }
2275 else {
2276 r = m - 1;
2277 }
2278 }
2279 if (l >= size) {
2280 return &insns_info[size-1];
2281 }
2282 if (positions[l] > pos) {
2283 return &insns_info[l-1];
2284 }
2285 return &insns_info[l];
2286 }
2287}
2288
2289static const struct iseq_insn_info_entry *
2290get_insn_info(const rb_iseq_t *iseq, size_t pos)
2291{
2292 return get_insn_info_binary_search(iseq, pos);
2293}
2294#endif
2295
2296#if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
2297static const struct iseq_insn_info_entry *
2298get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos)
2299{
2300 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2301 size_t size = body->insns_info.size;
2302 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2303 const int debug = 0;
2304
2305 if (debug) {
2306#if VM_CHECK_MODE > 0
2307 const unsigned int *positions = body->insns_info.positions;
2308 printf("size: %"PRIuSIZE"\n", size);
2309 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2310 (size_t)0, positions[0], insns_info[0].line_no, pos);
2311#else
2312 printf("size: %"PRIuSIZE"\n", size);
2313 printf("insns_info[%"PRIuSIZE"]: line: %d, pos: %"PRIuSIZE"\n",
2314 (size_t)0, insns_info[0].line_no, pos);
2315#endif
2316 }
2317
2318 if (size == 0) {
2319 return NULL;
2320 }
2321 else if (size == 1) {
2322 return &insns_info[0];
2323 }
2324 else {
2325 int index;
2326 VM_ASSERT(body->insns_info.succ_index_table != NULL);
2327 index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos);
2328 return &insns_info[index-1];
2329 }
2330}
2331
2332static const struct iseq_insn_info_entry *
2333get_insn_info(const rb_iseq_t *iseq, size_t pos)
2334{
2335 return get_insn_info_succinct_bitvector(iseq, pos);
2336}
2337#endif
2338
2339#if VM_CHECK_MODE > 0 || VM_INSN_INFO_TABLE_IMPL == 0
2340static const struct iseq_insn_info_entry *
2341get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos)
2342{
2343 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2344 size_t i = 0, size = body->insns_info.size;
2345 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
2346 const unsigned int *positions = body->insns_info.positions;
2347 const int debug = 0;
2348
2349 if (debug) {
2350 printf("size: %"PRIuSIZE"\n", size);
2351 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2352 i, positions[i], insns_info[i].line_no, pos);
2353 }
2354
2355 if (size == 0) {
2356 return NULL;
2357 }
2358 else if (size == 1) {
2359 return &insns_info[0];
2360 }
2361 else {
2362 for (i=1; i<size; i++) {
2363 if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
2364 i, positions[i], insns_info[i].line_no, pos);
2365
2366 if (positions[i] == pos) {
2367 return &insns_info[i];
2368 }
2369 if (positions[i] > pos) {
2370 return &insns_info[i-1];
2371 }
2372 }
2373 }
2374 return &insns_info[i-1];
2375}
2376#endif
2377
2378#if VM_INSN_INFO_TABLE_IMPL == 0 /* linear search */
2379static const struct iseq_insn_info_entry *
2380get_insn_info(const rb_iseq_t *iseq, size_t pos)
2381{
2382 return get_insn_info_linear_search(iseq, pos);
2383}
2384#endif
2385
2386#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
2387static void
2388validate_get_insn_info(const rb_iseq_t *iseq)
2389{
2390 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2391 size_t i;
2392 for (i = 0; i < body->iseq_size; i++) {
2393 if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) {
2394 rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i);
2395 }
2396 }
2397}
2398#endif
2399
2400unsigned int
2401rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos)
2402{
2403 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2404
2405 if (entry) {
2406 return entry->line_no;
2407 }
2408 else {
2409 return 0;
2410 }
2411}
2412
2413#ifdef USE_ISEQ_NODE_ID
2414int
2415rb_iseq_node_id(const rb_iseq_t *iseq, size_t pos)
2416{
2417 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2418
2419 if (entry) {
2420 return entry->node_id;
2421 }
2422 else {
2423 return 0;
2424 }
2425}
2426#endif
2427
2429rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos)
2430{
2431 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
2432 if (entry) {
2433 return entry->events;
2434 }
2435 else {
2436 return 0;
2437 }
2438}
2439
2440// Clear tracing event flags and turn off tracing for a given instruction as needed.
2441// This is currently used after updating a one-shot line coverage for the current instruction.
2442void
2443rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset)
2444{
2445 struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos);
2446 if (entry) {
2447 entry->events &= ~reset;
2448 if (!(entry->events & iseq->aux.exec.global_trace_events)) {
2449 void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos);
2450 rb_iseq_trace_flag_cleared(iseq, pos);
2451 }
2452 }
2453}
2454
2455static VALUE
2456local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op)
2457{
2458 VALUE i;
2459 VALUE name;
2460 ID lid;
2461 int idx;
2462
2463 for (i = 0; i < level; i++) {
2464 diseq = ISEQ_BODY(diseq)->parent_iseq;
2465 }
2466 idx = ISEQ_BODY(diseq)->local_table_size - (int)op - 1;
2467 lid = ISEQ_BODY(diseq)->local_table[idx];
2468 name = rb_id2str(lid);
2469 if (!name) {
2470 name = rb_str_new_cstr("?");
2471 }
2472 else if (!rb_is_local_id(lid)) {
2473 name = rb_str_inspect(name);
2474 }
2475 else {
2476 name = rb_str_dup(name);
2477 }
2478 rb_str_catf(name, "@%d", idx);
2479 return name;
2480}
2481
2482int rb_insn_unified_local_var_level(VALUE);
2483VALUE rb_dump_literal(VALUE lit);
2484
2485VALUE
2486rb_insn_operand_intern(const rb_iseq_t *iseq,
2487 VALUE insn, int op_no, VALUE op,
2488 int len, size_t pos, const VALUE *pnop, VALUE child)
2489{
2490 const char *types = insn_op_types(insn);
2491 char type = types[op_no];
2492 VALUE ret = Qundef;
2493
2494 switch (type) {
2495 case TS_OFFSET: /* LONG */
2496 ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op));
2497 break;
2498
2499 case TS_NUM: /* ULONG */
2500 if (insn == BIN(defined) && op_no == 0) {
2501 enum defined_type deftype = (enum defined_type)op;
2502 switch (deftype) {
2503 case DEFINED_FUNC:
2504 ret = rb_fstring_lit("func");
2505 break;
2506 case DEFINED_REF:
2507 ret = rb_fstring_lit("ref");
2508 break;
2509 case DEFINED_CONST_FROM:
2510 ret = rb_fstring_lit("constant-from");
2511 break;
2512 default:
2513 ret = rb_iseq_defined_string(deftype);
2514 break;
2515 }
2516 if (ret) break;
2517 }
2518 else if (insn == BIN(checktype) && op_no == 0) {
2519 const char *type_str = rb_type_str((enum ruby_value_type)op);
2520 if (type_str) {
2521 ret = rb_str_new_cstr(type_str); break;
2522 }
2523 }
2524 ret = rb_sprintf("%"PRIuVALUE, op);
2525 break;
2526
2527 case TS_LINDEX:{
2528 int level;
2529 if (types[op_no+1] == TS_NUM && pnop) {
2530 ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE);
2531 }
2532 else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) {
2533 ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE);
2534 }
2535 else {
2536 ret = rb_inspect(INT2FIX(op));
2537 }
2538 break;
2539 }
2540 case TS_ID: /* ID (symbol) */
2541 ret = rb_inspect(ID2SYM(op));
2542 break;
2543
2544 case TS_VALUE: /* VALUE */
2545 op = obj_resurrect(op);
2546 if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) {
2547 /* should be DEFINED_REF */
2548 int type = NUM2INT(op);
2549 if (type) {
2550 if (type & 1) {
2551 ret = rb_sprintf(":$%c", (type >> 1));
2552 }
2553 else {
2554 ret = rb_sprintf(":$%d", (type >> 1));
2555 }
2556 break;
2557 }
2558 }
2559 ret = rb_dump_literal(op);
2560 if (CLASS_OF(op) == rb_cISeq) {
2561 if (child) {
2562 rb_ary_push(child, op);
2563 }
2564 }
2565 break;
2566
2567 case TS_ISEQ: /* iseq */
2568 {
2569 if (op) {
2570 const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op);
2571 ret = ISEQ_BODY(iseq)->location.label;
2572 if (child) {
2573 rb_ary_push(child, (VALUE)iseq);
2574 }
2575 }
2576 else {
2577 ret = rb_str_new2("nil");
2578 }
2579 break;
2580 }
2581
2582 case TS_IC:
2583 {
2584 ret = rb_sprintf("<ic:%"PRIdPTRDIFF" ", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2585 const ID *segments = ((IC)op)->segments;
2586 rb_str_cat2(ret, rb_id2name(*segments++));
2587 while (*segments) {
2588 rb_str_catf(ret, "::%s", rb_id2name(*segments++));
2589 }
2590 rb_str_cat2(ret, ">");
2591 }
2592 break;
2593 case TS_IVC:
2594 case TS_ICVARC:
2595 case TS_ISE:
2596 ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2597 break;
2598
2599 case TS_CALLDATA:
2600 {
2601 struct rb_call_data *cd = (struct rb_call_data *)op;
2602 const struct rb_callinfo *ci = cd->ci;
2603 VALUE ary = rb_ary_new();
2604 ID mid = vm_ci_mid(ci);
2605
2606 if (mid) {
2607 rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid)));
2608 }
2609
2610 rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci)));
2611
2612 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
2613 const struct rb_callinfo_kwarg *kw_args = vm_ci_kwarg(ci);
2614 VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords);
2615 rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(","))));
2616 }
2617
2618 if (vm_ci_flag(ci)) {
2619 VALUE flags = rb_ary_new();
2620# define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n))
2621 CALL_FLAG(ARGS_SPLAT);
2622 CALL_FLAG(ARGS_SPLAT_MUT);
2623 CALL_FLAG(ARGS_BLOCKARG);
2624 CALL_FLAG(FCALL);
2625 CALL_FLAG(VCALL);
2626 CALL_FLAG(ARGS_SIMPLE);
2627 CALL_FLAG(TAILCALL);
2628 CALL_FLAG(SUPER);
2629 CALL_FLAG(ZSUPER);
2630 CALL_FLAG(KWARG);
2631 CALL_FLAG(KW_SPLAT);
2632 CALL_FLAG(KW_SPLAT_MUT);
2633 CALL_FLAG(FORWARDING);
2634 CALL_FLAG(OPT_SEND); /* maybe not reachable */
2635 rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|")));
2636 }
2637
2638 ret = rb_sprintf("<calldata!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", ")));
2639 }
2640 break;
2641
2642 case TS_CDHASH:
2643 ret = rb_str_new2("<cdhash>");
2644 break;
2645
2646 case TS_FUNCPTR:
2647 {
2648#ifdef HAVE_DLADDR
2649 Dl_info info;
2650 if (dladdr((void *)op, &info) && info.dli_sname) {
2651 ret = rb_str_new_cstr(info.dli_sname);
2652 break;
2653 }
2654#endif
2655 ret = rb_str_new2("<funcptr>");
2656 }
2657 break;
2658
2659 case TS_BUILTIN:
2660 {
2661 const struct rb_builtin_function *bf = (const struct rb_builtin_function *)op;
2662 ret = rb_sprintf("<builtin!%s/%d>",
2663 bf->name, bf->argc);
2664 }
2665 break;
2666
2667 default:
2668 rb_bug("unknown operand type: %c", type);
2669 }
2670 return ret;
2671}
2672
2673static VALUE
2674right_strip(VALUE str)
2675{
2676 const char *beg = RSTRING_PTR(str), *end = RSTRING_END(str);
2677 while (end-- > beg && *end == ' ');
2678 rb_str_set_len(str, end - beg + 1);
2679 return str;
2680}
2681
2686int
2687rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos,
2688 const rb_iseq_t *iseq, VALUE child)
2689{
2690 VALUE insn = code[pos];
2691 int len = insn_len(insn);
2692 int j;
2693 const char *types = insn_op_types(insn);
2694 VALUE str = rb_str_new(0, 0);
2695 const char *insn_name_buff;
2696
2697 insn_name_buff = insn_name(insn);
2698 if (1) {
2699 extern const int rb_vm_max_insn_name_size;
2700 rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff);
2701 }
2702 else {
2703 rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos,
2704 (int)strcspn(insn_name_buff, "_"), insn_name_buff);
2705 }
2706
2707 for (j = 0; types[j]; j++) {
2708 VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1],
2709 len, pos, &code[pos + j + 2],
2710 child);
2711 rb_str_concat(str, opstr);
2712
2713 if (types[j + 1]) {
2714 rb_str_cat2(str, ", ");
2715 }
2716 }
2717
2718 {
2719 unsigned int line_no = rb_iseq_line_no(iseq, pos);
2720 unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1);
2721 if (line_no && line_no != prev) {
2722 long slen = RSTRING_LEN(str);
2723 slen = (slen > 70) ? 0 : (70 - slen);
2724 str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no);
2725 }
2726 }
2727
2728 {
2729 rb_event_flag_t events = rb_iseq_event_flags(iseq, pos);
2730 if (events) {
2731 str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s%s]",
2732 events & RUBY_EVENT_LINE ? "Li" : "",
2733 events & RUBY_EVENT_CLASS ? "Cl" : "",
2734 events & RUBY_EVENT_END ? "En" : "",
2735 events & RUBY_EVENT_CALL ? "Ca" : "",
2736 events & RUBY_EVENT_RETURN ? "Re" : "",
2737 events & RUBY_EVENT_C_CALL ? "Cc" : "",
2738 events & RUBY_EVENT_C_RETURN ? "Cr" : "",
2739 events & RUBY_EVENT_B_CALL ? "Bc" : "",
2740 events & RUBY_EVENT_B_RETURN ? "Br" : "",
2741 events & RUBY_EVENT_RESCUE ? "Rs" : "",
2742 events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "",
2743 events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : "");
2744 }
2745 }
2746
2747 right_strip(str);
2748 if (ret) {
2749 rb_str_cat2(str, "\n");
2750 rb_str_concat(ret, str);
2751 }
2752 else {
2753 printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str));
2754 }
2755 return len;
2756}
2757
2758static const char *
2759catch_type(int type)
2760{
2761 switch (type) {
2762 case CATCH_TYPE_RESCUE:
2763 return "rescue";
2764 case CATCH_TYPE_ENSURE:
2765 return "ensure";
2766 case CATCH_TYPE_RETRY:
2767 return "retry";
2768 case CATCH_TYPE_BREAK:
2769 return "break";
2770 case CATCH_TYPE_REDO:
2771 return "redo";
2772 case CATCH_TYPE_NEXT:
2773 return "next";
2774 default:
2775 rb_bug("unknown catch type: %d", type);
2776 return 0;
2777 }
2778}
2779
2780static VALUE
2781iseq_inspect(const rb_iseq_t *iseq)
2782{
2783 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2784 if (!body->location.label) {
2785 return rb_sprintf("#<ISeq: uninitialized>");
2786 }
2787 else {
2788 const rb_code_location_t *loc = &body->location.code_location;
2789 return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>",
2790 body->location.label, rb_iseq_path(iseq),
2791 loc->beg_pos.lineno,
2792 loc->beg_pos.lineno,
2793 loc->beg_pos.column,
2794 loc->end_pos.lineno,
2795 loc->end_pos.column);
2796 }
2797}
2798
2799static const rb_data_type_t tmp_set = {
2800 "tmpset",
2801 {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,},
2802 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
2803};
2804
2805static VALUE
2806rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent)
2807{
2808 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2809 VALUE *code;
2810 VALUE str = rb_str_new(0, 0);
2811 VALUE child = rb_ary_hidden_new(3);
2812 unsigned int size;
2813 unsigned int i;
2814 long l;
2815 size_t n;
2816 enum {header_minlen = 72};
2817 st_table *done_iseq = 0;
2818 VALUE done_iseq_wrapper = Qnil;
2819 const char *indent_str;
2820 long indent_len;
2821
2822 size = body->iseq_size;
2823
2824 indent_len = RSTRING_LEN(indent);
2825 indent_str = RSTRING_PTR(indent);
2826
2827 rb_str_cat(str, indent_str, indent_len);
2828 rb_str_cat2(str, "== disasm: ");
2829
2830 rb_str_append(str, iseq_inspect(iseq));
2831 if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) {
2832 rb_str_modify_expand(str, header_minlen - l);
2833 memset(RSTRING_END(str), '=', header_minlen - l);
2834 }
2835 if (iseq->body->builtin_attrs) {
2836#define disasm_builtin_attr(str, iseq, attr) \
2837 if (iseq->body->builtin_attrs & BUILTIN_ATTR_ ## attr) { \
2838 rb_str_cat2(str, " " #attr); \
2839 }
2840 disasm_builtin_attr(str, iseq, LEAF);
2841 disasm_builtin_attr(str, iseq, SINGLE_NOARG_LEAF);
2842 disasm_builtin_attr(str, iseq, INLINE_BLOCK);
2843 disasm_builtin_attr(str, iseq, C_TRACE);
2844 }
2845 rb_str_cat2(str, "\n");
2846
2847 /* show catch table information */
2848 if (body->catch_table) {
2849 rb_str_cat(str, indent_str, indent_len);
2850 rb_str_cat2(str, "== catch table\n");
2851 }
2852 if (body->catch_table) {
2853 rb_str_cat_cstr(indent, "| ");
2854 indent_str = RSTRING_PTR(indent);
2855 for (i = 0; i < body->catch_table->size; i++) {
2856 const struct iseq_catch_table_entry *entry =
2857 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2858 rb_str_cat(str, indent_str, indent_len);
2859 rb_str_catf(str,
2860 "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n",
2861 catch_type((int)entry->type), (int)entry->start,
2862 (int)entry->end, (int)entry->sp, (int)entry->cont);
2863 if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) {
2864 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent));
2865 if (!done_iseq) {
2866 done_iseq = st_init_numtable();
2867 done_iseq_wrapper = TypedData_Wrap_Struct(0, &tmp_set, done_iseq);
2868 }
2869 st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0);
2870 indent_str = RSTRING_PTR(indent);
2871 }
2872 }
2873 rb_str_resize(indent, indent_len);
2874 indent_str = RSTRING_PTR(indent);
2875 }
2876 if (body->catch_table) {
2877 rb_str_cat(str, indent_str, indent_len);
2878 rb_str_cat2(str, "|-------------------------------------"
2879 "-----------------------------------\n");
2880 }
2881
2882 /* show local table information */
2883 if (body->local_table) {
2884 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2885 rb_str_cat(str, indent_str, indent_len);
2886 rb_str_catf(str,
2887 "local table (size: %d, argc: %d "
2888 "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n",
2889 body->local_table_size,
2890 body->param.lead_num,
2891 body->param.opt_num,
2892 body->param.flags.has_rest ? body->param.rest_start : -1,
2893 body->param.post_num,
2894 body->param.flags.has_block ? body->param.block_start : -1,
2895 body->param.flags.has_kw ? keyword->num : -1,
2896 body->param.flags.has_kw ? keyword->required_num : -1,
2897 body->param.flags.has_kwrest ? keyword->rest_start : -1);
2898
2899 for (i = body->local_table_size; i > 0;) {
2900 int li = body->local_table_size - --i - 1;
2901 long width;
2902 VALUE name = local_var_name(iseq, 0, i);
2903 char argi[0x100];
2904 char opti[0x100];
2905
2906 opti[0] = '\0';
2907 if (body->param.flags.has_opt) {
2908 int argc = body->param.lead_num;
2909 int opts = body->param.opt_num;
2910 if (li >= argc && li < argc + opts) {
2911 snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE,
2912 body->param.opt_table[li - argc]);
2913 }
2914 }
2915
2916 snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */
2917 (body->param.lead_num > li) ? (body->param.flags.ambiguous_param0 ? "AmbiguousArg" : "Arg") : "",
2918 opti,
2919 (body->param.flags.has_rest && body->param.rest_start == li) ? (body->param.flags.anon_rest ? "AnonRest" : "Rest") : "",
2920 (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "",
2921 (body->param.flags.has_kwrest && keyword->rest_start == li) ? (body->param.flags.anon_kwrest ? "AnonKwrest" : "Kwrest") : "",
2922 (body->param.flags.has_block && body->param.block_start == li) ? "Block" : "");
2923
2924 rb_str_cat(str, indent_str, indent_len);
2925 rb_str_catf(str, "[%2d] ", i + 1);
2926 width = RSTRING_LEN(str) + 11;
2927 rb_str_append(str, name);
2928 if (*argi) rb_str_catf(str, "<%s>", argi);
2929 if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, "");
2930 }
2931 rb_str_cat_cstr(right_strip(str), "\n");
2932 }
2933
2934 /* show each line */
2935 code = rb_iseq_original_iseq(iseq);
2936 for (n = 0; n < size;) {
2937 rb_str_cat(str, indent_str, indent_len);
2938 n += rb_iseq_disasm_insn(str, code, n, iseq, child);
2939 }
2940
2941 for (l = 0; l < RARRAY_LEN(child); l++) {
2942 VALUE isv = rb_ary_entry(child, l);
2943 if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue;
2944 rb_str_cat_cstr(str, "\n");
2945 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent));
2946 indent_str = RSTRING_PTR(indent);
2947 }
2948 RB_GC_GUARD(done_iseq_wrapper);
2949
2950 return str;
2951}
2952
2953VALUE
2954rb_iseq_disasm(const rb_iseq_t *iseq)
2955{
2956 VALUE str = rb_iseq_disasm_recursive(iseq, rb_str_new(0, 0));
2957 rb_str_resize(str, RSTRING_LEN(str));
2958 return str;
2959}
2960
2961/*
2962 * Estimates the number of instance variables that will be set on
2963 * a given `class` with the initialize method defined in
2964 * `initialize_iseq`
2965 */
2966attr_index_t
2967rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq)
2968{
2969 struct rb_id_table * iv_names = rb_id_table_create(0);
2970
2971 for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) {
2972 IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i];
2973
2974 if (cache->iv_set_name) {
2975 rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue);
2976 }
2977 }
2978
2979 attr_index_t count = (attr_index_t)rb_id_table_size(iv_names);
2980
2981 VALUE superclass = rb_class_superclass(klass);
2982 count += RCLASS_MAX_IV_COUNT(superclass);
2983
2984 rb_id_table_free(iv_names);
2985
2986 return count;
2987}
2988
2989/*
2990 * call-seq:
2991 * iseq.disasm -> str
2992 * iseq.disassemble -> str
2993 *
2994 * Returns the instruction sequence as a +String+ in human readable form.
2995 *
2996 * puts RubyVM::InstructionSequence.compile('1 + 2').disasm
2997 *
2998 * Produces:
2999 *
3000 * == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
3001 * 0000 trace 1 ( 1)
3002 * 0002 putobject 1
3003 * 0004 putobject 2
3004 * 0006 opt_plus <ic:1>
3005 * 0008 leave
3006 */
3007static VALUE
3008iseqw_disasm(VALUE self)
3009{
3010 return rb_iseq_disasm(iseqw_check(self));
3011}
3012
3013static int
3014iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t *child_iseq, void *data), void *data)
3015{
3016 unsigned int i;
3017 VALUE *code = rb_iseq_original_iseq(iseq);
3018 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3019 const rb_iseq_t *child;
3020 VALUE all_children = rb_obj_hide(rb_ident_hash_new());
3021
3022 if (body->catch_table) {
3023 for (i = 0; i < body->catch_table->size; i++) {
3024 const struct iseq_catch_table_entry *entry =
3025 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
3026 child = entry->iseq;
3027 if (child) {
3028 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
3029 rb_hash_aset(all_children, (VALUE)child, Qtrue);
3030 (*iter_func)(child, data);
3031 }
3032 }
3033 }
3034 }
3035
3036 for (i=0; i<body->iseq_size;) {
3037 VALUE insn = code[i];
3038 int len = insn_len(insn);
3039 const char *types = insn_op_types(insn);
3040 int j;
3041
3042 for (j=0; types[j]; j++) {
3043 switch (types[j]) {
3044 case TS_ISEQ:
3045 child = (const rb_iseq_t *)code[i+j+1];
3046 if (child) {
3047 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
3048 rb_hash_aset(all_children, (VALUE)child, Qtrue);
3049 (*iter_func)(child, data);
3050 }
3051 }
3052 break;
3053 default:
3054 break;
3055 }
3056 }
3057 i += len;
3058 }
3059
3060 return (int)RHASH_SIZE(all_children);
3061}
3062
3063static void
3064yield_each_children(const rb_iseq_t *child_iseq, void *data)
3065{
3066 rb_yield(iseqw_new(child_iseq));
3067}
3068
3069/*
3070 * call-seq:
3071 * iseq.each_child{|child_iseq| ...} -> iseq
3072 *
3073 * Iterate all direct child instruction sequences.
3074 * Iteration order is implementation/version defined
3075 * so that people should not rely on the order.
3076 */
3077static VALUE
3078iseqw_each_child(VALUE self)
3079{
3080 const rb_iseq_t *iseq = iseqw_check(self);
3081 iseq_iterate_children(iseq, yield_each_children, NULL);
3082 return self;
3083}
3084
3085static void
3086push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE ary)
3087{
3088#define C(ev, cstr, l) if (events & ev) rb_ary_push(ary, rb_ary_new_from_args(2, l, ID2SYM(rb_intern(cstr))));
3089 C(RUBY_EVENT_CLASS, "class", rb_iseq_first_lineno(iseq));
3090 C(RUBY_EVENT_CALL, "call", rb_iseq_first_lineno(iseq));
3091 C(RUBY_EVENT_B_CALL, "b_call", rb_iseq_first_lineno(iseq));
3092 C(RUBY_EVENT_LINE, "line", INT2FIX(line));
3093 C(RUBY_EVENT_END, "end", INT2FIX(line));
3094 C(RUBY_EVENT_RETURN, "return", INT2FIX(line));
3095 C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line));
3096 C(RUBY_EVENT_RESCUE, "rescue", INT2FIX(line));
3097#undef C
3098}
3099
3100/*
3101 * call-seq:
3102 * iseq.trace_points -> ary
3103 *
3104 * Return trace points in the instruction sequence.
3105 * Return an array of [line, event_symbol] pair.
3106 */
3107static VALUE
3108iseqw_trace_points(VALUE self)
3109{
3110 const rb_iseq_t *iseq = iseqw_check(self);
3111 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3112 unsigned int i;
3113 VALUE ary = rb_ary_new();
3114
3115 for (i=0; i<body->insns_info.size; i++) {
3116 const struct iseq_insn_info_entry *entry = &body->insns_info.body[i];
3117 if (entry->events) {
3118 push_event_info(iseq, entry->events, entry->line_no, ary);
3119 }
3120 }
3121 return ary;
3122}
3123
3124/*
3125 * Returns the instruction sequence containing the given proc or method.
3126 *
3127 * For example, using irb:
3128 *
3129 * # a proc
3130 * > p = proc { num = 1 + 2 }
3131 * > RubyVM::InstructionSequence.of(p)
3132 * > #=> <RubyVM::InstructionSequence:block in irb_binding@(irb)>
3133 *
3134 * # for a method
3135 * > def foo(bar); puts bar; end
3136 * > RubyVM::InstructionSequence.of(method(:foo))
3137 * > #=> <RubyVM::InstructionSequence:foo@(irb)>
3138 *
3139 * Using ::compile_file:
3140 *
3141 * # /tmp/iseq_of.rb
3142 * def hello
3143 * puts "hello, world"
3144 * end
3145 *
3146 * $a_global_proc = proc { str = 'a' + 'b' }
3147 *
3148 * # in irb
3149 * > require '/tmp/iseq_of.rb'
3150 *
3151 * # first the method hello
3152 * > RubyVM::InstructionSequence.of(method(:hello))
3153 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7cb1d0>
3154 *
3155 * # then the global proc
3156 * > RubyVM::InstructionSequence.of($a_global_proc)
3157 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7caf78>
3158 */
3159static VALUE
3160iseqw_s_of(VALUE klass, VALUE body)
3161{
3162 const rb_iseq_t *iseq = NULL;
3163
3164 if (rb_frame_info_p(body)) {
3165 iseq = rb_get_iseq_from_frame_info(body);
3166 }
3167 else if (rb_obj_is_proc(body)) {
3168 iseq = vm_proc_iseq(body);
3169
3170 if (!rb_obj_is_iseq((VALUE)iseq)) {
3171 iseq = NULL;
3172 }
3173 }
3174 else if (rb_obj_is_method(body)) {
3175 iseq = rb_method_iseq(body);
3176 }
3177 else if (rb_typeddata_is_instance_of(body, &iseqw_data_type)) {
3178 return body;
3179 }
3180
3181 return iseq ? iseqw_new(iseq) : Qnil;
3182}
3183
3184/*
3185 * call-seq:
3186 * InstructionSequence.disasm(body) -> str
3187 * InstructionSequence.disassemble(body) -> str
3188 *
3189 * Takes +body+, a +Method+ or +Proc+ object, and returns a +String+
3190 * with the human readable instructions for +body+.
3191 *
3192 * For a +Method+ object:
3193 *
3194 * # /tmp/method.rb
3195 * def hello
3196 * puts "hello, world"
3197 * end
3198 *
3199 * puts RubyVM::InstructionSequence.disasm(method(:hello))
3200 *
3201 * Produces:
3202 *
3203 * == disasm: <RubyVM::InstructionSequence:hello@/tmp/method.rb>============
3204 * 0000 trace 8 ( 1)
3205 * 0002 trace 1 ( 2)
3206 * 0004 putself
3207 * 0005 putstring "hello, world"
3208 * 0007 send :puts, 1, nil, 8, <ic:0>
3209 * 0013 trace 16 ( 3)
3210 * 0015 leave ( 2)
3211 *
3212 * For a +Proc+ object:
3213 *
3214 * # /tmp/proc.rb
3215 * p = proc { num = 1 + 2 }
3216 * puts RubyVM::InstructionSequence.disasm(p)
3217 *
3218 * Produces:
3219 *
3220 * == disasm: <RubyVM::InstructionSequence:block in <main>@/tmp/proc.rb>===
3221 * == catch table
3222 * | catch type: redo st: 0000 ed: 0012 sp: 0000 cont: 0000
3223 * | catch type: next st: 0000 ed: 0012 sp: 0000 cont: 0012
3224 * |------------------------------------------------------------------------
3225 * local table (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1] s1)
3226 * [ 2] num
3227 * 0000 trace 1 ( 1)
3228 * 0002 putobject 1
3229 * 0004 putobject 2
3230 * 0006 opt_plus <ic:1>
3231 * 0008 dup
3232 * 0009 setlocal num, 0
3233 * 0012 leave
3234 *
3235 */
3236static VALUE
3237iseqw_s_disasm(VALUE klass, VALUE body)
3238{
3239 VALUE iseqw = iseqw_s_of(klass, body);
3240 return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw));
3241}
3242
3243static VALUE
3244register_label(struct st_table *table, unsigned long idx)
3245{
3246 VALUE sym = rb_str_intern(rb_sprintf("label_%lu", idx));
3247 st_insert(table, idx, sym);
3248 return sym;
3249}
3250
3251static VALUE
3252exception_type2symbol(VALUE type)
3253{
3254 ID id;
3255 switch (type) {
3256 case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break;
3257 case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break;
3258 case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break;
3259 case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break;
3260 case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break;
3261 case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break;
3262 default:
3263 rb_bug("unknown exception type: %d", (int)type);
3264 }
3265 return ID2SYM(id);
3266}
3267
3268static int
3269cdhash_each(VALUE key, VALUE value, VALUE ary)
3270{
3271 rb_ary_push(ary, obj_resurrect(key));
3272 rb_ary_push(ary, value);
3273 return ST_CONTINUE;
3274}
3275
3276static const rb_data_type_t label_wrapper = {
3277 "label_wrapper",
3278 {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,},
3279 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
3280};
3281
3282#define DECL_ID(name) \
3283 static ID id_##name
3284
3285#define INIT_ID(name) \
3286 id_##name = rb_intern(#name)
3287
3288static VALUE
3289iseq_type_id(enum rb_iseq_type type)
3290{
3291 DECL_ID(top);
3292 DECL_ID(method);
3293 DECL_ID(block);
3294 DECL_ID(class);
3295 DECL_ID(rescue);
3296 DECL_ID(ensure);
3297 DECL_ID(eval);
3298 DECL_ID(main);
3299 DECL_ID(plain);
3300
3301 if (id_top == 0) {
3302 INIT_ID(top);
3303 INIT_ID(method);
3304 INIT_ID(block);
3305 INIT_ID(class);
3306 INIT_ID(rescue);
3307 INIT_ID(ensure);
3308 INIT_ID(eval);
3309 INIT_ID(main);
3310 INIT_ID(plain);
3311 }
3312
3313 switch (type) {
3314 case ISEQ_TYPE_TOP: return id_top;
3315 case ISEQ_TYPE_METHOD: return id_method;
3316 case ISEQ_TYPE_BLOCK: return id_block;
3317 case ISEQ_TYPE_CLASS: return id_class;
3318 case ISEQ_TYPE_RESCUE: return id_rescue;
3319 case ISEQ_TYPE_ENSURE: return id_ensure;
3320 case ISEQ_TYPE_EVAL: return id_eval;
3321 case ISEQ_TYPE_MAIN: return id_main;
3322 case ISEQ_TYPE_PLAIN: return id_plain;
3323 };
3324
3325 rb_bug("unsupported iseq type: %d", (int)type);
3326}
3327
3328static VALUE
3329iseq_data_to_ary(const rb_iseq_t *iseq)
3330{
3331 unsigned int i;
3332 long l;
3333 const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq);
3334 const struct iseq_insn_info_entry *prev_insn_info;
3335 unsigned int pos;
3336 int last_line = 0;
3337 VALUE *seq, *iseq_original;
3338
3339 VALUE val = rb_ary_new();
3340 ID type; /* Symbol */
3341 VALUE locals = rb_ary_new();
3342 VALUE params = rb_hash_new();
3343 VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */
3344 VALUE nbody;
3345 VALUE exception = rb_ary_new(); /* [[....]] */
3346 VALUE misc = rb_hash_new();
3347
3348 static ID insn_syms[VM_BARE_INSTRUCTION_SIZE]; /* w/o-trace only */
3349 struct st_table *labels_table = st_init_numtable();
3350 VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table);
3351
3352 if (insn_syms[0] == 0) {
3353 int i;
3354 for (i=0; i<numberof(insn_syms); i++) {
3355 insn_syms[i] = rb_intern(insn_name(i));
3356 }
3357 }
3358
3359 /* type */
3360 type = iseq_type_id(iseq_body->type);
3361
3362 /* locals */
3363 for (i=0; i<iseq_body->local_table_size; i++) {
3364 ID lid = iseq_body->local_table[i];
3365 if (lid) {
3366 if (rb_id2str(lid)) {
3367 rb_ary_push(locals, ID2SYM(lid));
3368 }
3369 else { /* hidden variable from id_internal() */
3370 rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1));
3371 }
3372 }
3373 else {
3374 rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest")));
3375 }
3376 }
3377
3378 /* params */
3379 {
3380 const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword;
3381 int j;
3382
3383 if (iseq_body->param.flags.has_opt) {
3384 int len = iseq_body->param.opt_num + 1;
3385 VALUE arg_opt_labels = rb_ary_new2(len);
3386
3387 for (j = 0; j < len; j++) {
3388 VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]);
3389 rb_ary_push(arg_opt_labels, l);
3390 }
3391 rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels);
3392 }
3393
3394 /* commit */
3395 if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num));
3396 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num));
3397 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start));
3398 if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start));
3399 if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start));
3400 if (iseq_body->param.flags.has_kw) {
3401 VALUE keywords = rb_ary_new();
3402 int i, j;
3403 for (i=0; i<keyword->required_num; i++) {
3404 rb_ary_push(keywords, ID2SYM(keyword->table[i]));
3405 }
3406 for (j=0; i<keyword->num; i++, j++) {
3407 VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i]));
3408 if (!UNDEF_P(keyword->default_values[j])) {
3409 rb_ary_push(key, keyword->default_values[j]);
3410 }
3411 rb_ary_push(keywords, key);
3412 }
3413
3414 rb_hash_aset(params, ID2SYM(rb_intern("kwbits")),
3415 INT2FIX(keyword->bits_start));
3416 rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords);
3417 }
3418 if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start));
3419 if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue);
3420 if (iseq_body->param.flags.use_block) rb_hash_aset(params, ID2SYM(rb_intern("use_block")), Qtrue);
3421 }
3422
3423 /* body */
3424 iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq);
3425
3426 for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) {
3427 VALUE insn = *seq++;
3428 int j, len = insn_len(insn);
3429 VALUE *nseq = seq + len - 1;
3430 VALUE ary = rb_ary_new2(len);
3431
3432 rb_ary_push(ary, ID2SYM(insn_syms[insn%numberof(insn_syms)]));
3433 for (j=0; j<len-1; j++, seq++) {
3434 enum ruby_insn_type_chars op_type = insn_op_type(insn, j);
3435
3436 switch (op_type) {
3437 case TS_OFFSET: {
3438 unsigned long idx = nseq - iseq_original + *seq;
3439 rb_ary_push(ary, register_label(labels_table, idx));
3440 break;
3441 }
3442 case TS_LINDEX:
3443 case TS_NUM:
3444 rb_ary_push(ary, INT2FIX(*seq));
3445 break;
3446 case TS_VALUE:
3447 rb_ary_push(ary, obj_resurrect(*seq));
3448 break;
3449 case TS_ISEQ:
3450 {
3451 const rb_iseq_t *iseq = (rb_iseq_t *)*seq;
3452 if (iseq) {
3453 VALUE val = iseq_data_to_ary(rb_iseq_check(iseq));
3454 rb_ary_push(ary, val);
3455 }
3456 else {
3457 rb_ary_push(ary, Qnil);
3458 }
3459 }
3460 break;
3461 case TS_IC:
3462 {
3463 VALUE list = rb_ary_new();
3464 const ID *ids = ((IC)*seq)->segments;
3465 while (*ids) {
3466 rb_ary_push(list, ID2SYM(*ids++));
3467 }
3468 rb_ary_push(ary, list);
3469 }
3470 break;
3471 case TS_IVC:
3472 case TS_ICVARC:
3473 case TS_ISE:
3474 {
3475 union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq;
3476 rb_ary_push(ary, INT2FIX(is - ISEQ_IS_ENTRY_START(ISEQ_BODY(iseq), op_type)));
3477 }
3478 break;
3479 case TS_CALLDATA:
3480 {
3481 struct rb_call_data *cd = (struct rb_call_data *)*seq;
3482 const struct rb_callinfo *ci = cd->ci;
3483 VALUE e = rb_hash_new();
3484 int argc = vm_ci_argc(ci);
3485
3486 ID mid = vm_ci_mid(ci);
3487 rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil);
3488 rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci)));
3489
3490 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
3491 const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci);
3492 int i;
3493 VALUE kw = rb_ary_new2((long)kwarg->keyword_len);
3494
3495 argc -= kwarg->keyword_len;
3496 for (i = 0; i < kwarg->keyword_len; i++) {
3497 rb_ary_push(kw, kwarg->keywords[i]);
3498 }
3499 rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw);
3500 }
3501
3502 rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")),
3503 INT2FIX(argc));
3504 rb_ary_push(ary, e);
3505 }
3506 break;
3507 case TS_ID:
3508 rb_ary_push(ary, ID2SYM(*seq));
3509 break;
3510 case TS_CDHASH:
3511 {
3512 VALUE hash = *seq;
3513 VALUE val = rb_ary_new();
3514 int i;
3515
3516 rb_hash_foreach(hash, cdhash_each, val);
3517
3518 for (i=0; i<RARRAY_LEN(val); i+=2) {
3519 VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
3520 unsigned long idx = nseq - iseq_original + pos;
3521
3522 rb_ary_store(val, i+1,
3523 register_label(labels_table, idx));
3524 }
3525 rb_ary_push(ary, val);
3526 }
3527 break;
3528 case TS_FUNCPTR:
3529 {
3530#if SIZEOF_VALUE <= SIZEOF_LONG
3531 VALUE val = LONG2NUM((SIGNED_VALUE)*seq);
3532#else
3533 VALUE val = LL2NUM((SIGNED_VALUE)*seq);
3534#endif
3535 rb_ary_push(ary, val);
3536 }
3537 break;
3538 case TS_BUILTIN:
3539 {
3540 VALUE val = rb_hash_new();
3541#if SIZEOF_VALUE <= SIZEOF_LONG
3542 VALUE func_ptr = LONG2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3543#else
3544 VALUE func_ptr = LL2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3545#endif
3546 rb_hash_aset(val, ID2SYM(rb_intern("func_ptr")), func_ptr);
3547 rb_hash_aset(val, ID2SYM(rb_intern("argc")), INT2NUM(((RB_BUILTIN)*seq)->argc));
3548 rb_hash_aset(val, ID2SYM(rb_intern("index")), INT2NUM(((RB_BUILTIN)*seq)->index));
3549 rb_hash_aset(val, ID2SYM(rb_intern("name")), rb_str_new_cstr(((RB_BUILTIN)*seq)->name));
3550 rb_ary_push(ary, val);
3551 }
3552 break;
3553 default:
3554 rb_bug("unknown operand: %c", insn_op_type(insn, j));
3555 }
3556 }
3557 rb_ary_push(body, ary);
3558 }
3559
3560 nbody = body;
3561
3562 /* exception */
3563 if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) {
3564 VALUE ary = rb_ary_new();
3565 const struct iseq_catch_table_entry *entry =
3566 UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]);
3567 rb_ary_push(ary, exception_type2symbol(entry->type));
3568 if (entry->iseq) {
3569 rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq)));
3570 }
3571 else {
3572 rb_ary_push(ary, Qnil);
3573 }
3574 rb_ary_push(ary, register_label(labels_table, entry->start));
3575 rb_ary_push(ary, register_label(labels_table, entry->end));
3576 rb_ary_push(ary, register_label(labels_table, entry->cont));
3577 rb_ary_push(ary, UINT2NUM(entry->sp));
3578 rb_ary_push(exception, ary);
3579 }
3580
3581 /* make body with labels and insert line number */
3582 body = rb_ary_new();
3583 prev_insn_info = NULL;
3584#ifdef USE_ISEQ_NODE_ID
3585 VALUE node_ids = rb_ary_new();
3586#endif
3587
3588 for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) {
3589 const struct iseq_insn_info_entry *info;
3590 VALUE ary = RARRAY_AREF(nbody, l);
3591 st_data_t label;
3592
3593 if (st_lookup(labels_table, pos, &label)) {
3594 rb_ary_push(body, (VALUE)label);
3595 }
3596
3597 info = get_insn_info(iseq, pos);
3598#ifdef USE_ISEQ_NODE_ID
3599 rb_ary_push(node_ids, INT2FIX(info->node_id));
3600#endif
3601
3602 if (prev_insn_info != info) {
3603 int line = info->line_no;
3604 rb_event_flag_t events = info->events;
3605
3606 if (line > 0 && last_line != line) {
3607 rb_ary_push(body, INT2FIX(line));
3608 last_line = line;
3609 }
3610#define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev)));
3611 CHECK_EVENT(RUBY_EVENT_LINE);
3612 CHECK_EVENT(RUBY_EVENT_CLASS);
3613 CHECK_EVENT(RUBY_EVENT_END);
3614 CHECK_EVENT(RUBY_EVENT_CALL);
3615 CHECK_EVENT(RUBY_EVENT_RETURN);
3616 CHECK_EVENT(RUBY_EVENT_B_CALL);
3617 CHECK_EVENT(RUBY_EVENT_B_RETURN);
3618 CHECK_EVENT(RUBY_EVENT_RESCUE);
3619#undef CHECK_EVENT
3620 prev_insn_info = info;
3621 }
3622
3623 rb_ary_push(body, ary);
3624 pos += RARRAY_LENINT(ary); /* reject too huge data */
3625 }
3626 RB_GC_GUARD(nbody);
3627 RB_GC_GUARD(labels_wrapper);
3628
3629 rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq_body->param.size));
3630 rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq_body->local_table_size));
3631 rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max));
3632 rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id));
3633 rb_hash_aset(misc, ID2SYM(rb_intern("code_location")),
3634 rb_ary_new_from_args(4,
3635 INT2FIX(iseq_body->location.code_location.beg_pos.lineno),
3636 INT2FIX(iseq_body->location.code_location.beg_pos.column),
3637 INT2FIX(iseq_body->location.code_location.end_pos.lineno),
3638 INT2FIX(iseq_body->location.code_location.end_pos.column)));
3639#ifdef USE_ISEQ_NODE_ID
3640 rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids);
3641#endif
3642 rb_hash_aset(misc, ID2SYM(rb_intern("parser")), iseq_body->prism ? ID2SYM(rb_intern("prism")) : ID2SYM(rb_intern("parse.y")));
3643
3644 /*
3645 * [:magic, :major_version, :minor_version, :format_type, :misc,
3646 * :name, :path, :absolute_path, :start_lineno, :type, :locals, :args,
3647 * :catch_table, :bytecode]
3648 */
3649 rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat"));
3650 rb_ary_push(val, INT2FIX(ISEQ_MAJOR_VERSION)); /* major */
3651 rb_ary_push(val, INT2FIX(ISEQ_MINOR_VERSION)); /* minor */
3652 rb_ary_push(val, INT2FIX(1));
3653 rb_ary_push(val, misc);
3654 rb_ary_push(val, iseq_body->location.label);
3655 rb_ary_push(val, rb_iseq_path(iseq));
3656 rb_ary_push(val, rb_iseq_realpath(iseq));
3657 rb_ary_push(val, RB_INT2NUM(iseq_body->location.first_lineno));
3658 rb_ary_push(val, ID2SYM(type));
3659 rb_ary_push(val, locals);
3660 rb_ary_push(val, params);
3661 rb_ary_push(val, exception);
3662 rb_ary_push(val, body);
3663 return val;
3664}
3665
3666VALUE
3667rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc)
3668{
3669 int i, r;
3670 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3671 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
3672 VALUE a, args = rb_ary_new2(body->param.size);
3673 ID req, opt, rest, block, key, keyrest;
3674#define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type))
3675#define PARAM_ID(i) body->local_table[(i)]
3676#define PARAM(i, type) ( \
3677 PARAM_TYPE(type), \
3678 rb_id2str(PARAM_ID(i)) ? \
3679 rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \
3680 a)
3681
3682 CONST_ID(req, "req");
3683 CONST_ID(opt, "opt");
3684
3685 if (body->param.flags.forwardable) {
3686 // [[:rest, :*], [:keyrest, :**], [:block, :&]]
3687 CONST_ID(rest, "rest");
3688 CONST_ID(keyrest, "keyrest");
3689 CONST_ID(block, "block");
3690 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(rest), ID2SYM(idMULT)));
3691 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(keyrest), ID2SYM(idPow)));
3692 rb_ary_push(args, rb_ary_new_from_args(2, ID2SYM(block), ID2SYM(idAnd)));
3693 }
3694
3695 if (is_proc) {
3696 for (i = 0; i < body->param.lead_num; i++) {
3697 PARAM_TYPE(opt);
3698 if (rb_id2str(PARAM_ID(i))) {
3699 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3700 }
3701 rb_ary_push(args, a);
3702 }
3703 }
3704 else {
3705 for (i = 0; i < body->param.lead_num; i++) {
3706 rb_ary_push(args, PARAM(i, req));
3707 }
3708 }
3709 r = body->param.lead_num + body->param.opt_num;
3710 for (; i < r; i++) {
3711 PARAM_TYPE(opt);
3712 if (rb_id2str(PARAM_ID(i))) {
3713 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3714 }
3715 rb_ary_push(args, a);
3716 }
3717 if (body->param.flags.has_rest) {
3718 CONST_ID(rest, "rest");
3719 rb_ary_push(args, PARAM(body->param.rest_start, rest));
3720 }
3721 r = body->param.post_start + body->param.post_num;
3722 if (is_proc) {
3723 for (i = body->param.post_start; i < r; i++) {
3724 PARAM_TYPE(opt);
3725 if (rb_id2str(PARAM_ID(i))) {
3726 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3727 }
3728 rb_ary_push(args, a);
3729 }
3730 }
3731 else {
3732 for (i = body->param.post_start; i < r; i++) {
3733 rb_ary_push(args, PARAM(i, req));
3734 }
3735 }
3736 if (body->param.flags.accepts_no_kwarg) {
3737 ID nokey;
3738 CONST_ID(nokey, "nokey");
3739 PARAM_TYPE(nokey);
3740 rb_ary_push(args, a);
3741 }
3742 if (body->param.flags.has_kw) {
3743 i = 0;
3744 if (keyword->required_num > 0) {
3745 ID keyreq;
3746 CONST_ID(keyreq, "keyreq");
3747 for (; i < keyword->required_num; i++) {
3748 PARAM_TYPE(keyreq);
3749 if (rb_id2str(keyword->table[i])) {
3750 rb_ary_push(a, ID2SYM(keyword->table[i]));
3751 }
3752 rb_ary_push(args, a);
3753 }
3754 }
3755 CONST_ID(key, "key");
3756 for (; i < keyword->num; i++) {
3757 PARAM_TYPE(key);
3758 if (rb_id2str(keyword->table[i])) {
3759 rb_ary_push(a, ID2SYM(keyword->table[i]));
3760 }
3761 rb_ary_push(args, a);
3762 }
3763 }
3764 if (body->param.flags.has_kwrest || body->param.flags.ruby2_keywords) {
3765 ID param;
3766 CONST_ID(keyrest, "keyrest");
3767 PARAM_TYPE(keyrest);
3768 if (body->param.flags.has_kwrest &&
3769 rb_id2str(param = PARAM_ID(keyword->rest_start))) {
3770 rb_ary_push(a, ID2SYM(param));
3771 }
3772 else if (body->param.flags.ruby2_keywords) {
3773 rb_ary_push(a, ID2SYM(idPow));
3774 }
3775 rb_ary_push(args, a);
3776 }
3777 if (body->param.flags.has_block) {
3778 CONST_ID(block, "block");
3779 rb_ary_push(args, PARAM(body->param.block_start, block));
3780 }
3781 return args;
3782}
3783
3784VALUE
3785rb_iseq_defined_string(enum defined_type type)
3786{
3787 static const char expr_names[][18] = {
3788 "nil",
3789 "instance-variable",
3790 "local-variable",
3791 "global-variable",
3792 "class variable",
3793 "constant",
3794 "method",
3795 "yield",
3796 "super",
3797 "self",
3798 "true",
3799 "false",
3800 "assignment",
3801 "expression",
3802 };
3803 const char *estr;
3804
3805 if ((unsigned)(type - 1) >= (unsigned)numberof(expr_names)) rb_bug("unknown defined type %d", type);
3806 estr = expr_names[type - 1];
3807 return rb_fstring_cstr(estr);
3808}
3809
3810// A map from encoded_insn to insn_data: decoded insn number, its len,
3811// decoded ZJIT insn number, non-trace version of encoded insn,
3812// trace version, and zjit version.
3813static st_table *encoded_insn_data;
3814typedef struct insn_data_struct {
3815 int insn;
3816 int insn_len;
3817 void *notrace_encoded_insn;
3818 void *trace_encoded_insn;
3819#if USE_ZJIT
3820 int zjit_insn;
3821 void *zjit_encoded_insn;
3822#endif
3823} insn_data_t;
3824static insn_data_t insn_data[VM_BARE_INSTRUCTION_SIZE];
3825
3826void
3827rb_free_encoded_insn_data(void)
3828{
3829 st_free_table(encoded_insn_data);
3830}
3831
3832// Initialize a table to decode bare, trace, and zjit instructions.
3833// This function also determines which instructions are used when TracePoint is enabled.
3834void
3835rb_vm_encoded_insn_data_table_init(void)
3836{
3837#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3838 const void * const *table = rb_vm_get_insns_address_table();
3839#define INSN_CODE(insn) ((VALUE)table[insn])
3840#else
3841#define INSN_CODE(insn) ((VALUE)(insn))
3842#endif
3843 encoded_insn_data = st_init_numtable_with_size(VM_BARE_INSTRUCTION_SIZE);
3844
3845 for (int insn = 0; insn < VM_BARE_INSTRUCTION_SIZE; insn++) {
3846 insn_data[insn].insn = insn;
3847 insn_data[insn].insn_len = insn_len(insn);
3848
3849 // When tracing :return events, we convert opt_invokebuiltin_delegate_leave + leave into
3850 // opt_invokebuiltin_delegate + trace_leave, presumably because we don't want to fire
3851 // :return events before invokebuiltin. https://github.com/ruby/ruby/pull/3256
3852 int notrace_insn = (insn != BIN(opt_invokebuiltin_delegate_leave)) ? insn : BIN(opt_invokebuiltin_delegate);
3853 insn_data[insn].notrace_encoded_insn = (void *)INSN_CODE(notrace_insn);
3854 insn_data[insn].trace_encoded_insn = (void *)INSN_CODE(notrace_insn + VM_BARE_INSTRUCTION_SIZE);
3855
3856 st_data_t key1 = (st_data_t)INSN_CODE(insn);
3857 st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_BARE_INSTRUCTION_SIZE);
3858 st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]);
3859 st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]);
3860
3861#if USE_ZJIT
3862 int zjit_insn = vm_bare_insn_to_zjit_insn(insn);
3863 insn_data[insn].zjit_insn = zjit_insn;
3864 insn_data[insn].zjit_encoded_insn = (insn != zjit_insn) ? (void *)INSN_CODE(zjit_insn) : 0;
3865
3866 if (insn != zjit_insn) {
3867 st_data_t key3 = (st_data_t)INSN_CODE(zjit_insn);
3868 st_add_direct(encoded_insn_data, key3, (st_data_t)&insn_data[insn]);
3869 }
3870#endif
3871 }
3872}
3873
3874// Decode an insn address to an insn. This returns bare instructions
3875// even if they're trace/zjit instructions. Use rb_vm_insn_addr2opcode
3876// to decode trace/zjit instructions as is.
3877int
3878rb_vm_insn_addr2insn(const void *addr)
3879{
3880 st_data_t key = (st_data_t)addr;
3881 st_data_t val;
3882
3883 if (st_lookup(encoded_insn_data, key, &val)) {
3884 insn_data_t *e = (insn_data_t *)val;
3885 return (int)e->insn;
3886 }
3887
3888 rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr);
3889}
3890
3891// Decode an insn address to an insn. Unlike rb_vm_insn_addr2insn,
3892// this function can return trace/zjit opcode variants.
3893int
3894rb_vm_insn_addr2opcode(const void *addr)
3895{
3896 st_data_t key = (st_data_t)addr;
3897 st_data_t val;
3898
3899 if (st_lookup(encoded_insn_data, key, &val)) {
3900 insn_data_t *e = (insn_data_t *)val;
3901 int opcode = e->insn;
3902 if (addr == e->trace_encoded_insn) {
3903 opcode += VM_BARE_INSTRUCTION_SIZE;
3904 }
3905#if USE_ZJIT
3906 else if (addr == e->zjit_encoded_insn) {
3907 opcode = e->zjit_insn;
3908 }
3909#endif
3910 return opcode;
3911 }
3912
3913 rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr);
3914}
3915
3916// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn. This returns
3917// bare instructions even if they're trace/zjit instructions. Use
3918// rb_vm_insn_addr2opcode to decode trace/zjit instructions as is.
3919int
3920rb_vm_insn_decode(const VALUE encoded)
3921{
3922#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3923 int insn = rb_vm_insn_addr2insn((void *)encoded);
3924#else
3925 int insn = (int)encoded;
3926#endif
3927 return insn;
3928}
3929
3930// Turn on or off tracing for a given instruction address
3931static inline int
3932encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_current_trace)
3933{
3934 st_data_t key = (st_data_t)*iseq_encoded_insn;
3935 st_data_t val;
3936
3937 if (st_lookup(encoded_insn_data, key, &val)) {
3938 insn_data_t *e = (insn_data_t *)val;
3939 if (remain_current_trace && key == (st_data_t)e->trace_encoded_insn) {
3940 turnon = 1;
3941 }
3942 *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn);
3943 return e->insn_len;
3944 }
3945
3946 rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn);
3947}
3948
3949// Turn off tracing for an instruction at pos after tracing event flags are cleared
3950void
3951rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos)
3952{
3953 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3954 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3955 encoded_iseq_trace_instrument(&iseq_encoded[pos], 0, false);
3956}
3957
3958// We need to fire call events on instructions with b_call events if the block
3959// is running as a method. So, if we are listening for call events, then
3960// instructions that have b_call events need to become trace variants.
3961// Use this function when making decisions about recompiling to trace variants.
3962static inline rb_event_flag_t
3963add_bmethod_events(rb_event_flag_t events)
3964{
3965 if (events & RUBY_EVENT_CALL) {
3966 events |= RUBY_EVENT_B_CALL;
3967 }
3968 if (events & RUBY_EVENT_RETURN) {
3969 events |= RUBY_EVENT_B_RETURN;
3970 }
3971 return events;
3972}
3973
3974// Note, to support call/return events for bmethods, turnon_event can have more events than tpval.
3975static int
3976iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3977{
3978 unsigned int pc;
3979 int n = 0;
3980 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3981 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3982
3983 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
3984
3985 for (pc=0; pc<body->iseq_size;) {
3986 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
3987 rb_event_flag_t pc_events = entry->events;
3988 rb_event_flag_t target_events = turnon_events;
3989 unsigned int line = (int)entry->line_no;
3990
3991 if (target_line == 0 || target_line == line) {
3992 /* ok */
3993 }
3994 else {
3995 target_events &= ~RUBY_EVENT_LINE;
3996 }
3997
3998 if (pc_events & target_events) {
3999 n++;
4000 }
4001 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (target_events | iseq->aux.exec.global_trace_events), true);
4002 }
4003
4004 if (n > 0) {
4005 if (iseq->aux.exec.local_hooks == NULL) {
4006 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t);
4007 iseq->aux.exec.local_hooks->is_local = true;
4008 }
4009 rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line);
4010 }
4011
4012 return n;
4013}
4014
4016 rb_event_flag_t turnon_events;
4017 VALUE tpval;
4018 unsigned int target_line;
4019 int n;
4020};
4021
4022static void
4023iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
4024{
4026 data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line);
4027 iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p);
4028}
4029
4030int
4031rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod)
4032{
4034 if (target_bmethod) {
4035 turnon_events = add_bmethod_events(turnon_events);
4036 }
4037 data.turnon_events = turnon_events;
4038 data.tpval = tpval;
4039 data.target_line = target_line;
4040 data.n = 0;
4041
4042 iseq_add_local_tracepoint_i(iseq, (void *)&data);
4043 if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */
4044 return data.n;
4045}
4046
4047static int
4048iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval)
4049{
4050 int n = 0;
4051
4052 if (iseq->aux.exec.local_hooks) {
4053 unsigned int pc;
4054 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4055 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4056 rb_event_flag_t local_events = 0;
4057
4058 rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval);
4059 local_events = iseq->aux.exec.local_hooks->events;
4060
4061 if (local_events == 0) {
4062 rb_hook_list_free(iseq->aux.exec.local_hooks);
4063 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL;
4064 }
4065
4066 local_events = add_bmethod_events(local_events);
4067 for (pc = 0; pc<body->iseq_size;) {
4068 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4069 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false);
4070 }
4071 }
4072 return n;
4073}
4074
4076 VALUE tpval;
4077 int n;
4078};
4079
4080static void
4081iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
4082{
4084 data->n += iseq_remove_local_tracepoint(iseq, data->tpval);
4085 iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p);
4086}
4087
4088int
4089rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval)
4090{
4092 data.tpval = tpval;
4093 data.n = 0;
4094
4095 iseq_remove_local_tracepoint_i(iseq, (void *)&data);
4096 return data.n;
4097}
4098
4099void
4100rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
4101{
4102 if (iseq->aux.exec.global_trace_events == turnon_events) {
4103 return;
4104 }
4105
4106 if (!ISEQ_EXECUTABLE_P(iseq)) {
4107 /* this is building ISeq */
4108 return;
4109 }
4110 else {
4111 unsigned int pc;
4112 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
4113 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
4114 rb_event_flag_t enabled_events;
4115 rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0;
4116 ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events;
4117 enabled_events = add_bmethod_events(turnon_events | local_events);
4118
4119 for (pc=0; pc<body->iseq_size;) {
4120 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
4121 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true);
4122 }
4123 }
4124}
4125
4126void rb_vm_cc_general(const struct rb_callcache *cc);
4127
4128static bool
4129clear_attr_cc(VALUE v)
4130{
4131 if (imemo_type_p(v, imemo_callcache) && vm_cc_ivar_p((const struct rb_callcache *)v)) {
4132 rb_vm_cc_general((struct rb_callcache *)v);
4133 return true;
4134 }
4135 else {
4136 return false;
4137 }
4138}
4139
4140static bool
4141clear_bf_cc(VALUE v)
4142{
4143 if (imemo_type_p(v, imemo_callcache) && vm_cc_bf_p((const struct rb_callcache *)v)) {
4144 rb_vm_cc_general((struct rb_callcache *)v);
4145 return true;
4146 }
4147 else {
4148 return false;
4149 }
4150}
4151
4152static int
4153clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4154{
4155 VALUE v = (VALUE)vstart;
4156 for (; v != (VALUE)vend; v += stride) {
4157 void *ptr = rb_asan_poisoned_object_p(v);
4158 rb_asan_unpoison_object(v, false);
4159 clear_attr_cc(v);
4160 asan_poison_object_if(ptr, v);
4161 }
4162 return 0;
4163}
4164
4165void
4166rb_clear_attr_ccs(void)
4167{
4168 rb_objspace_each_objects(clear_attr_ccs_i, NULL);
4169}
4170
4171static int
4172clear_bf_ccs_i(void *vstart, void *vend, size_t stride, void *data)
4173{
4174 VALUE v = (VALUE)vstart;
4175 for (; v != (VALUE)vend; v += stride) {
4176 void *ptr = rb_asan_poisoned_object_p(v);
4177 rb_asan_unpoison_object(v, false);
4178 clear_bf_cc(v);
4179 asan_poison_object_if(ptr, v);
4180 }
4181 return 0;
4182}
4183
4184void
4185rb_clear_bf_ccs(void)
4186{
4187 rb_objspace_each_objects(clear_bf_ccs_i, NULL);
4188}
4189
4190static int
4191trace_set_i(void *vstart, void *vend, size_t stride, void *data)
4192{
4193 rb_event_flag_t turnon_events = *(rb_event_flag_t *)data;
4194
4195 VALUE v = (VALUE)vstart;
4196 for (; v != (VALUE)vend; v += stride) {
4197 void *ptr = rb_asan_poisoned_object_p(v);
4198 rb_asan_unpoison_object(v, false);
4199
4200 if (rb_obj_is_iseq(v)) {
4201 rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events);
4202 }
4203 else if (clear_attr_cc(v)) {
4204 }
4205 else if (clear_bf_cc(v)) {
4206 }
4207
4208 asan_poison_object_if(ptr, v);
4209 }
4210 return 0;
4211}
4212
4213void
4214rb_iseq_trace_set_all(rb_event_flag_t turnon_events)
4215{
4216 rb_objspace_each_objects(trace_set_i, &turnon_events);
4217}
4218
4219VALUE
4220rb_iseqw_local_variables(VALUE iseqval)
4221{
4222 return rb_iseq_local_variables(iseqw_check(iseqval));
4223}
4224
4225/*
4226 * call-seq:
4227 * iseq.to_binary(extra_data = nil) -> binary str
4228 *
4229 * Returns serialized iseq binary format data as a String object.
4230 * A corresponding iseq object is created by
4231 * RubyVM::InstructionSequence.load_from_binary() method.
4232 *
4233 * String extra_data will be saved with binary data.
4234 * You can access this data with
4235 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary).
4236 *
4237 * Note that the translated binary data is not portable.
4238 * You can not move this binary data to another machine.
4239 * You can not use the binary data which is created by another
4240 * version/another architecture of Ruby.
4241 */
4242static VALUE
4243iseqw_to_binary(int argc, VALUE *argv, VALUE self)
4244{
4245 VALUE opt = !rb_check_arity(argc, 0, 1) ? Qnil : argv[0];
4246 return rb_iseq_ibf_dump(iseqw_check(self), opt);
4247}
4248
4249/*
4250 * call-seq:
4251 * RubyVM::InstructionSequence.load_from_binary(binary) -> iseq
4252 *
4253 * Load an iseq object from binary format String object
4254 * created by RubyVM::InstructionSequence.to_binary.
4255 *
4256 * This loader does not have a verifier, so that loading broken/modified
4257 * binary causes critical problem.
4258 *
4259 * You should not load binary data provided by others.
4260 * You should use binary data translated by yourself.
4261 */
4262static VALUE
4263iseqw_s_load_from_binary(VALUE self, VALUE str)
4264{
4265 return iseqw_new(rb_iseq_ibf_load(str));
4266}
4267
4268/*
4269 * call-seq:
4270 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary) -> str
4271 *
4272 * Load extra data embed into binary format String object.
4273 */
4274static VALUE
4275iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str)
4276{
4277 return rb_iseq_ibf_load_extra_data(str);
4278}
4279
4280#if VM_INSN_INFO_TABLE_IMPL == 2
4281
4282/* An implementation of succinct bit-vector for insn_info table.
4283 *
4284 * A succinct bit-vector is a small and efficient data structure that provides
4285 * a bit-vector augmented with an index for O(1) rank operation:
4286 *
4287 * rank(bv, n): the number of 1's within a range from index 0 to index n
4288 *
4289 * This can be used to lookup insn_info table from PC.
4290 * For example, consider the following iseq and insn_info_table:
4291 *
4292 * iseq insn_info_table
4293 * PC insn+operand position lineno event
4294 * 0: insn1 0: 1 [Li]
4295 * 2: insn2 2: 2 [Li] <= (A)
4296 * 5: insn3 8: 3 [Li] <= (B)
4297 * 8: insn4
4298 *
4299 * In this case, a succinct bit-vector whose indexes 0, 2, 8 is "1" and
4300 * other indexes is "0", i.e., "101000001", is created.
4301 * To lookup the lineno of insn2, calculate rank("10100001", 2) = 2, so
4302 * the line (A) is the entry in question.
4303 * To lookup the lineno of insn4, calculate rank("10100001", 8) = 3, so
4304 * the line (B) is the entry in question.
4305 *
4306 * A naive implementation of succinct bit-vector works really well
4307 * not only for large size but also for small size. However, it has
4308 * tiny overhead for very small size. So, this implementation consist
4309 * of two parts: one part is the "immediate" table that keeps rank result
4310 * as a raw table, and the other part is a normal succinct bit-vector.
4311 */
4312
4313#define IMMEDIATE_TABLE_SIZE 54 /* a multiple of 9, and < 128 */
4314
4315struct succ_index_table {
4316 uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9];
4317 struct succ_dict_block {
4318 unsigned int rank;
4319 uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */
4320 uint64_t bits[512/64];
4321 } succ_part[FLEX_ARY_LEN];
4322};
4323
4324#define imm_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (7 * (i))
4325#define imm_block_rank_get(v, i) (((int)((v) >> ((i) * 7))) & 0x7f)
4326#define small_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (9 * ((i) - 1))
4327#define small_block_rank_get(v, i) ((i) == 0 ? 0 : (((int)((v) >> (((i) - 1) * 9))) & 0x1ff))
4328
4329static struct succ_index_table *
4330succ_index_table_create(int max_pos, int *data, int size)
4331{
4332 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4333 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4334 struct succ_index_table *sd =
4335 rb_xcalloc_mul_add_mul(
4336 imm_size, sizeof(uint64_t),
4337 succ_size, sizeof(struct succ_dict_block));
4338 int i, j, k, r;
4339
4340 r = 0;
4341 for (j = 0; j < imm_size; j++) {
4342 for (i = 0; i < 9; i++) {
4343 if (r < size && data[r] == j * 9 + i) r++;
4344 imm_block_rank_set(sd->imm_part[j], i, r);
4345 }
4346 }
4347 for (k = 0; k < succ_size; k++) {
4348 struct succ_dict_block *sd_block = &sd->succ_part[k];
4349 int small_rank = 0;
4350 sd_block->rank = r;
4351 for (j = 0; j < 8; j++) {
4352 uint64_t bits = 0;
4353 if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank);
4354 for (i = 0; i < 64; i++) {
4355 if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) {
4356 bits |= ((uint64_t)1) << i;
4357 r++;
4358 }
4359 }
4360 sd_block->bits[j] = bits;
4361 small_rank += rb_popcount64(bits);
4362 }
4363 }
4364 return sd;
4365}
4366
4367static unsigned int *
4368succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size)
4369{
4370 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
4371 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
4372 unsigned int *positions = ALLOC_N(unsigned int, size), *p;
4373 int i, j, k, r = -1;
4374 p = positions;
4375 for (j = 0; j < imm_size; j++) {
4376 for (i = 0; i < 9; i++) {
4377 int nr = imm_block_rank_get(sd->imm_part[j], i);
4378 if (r != nr) *p++ = j * 9 + i;
4379 r = nr;
4380 }
4381 }
4382 for (k = 0; k < succ_size; k++) {
4383 for (j = 0; j < 8; j++) {
4384 for (i = 0; i < 64; i++) {
4385 if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) {
4386 *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE;
4387 }
4388 }
4389 }
4390 }
4391 return positions;
4392}
4393
4394static int
4395succ_index_lookup(const struct succ_index_table *sd, int x)
4396{
4397 if (x < IMMEDIATE_TABLE_SIZE) {
4398 const int i = x / 9;
4399 const int j = x % 9;
4400 return imm_block_rank_get(sd->imm_part[i], j);
4401 }
4402 else {
4403 const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512;
4404 const struct succ_dict_block *block = &sd->succ_part[block_index];
4405 const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512;
4406 const int small_block_index = block_bit_index / 64;
4407 const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index);
4408 const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64));
4409
4410 return block->rank + small_block_popcount + popcnt;
4411 }
4412}
4413#endif
4414
4415
4416/*
4417 * call-seq:
4418 * iseq.script_lines -> array or nil
4419 *
4420 * It returns recorded script lines if it is available.
4421 * The script lines are not limited to the iseq range, but
4422 * are entire lines of the source file.
4423 *
4424 * Note that this is an API for ruby internal use, debugging,
4425 * and research. Do not use this for any other purpose.
4426 * The compatibility is not guaranteed.
4427 */
4428static VALUE
4429iseqw_script_lines(VALUE self)
4430{
4431 const rb_iseq_t *iseq = iseqw_check(self);
4432 return ISEQ_BODY(iseq)->variable.script_lines;
4433}
4434
4435/*
4436 * Document-class: RubyVM::InstructionSequence
4437 *
4438 * The InstructionSequence class represents a compiled sequence of
4439 * instructions for the Virtual Machine used in MRI. Not all implementations of Ruby
4440 * may implement this class, and for the implementations that implement it,
4441 * the methods defined and behavior of the methods can change in any version.
4442 *
4443 * With it, you can get a handle to the instructions that make up a method or
4444 * a proc, compile strings of Ruby code down to VM instructions, and
4445 * disassemble instruction sequences to strings for easy inspection. It is
4446 * mostly useful if you want to learn how YARV works, but it also lets
4447 * you control various settings for the Ruby iseq compiler.
4448 *
4449 * You can find the source for the VM instructions in +insns.def+ in the Ruby
4450 * source.
4451 *
4452 * The instruction sequence results will almost certainly change as Ruby
4453 * changes, so example output in this documentation may be different from what
4454 * you see.
4455 *
4456 * Of course, this class is MRI specific.
4457 */
4458
4459void
4460Init_ISeq(void)
4461{
4462 /* declare ::RubyVM::InstructionSequence */
4463 rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject);
4464 rb_undef_alloc_func(rb_cISeq);
4465 rb_define_method(rb_cISeq, "inspect", iseqw_inspect, 0);
4466 rb_define_method(rb_cISeq, "disasm", iseqw_disasm, 0);
4467 rb_define_method(rb_cISeq, "disassemble", iseqw_disasm, 0);
4468 rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0);
4469 rb_define_method(rb_cISeq, "eval", iseqw_eval, 0);
4470
4471 rb_define_method(rb_cISeq, "to_binary", iseqw_to_binary, -1);
4472 rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1);
4473 rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1);
4474
4475 /* location APIs */
4476 rb_define_method(rb_cISeq, "path", iseqw_path, 0);
4477 rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0);
4478 rb_define_method(rb_cISeq, "label", iseqw_label, 0);
4479 rb_define_method(rb_cISeq, "base_label", iseqw_base_label, 0);
4480 rb_define_method(rb_cISeq, "first_lineno", iseqw_first_lineno, 0);
4481 rb_define_method(rb_cISeq, "trace_points", iseqw_trace_points, 0);
4482 rb_define_method(rb_cISeq, "each_child", iseqw_each_child, 0);
4483
4484#if 0 /* TBD */
4485 rb_define_private_method(rb_cISeq, "marshal_dump", iseqw_marshal_dump, 0);
4486 rb_define_private_method(rb_cISeq, "marshal_load", iseqw_marshal_load, 1);
4487 /* disable this feature because there is no verifier. */
4488 rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1);
4489#endif
4490 (void)iseq_s_load;
4491
4492 rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1);
4493 rb_define_singleton_method(rb_cISeq, "compile_parsey", iseqw_s_compile_parsey, -1);
4494 rb_define_singleton_method(rb_cISeq, "compile_prism", iseqw_s_compile_prism, -1);
4495 rb_define_singleton_method(rb_cISeq, "compile_file_prism", iseqw_s_compile_file_prism, -1);
4496 rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1);
4497 rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1);
4498 rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0);
4499 rb_define_singleton_method(rb_cISeq, "compile_option=", iseqw_s_compile_option_set, 1);
4500 rb_define_singleton_method(rb_cISeq, "disasm", iseqw_s_disasm, 1);
4501 rb_define_singleton_method(rb_cISeq, "disassemble", iseqw_s_disasm, 1);
4502 rb_define_singleton_method(rb_cISeq, "of", iseqw_s_of, 1);
4503
4504 // script lines
4505 rb_define_method(rb_cISeq, "script_lines", iseqw_script_lines, 0);
4506
4507 rb_undef_method(CLASS_OF(rb_cISeq), "translate");
4508 rb_undef_method(CLASS_OF(rb_cISeq), "load_iseq");
4509}
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:219
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define rb_define_private_method(klass, mid, func, arity)
Defines klass#mid and makes it private.
#define RUBY_EVENT_END
Encountered an end of a class clause.
Definition event.h:40
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition event.h:43
#define RUBY_EVENT_B_RETURN
Encountered a next statement.
Definition event.h:56
#define RUBY_EVENT_CLASS
Encountered a new class.
Definition event.h:39
#define RUBY_EVENT_LINE
Encountered a new line.
Definition event.h:38
#define RUBY_EVENT_RETURN
Encountered a return statement.
Definition event.h:42
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition event.h:44
#define RUBY_EVENT_B_CALL
Encountered an yield statement.
Definition event.h:55
uint32_t rb_event_flag_t
Represents event(s).
Definition event.h:108
#define RUBY_EVENT_CALL
A method, written in Ruby, is called.
Definition event.h:41
#define RUBY_EVENT_RESCUE
Encountered a rescue statement.
Definition event.h:61
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:1639
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
Definition class.c:2792
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition class.c:3262
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1674
#define T_FILE
Old name of RUBY_T_FILE.
Definition value_type.h:62
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define rb_str_cat2
Old name of rb_str_cat_cstr.
Definition string.h:1682
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ZALLOC
Old name of RB_ZALLOC.
Definition memory.h:402
#define LL2NUM
Old name of RB_LL2NUM.
Definition long_long.h:30
#define CLASS_OF
Old name of rb_class_of.
Definition globals.h:206
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define FIX2INT
Old name of RB_FIX2INT.
Definition int.h:41
#define T_HASH
Old name of RUBY_T_HASH.
Definition value_type.h:65
#define ALLOC_N
Old name of RB_ALLOC_N.
Definition memory.h:399
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:131
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition long.h:50
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define INT2NUM
Old name of RB_INT2NUM.
Definition int.h:43
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define UINT2NUM
Old name of RB_UINT2NUM.
Definition int.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
Definition symbol.h:47
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:657
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition eval.c:683
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Identical to rb_typeddata_is_kind_of(), except it raises exceptions instead of returning false.
Definition error.c:1397
VALUE rb_eSyntaxError
SyntaxError exception.
Definition error.c:1447
VALUE rb_class_superclass(VALUE klass)
Queries the parent of the given class.
Definition object.c:2220
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:100
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:265
VALUE rb_inspect(VALUE obj)
Generates a human-readable textual representation of the given object.
Definition object.c:687
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:603
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
Definition vm_eval.c:1117
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new_from_values(long n, const VALUE *elts)
Identical to rb_ary_new_from_args(), except how objects are passed.
VALUE rb_ary_resurrect(VALUE ary)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_hidden_new(long capa)
Allocates a hidden (no class) empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_freeze(VALUE obj)
Freeze an array, preventing further modifications.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
VALUE rb_ary_join(VALUE ary, VALUE sep)
Recursively stringises the elements of the passed array, flattens that result, then joins the sequenc...
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
Definition error.h:284
VALUE rb_file_open_str(VALUE fname, const char *fmode)
Identical to rb_file_open(), except it takes the pathname as a Ruby's string instead of C's.
Definition io.c:7270
VALUE rb_io_close(VALUE io)
Closes the IO.
Definition io.c:5759
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1109
VALUE rb_obj_is_method(VALUE recv)
Queries if the given object is a method.
Definition proc.c:1676
VALUE rb_obj_is_proc(VALUE recv)
Queries if the given object is a proc.
Definition proc.c:120
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition string.c:3787
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1497
#define rb_exc_new_cstr(exc, str)
Identical to rb_exc_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1669
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1986
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3555
VALUE rb_str_resurrect(VALUE str)
Like rb_str_dup(), but always create an instance of rb_cString regardless of the given object's class...
Definition string.c:2004
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3377
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition string.c:7230
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
Definition string.c:4204
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
Definition string.c:4024
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1655
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2734
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1513
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:937
VALUE rb_class_name(VALUE obj)
Queries the name of the given object's class.
Definition variable.c:500
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
Definition vm_method.c:3361
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
Definition vm_method.c:1624
VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcallv(), except it returns RUBY_Qundef instead of raising rb_eNoMethodError.
Definition vm_eval.c:686
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1133
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
Definition symbol.c:993
VALUE rb_io_path(VALUE io)
Returns the path for the given IO.
Definition io.c:2973
int len
Length of the buffer.
Definition io.h:8
#define RB_OBJ_SHAREABLE_P(obj)
Queries if the passed object has previously classified as shareable or not.
Definition ractor.h:235
VALUE rb_ractor_make_shareable(VALUE obj)
Destructively transforms the passed object so that multiple Ractors can share it.
Definition ractor.c:1485
#define RB_NUM2INT
Just another name of rb_num2int_inline.
Definition int.h:38
#define RB_INT2NUM
Just another name of rb_int2num_inline.
Definition int.h:37
VALUE rb_yield(VALUE val)
Yields the block.
Definition vm_eval.c:1372
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
#define RB_ZALLOC(type)
Shorthand of RB_ZALLOC_N with n=1.
Definition memory.h:249
VALUE type(ANYARGS)
ANYARGS-ed function type.
void rb_hash_foreach(VALUE q, int_type *w, VALUE e)
Iteration over the given hash.
PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line)
Set the line option on the given options struct.
Definition options.c:40
PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal)
Set the frozen string literal option on the given options struct.
Definition options.c:48
PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count)
Allocate and zero out the scopes array on the given options struct.
Definition options.c:162
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
Definition rarray.h:281
#define RARRAY_AREF(a, i)
Definition rarray.h:403
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
Definition rbasic.h:166
#define RHASH_SIZE(h)
Queries the size of the hash.
Definition rhash.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition rstring.h:442
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
Definition rstring.h:89
#define RUBY_TYPED_DEFAULT_FREE
This is a value you can set to rb_data_type_struct::dfree.
Definition rtypeddata.h:80
#define TypedData_Get_Struct(obj, type, data_type, sval)
Obtains a C struct from inside of a wrapper Ruby object.
Definition rtypeddata.h:520
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
Definition rtypeddata.h:455
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
Definition rtypeddata.h:502
#define FilePathValue(v)
Ensures that the parameter object is a path.
Definition ruby.h:90
#define RTEST
This is an old name of RB_TEST.
Definition iseq.h:286
const ID * segments
A null-terminated list of ids, used to represent a constant's path idNULL is used to represent the ::...
Definition vm_core.h:285
Definition vm_core.h:293
Definition vm_core.h:288
Definition iseq.h:257
A line and column in a string.
uint32_t column
The column number.
int32_t line
The line number.
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
size_t size
The number of offsets in the list.
uint32_t node_id
The unique identifier for this node, which is deterministic based on the source.
Definition ast.h:1085
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
int32_t line
The line within the file that the parse starts on.
Definition options.h:124
pm_scope_node_t node
The resulting scope node that will hold the generated AST.
pm_options_t options
The options that will be passed to the parser.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
VALUE * script_lines
This is a pointer to the list of script lines for the ISEQs that will be associated with this scope n...
Definition method.h:63
This is the struct that holds necessary info for a struct.
Definition rtypeddata.h:202
struct rb_iseq_constant_body::@156 param
parameter information
Definition st.h:79
Definition vm_core.h:297
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
Definition value_type.h:433
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376
ruby_value_type
C-level type of an object.
Definition value_type.h:113