Ruby 3.5.0dev (2025-04-11 revision 0e792793277f3bb67ec58369436a57c0cb757a4d)
addr2line.c (0e792793277f3bb67ec58369436a57c0cb757a4d)
1/**********************************************************************
2
3 addr2line.c -
4
5 $Author$
6
7 Copyright (C) 2010 Shinichiro Hamaji
8
9**********************************************************************/
10
11#if defined(__clang__) && defined(__has_warning)
12#if __has_warning("-Wgnu-empty-initializer")
13#pragma clang diagnostic ignored "-Wgnu-empty-initializer"
14#endif
15#if __has_warning("-Wgcc-compat")
16#pragma clang diagnostic ignored "-Wgcc-compat"
17#endif
18#endif
19
20#include "ruby/internal/config.h"
21#include "ruby/defines.h"
22#include "ruby/missing.h"
23#include "addr2line.h"
24
25#include <stdio.h>
26#include <errno.h>
27
28#ifdef HAVE_LIBPROC_H
29#include <libproc.h>
30#endif
31
33
34#if defined(USE_ELF) || defined(HAVE_MACH_O_LOADER_H)
35
36#include <fcntl.h>
37#include <limits.h>
38#include <stdio.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sys/mman.h>
43#include <sys/types.h>
44#include <sys/stat.h>
45#include <unistd.h>
46
47/* Make alloca work the best possible way. */
48#ifdef __GNUC__
49# ifndef alloca
50# define alloca __builtin_alloca
51# endif
52#else
53# ifdef HAVE_ALLOCA_H
54# include <alloca.h>
55# else
56# ifdef _AIX
57#pragma alloca
58# else
59# ifndef alloca /* predefined by HP cc +Olibcalls */
60void *alloca();
61# endif
62# endif /* AIX */
63# endif /* HAVE_ALLOCA_H */
64# ifndef UNREACHABLE
65# define UNREACHABLE __builtin_unreachable()
66# endif
67# ifndef UNREACHABLE_RETURN
68# define UNREACHABLE_RETURN(_) __builtin_unreachable()
69# endif
70#endif /* __GNUC__ */
71
72#ifndef UNREACHABLE
73# define UNREACHABLE abort()
74#endif
75#ifndef UNREACHABLE_RETURN
76# define UNREACHABLE_RETURN(_) return (abort(), (_))
77#endif
78
79#ifdef HAVE_DLADDR
80# include <dlfcn.h>
81#endif
82
83#ifdef HAVE_MACH_O_LOADER_H
84# include <crt_externs.h>
85# include <mach-o/fat.h>
86# include <mach-o/loader.h>
87# include <mach-o/nlist.h>
88# include <mach-o/stab.h>
89#endif
90
91#ifdef USE_ELF
92# ifdef __OpenBSD__
93# include <elf_abi.h>
94# else
95# include <elf.h>
96# endif
97
98#ifndef ElfW
99# if SIZEOF_VOIDP == 8
100# define ElfW(x) Elf64##_##x
101# else
102# define ElfW(x) Elf32##_##x
103# endif
104#endif
105#ifndef ELF_ST_TYPE
106# if SIZEOF_VOIDP == 8
107# define ELF_ST_TYPE ELF64_ST_TYPE
108# else
109# define ELF_ST_TYPE ELF32_ST_TYPE
110# endif
111#endif
112#endif
113
114#ifdef SHF_COMPRESSED
115# if defined(ELFCOMPRESS_ZLIB) && defined(HAVE_LIBZ)
116 /* FreeBSD 11.0 lacks ELFCOMPRESS_ZLIB */
117# include <zlib.h>
118# define SUPPORT_COMPRESSED_DEBUG_LINE
119# endif
120#else /* compatibility with glibc < 2.22 */
121# define SHF_COMPRESSED 0
122#endif
123
124#ifndef PATH_MAX
125#define PATH_MAX 4096
126#endif
127
128#define DW_LNS_copy 0x01
129#define DW_LNS_advance_pc 0x02
130#define DW_LNS_advance_line 0x03
131#define DW_LNS_set_file 0x04
132#define DW_LNS_set_column 0x05
133#define DW_LNS_negate_stmt 0x06
134#define DW_LNS_set_basic_block 0x07
135#define DW_LNS_const_add_pc 0x08
136#define DW_LNS_fixed_advance_pc 0x09
137#define DW_LNS_set_prologue_end 0x0a /* DWARF3 */
138#define DW_LNS_set_epilogue_begin 0x0b /* DWARF3 */
139#define DW_LNS_set_isa 0x0c /* DWARF3 */
140
141/* Line number extended opcode name. */
142#define DW_LNE_end_sequence 0x01
143#define DW_LNE_set_address 0x02
144#define DW_LNE_define_file 0x03
145#define DW_LNE_set_discriminator 0x04 /* DWARF4 */
146
147#define kprintf(...) fprintf(errout, "" __VA_ARGS__)
148
149typedef struct line_info {
150 const char *dirname;
151 const char *filename;
152 const char *path; /* object path */
153 int line;
154
155 uintptr_t base_addr;
156 uintptr_t saddr;
157 const char *sname; /* function name */
158
159 struct line_info *next;
160} line_info_t;
161
162struct dwarf_section {
163 char *ptr;
164 size_t size;
165 uint64_t flags;
166};
167
168typedef struct obj_info {
169 const char *path; /* object path */
170 char *mapped;
171 size_t mapped_size;
172 void *uncompressed;
173 uintptr_t base_addr;
174 uintptr_t vmaddr;
175 struct dwarf_section debug_abbrev;
176 struct dwarf_section debug_info;
177 struct dwarf_section debug_line;
178 struct dwarf_section debug_ranges;
179 struct dwarf_section debug_str_offsets;
180 struct dwarf_section debug_addr;
181 struct dwarf_section debug_rnglists;
182 struct dwarf_section debug_str;
183 struct dwarf_section debug_line_str;
184 struct obj_info *next;
185} obj_info_t;
186
187#define DWARF_SECTION_COUNT 9
188
189static struct dwarf_section *
190obj_dwarf_section_at(obj_info_t *obj, int n)
191{
192 struct dwarf_section *ary[] = {
193 &obj->debug_abbrev,
194 &obj->debug_info,
195 &obj->debug_line,
196 &obj->debug_ranges,
197 &obj->debug_str_offsets,
198 &obj->debug_addr,
199 &obj->debug_rnglists,
200 &obj->debug_str,
201 &obj->debug_line_str
202 };
203 if (n < 0 || DWARF_SECTION_COUNT <= n) {
205 }
206 return ary[n];
207}
208
209struct debug_section_definition {
210 const char *name;
211 struct dwarf_section *dwarf;
212};
213
214/* Avoid consuming stack as this module may be used from signal handler */
215static char binary_filename[PATH_MAX + 1];
216
217static unsigned long
218uleb128(const char **p)
219{
220 unsigned long r = 0;
221 int s = 0;
222 for (;;) {
223 unsigned char b = (unsigned char)*(*p)++;
224 if (b < 0x80) {
225 r += (unsigned long)b << s;
226 break;
227 }
228 r += (b & 0x7f) << s;
229 s += 7;
230 }
231 return r;
232}
233
234static long
235sleb128(const char **p)
236{
237 long r = 0;
238 int s = 0;
239 for (;;) {
240 unsigned char b = (unsigned char)*(*p)++;
241 if (b < 0x80) {
242 if (b & 0x40) {
243 r -= (0x80 - b) << s;
244 }
245 else {
246 r += (b & 0x3f) << s;
247 }
248 break;
249 }
250 r += (b & 0x7f) << s;
251 s += 7;
252 }
253 return r;
254}
255
256static const char *
257get_nth_dirname(unsigned long dir, const char *p, FILE *errout)
258{
259 if (!dir--) {
260 return "";
261 }
262 while (dir--) {
263 while (*p) p++;
264 p++;
265 if (!*p) {
266 kprintf("Unexpected directory number %lu in %s\n",
267 dir, binary_filename);
268 return "";
269 }
270 }
271 return p;
272}
273
274static const char *parse_ver5_debug_line_header(
275 const char *p, int idx, uint8_t format,
276 obj_info_t *obj, const char **out_path,
277 uint64_t *out_directory_index, FILE *errout);
278
279static void
280fill_filename(int file, uint8_t format, uint16_t version, const char *include_directories,
281 const char *filenames, line_info_t *line, obj_info_t *obj, FILE *errout)
282{
283 int i;
284 const char *p = filenames;
285 const char *filename;
286 unsigned long dir;
287 if (version >= 5) {
288 const char *path;
289 uint64_t directory_index = -1;
290 parse_ver5_debug_line_header(filenames, file, format, obj, &path, &directory_index, errout);
291 line->filename = path;
292 parse_ver5_debug_line_header(include_directories, (int)directory_index, format, obj, &path, NULL, errout);
293 line->dirname = path;
294 }
295 else {
296 for (i = 1; i <= file; i++) {
297 filename = p;
298 if (!*p) {
299#ifndef __APPLE__
300 /* Need to output binary file name? */
301 kprintf("Unexpected file number %d in %s at %tx\n",
302 file, binary_filename, filenames - obj->mapped);
303#endif
304 return;
305 }
306 while (*p) p++;
307 p++;
308 dir = uleb128(&p);
309 /* last modified. */
310 uleb128(&p);
311 /* size of the file. */
312 uleb128(&p);
313
314 if (i == file) {
315 line->filename = filename;
316 line->dirname = get_nth_dirname(dir, include_directories, errout);
317 }
318 }
319 }
320}
321
322static void
323fill_line(int num_traces, void **traces, uintptr_t addr, int file, int line,
324 uint8_t format, uint16_t version, const char *include_directories, const char *filenames,
325 obj_info_t *obj, line_info_t *lines, int offset, FILE *errout)
326{
327 int i;
328 addr += obj->base_addr - obj->vmaddr;
329 for (i = offset; i < num_traces; i++) {
330 uintptr_t a = (uintptr_t)traces[i];
331 /* We assume one line code doesn't result >100 bytes of native code.
332 We may want more reliable way eventually... */
333 if (addr < a && a < addr + 100) {
334 fill_filename(file, format, version, include_directories, filenames, &lines[i], obj, errout);
335 lines[i].line = line;
336 }
337 }
338}
339
340struct LineNumberProgramHeader {
341 uint64_t unit_length;
342 uint16_t version;
343 uint8_t format; /* 4 or 8 */
344 uint64_t header_length;
345 uint8_t minimum_instruction_length;
346 uint8_t maximum_operations_per_instruction;
347 uint8_t default_is_stmt;
348 int8_t line_base;
349 uint8_t line_range;
350 uint8_t opcode_base;
351 /* uint8_t standard_opcode_lengths[opcode_base-1]; */
352 const char *include_directories;
353 const char *filenames;
354 const char *cu_start;
355 const char *cu_end;
356};
357
358static int
359parse_debug_line_header(obj_info_t *obj, const char **pp, struct LineNumberProgramHeader *header, FILE *errout)
360{
361 const char *p = *pp;
362 header->unit_length = *(uint32_t *)p;
363 p += sizeof(uint32_t);
364
365 header->format = 4;
366 if (header->unit_length == 0xffffffff) {
367 header->unit_length = *(uint64_t *)p;
368 p += sizeof(uint64_t);
369 header->format = 8;
370 }
371
372 header->cu_end = p + header->unit_length;
373
374 header->version = *(uint16_t *)p;
375 p += sizeof(uint16_t);
376 if (header->version > 5) return -1;
377
378 if (header->version >= 5) {
379 /* address_size = *(uint8_t *)p++; */
380 /* segment_selector_size = *(uint8_t *)p++; */
381 p += 2;
382 }
383
384 header->header_length = header->format == 4 ? *(uint32_t *)p : *(uint64_t *)p;
385 p += header->format;
386 header->cu_start = p + header->header_length;
387
388 header->minimum_instruction_length = *(uint8_t *)p++;
389
390 if (header->version >= 4) {
391 /* maximum_operations_per_instruction = *(uint8_t *)p; */
392 if (*p != 1) return -1; /* For non-VLIW architectures, this field is 1 */
393 p++;
394 }
395
396 header->default_is_stmt = *(uint8_t *)p++;
397 header->line_base = *(int8_t *)p++;
398 header->line_range = *(uint8_t *)p++;
399 header->opcode_base = *(uint8_t *)p++;
400 /* header->standard_opcode_lengths = (uint8_t *)p - 1; */
401 p += header->opcode_base - 1;
402
403 if (header->version >= 5) {
404 header->include_directories = p;
405 p = parse_ver5_debug_line_header(p, -1, header->format, obj, NULL, NULL, errout);
406 header->filenames = p;
407 }
408 else {
409 header->include_directories = p;
410
411 /* temporary measure for compress-debug-sections */
412 if (p >= header->cu_end) return -1;
413
414 /* skip include directories */
415 while (*p) {
416 p = memchr(p, '\0', header->cu_end - p);
417 if (!p) return -1;
418 p++;
419 }
420 p++;
421
422 header->filenames = p;
423 }
424
425 *pp = header->cu_start;
426
427 return 0;
428}
429
430static int
431parse_debug_line_cu(int num_traces, void **traces, const char **debug_line,
432 obj_info_t *obj, line_info_t *lines, int offset, FILE *errout)
433{
434 const char *p = (const char *)*debug_line;
435 struct LineNumberProgramHeader header;
436
437 /* The registers. */
438 unsigned long addr = 0;
439 unsigned int file = 1;
440 unsigned int line = 1;
441 /* unsigned int column = 0; */
442 int is_stmt;
443 /* int basic_block = 0; */
444 /* int end_sequence = 0; */
445 /* int prologue_end = 0; */
446 /* int epilogue_begin = 0; */
447 /* unsigned int isa = 0; */
448
449 if (parse_debug_line_header(obj, &p, &header, errout))
450 return -1;
451 is_stmt = header.default_is_stmt;
452
453#define FILL_LINE() \
454 do { \
455 fill_line(num_traces, traces, addr, file, line, \
456 header.format, \
457 header.version, \
458 header.include_directories, \
459 header.filenames, \
460 obj, lines, offset, errout); \
461 /*basic_block = prologue_end = epilogue_begin = 0;*/ \
462 } while (0)
463
464 while (p < header.cu_end) {
465 unsigned long a;
466 unsigned char op = *p++;
467 switch (op) {
468 case DW_LNS_copy:
469 FILL_LINE();
470 break;
471 case DW_LNS_advance_pc:
472 a = uleb128(&p) * header.minimum_instruction_length;
473 addr += a;
474 break;
475 case DW_LNS_advance_line: {
476 long a = sleb128(&p);
477 line += a;
478 break;
479 }
480 case DW_LNS_set_file:
481 file = (unsigned int)uleb128(&p);
482 break;
483 case DW_LNS_set_column:
484 /*column = (unsigned int)*/(void)uleb128(&p);
485 break;
486 case DW_LNS_negate_stmt:
487 is_stmt = !is_stmt;
488 break;
489 case DW_LNS_set_basic_block:
490 /*basic_block = 1; */
491 break;
492 case DW_LNS_const_add_pc:
493 a = ((255UL - header.opcode_base) / header.line_range) *
494 header.minimum_instruction_length;
495 addr += a;
496 break;
497 case DW_LNS_fixed_advance_pc:
498 a = *(uint16_t *)p;
499 p += sizeof(uint16_t);
500 addr += a;
501 break;
502 case DW_LNS_set_prologue_end:
503 /* prologue_end = 1; */
504 break;
505 case DW_LNS_set_epilogue_begin:
506 /* epilogue_begin = 1; */
507 break;
508 case DW_LNS_set_isa:
509 /* isa = (unsigned int)*/(void)uleb128(&p);
510 break;
511 case 0:
512 a = uleb128(&p);
513 op = *p++;
514 switch (op) {
515 case DW_LNE_end_sequence:
516 /* end_sequence = 1; */
517 FILL_LINE();
518 addr = 0;
519 file = 1;
520 line = 1;
521 /* column = 0; */
522 is_stmt = header.default_is_stmt;
523 /* end_sequence = 0; */
524 /* isa = 0; */
525 break;
526 case DW_LNE_set_address:
527 addr = *(unsigned long *)p;
528 p += sizeof(unsigned long);
529 break;
530 case DW_LNE_define_file:
531 kprintf("Unsupported operation in %s\n",
532 binary_filename);
533 break;
534 case DW_LNE_set_discriminator:
535 /* TODO:currently ignore */
536 uleb128(&p);
537 break;
538 default:
539 kprintf("Unknown extended opcode: %d in %s\n",
540 op, binary_filename);
541 }
542 break;
543 default: {
544 uint8_t adjusted_opcode = op - header.opcode_base;
545 uint8_t operation_advance = adjusted_opcode / header.line_range;
546 /* NOTE: this code doesn't support VLIW */
547 addr += operation_advance * header.minimum_instruction_length;
548 line += header.line_base + (adjusted_opcode % header.line_range);
549 FILL_LINE();
550 }
551 }
552 }
553 *debug_line = (char *)p;
554 return 0;
555}
556
557static int
558parse_debug_line(int num_traces, void **traces,
559 const char *debug_line, unsigned long size,
560 obj_info_t *obj, line_info_t *lines, int offset, FILE *errout)
561{
562 const char *debug_line_end = debug_line + size;
563 while (debug_line < debug_line_end) {
564 if (parse_debug_line_cu(num_traces, traces, &debug_line, obj, lines, offset, errout))
565 return -1;
566 }
567 if (debug_line != debug_line_end) {
568 kprintf("Unexpected size of .debug_line in %s\n",
569 binary_filename);
570 }
571 return 0;
572}
573
574/* read file and fill lines */
575static uintptr_t
576fill_lines(int num_traces, void **traces, int check_debuglink,
577 obj_info_t **objp, line_info_t *lines, int offset, FILE *errout);
578
579static void
580append_obj(obj_info_t **objp)
581{
582 obj_info_t *newobj = calloc(1, sizeof(obj_info_t));
583 if (*objp) (*objp)->next = newobj;
584 *objp = newobj;
585}
586
587#ifdef USE_ELF
588/* Ideally we should check 4 paths to follow gnu_debuglink:
589 *
590 * - /usr/lib/debug/.build-id/ab/cdef1234.debug
591 * - /usr/bin/ruby.debug
592 * - /usr/bin/.debug/ruby.debug
593 * - /usr/lib/debug/usr/bin/ruby.debug.
594 *
595 * but we handle only two cases for now as the two formats are
596 * used by some linux distributions.
597 *
598 * See GDB's info for detail.
599 * https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
600 */
601
602// check the path pattern of "/usr/lib/debug/usr/bin/ruby.debug"
603static void
604follow_debuglink(const char *debuglink, int num_traces, void **traces,
605 obj_info_t **objp, line_info_t *lines, int offset, FILE *errout)
606{
607 static const char global_debug_dir[] = "/usr/lib/debug";
608 const size_t global_debug_dir_len = sizeof(global_debug_dir) - 1;
609 char *p;
610 obj_info_t *o1 = *objp, *o2;
611 size_t len;
612
613 p = strrchr(binary_filename, '/');
614 if (!p) {
615 return;
616 }
617 p[1] = '\0';
618
619 len = strlen(binary_filename);
620 if (len >= PATH_MAX - global_debug_dir_len)
621 len = PATH_MAX - global_debug_dir_len - 1;
622 memmove(binary_filename + global_debug_dir_len, binary_filename, len);
623 memcpy(binary_filename, global_debug_dir, global_debug_dir_len);
624 len += global_debug_dir_len;
625 strlcpy(binary_filename + len, debuglink, PATH_MAX - len);
626
627 append_obj(objp);
628 o2 = *objp;
629 o2->base_addr = o1->base_addr;
630 o2->path = o1->path;
631 fill_lines(num_traces, traces, 0, objp, lines, offset, errout);
632}
633
634// check the path pattern of "/usr/lib/debug/.build-id/ab/cdef1234.debug"
635static void
636follow_debuglink_build_id(const char *build_id, size_t build_id_size, int num_traces, void **traces,
637 obj_info_t **objp, line_info_t *lines, int offset, FILE *errout)
638{
639 static const char global_debug_dir[] = "/usr/lib/debug/.build-id/";
640 const size_t global_debug_dir_len = sizeof(global_debug_dir) - 1;
641 char *p;
642 obj_info_t *o1 = *objp, *o2;
643 size_t i;
644
645 if (PATH_MAX < global_debug_dir_len + 1 + build_id_size * 2 + 6) return;
646
647 memcpy(binary_filename, global_debug_dir, global_debug_dir_len);
648 p = binary_filename + global_debug_dir_len;
649 for (i = 0; i < build_id_size; i++) {
650 static const char tbl[] = "0123456789abcdef";
651 unsigned char n = build_id[i];
652 *p++ = tbl[n / 16];
653 *p++ = tbl[n % 16];
654 if (i == 0) *p++ = '/';
655 }
656 strcpy(p, ".debug");
657
658 append_obj(objp);
659 o2 = *objp;
660 o2->base_addr = o1->base_addr;
661 o2->path = o1->path;
662 fill_lines(num_traces, traces, 0, objp, lines, offset, errout);
663}
664#endif
665
666enum
667{
668 DW_TAG_compile_unit = 0x11,
669 DW_TAG_inlined_subroutine = 0x1d,
670 DW_TAG_subprogram = 0x2e,
671};
672
673/* Attributes encodings */
674enum
675{
676 DW_AT_sibling = 0x01,
677 DW_AT_location = 0x02,
678 DW_AT_name = 0x03,
679 /* Reserved 0x04 */
680 /* Reserved 0x05 */
681 /* Reserved 0x06 */
682 /* Reserved 0x07 */
683 /* Reserved 0x08 */
684 DW_AT_ordering = 0x09,
685 /* Reserved 0x0a */
686 DW_AT_byte_size = 0x0b,
687 /* Reserved 0x0c */
688 DW_AT_bit_size = 0x0d,
689 /* Reserved 0x0e */
690 /* Reserved 0x0f */
691 DW_AT_stmt_list = 0x10,
692 DW_AT_low_pc = 0x11,
693 DW_AT_high_pc = 0x12,
694 DW_AT_language = 0x13,
695 /* Reserved 0x14 */
696 DW_AT_discr = 0x15,
697 DW_AT_discr_value = 0x16,
698 DW_AT_visibility = 0x17,
699 DW_AT_import = 0x18,
700 DW_AT_string_length = 0x19,
701 DW_AT_common_reference = 0x1a,
702 DW_AT_comp_dir = 0x1b,
703 DW_AT_const_value = 0x1c,
704 DW_AT_containing_type = 0x1d,
705 DW_AT_default_value = 0x1e,
706 /* Reserved 0x1f */
707 DW_AT_inline = 0x20,
708 DW_AT_is_optional = 0x21,
709 DW_AT_lower_bound = 0x22,
710 /* Reserved 0x23 */
711 /* Reserved 0x24 */
712 DW_AT_producer = 0x25,
713 /* Reserved 0x26 */
714 DW_AT_prototyped = 0x27,
715 /* Reserved 0x28 */
716 /* Reserved 0x29 */
717 DW_AT_return_addr = 0x2a,
718 /* Reserved 0x2b */
719 DW_AT_start_scope = 0x2c,
720 /* Reserved 0x2d */
721 DW_AT_bit_stride = 0x2e,
722 DW_AT_upper_bound = 0x2f,
723 /* Reserved 0x30 */
724 DW_AT_abstract_origin = 0x31,
725 DW_AT_accessibility = 0x32,
726 DW_AT_address_class = 0x33,
727 DW_AT_artificial = 0x34,
728 DW_AT_base_types = 0x35,
729 DW_AT_calling_convention = 0x36,
730 DW_AT_count = 0x37,
731 DW_AT_data_member_location = 0x38,
732 DW_AT_decl_column = 0x39,
733 DW_AT_decl_file = 0x3a,
734 DW_AT_decl_line = 0x3b,
735 DW_AT_declaration = 0x3c,
736 DW_AT_discr_list = 0x3d,
737 DW_AT_encoding = 0x3e,
738 DW_AT_external = 0x3f,
739 DW_AT_frame_base = 0x40,
740 DW_AT_friend = 0x41,
741 DW_AT_identifier_case = 0x42,
742 /* Reserved 0x43 */
743 DW_AT_namelist_item = 0x44,
744 DW_AT_priority = 0x45,
745 DW_AT_segment = 0x46,
746 DW_AT_specification = 0x47,
747 DW_AT_static_link = 0x48,
748 DW_AT_type = 0x49,
749 DW_AT_use_location = 0x4a,
750 DW_AT_variable_parameter = 0x4b,
751 DW_AT_virtuality = 0x4c,
752 DW_AT_vtable_elem_location = 0x4d,
753 DW_AT_allocated = 0x4e,
754 DW_AT_associated = 0x4f,
755 DW_AT_data_location = 0x50,
756 DW_AT_byte_stride = 0x51,
757 DW_AT_entry_pc = 0x52,
758 DW_AT_use_UTF8 = 0x53,
759 DW_AT_extension = 0x54,
760 DW_AT_ranges = 0x55,
761 DW_AT_trampoline = 0x56,
762 DW_AT_call_column = 0x57,
763 DW_AT_call_file = 0x58,
764 DW_AT_call_line = 0x59,
765 DW_AT_description = 0x5a,
766 DW_AT_binary_scale = 0x5b,
767 DW_AT_decimal_scale = 0x5c,
768 DW_AT_small = 0x5d,
769 DW_AT_decimal_sign = 0x5e,
770 DW_AT_digit_count = 0x5f,
771 DW_AT_picture_string = 0x60,
772 DW_AT_mutable = 0x61,
773 DW_AT_threads_scaled = 0x62,
774 DW_AT_explicit = 0x63,
775 DW_AT_object_pointer = 0x64,
776 DW_AT_endianity = 0x65,
777 DW_AT_elemental = 0x66,
778 DW_AT_pure = 0x67,
779 DW_AT_recursive = 0x68,
780 DW_AT_signature = 0x69,
781 DW_AT_main_subprogram = 0x6a,
782 DW_AT_data_bit_offset = 0x6b,
783 DW_AT_const_expr = 0x6c,
784 DW_AT_enum_class = 0x6d,
785 DW_AT_linkage_name = 0x6e,
786 DW_AT_string_length_bit_size = 0x6f,
787 DW_AT_string_length_byte_size = 0x70,
788 DW_AT_rank = 0x71,
789 DW_AT_str_offsets_base = 0x72,
790 DW_AT_addr_base = 0x73,
791 DW_AT_rnglists_base = 0x74,
792 /* Reserved 0x75 */
793 DW_AT_dwo_name = 0x76,
794 DW_AT_reference = 0x77,
795 DW_AT_rvalue_reference = 0x78,
796 DW_AT_macros = 0x79,
797 DW_AT_call_all_calls = 0x7a,
798 DW_AT_call_all_source_calls = 0x7b,
799 DW_AT_call_all_tail_calls = 0x7c,
800 DW_AT_call_return_pc = 0x7d,
801 DW_AT_call_value = 0x7e,
802 DW_AT_call_origin = 0x7f,
803 DW_AT_call_parameter = 0x80,
804 DW_AT_call_pc = 0x81,
805 DW_AT_call_tail_call = 0x82,
806 DW_AT_call_target = 0x83,
807 DW_AT_call_target_clobbered = 0x84,
808 DW_AT_call_data_location = 0x85,
809 DW_AT_call_data_value = 0x86,
810 DW_AT_noreturn = 0x87,
811 DW_AT_alignment = 0x88,
812 DW_AT_export_symbols = 0x89,
813 DW_AT_deleted = 0x8a,
814 DW_AT_defaulted = 0x8b,
815 DW_AT_loclists_base = 0x8c,
816 DW_AT_lo_user = 0x2000,
817 DW_AT_hi_user = 0x3fff
818};
819
820/* Attribute form encodings */
821enum
822{
823 DW_FORM_addr = 0x01,
824 /* Reserved 0x02 */
825 DW_FORM_block2 = 0x03,
826 DW_FORM_block4 = 0x04,
827 DW_FORM_data2 = 0x05,
828 DW_FORM_data4 = 0x06,
829 DW_FORM_data8 = 0x07,
830 DW_FORM_string = 0x08,
831 DW_FORM_block = 0x09,
832 DW_FORM_block1 = 0x0a,
833 DW_FORM_data1 = 0x0b,
834 DW_FORM_flag = 0x0c,
835 DW_FORM_sdata = 0x0d,
836 DW_FORM_strp = 0x0e,
837 DW_FORM_udata = 0x0f,
838 DW_FORM_ref_addr = 0x10,
839 DW_FORM_ref1 = 0x11,
840 DW_FORM_ref2 = 0x12,
841 DW_FORM_ref4 = 0x13,
842 DW_FORM_ref8 = 0x14,
843 DW_FORM_ref_udata = 0x15,
844 DW_FORM_indirect = 0x16,
845 DW_FORM_sec_offset = 0x17,
846 DW_FORM_exprloc = 0x18,
847 DW_FORM_flag_present = 0x19,
848 DW_FORM_strx = 0x1a,
849 DW_FORM_addrx = 0x1b,
850 DW_FORM_ref_sup4 = 0x1c,
851 DW_FORM_strp_sup = 0x1d,
852 DW_FORM_data16 = 0x1e,
853 DW_FORM_line_strp = 0x1f,
854 DW_FORM_ref_sig8 = 0x20,
855 DW_FORM_implicit_const = 0x21,
856 DW_FORM_loclistx = 0x22,
857 DW_FORM_rnglistx = 0x23,
858 DW_FORM_ref_sup8 = 0x24,
859 DW_FORM_strx1 = 0x25,
860 DW_FORM_strx2 = 0x26,
861 DW_FORM_strx3 = 0x27,
862 DW_FORM_strx4 = 0x28,
863 DW_FORM_addrx1 = 0x29,
864 DW_FORM_addrx2 = 0x2a,
865 DW_FORM_addrx3 = 0x2b,
866 DW_FORM_addrx4 = 0x2c,
867
868 /* GNU extensions for referring to .gnu_debugaltlink dwz-compressed info */
869 DW_FORM_GNU_ref_alt = 0x1f20,
870 DW_FORM_GNU_strp_alt = 0x1f21
871};
872
873/* Range list entry encodings */
874enum {
875 DW_RLE_end_of_list = 0x00,
876 DW_RLE_base_addressx = 0x01,
877 DW_RLE_startx_endx = 0x02,
878 DW_RLE_startx_length = 0x03,
879 DW_RLE_offset_pair = 0x04,
880 DW_RLE_base_address = 0x05,
881 DW_RLE_start_end = 0x06,
882 DW_RLE_start_length = 0x07
883};
884
885enum {
886 VAL_none = 0,
887 VAL_cstr = 1,
888 VAL_data = 2,
889 VAL_uint = 3,
890 VAL_int = 4,
891 VAL_addr = 5
892};
893
894# define ABBREV_TABLE_SIZE 256
895typedef struct {
896 obj_info_t *obj;
897 const char *file;
898 uint8_t current_version;
899 const char *current_cu;
900 uint64_t current_low_pc;
901 uint64_t current_str_offsets_base;
902 uint64_t current_addr_base;
903 uint64_t current_rnglists_base;
904 const char *debug_line_cu_end;
905 uint8_t debug_line_format;
906 uint16_t debug_line_version;
907 const char *debug_line_files;
908 const char *debug_line_directories;
909 const char *p;
910 const char *cu_end;
911 const char *pend;
912 const char *q0;
913 const char *q;
914 int format; // 4 or 8
915 uint8_t address_size;
916 int level;
917 const char *abbrev_table[ABBREV_TABLE_SIZE];
918} DebugInfoReader;
919
920typedef struct {
921 ptrdiff_t pos;
922 int tag;
923 int has_children;
924} DIE;
925
926typedef struct {
927 union {
928 const char *ptr;
929 uint64_t uint64;
930 int64_t int64;
931 uint64_t addr_idx;
932 } as;
933 uint64_t off;
934 uint64_t at;
935 uint64_t form;
936 size_t size;
937 int type;
938} DebugInfoValue;
939
940#if defined(WORDS_BIGENDIAN)
941#define MERGE_2INTS(a,b,sz) (((uint64_t)(a)<<sz)|(b))
942#else
943#define MERGE_2INTS(a,b,sz) (((uint64_t)(b)<<sz)|(a))
944#endif
945
946static uint16_t
947get_uint16(const uint8_t *p)
948{
949 return (uint16_t)MERGE_2INTS(p[0],p[1],8);
950}
951
952static uint32_t
953get_uint32(const uint8_t *p)
954{
955 return (uint32_t)MERGE_2INTS(get_uint16(p),get_uint16(p+2),16);
956}
957
958static uint64_t
959get_uint64(const uint8_t *p)
960{
961 return MERGE_2INTS(get_uint32(p),get_uint32(p+4),32);
962}
963
964static uint8_t
965read_uint8(const char **ptr)
966{
967 const char *p = *ptr;
968 *ptr = (p + 1);
969 return (uint8_t)*p;
970}
971
972static uint16_t
973read_uint16(const char **ptr)
974{
975 const char *p = *ptr;
976 *ptr = (p + 2);
977 return get_uint16((const uint8_t *)p);
978}
979
980static uint32_t
981read_uint24(const char **ptr)
982{
983 const char *p = *ptr;
984 *ptr = (p + 3);
985 return ((uint8_t)*p << 16) | get_uint16((const uint8_t *)p+1);
986}
987
988static uint32_t
989read_uint32(const char **ptr)
990{
991 const char *p = *ptr;
992 *ptr = (p + 4);
993 return get_uint32((const uint8_t *)p);
994}
995
996static uint64_t
997read_uint64(const char **ptr)
998{
999 const unsigned char *p = (const unsigned char *)*ptr;
1000 *ptr = (char *)(p + 8);
1001 return get_uint64(p);
1002}
1003
1004static uintptr_t
1005read_uintptr(const char **ptr)
1006{
1007 const unsigned char *p = (const unsigned char *)*ptr;
1008 *ptr = (char *)(p + SIZEOF_VOIDP);
1009#if SIZEOF_VOIDP == 8
1010 return get_uint64(p);
1011#else
1012 return get_uint32(p);
1013#endif
1014}
1015
1016static uint64_t
1017read_uint(DebugInfoReader *reader)
1018{
1019 if (reader->format == 4) {
1020 return read_uint32(&reader->p);
1021 } else { /* 64 bit */
1022 return read_uint64(&reader->p);
1023 }
1024}
1025
1026static uint64_t
1027read_uleb128(DebugInfoReader *reader)
1028{
1029 return uleb128(&reader->p);
1030}
1031
1032static int64_t
1033read_sleb128(DebugInfoReader *reader)
1034{
1035 return sleb128(&reader->p);
1036}
1037
1038static void
1039debug_info_reader_init(DebugInfoReader *reader, obj_info_t *obj)
1040{
1041 reader->file = obj->mapped;
1042 reader->obj = obj;
1043 reader->p = obj->debug_info.ptr;
1044 reader->pend = obj->debug_info.ptr + obj->debug_info.size;
1045 reader->debug_line_cu_end = obj->debug_line.ptr;
1046 reader->current_low_pc = 0;
1047 reader->current_str_offsets_base = 0;
1048 reader->current_addr_base = 0;
1049 reader->current_rnglists_base = 0;
1050}
1051
1052static void
1053di_skip_die_attributes(const char **p)
1054{
1055 for (;;) {
1056 uint64_t at = uleb128(p);
1057 uint64_t form = uleb128(p);
1058 if (!at && !form) break;
1059 switch (form) {
1060 default:
1061 break;
1062 case DW_FORM_implicit_const:
1063 sleb128(p);
1064 break;
1065 }
1066 }
1067}
1068
1069static void
1070di_read_debug_abbrev_cu(DebugInfoReader *reader)
1071{
1072 uint64_t prev = 0;
1073 const char *p = reader->q0;
1074 for (;;) {
1075 uint64_t abbrev_number = uleb128(&p);
1076 if (abbrev_number <= prev) break;
1077 if (abbrev_number < ABBREV_TABLE_SIZE) {
1078 reader->abbrev_table[abbrev_number] = p;
1079 }
1080 prev = abbrev_number;
1081 uleb128(&p); /* tag */
1082 p++; /* has_children */
1083 di_skip_die_attributes(&p);
1084 }
1085}
1086
1087static int
1088di_read_debug_line_cu(DebugInfoReader *reader, FILE *errout)
1089{
1090 const char *p;
1091 struct LineNumberProgramHeader header;
1092
1093 p = (const char *)reader->debug_line_cu_end;
1094 if (parse_debug_line_header(reader->obj, &p, &header, errout))
1095 return -1;
1096
1097 reader->debug_line_cu_end = (char *)header.cu_end;
1098 reader->debug_line_format = header.format;
1099 reader->debug_line_version = header.version;
1100 reader->debug_line_directories = (char *)header.include_directories;
1101 reader->debug_line_files = (char *)header.filenames;
1102
1103 return 0;
1104}
1105
1106static void
1107set_addr_idx_value(DebugInfoValue *v, uint64_t n)
1108{
1109 v->as.addr_idx = n;
1110 v->type = VAL_addr;
1111}
1112
1113static void
1114set_uint_value(DebugInfoValue *v, uint64_t n)
1115{
1116 v->as.uint64 = n;
1117 v->type = VAL_uint;
1118}
1119
1120static void
1121set_int_value(DebugInfoValue *v, int64_t n)
1122{
1123 v->as.int64 = n;
1124 v->type = VAL_int;
1125}
1126
1127static void
1128set_cstr_value(DebugInfoValue *v, const char *s)
1129{
1130 v->as.ptr = s;
1131 v->off = 0;
1132 v->type = VAL_cstr;
1133}
1134
1135static void
1136set_cstrp_value(DebugInfoValue *v, const char *s, uint64_t off)
1137{
1138 v->as.ptr = s;
1139 v->off = off;
1140 v->type = VAL_cstr;
1141}
1142
1143static void
1144set_data_value(DebugInfoValue *v, const char *s)
1145{
1146 v->as.ptr = s;
1147 v->type = VAL_data;
1148}
1149
1150static const char *
1151get_cstr_value(DebugInfoValue *v)
1152{
1153 if (v->as.ptr) {
1154 return v->as.ptr + v->off;
1155 } else {
1156 return NULL;
1157 }
1158}
1159
1160static const char *
1161resolve_strx(DebugInfoReader *reader, uint64_t idx)
1162{
1163 const char *p = reader->obj->debug_str_offsets.ptr + reader->current_str_offsets_base;
1164 uint64_t off;
1165 if (reader->format == 4) {
1166 off = ((uint32_t *)p)[idx];
1167 }
1168 else {
1169 off = ((uint64_t *)p)[idx];
1170 }
1171 return reader->obj->debug_str.ptr + off;
1172}
1173
1174static bool
1175debug_info_reader_read_addr_value_member(DebugInfoReader *reader, DebugInfoValue *v, int size)
1176{
1177 if (size == 4) {
1178 set_uint_value(v, read_uint32(&reader->p));
1179 } else if (size == 8) {
1180 set_uint_value(v, read_uint64(&reader->p));
1181 } else {
1182 return false;
1183 }
1184 return true;
1185}
1186
1187#define debug_info_reader_read_addr_value(reader, v, mem) \
1188 if (!debug_info_reader_read_addr_value_member((reader), (v), (reader)->mem)) { \
1189 kprintf("unknown " #mem ":%d", (reader)->mem); \
1190 return false; \
1191 }
1192
1193
1194static bool
1195debug_info_reader_read_value(DebugInfoReader *reader, uint64_t form, DebugInfoValue *v, FILE *errout)
1196{
1197 switch (form) {
1198 case DW_FORM_addr:
1199 debug_info_reader_read_addr_value(reader, v, address_size);
1200 break;
1201 case DW_FORM_block2:
1202 v->size = read_uint16(&reader->p);
1203 set_data_value(v, reader->p);
1204 reader->p += v->size;
1205 break;
1206 case DW_FORM_block4:
1207 v->size = read_uint32(&reader->p);
1208 set_data_value(v, reader->p);
1209 reader->p += v->size;
1210 break;
1211 case DW_FORM_data2:
1212 set_uint_value(v, read_uint16(&reader->p));
1213 break;
1214 case DW_FORM_data4:
1215 set_uint_value(v, read_uint32(&reader->p));
1216 break;
1217 case DW_FORM_data8:
1218 set_uint_value(v, read_uint64(&reader->p));
1219 break;
1220 case DW_FORM_string:
1221 v->size = strlen(reader->p);
1222 set_cstr_value(v, reader->p);
1223 reader->p += v->size + 1;
1224 break;
1225 case DW_FORM_block:
1226 v->size = uleb128(&reader->p);
1227 set_data_value(v, reader->p);
1228 reader->p += v->size;
1229 break;
1230 case DW_FORM_block1:
1231 v->size = read_uint8(&reader->p);
1232 set_data_value(v, reader->p);
1233 reader->p += v->size;
1234 break;
1235 case DW_FORM_data1:
1236 set_uint_value(v, read_uint8(&reader->p));
1237 break;
1238 case DW_FORM_flag:
1239 set_uint_value(v, read_uint8(&reader->p));
1240 break;
1241 case DW_FORM_sdata:
1242 set_int_value(v, read_sleb128(reader));
1243 break;
1244 case DW_FORM_strp:
1245 set_cstrp_value(v, reader->obj->debug_str.ptr, read_uint(reader));
1246 break;
1247 case DW_FORM_udata:
1248 set_uint_value(v, read_uleb128(reader));
1249 break;
1250 case DW_FORM_ref_addr:
1251 if (reader->current_version <= 2) {
1252 // DWARF Version 2 specifies that references have
1253 // the same size as an address on the target system
1254 debug_info_reader_read_addr_value(reader, v, address_size);
1255 } else {
1256 debug_info_reader_read_addr_value(reader, v, format);
1257 }
1258 break;
1259 case DW_FORM_ref1:
1260 set_uint_value(v, read_uint8(&reader->p));
1261 break;
1262 case DW_FORM_ref2:
1263 set_uint_value(v, read_uint16(&reader->p));
1264 break;
1265 case DW_FORM_ref4:
1266 set_uint_value(v, read_uint32(&reader->p));
1267 break;
1268 case DW_FORM_ref8:
1269 set_uint_value(v, read_uint64(&reader->p));
1270 break;
1271 case DW_FORM_ref_udata:
1272 set_uint_value(v, uleb128(&reader->p));
1273 break;
1274 case DW_FORM_indirect:
1275 /* TODO: read the referred value */
1276 set_uint_value(v, uleb128(&reader->p));
1277 break;
1278 case DW_FORM_sec_offset:
1279 set_uint_value(v, read_uint(reader)); /* offset */
1280 /* addrptr: debug_addr */
1281 /* lineptr: debug_line */
1282 /* loclist: debug_loclists */
1283 /* loclistptr: debug_loclists */
1284 /* macptr: debug_macro */
1285 /* rnglist: debug_rnglists */
1286 /* rnglistptr: debug_rnglists */
1287 /* stroffsetsptr: debug_str_offsets */
1288 break;
1289 case DW_FORM_exprloc:
1290 v->size = (size_t)read_uleb128(reader);
1291 set_data_value(v, reader->p);
1292 reader->p += v->size;
1293 break;
1294 case DW_FORM_flag_present:
1295 set_uint_value(v, 1);
1296 break;
1297 case DW_FORM_strx:
1298 set_cstr_value(v, resolve_strx(reader, uleb128(&reader->p)));
1299 break;
1300 case DW_FORM_addrx:
1301 set_addr_idx_value(v, uleb128(&reader->p));
1302 break;
1303 case DW_FORM_ref_sup4:
1304 set_uint_value(v, read_uint32(&reader->p));
1305 break;
1306 case DW_FORM_strp_sup:
1307 set_uint_value(v, read_uint(reader));
1308 /* *p = reader->sup_file + reader->sup_str->sh_offset + ret; */
1309 break;
1310 case DW_FORM_data16:
1311 v->size = 16;
1312 set_data_value(v, reader->p);
1313 reader->p += v->size;
1314 break;
1315 case DW_FORM_line_strp:
1316 set_cstrp_value(v, reader->obj->debug_line_str.ptr, read_uint(reader));
1317 break;
1318 case DW_FORM_ref_sig8:
1319 set_uint_value(v, read_uint64(&reader->p));
1320 break;
1321 case DW_FORM_implicit_const:
1322 set_int_value(v, sleb128(&reader->q));
1323 break;
1324 case DW_FORM_loclistx:
1325 set_uint_value(v, read_uleb128(reader));
1326 break;
1327 case DW_FORM_rnglistx:
1328 set_uint_value(v, read_uleb128(reader));
1329 break;
1330 case DW_FORM_ref_sup8:
1331 set_uint_value(v, read_uint64(&reader->p));
1332 break;
1333 case DW_FORM_strx1:
1334 set_cstr_value(v, resolve_strx(reader, read_uint8(&reader->p)));
1335 break;
1336 case DW_FORM_strx2:
1337 set_cstr_value(v, resolve_strx(reader, read_uint16(&reader->p)));
1338 break;
1339 case DW_FORM_strx3:
1340 set_cstr_value(v, resolve_strx(reader, read_uint24(&reader->p)));
1341 break;
1342 case DW_FORM_strx4:
1343 set_cstr_value(v, resolve_strx(reader, read_uint32(&reader->p)));
1344 break;
1345 case DW_FORM_addrx1:
1346 set_addr_idx_value(v, read_uint8(&reader->p));
1347 break;
1348 case DW_FORM_addrx2:
1349 set_addr_idx_value(v, read_uint16(&reader->p));
1350 break;
1351 case DW_FORM_addrx3:
1352 set_addr_idx_value(v, read_uint24(&reader->p));
1353 break;
1354 case DW_FORM_addrx4:
1355 set_addr_idx_value(v, read_uint32(&reader->p));
1356 break;
1357 /* we have no support for actually reading the real values of these refs out
1358 * of the .gnu_debugaltlink dwz-compressed debuginfo at the moment, but "read"
1359 * them anyway so that we advance the reader by the right amount. */
1360 case DW_FORM_GNU_ref_alt:
1361 case DW_FORM_GNU_strp_alt:
1362 read_uint(reader);
1363 set_uint_value(v, 0);
1364 break;
1365 case 0:
1366 goto fail;
1367 break;
1368 }
1369 return true;
1370
1371 fail:
1372 kprintf("%d: unsupported form: %#"PRIx64"\n", __LINE__, form);
1373 return false;
1374}
1375
1376/* find abbrev in current compilation unit */
1377static const char *
1378di_find_abbrev(DebugInfoReader *reader, uint64_t abbrev_number, FILE *errout)
1379{
1380 const char *p;
1381 if (abbrev_number < ABBREV_TABLE_SIZE) {
1382 return reader->abbrev_table[abbrev_number];
1383 }
1384 p = reader->abbrev_table[ABBREV_TABLE_SIZE-1];
1385 /* skip 255th record */
1386 uleb128(&p); /* tag */
1387 p++; /* has_children */
1388 di_skip_die_attributes(&p);
1389 for (uint64_t n = uleb128(&p); abbrev_number != n; n = uleb128(&p)) {
1390 if (n == 0) {
1391 kprintf("%d: Abbrev Number %"PRId64" not found\n",__LINE__, abbrev_number);
1392 return NULL;
1393 }
1394 uleb128(&p); /* tag */
1395 p++; /* has_children */
1396 di_skip_die_attributes(&p);
1397 }
1398 return p;
1399}
1400
1401#if 0
1402static void
1403hexdump0(const unsigned char *p, size_t n, FILE *errout)
1404{
1405 size_t i;
1406 kprintf(" 0 1 2 3 4 5 6 7 8 9 A B C D E F\n");
1407 for (i=0; i < n; i++){
1408 switch (i & 15) {
1409 case 0:
1410 kprintf("%02" PRIdSIZE ": %02X ", i/16, p[i]);
1411 break;
1412 case 15:
1413 kprintf("%02X\n", p[i]);
1414 break;
1415 default:
1416 kprintf("%02X ", p[i]);
1417 break;
1418 }
1419 }
1420 if ((i & 15) != 15) {
1421 kprintf("\n");
1422 }
1423}
1424#define hexdump(p,n,e) hexdump0((const unsigned char *)p, n, e)
1425
1426static void
1427div_inspect(DebugInfoValue *v, FILE *errout)
1428{
1429 switch (v->type) {
1430 case VAL_uint:
1431 kprintf("%d: type:%d size:%" PRIxSIZE " v:%"PRIx64"\n",__LINE__,v->type,v->size,v->as.uint64);
1432 break;
1433 case VAL_int:
1434 kprintf("%d: type:%d size:%" PRIxSIZE " v:%"PRId64"\n",__LINE__,v->type,v->size,(int64_t)v->as.uint64);
1435 break;
1436 case VAL_cstr:
1437 kprintf("%d: type:%d size:%" PRIxSIZE " v:'%s'\n",__LINE__,v->type,v->size,v->as.ptr);
1438 break;
1439 case VAL_data:
1440 kprintf("%d: type:%d size:%" PRIxSIZE " v:\n",__LINE__,v->type,v->size);
1441 hexdump(v->as.ptr, 16, errout);
1442 break;
1443 }
1444}
1445#endif
1446
1447static DIE *
1448di_read_die(DebugInfoReader *reader, DIE *die, FILE *errout)
1449{
1450 uint64_t abbrev_number = uleb128(&reader->p);
1451 if (abbrev_number == 0) {
1452 reader->level--;
1453 return NULL;
1454 }
1455
1456 if (!(reader->q = di_find_abbrev(reader, abbrev_number, errout))) return NULL;
1457
1458 die->pos = reader->p - reader->obj->debug_info.ptr - 1;
1459 die->tag = (int)uleb128(&reader->q); /* tag */
1460 die->has_children = *reader->q++; /* has_children */
1461 if (die->has_children) {
1462 reader->level++;
1463 }
1464 return die;
1465}
1466
1467static DebugInfoValue *
1468di_read_record(DebugInfoReader *reader, DebugInfoValue *vp, FILE *errout)
1469{
1470 uint64_t at = uleb128(&reader->q);
1471 uint64_t form = uleb128(&reader->q);
1472 if (!at || !form) return NULL;
1473 vp->at = at;
1474 vp->form = form;
1475 if (!debug_info_reader_read_value(reader, form, vp, errout)) return NULL;
1476 return vp;
1477}
1478
1479static bool
1480di_skip_records(DebugInfoReader *reader, FILE *errout)
1481{
1482 for (;;) {
1483 DebugInfoValue v = {{0}};
1484 uint64_t at = uleb128(&reader->q);
1485 uint64_t form = uleb128(&reader->q);
1486 if (!at || !form) return true;
1487 if (!debug_info_reader_read_value(reader, form, &v, errout)) return false;
1488 }
1489}
1490
1491typedef struct addr_header {
1492 const char *ptr;
1493 uint64_t unit_length;
1494 uint8_t format;
1495 uint8_t address_size;
1496 /* uint8_t segment_selector_size; */
1497} addr_header_t;
1498
1499static bool
1500addr_header_init(obj_info_t *obj, addr_header_t *header, FILE *errout)
1501{
1502 const char *p = obj->debug_addr.ptr;
1503
1504 header->ptr = p;
1505
1506 if (!p) return true;
1507
1508 header->unit_length = *(uint32_t *)p;
1509 p += sizeof(uint32_t);
1510
1511 header->format = 4;
1512 if (header->unit_length == 0xffffffff) {
1513 header->unit_length = *(uint64_t *)p;
1514 p += sizeof(uint64_t);
1515 header->format = 8;
1516 }
1517
1518 p += 2; /* version */
1519 header->address_size = *p++;
1520 if (header->address_size != 4 && header->address_size != 8) {
1521 kprintf("unknown address_size:%d", header->address_size);
1522 return false;
1523 }
1524 p++; /* segment_selector_size */
1525 return true;
1526}
1527
1528static uint64_t
1529read_addr(addr_header_t *header, uint64_t addr_base, uint64_t idx) {
1530 if (header->address_size == 4) {
1531 return ((uint32_t*)(header->ptr + addr_base))[idx];
1532 }
1533 else {
1534 return ((uint64_t*)(header->ptr + addr_base))[idx];
1535 }
1536}
1537
1538typedef struct rnglists_header {
1539 uint64_t unit_length;
1540 uint8_t format;
1541 uint8_t address_size;
1542 uint32_t offset_entry_count;
1543} rnglists_header_t;
1544
1545static bool
1546rnglists_header_init(obj_info_t *obj, rnglists_header_t *header, FILE *errout)
1547{
1548 const char *p = obj->debug_rnglists.ptr;
1549
1550 if (!p) return true;
1551
1552 header->unit_length = *(uint32_t *)p;
1553 p += sizeof(uint32_t);
1554
1555 header->format = 4;
1556 if (header->unit_length == 0xffffffff) {
1557 header->unit_length = *(uint64_t *)p;
1558 p += sizeof(uint64_t);
1559 header->format = 8;
1560 }
1561
1562 p += 2; /* version */
1563 header->address_size = *p++;
1564 if (header->address_size != 4 && header->address_size != 8) {
1565 kprintf("unknown address_size:%d", header->address_size);
1566 return false;
1567 }
1568 p++; /* segment_selector_size */
1569 header->offset_entry_count = *(uint32_t *)p;
1570 return true;
1571}
1572
1573typedef struct {
1574 uint64_t low_pc;
1575 uint64_t high_pc;
1576 uint64_t ranges;
1577 bool low_pc_set;
1578 bool high_pc_set;
1579 bool ranges_set;
1580} ranges_t;
1581
1582static void
1583ranges_set(ranges_t *ptr, DebugInfoValue *v, addr_header_t *addr_header, uint64_t addr_base)
1584{
1585 uint64_t n = 0;
1586 if (v->type == VAL_uint) {
1587 n = v->as.uint64;
1588 }
1589 else if (v->type == VAL_addr) {
1590 n = read_addr(addr_header, addr_base, v->as.addr_idx);
1591 }
1592 switch (v->at) {
1593 case DW_AT_low_pc:
1594 ptr->low_pc = n;
1595 ptr->low_pc_set = true;
1596 break;
1597 case DW_AT_high_pc:
1598 if (v->form == DW_FORM_addr) {
1599 ptr->high_pc = n;
1600 }
1601 else {
1602 ptr->high_pc = ptr->low_pc + n;
1603 }
1604 ptr->high_pc_set = true;
1605 break;
1606 case DW_AT_ranges:
1607 ptr->ranges = n;
1608 ptr->ranges_set = true;
1609 break;
1610 }
1611}
1612
1613static uint64_t
1614read_dw_form_addr(DebugInfoReader *reader, const char **ptr, FILE *errout)
1615{
1616 const char *p = *ptr;
1617 *ptr = p + reader->address_size;
1618 if (reader->address_size == 4) {
1619 return read_uint32(&p);
1620 } else {
1621 return read_uint64(&p);
1622 }
1623}
1624
1625static uintptr_t
1626ranges_include(DebugInfoReader *reader, ranges_t *ptr, uint64_t addr, rnglists_header_t *rnglists_header, FILE *errout)
1627{
1628 if (ptr->high_pc_set) {
1629 if (ptr->ranges_set || !ptr->low_pc_set) {
1630 return UINTPTR_MAX;
1631 }
1632 if (ptr->low_pc <= addr && addr <= ptr->high_pc) {
1633 return (uintptr_t)ptr->low_pc;
1634 }
1635 }
1636 else if (ptr->ranges_set) {
1637 /* TODO: support base address selection entry */
1638 const char *p;
1639 uint64_t base = ptr->low_pc_set ? ptr->low_pc : reader->current_low_pc;
1640 bool base_valid = true;
1641 if (reader->current_version >= 5) {
1642 if (rnglists_header->offset_entry_count == 0) {
1643 // DW_FORM_sec_offset
1644 p = reader->obj->debug_rnglists.ptr + ptr->ranges + reader->current_rnglists_base;
1645 }
1646 else {
1647 // DW_FORM_rnglistx
1648 const char *offset_array = reader->obj->debug_rnglists.ptr + reader->current_rnglists_base;
1649 if (rnglists_header->format == 4) {
1650 p = offset_array + ((uint32_t *)offset_array)[ptr->ranges];
1651 }
1652 else {
1653 p = offset_array + ((uint64_t *)offset_array)[ptr->ranges];
1654 }
1655 }
1656 for (;;) {
1657 uint8_t rle = read_uint8(&p);
1658 uintptr_t from = 0, to = 0;
1659 if (rle == DW_RLE_end_of_list) break;
1660 switch (rle) {
1661 case DW_RLE_base_addressx:
1662 uleb128(&p);
1663 base_valid = false; /* not supported yet */
1664 break;
1665 case DW_RLE_startx_endx:
1666 uleb128(&p);
1667 uleb128(&p);
1668 break;
1669 case DW_RLE_startx_length:
1670 uleb128(&p);
1671 uleb128(&p);
1672 break;
1673 case DW_RLE_offset_pair:
1674 if (!base_valid) break;
1675 from = (uintptr_t)base + uleb128(&p);
1676 to = (uintptr_t)base + uleb128(&p);
1677 break;
1678 case DW_RLE_base_address:
1679 base = read_dw_form_addr(reader, &p, errout);
1680 base_valid = true;
1681 break;
1682 case DW_RLE_start_end:
1683 from = (uintptr_t)read_dw_form_addr(reader, &p, errout);
1684 to = (uintptr_t)read_dw_form_addr(reader, &p, errout);
1685 break;
1686 case DW_RLE_start_length:
1687 from = (uintptr_t)read_dw_form_addr(reader, &p, errout);
1688 to = from + uleb128(&p);
1689 break;
1690 }
1691 if (from <= addr && addr < to) {
1692 return from;
1693 }
1694 }
1695 return 0;
1696 }
1697 p = reader->obj->debug_ranges.ptr + ptr->ranges;
1698 for (;;) {
1699 uintptr_t from = read_uintptr(&p);
1700 uintptr_t to = read_uintptr(&p);
1701 if (!from && !to) break;
1702 if (from == UINTPTR_MAX) {
1703 /* base address selection entry */
1704 base = to;
1705 }
1706 else if (base + from <= addr && addr < base + to) {
1707 return (uintptr_t)base + from;
1708 }
1709 }
1710 }
1711 else if (ptr->low_pc_set) {
1712 if (ptr->low_pc == addr) {
1713 return (uintptr_t)ptr->low_pc;
1714 }
1715 }
1716 return 0;
1717}
1718
1719#if 0
1720static void
1721ranges_inspect(DebugInfoReader *reader, ranges_t *ptr, FILE *errout)
1722{
1723 if (ptr->high_pc_set) {
1724 if (ptr->ranges_set || !ptr->low_pc_set) {
1725 kprintf("low_pc_set:%d high_pc_set:%d ranges_set:%d\n",ptr->low_pc_set,ptr->high_pc_set,ptr->ranges_set);
1726 return;
1727 }
1728 kprintf("low_pc:%"PRIx64" high_pc:%"PRIx64"\n",ptr->low_pc,ptr->high_pc);
1729 }
1730 else if (ptr->ranges_set) {
1731 char *p = reader->obj->debug_ranges.ptr + ptr->ranges;
1732 kprintf("low_pc:%"PRIx64" ranges:%"PRIx64" %lx ",ptr->low_pc,ptr->ranges, p-reader->obj->mapped);
1733 for (;;) {
1734 uintptr_t from = read_uintptr(&p);
1735 uintptr_t to = read_uintptr(&p);
1736 if (!from && !to) break;
1737 kprintf("%"PRIx64"-%"PRIx64" ",ptr->low_pc+from,ptr->low_pc+to);
1738 }
1739 kprintf("\n");
1740 }
1741 else if (ptr->low_pc_set) {
1742 kprintf("low_pc:%"PRIx64"\n",ptr->low_pc);
1743 }
1744 else {
1745 kprintf("empty\n");
1746 }
1747}
1748#endif
1749
1750static int
1751di_read_cu(DebugInfoReader *reader, FILE *errout)
1752{
1753 uint64_t unit_length;
1754 uint16_t version;
1755 uint64_t debug_abbrev_offset;
1756 reader->format = 4;
1757 reader->current_cu = reader->p;
1758 unit_length = read_uint32(&reader->p);
1759 if (unit_length == 0xffffffff) {
1760 unit_length = read_uint64(&reader->p);
1761 reader->format = 8;
1762 }
1763 reader->cu_end = reader->p + unit_length;
1764 version = read_uint16(&reader->p);
1765 reader->current_version = version;
1766 if (version > 5) {
1767 return -1;
1768 }
1769 else if (version == 5) {
1770 /* unit_type = */ read_uint8(&reader->p);
1771 reader->address_size = read_uint8(&reader->p);
1772 debug_abbrev_offset = read_uint(reader);
1773 }
1774 else {
1775 debug_abbrev_offset = read_uint(reader);
1776 reader->address_size = read_uint8(&reader->p);
1777 }
1778 if (reader->address_size != 4 && reader->address_size != 8) {
1779 kprintf("unknown address_size:%d", reader->address_size);
1780 return -1;
1781 }
1782 reader->q0 = reader->obj->debug_abbrev.ptr + debug_abbrev_offset;
1783
1784 reader->level = 0;
1785 di_read_debug_abbrev_cu(reader);
1786 if (di_read_debug_line_cu(reader, errout)) return -1;
1787
1788 do {
1789 DIE die;
1790
1791 if (!di_read_die(reader, &die, errout)) continue;
1792
1793 if (die.tag != DW_TAG_compile_unit) {
1794 if (!di_skip_records(reader, errout)) return -1;
1795 break;
1796 }
1797
1798 reader->current_str_offsets_base = 0;
1799 reader->current_addr_base = 0;
1800 reader->current_rnglists_base = 0;
1801
1802 DebugInfoValue low_pc = {{0}};
1803 /* enumerate abbrev */
1804 for (;;) {
1805 DebugInfoValue v = {{0}};
1806 if (!di_read_record(reader, &v, errout)) break;
1807 switch (v.at) {
1808 case DW_AT_low_pc:
1809 // clang may output DW_AT_addr_base after DW_AT_low_pc.
1810 // We need to resolve the DW_FORM_addr* after DW_AT_addr_base is parsed.
1811 low_pc = v;
1812 break;
1813 case DW_AT_str_offsets_base:
1814 reader->current_str_offsets_base = v.as.uint64;
1815 break;
1816 case DW_AT_addr_base:
1817 reader->current_addr_base = v.as.uint64;
1818 break;
1819 case DW_AT_rnglists_base:
1820 reader->current_rnglists_base = v.as.uint64;
1821 break;
1822 }
1823 }
1824 // Resolve the DW_FORM_addr of DW_AT_low_pc
1825 switch (low_pc.type) {
1826 case VAL_uint:
1827 reader->current_low_pc = low_pc.as.uint64;
1828 break;
1829 case VAL_addr:
1830 {
1831 addr_header_t header = {0};
1832 if (!addr_header_init(reader->obj, &header, errout)) return -1;
1833 reader->current_low_pc = read_addr(&header, reader->current_addr_base, low_pc.as.addr_idx);
1834 }
1835 break;
1836 }
1837 } while (0);
1838
1839 return 0;
1840}
1841
1842static void
1843read_abstract_origin(DebugInfoReader *reader, uint64_t form, uint64_t abstract_origin, line_info_t *line, FILE *errout)
1844{
1845 const char *p = reader->p;
1846 const char *q = reader->q;
1847 int level = reader->level;
1848 DIE die;
1849
1850 switch (form) {
1851 case DW_FORM_ref1:
1852 case DW_FORM_ref2:
1853 case DW_FORM_ref4:
1854 case DW_FORM_ref8:
1855 case DW_FORM_ref_udata:
1856 reader->p = reader->current_cu + abstract_origin;
1857 break;
1858 case DW_FORM_ref_addr:
1859 goto finish; /* not supported yet */
1860 case DW_FORM_ref_sig8:
1861 goto finish; /* not supported yet */
1862 case DW_FORM_ref_sup4:
1863 case DW_FORM_ref_sup8:
1864 goto finish; /* not supported yet */
1865 default:
1866 goto finish;
1867 }
1868 if (!di_read_die(reader, &die, errout)) goto finish;
1869
1870 /* enumerate abbrev */
1871 for (;;) {
1872 DebugInfoValue v = {{0}};
1873 if (!di_read_record(reader, &v, errout)) break;
1874 switch (v.at) {
1875 case DW_AT_name:
1876 line->sname = get_cstr_value(&v);
1877 break;
1878 }
1879 }
1880
1881 finish:
1882 reader->p = p;
1883 reader->q = q;
1884 reader->level = level;
1885}
1886
1887static bool
1888debug_info_read(DebugInfoReader *reader, int num_traces, void **traces,
1889 line_info_t *lines, int offset, FILE *errout)
1890{
1891
1892 addr_header_t addr_header = {0};
1893 if (!addr_header_init(reader->obj, &addr_header, errout)) return false;
1894
1895 rnglists_header_t rnglists_header = {0};
1896 if (!rnglists_header_init(reader->obj, &rnglists_header, errout)) return false;
1897
1898 while (reader->p < reader->cu_end) {
1899 DIE die;
1900 ranges_t ranges = {0};
1901 line_info_t line = {0};
1902
1903 if (!di_read_die(reader, &die, errout)) continue;
1904 /* kprintf("%d:%tx: <%d>\n",__LINE__,die.pos,reader->level,die.tag); */
1905
1906 if (die.tag != DW_TAG_subprogram && die.tag != DW_TAG_inlined_subroutine) {
1907 skip_die:
1908 if (!di_skip_records(reader, errout)) return false;
1909 continue;
1910 }
1911
1912 /* enumerate abbrev */
1913 for (;;) {
1914 DebugInfoValue v = {{0}};
1915 /* ptrdiff_t pos = reader->p - reader->p0; */
1916 if (!di_read_record(reader, &v, errout)) break;
1917 /* kprintf("\n%d:%tx: AT:%lx FORM:%lx\n",__LINE__,pos,v.at,v.form); */
1918 /* div_inspect(&v, errout); */
1919 switch (v.at) {
1920 case DW_AT_name:
1921 line.sname = get_cstr_value(&v);
1922 break;
1923 case DW_AT_call_file:
1924 fill_filename((int)v.as.uint64, reader->debug_line_format, reader->debug_line_version, reader->debug_line_directories, reader->debug_line_files, &line, reader->obj, errout);
1925 break;
1926 case DW_AT_call_line:
1927 line.line = (int)v.as.uint64;
1928 break;
1929 case DW_AT_low_pc:
1930 case DW_AT_high_pc:
1931 case DW_AT_ranges:
1932 ranges_set(&ranges, &v, &addr_header, reader->current_addr_base);
1933 break;
1934 case DW_AT_declaration:
1935 goto skip_die;
1936 case DW_AT_inline:
1937 /* 1 or 3 */
1938 break; /* goto skip_die; */
1939 case DW_AT_abstract_origin:
1940 read_abstract_origin(reader, v.form, v.as.uint64, &line, errout);
1941 break; /* goto skip_die; */
1942 }
1943 }
1944 /* ranges_inspect(reader, &ranges, errout); */
1945 /* kprintf("%d:%tx: %x ",__LINE__,diepos,die.tag); */
1946 for (int i=offset; i < num_traces; i++) {
1947 uintptr_t addr = (uintptr_t)traces[i];
1948 uintptr_t offset = addr - reader->obj->base_addr + reader->obj->vmaddr;
1949 uintptr_t saddr = ranges_include(reader, &ranges, offset, &rnglists_header, errout);
1950 if (saddr == UINTPTR_MAX) return false;
1951 if (saddr) {
1952 /* kprintf("%d:%tx: %d %lx->%lx %x %s: %s/%s %d %s %s %s\n",__LINE__,die.pos, i,addr,offset, die.tag,line.sname,line.dirname,line.filename,line.line,reader->obj->path,line.sname,lines[i].sname); */
1953 if (lines[i].sname) {
1954 line_info_t *lp = malloc(sizeof(line_info_t));
1955 memcpy(lp, &lines[i], sizeof(line_info_t));
1956 lines[i].next = lp;
1957 lp->dirname = line.dirname;
1958 lp->filename = line.filename;
1959 lp->line = line.line;
1960 lp->saddr = 0;
1961 }
1962 lines[i].path = reader->obj->path;
1963 lines[i].base_addr = line.base_addr;
1964 lines[i].sname = line.sname;
1965 lines[i].saddr = saddr + reader->obj->base_addr - reader->obj->vmaddr;
1966 }
1967 }
1968 }
1969 return true;
1970}
1971
1972// This function parses the following attributes of Line Number Program Header in DWARF 5:
1973//
1974// * directory_entry_format_count
1975// * directory_entry_format
1976// * directories_count
1977// * directories
1978//
1979// or
1980//
1981// * file_name_entry_format_count
1982// * file_name_entry_format
1983// * file_names_count
1984// * file_names
1985//
1986// It records DW_LNCT_path and DW_LNCT_directory_index at the index "idx".
1987static const char *
1988parse_ver5_debug_line_header(const char *p, int idx, uint8_t format,
1989 obj_info_t *obj, const char **out_path,
1990 uint64_t *out_directory_index, FILE *errout)
1991{
1992 int i, j;
1993 int entry_format_count = *(uint8_t *)p++;
1994 const char *entry_format = p;
1995
1996 /* skip the part of entry_format */
1997 for (i = 0; i < entry_format_count * 2; i++) uleb128(&p);
1998
1999 int entry_count = (int)uleb128(&p);
2000
2001 DebugInfoReader reader = {0};
2002 debug_info_reader_init(&reader, obj);
2003 reader.format = format;
2004 reader.p = p;
2005 for (j = 0; j < entry_count; j++) {
2006 const char *format = entry_format;
2007 for (i = 0; i < entry_format_count; i++) {
2008 DebugInfoValue v = {{0}};
2009 unsigned long dw_lnct = uleb128(&format);
2010 unsigned long dw_form = uleb128(&format);
2011 if (!debug_info_reader_read_value(&reader, dw_form, &v, errout)) return 0;
2012 if (dw_lnct == 1 /* DW_LNCT_path */ && v.type == VAL_cstr && out_path)
2013 *out_path = v.as.ptr + v.off;
2014 if (dw_lnct == 2 /* DW_LNCT_directory_index */ && v.type == VAL_uint && out_directory_index)
2015 *out_directory_index = v.as.uint64;
2016 }
2017 if (j == idx) return 0;
2018 }
2019
2020 return reader.p;
2021}
2022
2023#ifdef USE_ELF
2024static unsigned long
2025uncompress_debug_section(ElfW(Shdr) *shdr, char *file, char **ptr)
2026{
2027 *ptr = NULL;
2028#ifdef SUPPORT_COMPRESSED_DEBUG_LINE
2029 ElfW(Chdr) *chdr = (ElfW(Chdr) *)(file + shdr->sh_offset);
2030 unsigned long destsize = chdr->ch_size;
2031 int ret = 0;
2032
2033 if (chdr->ch_type != ELFCOMPRESS_ZLIB) {
2034 /* unsupported compression type */
2035 return 0;
2036 }
2037
2038 *ptr = malloc(destsize);
2039 if (!*ptr) return 0;
2040 ret = uncompress((Bytef *)*ptr, &destsize,
2041 (const Bytef*)chdr + sizeof(ElfW(Chdr)),
2042 shdr->sh_size - sizeof(ElfW(Chdr)));
2043 if (ret != Z_OK) goto fail;
2044 return destsize;
2045
2046fail:
2047 free(*ptr);
2048 *ptr = NULL;
2049#endif
2050 return 0;
2051}
2052
2053/* read file and fill lines */
2054static uintptr_t
2055fill_lines(int num_traces, void **traces, int check_debuglink,
2056 obj_info_t **objp, line_info_t *lines, int offset, FILE *errout)
2057{
2058 int i, j;
2059 char *shstr;
2060 ElfW(Ehdr) *ehdr;
2061 ElfW(Shdr) *shdr, *shstr_shdr;
2062 ElfW(Shdr) *gnu_debuglink_shdr = NULL;
2063 ElfW(Shdr) *note_gnu_build_id = NULL;
2064 int fd;
2065 off_t filesize;
2066 char *file;
2067 ElfW(Shdr) *symtab_shdr = NULL, *strtab_shdr = NULL;
2068 ElfW(Shdr) *dynsym_shdr = NULL, *dynstr_shdr = NULL;
2069 obj_info_t *obj = *objp;
2070 uintptr_t dladdr_fbase = 0;
2071
2072 fd = open(binary_filename, O_RDONLY);
2073 if (fd < 0) {
2074 goto fail;
2075 }
2076 filesize = lseek(fd, 0, SEEK_END);
2077 if (filesize < 0) {
2078 int e = errno;
2079 close(fd);
2080 kprintf("lseek: %s\n", strerror(e));
2081 goto fail;
2082 }
2083#if SIZEOF_OFF_T > SIZEOF_SIZE_T
2084 if (filesize > (off_t)SIZE_MAX) {
2085 close(fd);
2086 kprintf("Too large file %s\n", binary_filename);
2087 goto fail;
2088 }
2089#endif
2090 lseek(fd, 0, SEEK_SET);
2091 /* async-signal unsafe */
2092 file = (char *)mmap(NULL, (size_t)filesize, PROT_READ, MAP_SHARED, fd, 0);
2093 if (file == MAP_FAILED) {
2094 int e = errno;
2095 close(fd);
2096 kprintf("mmap: %s\n", strerror(e));
2097 goto fail;
2098 }
2099 close(fd);
2100
2101 ehdr = (ElfW(Ehdr) *)file;
2102 if (memcmp(ehdr->e_ident, "\177ELF", 4) != 0) {
2103 /*
2104 * Huh? Maybe filename was overridden by setproctitle() and
2105 * it match non-elf file.
2106 */
2107 goto fail;
2108 }
2109 obj->mapped = file;
2110 obj->mapped_size = (size_t)filesize;
2111
2112 shdr = (ElfW(Shdr) *)(file + ehdr->e_shoff);
2113
2114 shstr_shdr = shdr + ehdr->e_shstrndx;
2115 shstr = file + shstr_shdr->sh_offset;
2116
2117 for (i = 0; i < ehdr->e_shnum; i++) {
2118 char *section_name = shstr + shdr[i].sh_name;
2119 switch (shdr[i].sh_type) {
2120 case SHT_STRTAB:
2121 if (!strcmp(section_name, ".strtab")) {
2122 strtab_shdr = shdr + i;
2123 }
2124 else if (!strcmp(section_name, ".dynstr")) {
2125 dynstr_shdr = shdr + i;
2126 }
2127 break;
2128 case SHT_SYMTAB:
2129 /* if (!strcmp(section_name, ".symtab")) */
2130 symtab_shdr = shdr + i;
2131 break;
2132 case SHT_DYNSYM:
2133 /* if (!strcmp(section_name, ".dynsym")) */
2134 dynsym_shdr = shdr + i;
2135 break;
2136 case SHT_NOTE:
2137 if (!strcmp(section_name, ".note.gnu.build-id")) {
2138 note_gnu_build_id = shdr + i;
2139 }
2140 break;
2141 case SHT_PROGBITS:
2142 if (!strcmp(section_name, ".gnu_debuglink")) {
2143 gnu_debuglink_shdr = shdr + i;
2144 }
2145 else {
2146 const char *debug_section_names[] = {
2147 ".debug_abbrev",
2148 ".debug_info",
2149 ".debug_line",
2150 ".debug_ranges",
2151 ".debug_str_offsets",
2152 ".debug_addr",
2153 ".debug_rnglists",
2154 ".debug_str",
2155 ".debug_line_str"
2156 };
2157
2158 for (j=0; j < DWARF_SECTION_COUNT; j++) {
2159 struct dwarf_section *s = obj_dwarf_section_at(obj, j);
2160
2161 if (strcmp(section_name, debug_section_names[j]) != 0)
2162 continue;
2163
2164 s->ptr = file + shdr[i].sh_offset;
2165 s->size = shdr[i].sh_size;
2166 s->flags = shdr[i].sh_flags;
2167 if (s->flags & SHF_COMPRESSED) {
2168 s->size = uncompress_debug_section(&shdr[i], file, &s->ptr);
2169 if (!s->size) goto fail;
2170 }
2171 break;
2172 }
2173 }
2174 break;
2175 }
2176 }
2177
2178 if (offset == -1) {
2179 /* main executable */
2180 offset = 0;
2181 if (dynsym_shdr && dynstr_shdr) {
2182 char *strtab = file + dynstr_shdr->sh_offset;
2183 ElfW(Sym) *symtab = (ElfW(Sym) *)(file + dynsym_shdr->sh_offset);
2184 int symtab_count = (int)(dynsym_shdr->sh_size / sizeof(ElfW(Sym)));
2185 void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
2186 if (handle) {
2187 for (j = 0; j < symtab_count; j++) {
2188 ElfW(Sym) *sym = &symtab[j];
2189 Dl_info info;
2190 void *s;
2191 if (ELF_ST_TYPE(sym->st_info) != STT_FUNC || sym->st_size == 0) continue;
2192 s = dlsym(handle, strtab + sym->st_name);
2193 if (s && dladdr(s, &info)) {
2194 obj->base_addr = dladdr_fbase;
2195 dladdr_fbase = (uintptr_t)info.dli_fbase;
2196 break;
2197 }
2198 }
2199 dlclose(handle);
2200 }
2201 if (ehdr->e_type == ET_EXEC) {
2202 obj->base_addr = 0;
2203 }
2204 else {
2205 /* PIE (position-independent executable) */
2206 obj->base_addr = dladdr_fbase;
2207 }
2208 }
2209 }
2210
2211 if (obj->debug_info.ptr && obj->debug_abbrev.ptr) {
2212 DebugInfoReader reader;
2213 debug_info_reader_init(&reader, obj);
2214 i = 0;
2215 while (reader.p < reader.pend) {
2216 /* kprintf("%d:%tx: CU[%d]\n", __LINE__, reader.p - reader.obj->debug_info.ptr, i++); */
2217 if (di_read_cu(&reader, errout)) goto use_symtab;
2218 if (!debug_info_read(&reader, num_traces, traces, lines, offset, errout))
2219 goto use_symtab;
2220 }
2221 }
2222 else {
2223 /* This file doesn't have dwarf, use symtab or dynsym */
2224use_symtab:
2225 if (!symtab_shdr) {
2226 /* This file doesn't have symtab, use dynsym instead */
2227 symtab_shdr = dynsym_shdr;
2228 strtab_shdr = dynstr_shdr;
2229 }
2230
2231 if (symtab_shdr && strtab_shdr) {
2232 char *strtab = file + strtab_shdr->sh_offset;
2233 ElfW(Sym) *symtab = (ElfW(Sym) *)(file + symtab_shdr->sh_offset);
2234 int symtab_count = (int)(symtab_shdr->sh_size / sizeof(ElfW(Sym)));
2235 for (j = 0; j < symtab_count; j++) {
2236 ElfW(Sym) *sym = &symtab[j];
2237 uintptr_t saddr = (uintptr_t)sym->st_value + obj->base_addr;
2238 if (ELF_ST_TYPE(sym->st_info) != STT_FUNC) continue;
2239 for (i = offset; i < num_traces; i++) {
2240 uintptr_t d = (uintptr_t)traces[i] - saddr;
2241 if (lines[i].line > 0 || d > (uintptr_t)sym->st_size)
2242 continue;
2243 /* fill symbol name and addr from .symtab */
2244 if (!lines[i].sname) lines[i].sname = strtab + sym->st_name;
2245 lines[i].saddr = saddr;
2246 lines[i].path = obj->path;
2247 lines[i].base_addr = obj->base_addr;
2248 }
2249 }
2250 }
2251 }
2252
2253 if (!obj->debug_line.ptr) {
2254 /* This file doesn't have .debug_line section,
2255 let's check .gnu_debuglink section instead. */
2256 if (gnu_debuglink_shdr && check_debuglink) {
2257 follow_debuglink(file + gnu_debuglink_shdr->sh_offset,
2258 num_traces, traces,
2259 objp, lines, offset, errout);
2260 }
2261 if (note_gnu_build_id && check_debuglink) {
2262 ElfW(Nhdr) *nhdr = (ElfW(Nhdr)*) (file + note_gnu_build_id->sh_offset);
2263 const char *build_id = (char *)(nhdr + 1) + nhdr->n_namesz;
2264 follow_debuglink_build_id(build_id, nhdr->n_descsz,
2265 num_traces, traces,
2266 objp, lines, offset, errout);
2267 }
2268 goto finish;
2269 }
2270
2271 if (parse_debug_line(num_traces, traces,
2272 obj->debug_line.ptr,
2273 obj->debug_line.size,
2274 obj, lines, offset, errout) == -1)
2275 goto fail;
2276
2277finish:
2278 return dladdr_fbase;
2279fail:
2280 return (uintptr_t)-1;
2281}
2282#else /* Mach-O */
2283/* read file and fill lines */
2284static uintptr_t
2285fill_lines(int num_traces, void **traces, int check_debuglink,
2286 obj_info_t **objp, line_info_t *lines, int offset, FILE *errout)
2287{
2288# ifdef __LP64__
2289# define LP(x) x##_64
2290# else
2291# define LP(x) x
2292# endif
2293 int fd;
2294 off_t filesize;
2295 char *file, *p = NULL;
2296 obj_info_t *obj = *objp;
2297 struct LP(mach_header) *header;
2298 uintptr_t dladdr_fbase = 0;
2299
2300 {
2301 char *s = binary_filename;
2302 char *base = strrchr(binary_filename, '/')+1;
2303 size_t max = PATH_MAX;
2304 size_t size = strlen(binary_filename);
2305 size_t basesize = size - (base - binary_filename);
2306 s += size;
2307 max -= size;
2308 p = s;
2309 size = strlcpy(s, ".dSYM/Contents/Resources/DWARF/", max);
2310 if (size == 0) goto fail;
2311 s += size;
2312 max -= size;
2313 if (max <= basesize) goto fail;
2314 memcpy(s, base, basesize);
2315 s[basesize] = 0;
2316
2317 fd = open(binary_filename, O_RDONLY);
2318 if (fd < 0) {
2319 *p = 0; /* binary_filename becomes original file name */
2320 fd = open(binary_filename, O_RDONLY);
2321 if (fd < 0) {
2322 goto fail;
2323 }
2324 }
2325 }
2326
2327 filesize = lseek(fd, 0, SEEK_END);
2328 if (filesize < 0) {
2329 int e = errno;
2330 close(fd);
2331 kprintf("lseek: %s\n", strerror(e));
2332 goto fail;
2333 }
2334#if SIZEOF_OFF_T > SIZEOF_SIZE_T
2335 if (filesize > (off_t)SIZE_MAX) {
2336 close(fd);
2337 kprintf("Too large file %s\n", binary_filename);
2338 goto fail;
2339 }
2340#endif
2341 lseek(fd, 0, SEEK_SET);
2342 /* async-signal unsafe */
2343 file = (char *)mmap(NULL, (size_t)filesize, PROT_READ, MAP_SHARED, fd, 0);
2344 if (file == MAP_FAILED) {
2345 int e = errno;
2346 close(fd);
2347 kprintf("mmap: %s\n", strerror(e));
2348 goto fail;
2349 }
2350 close(fd);
2351
2352 obj->mapped = file;
2353 obj->mapped_size = (size_t)filesize;
2354
2355 header = (struct LP(mach_header) *)file;
2356 if (header->magic == LP(MH_MAGIC)) {
2357 /* non universal binary */
2358 p = file;
2359 }
2360 else if (header->magic == FAT_CIGAM) {
2361 struct LP(mach_header) *mhp = _NSGetMachExecuteHeader();
2362 struct fat_header *fat = (struct fat_header *)file;
2363 char *q = file + sizeof(*fat);
2364 uint32_t nfat_arch = __builtin_bswap32(fat->nfat_arch);
2365 /* kprintf("%d: fat:%s %d\n",__LINE__, binary_filename,nfat_arch); */
2366 for (uint32_t i = 0; i < nfat_arch; i++) {
2367 struct fat_arch *arch = (struct fat_arch *)q;
2368 cpu_type_t cputype = __builtin_bswap32(arch->cputype);
2369 cpu_subtype_t cpusubtype = __builtin_bswap32(arch->cpusubtype);
2370 uint32_t offset = __builtin_bswap32(arch->offset);
2371 /* kprintf("%d: fat %d %x/%x %x/%x\n",__LINE__, i, mhp->cputype,mhp->cpusubtype, cputype,cpusubtype); */
2372 if (mhp->cputype == cputype &&
2373 (cpu_subtype_t)(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == cpusubtype) {
2374 p = file + offset;
2375 file = p;
2376 header = (struct LP(mach_header) *)p;
2377 if (header->magic == LP(MH_MAGIC)) {
2378 goto found_mach_header;
2379 }
2380 break;
2381 }
2382 q += sizeof(*arch);
2383 }
2384 kprintf("'%s' is not a Mach-O universal binary file!\n",binary_filename);
2385 close(fd);
2386 goto fail;
2387 }
2388 else {
2389# ifdef __LP64__
2390# define bitsize "64"
2391# else
2392# define bitsize "32"
2393# endif
2394 kprintf("'%s' is not a " bitsize
2395 "-bit Mach-O file!\n",binary_filename);
2396# undef bitsize
2397 close(fd);
2398 goto fail;
2399 }
2400found_mach_header:
2401 p += sizeof(*header);
2402
2403 for (uint32_t i = 0; i < (uint32_t)header->ncmds; i++) {
2404 struct load_command *lcmd = (struct load_command *)p;
2405 switch (lcmd->cmd) {
2406 case LP(LC_SEGMENT):
2407 {
2408 static const char *debug_section_names[] = {
2409 "__debug_abbrev",
2410 "__debug_info",
2411 "__debug_line",
2412 "__debug_ranges",
2413 "__debug_str_offsets",
2414 "__debug_addr",
2415 "__debug_rnglists",
2416 "__debug_str",
2417 "__debug_line_str",
2418 };
2419 struct LP(segment_command) *scmd = (struct LP(segment_command) *)lcmd;
2420 if (strcmp(scmd->segname, "__TEXT") == 0) {
2421 obj->vmaddr = scmd->vmaddr;
2422 }
2423 else if (strcmp(scmd->segname, "__DWARF") == 0) {
2424 p += sizeof(struct LP(segment_command));
2425 for (uint64_t i = 0; i < scmd->nsects; i++) {
2426 struct LP(section) *sect = (struct LP(section) *)p;
2427 p += sizeof(struct LP(section));
2428 for (int j=0; j < DWARF_SECTION_COUNT; j++) {
2429 struct dwarf_section *s = obj_dwarf_section_at(obj, j);
2430
2431 if (strcmp(sect->sectname, debug_section_names[j]) != 0
2432#ifdef __APPLE__
2433 /* macOS clang 16 generates DWARF5, which have Mach-O
2434 * section names that are limited to 16 characters,
2435 * which causes sections with long names to be truncated
2436 * and not match above.
2437 * See: https://wiki.dwarfstd.org/Best_Practices.md#Mach-2d-O
2438 */
2439 && strncmp(sect->sectname, debug_section_names[j], 16) != 0
2440#endif
2441 )
2442 continue;
2443
2444 s->ptr = file + sect->offset;
2445 s->size = sect->size;
2446 s->flags = sect->flags;
2447 if (s->flags & SHF_COMPRESSED) {
2448 goto fail;
2449 }
2450 break;
2451 }
2452 }
2453 }
2454 }
2455 break;
2456
2457 case LC_SYMTAB:
2458 {
2459 struct symtab_command *cmd = (struct symtab_command *)lcmd;
2460 struct LP(nlist) *nl = (struct LP(nlist) *)(file + cmd->symoff);
2461 char *strtab = file + cmd->stroff, *sname = 0;
2462 uint32_t j;
2463 uintptr_t saddr = 0;
2464 /* kprintf("[%2d]: %x/symtab %p\n", i, cmd->cmd, (void *)p); */
2465 for (j = 0; j < cmd->nsyms; j++) {
2466 uintptr_t symsize, d;
2467 struct LP(nlist) *e = &nl[j];
2468 /* kprintf("[%2d][%4d]: %02x/%x/%x: %s %llx\n", i, j, e->n_type,e->n_sect,e->n_desc,strtab+e->n_un.n_strx,e->n_value); */
2469 if (e->n_type != N_FUN) continue;
2470 if (e->n_sect) {
2471 saddr = (uintptr_t)e->n_value + obj->base_addr - obj->vmaddr;
2472 sname = strtab + e->n_un.n_strx;
2473 /* kprintf("[%2d][%4d]: %02x/%x/%x: %s %llx\n", i, j, e->n_type,e->n_sect,e->n_desc,strtab+e->n_un.n_strx,e->n_value); */
2474 continue;
2475 }
2476 for (int k = offset; k < num_traces; k++) {
2477 d = (uintptr_t)traces[k] - saddr;
2478 symsize = e->n_value;
2479 /* kprintf("%lx %lx %lx\n",saddr,symsize,traces[k]); */
2480 if (lines[k].line > 0 || d > (uintptr_t)symsize)
2481 continue;
2482 /* fill symbol name and addr from .symtab */
2483 if (!lines[k].sname) lines[k].sname = sname;
2484 lines[k].saddr = saddr;
2485 lines[k].path = obj->path;
2486 lines[k].base_addr = obj->base_addr;
2487 }
2488 }
2489 }
2490 }
2491 p += lcmd->cmdsize;
2492 }
2493
2494 if (obj->debug_info.ptr && obj->debug_abbrev.ptr) {
2495 DebugInfoReader reader;
2496 debug_info_reader_init(&reader, obj);
2497 while (reader.p < reader.pend) {
2498 if (di_read_cu(&reader, errout)) goto fail;
2499 if (!debug_info_read(&reader, num_traces, traces, lines, offset, errout))
2500 goto fail;
2501 }
2502 }
2503
2504 if (parse_debug_line(num_traces, traces,
2505 obj->debug_line.ptr,
2506 obj->debug_line.size,
2507 obj, lines, offset, errout) == -1)
2508 goto fail;
2509
2510 return dladdr_fbase;
2511fail:
2512 return (uintptr_t)-1;
2513}
2514#endif
2515
2516#define HAVE_MAIN_EXE_PATH
2517#if defined(__FreeBSD__) || defined(__DragonFly__)
2518# include <sys/sysctl.h>
2519#endif
2520/* ssize_t main_exe_path(FILE *errout)
2521 *
2522 * store the path of the main executable to `binary_filename`,
2523 * and returns strlen(binary_filename).
2524 * it is NUL terminated.
2525 */
2526#if defined(__linux__) || defined(__NetBSD__)
2527static ssize_t
2528main_exe_path(FILE *errout)
2529{
2530# if defined(__linux__)
2531# define PROC_SELF_EXE "/proc/self/exe"
2532# elif defined(__NetBSD__)
2533# define PROC_SELF_EXE "/proc/curproc/exe"
2534# endif
2535 ssize_t len = readlink(PROC_SELF_EXE, binary_filename, PATH_MAX);
2536 if (len < 0) return 0;
2537 binary_filename[len] = 0;
2538 return len;
2539}
2540#elif defined(__FreeBSD__) || defined(__DragonFly__)
2541static ssize_t
2542main_exe_path(FILE *errout)
2543{
2544 int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
2545 size_t len = PATH_MAX;
2546 int err = sysctl(mib, 4, binary_filename, &len, NULL, 0);
2547 if (err) {
2548 kprintf("Can't get the path of ruby");
2549 return -1;
2550 }
2551 len--; /* sysctl sets strlen+1 */
2552 return len;
2553}
2554#elif defined(HAVE_LIBPROC_H)
2555static ssize_t
2556main_exe_path(FILE *errout)
2557{
2558 int len = proc_pidpath(getpid(), binary_filename, PATH_MAX);
2559 if (len == 0) return 0;
2560 binary_filename[len] = 0;
2561 return len;
2562}
2563#else
2564#undef HAVE_MAIN_EXE_PATH
2565#endif
2566
2567static void
2568print_line0(line_info_t *line, void *address, FILE *errout)
2569{
2570 uintptr_t addr = (uintptr_t)address;
2571 uintptr_t d = addr - line->saddr;
2572 if (!address) {
2573 /* inlined */
2574 if (line->dirname && line->dirname[0]) {
2575 kprintf("%s(%s) %s/%s:%d\n", line->path, line->sname, line->dirname, line->filename, line->line);
2576 }
2577 else {
2578 kprintf("%s(%s) %s:%d\n", line->path, line->sname, line->filename, line->line);
2579 }
2580 }
2581 else if (!line->path) {
2582 kprintf("[0x%"PRIxPTR"]\n", addr);
2583 }
2584 else if (!line->sname) {
2585 kprintf("%s(0x%"PRIxPTR") [0x%"PRIxPTR"]\n", line->path, addr-line->base_addr, addr);
2586 }
2587 else if (!line->saddr) {
2588 kprintf("%s(%s) [0x%"PRIxPTR"]\n", line->path, line->sname, addr);
2589 }
2590 else if (line->line <= 0) {
2591 kprintf("%s(%s+0x%"PRIxPTR") [0x%"PRIxPTR"]\n", line->path, line->sname,
2592 d, addr);
2593 }
2594 else if (!line->filename) {
2595 kprintf("%s(%s+0x%"PRIxPTR") [0x%"PRIxPTR"] ???:%d\n", line->path, line->sname,
2596 d, addr, line->line);
2597 }
2598 else if (line->dirname && line->dirname[0]) {
2599 kprintf("%s(%s+0x%"PRIxPTR") [0x%"PRIxPTR"] %s/%s:%d\n", line->path, line->sname,
2600 d, addr, line->dirname, line->filename, line->line);
2601 }
2602 else {
2603 kprintf("%s(%s+0x%"PRIxPTR") [0x%"PRIxPTR"] %s:%d\n", line->path, line->sname,
2604 d, addr, line->filename, line->line);
2605 }
2606}
2607
2608static void
2609print_line(line_info_t *line, void *address, FILE *errout)
2610{
2611 print_line0(line, address, errout);
2612 if (line->next) {
2613 print_line(line->next, NULL, errout);
2614 }
2615}
2616
2617void
2618rb_dump_backtrace_with_lines(int num_traces, void **traces, FILE *errout)
2619{
2620 int i;
2621 /* async-signal unsafe */
2622 line_info_t *lines = (line_info_t *)calloc(num_traces, sizeof(line_info_t));
2623 obj_info_t *obj = NULL;
2624 /* 2 is NULL + main executable */
2625 void **dladdr_fbases = (void **)calloc(num_traces+2, sizeof(void *));
2626
2627#ifdef HAVE_MAIN_EXE_PATH
2628 char *main_path = NULL; /* used on printing backtrace */
2629 ssize_t len;
2630 if ((len = main_exe_path(errout)) > 0) {
2631 main_path = (char *)alloca(len + 1);
2632 if (main_path) {
2633 uintptr_t addr;
2634 memcpy(main_path, binary_filename, len+1);
2635 append_obj(&obj);
2636 obj->path = main_path;
2637 addr = fill_lines(num_traces, traces, 1, &obj, lines, 0, errout);
2638 if (addr != (uintptr_t)-1) {
2639 dladdr_fbases[0] = (void *)addr;
2640 }
2641 }
2642 }
2643#endif
2644
2645 /* fill source lines by reading dwarf */
2646 for (i = 0; i < num_traces; i++) {
2647 Dl_info info;
2648 if (lines[i].line) continue;
2649 if (dladdr(traces[i], &info)) {
2650 const char *path;
2651 void **p;
2652
2653 /* skip symbols which is in already checked objects */
2654 /* if the binary is strip-ed, this may effect */
2655 for (p=dladdr_fbases; *p; p++) {
2656 if (*p == info.dli_fbase) {
2657 if (info.dli_fname) lines[i].path = info.dli_fname;
2658 if (info.dli_sname) lines[i].sname = info.dli_sname;
2659 goto next_line;
2660 }
2661 }
2662 *p = info.dli_fbase;
2663
2664 append_obj(&obj);
2665 obj->base_addr = (uintptr_t)info.dli_fbase;
2666 path = info.dli_fname;
2667 obj->path = path;
2668 if (path) lines[i].path = path;
2669 if (info.dli_sname) {
2670 lines[i].sname = info.dli_sname;
2671 lines[i].saddr = (uintptr_t)info.dli_saddr;
2672 }
2673 strlcpy(binary_filename, path, PATH_MAX);
2674 if (fill_lines(num_traces, traces, 1, &obj, lines, i, errout) == (uintptr_t)-1)
2675 break;
2676 }
2677next_line:
2678 continue;
2679 }
2680
2681 /* output */
2682 for (i = 0; i < num_traces; i++) {
2683 print_line(&lines[i], traces[i], errout);
2684
2685 /* FreeBSD's backtrace may show _start and so on */
2686 if (lines[i].sname && strcmp("main", lines[i].sname) == 0)
2687 break;
2688 }
2689
2690 /* free */
2691 while (obj) {
2692 obj_info_t *o = obj;
2693 for (i=0; i < DWARF_SECTION_COUNT; i++) {
2694 struct dwarf_section *s = obj_dwarf_section_at(obj, i);
2695 if (s->flags & SHF_COMPRESSED) {
2696 free(s->ptr);
2697 }
2698 }
2699 if (obj->mapped_size) {
2700 munmap(obj->mapped, obj->mapped_size);
2701 }
2702 obj = o->next;
2703 free(o);
2704 }
2705 for (i = 0; i < num_traces; i++) {
2706 line_info_t *line = lines[i].next;
2707 while (line) {
2708 line_info_t *l = line;
2709 line = line->next;
2710 free(l);
2711 }
2712 }
2713 free(lines);
2714 free(dladdr_fbases);
2715}
2716
2717#undef kprintf
2718
2719#else /* defined(USE_ELF) */
2720#error not supported
2721#endif
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition assume.h:29
int off
Offset inside of ptr.
Definition io.h:5
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
C99 shim for <stdbool.h>