From 51e499b3a0e4cd847172357b84d9a01479ddd558 Mon Sep 17 00:00:00 2001 From: aolo2 Date: Sun, 11 Jul 2021 21:00:42 +0300 Subject: [PATCH] Extract debug info (compilation dir and function list) ahead of time --- common.h | 18 +++- dwarf.c | 290 +++++++++++++++++++++++++++---------------------------- main.c | 10 +- util.c | 28 +++++- 4 files changed, 185 insertions(+), 161 deletions(-) diff --git a/common.h b/common.h index 93206ff..1ebf049 100644 --- a/common.h +++ b/common.h @@ -43,6 +43,21 @@ struct mi_sourcepoint { int file; }; +struct mi_function { + char *name; + u64 offset; +}; + +struct mi_debuginfo { + struct mi_sourcepoint *sp_table; + int sp_count; + + struct mi_function functions[64]; // TODO + int func_count; + + char *comp_dir; +}; + struct mi_process { pid_t pid; @@ -52,8 +67,7 @@ struct mi_process { u64 base_address; u64 main_address; - struct mi_sourcepoint *sp_table; - int sp_count; + struct mi_debuginfo debug; char **source_file_names; struct mi_buffer *source_files; diff --git a/dwarf.c b/dwarf.c index b3034d3..e5a0cff 100644 --- a/dwarf.c +++ b/dwarf.c @@ -139,19 +139,131 @@ abbrev_entry_offset(u8 *file, u64 abbrev_offset, u32 requested_code) return(0); } -static u64 -find_subroutine_offset(u8 *file, u64 header_size, u8 address_size, - u64 string_offset, u64 abbrev_offset, u64 data_offset, - char *subroutine) +static u32 +read_actual_debug_data(u8 *file, u64 string_offset, u32 address_size, u64 base_data_offset, u32 form, u64 data_offset, u64 *value) +{ + u32 increment = 0; + + switch (form) { + case DW_FORM_sec_offset: + case DW_FORM_strp: { + u32 offset; + memcpy(&offset, file + data_offset, 4); + char *str = (char *) (file + string_offset + offset); + *value = (u64) str; + increment = 4; // 8 bytes for x64 DWARF! + break; + } + + case DW_FORM_addr: { + memcpy(value, file + data_offset, address_size); + increment = address_size; + break; + } + + case DW_FORM_string: { + char *ptr = (char *) (file + data_offset); + *value = (u64) ptr; + increment = strlen(ptr) + 1; + break; + } + + case DW_FORM_flag_present: { + *value = 1; + break; + } + + case DW_FORM_ref4: { + u32 offset; + memcpy(&offset, file + data_offset, 4); + increment = 4; + *value = file[base_data_offset + offset]; + break; + } + + case DW_FORM_exprloc: { + // TODO: return value to caller + + u32 length; + increment = decode_leb128(file + data_offset, &length); + increment += length; + + break; + } + + case DW_FORM_data1: { + *value = file[data_offset]; + increment = 1; + break; + } + + case DW_FORM_data2: { + memcpy(value, file + data_offset, 2); + increment = 2; + break; + } + + case DW_FORM_data4: { + memcpy(value, file + data_offset, 4); + increment = 4; + break; + } + + case DW_FORM_data8: { + memcpy(value, file + data_offset, 8); + increment = 8; + break; + } + + case DW_FORM_sdata: { + increment = decode_leb128s(file + data_offset, (s32 *) value); + break; + } + + case DW_FORM_udata: { + increment = decode_leb128(file + data_offset, (u32 *) value); + break; + } + + default: { + if (form) { + printf("unknown attribute form %d\n", form); + } + } + } + + return(increment); +} + +static void +parse_debug_info(u8 *file, struct mi_debuginfo *dest) { + u64 debug_info_offset = get_section_offset(file, ".debug_info"); + printf("Found .debug_info at offset %#lx\n", debug_info_offset); + + u64 debug_abbrev_offset = get_section_offset(file, ".debug_abbrev"); + printf("Found .debug_abbrev at offset %#lx\n", debug_abbrev_offset); + + u64 debug_str_offset = get_section_offset(file, ".debug_str"); + printf("Found .debug_str at offset %#lx\n", debug_str_offset); + + struct dwarf_debug_info_header_x32 di_header = { 0 }; + u32 header_size = sizeof(di_header); + memcpy(&di_header, file + debug_info_offset, header_size); + + u64 abbrev_offset = debug_abbrev_offset + di_header.debug_abbrev_offset; + u64 data_offset = debug_info_offset + header_size; + u32 code, tag; u64 schema_offset; u32 depth = 0; - u64 original_data_offset = data_offset; + u64 base_data_offset = data_offset - header_size; int found_sr = 0; - do { + struct mi_function *func = dest->functions; + + for (;;) { data_offset += decode_leb128(file + data_offset, &code); if (code == 0) { @@ -180,127 +292,32 @@ find_subroutine_offset(u8 *file, u64 header_size, u8 address_size, schema_offset += decode_leb128(file + schema_offset, &attribute); schema_offset += decode_leb128(file + schema_offset, &form); - if (attribute) { - //printf("\t%s ", attribute_to_str(attribute)); - } + u64 value; - switch (form) { - case DW_FORM_sec_offset: - case DW_FORM_strp: { - u32 data; - memcpy(&data, file + data_offset, 4); - data_offset += 4; // 8 bytes for x64 DWARF! - - if (form == DW_FORM_strp) { - char *str = (char *) file + string_offset + data; - //printf("(indirect string, offset: %#x): %s\n", data, str); - if (tag == DW_TAG_subprogram) { - if (strcmp(str, subroutine) == 0) { - found_sr = 1; - } - } - } else { - //printf("%#x\n", data); - } - - break; - } - - case DW_FORM_addr: { - u64 data = 0; - memcpy(&data, file + data_offset, address_size); - data_offset += address_size; - //printf("%#lx\n", data); - - if (tag == DW_TAG_subprogram && found_sr == 1 && attribute == DW_AT_low_pc) { - return(data); - } - - break; - }; - - case DW_FORM_string: { - char *data = (char *) file + data_offset; - data_offset += strlen(data) + 1; - //printf("%s\n", data); - break; - } - - case DW_FORM_flag_present: { - int data = 1; - //printf("Flag = 1\n"); - break; - } - - case DW_FORM_ref4: { - u32 data; - memcpy(&data, file + data_offset, 4); - data_offset += 4; - //printf("%#x\n", data); - u32 referenced_data = file[original_data_offset - header_size + data]; - break; + data_offset += read_actual_debug_data(file, debug_str_offset, di_header.address_size, base_data_offset, form, data_offset, &value); + + if (tag == DW_TAG_compile_unit) { + if (attribute == DW_AT_comp_dir) { + dest->comp_dir = (char *) value; } - - case DW_FORM_exprloc: { - u32 length; - data_offset += decode_leb128(file + data_offset, &length); - //printf("%d byte block:", length); - - for (u32 i = 0; i < length; ++i) { - //printf(" %x", file[data_offset + i]); - } - //printf("\n"); - - data_offset += length; - - break; - } - - case DW_FORM_data1: { - u8 data = file[data_offset]; - data_offset += 1; - //printf("%#x\n", data); - break; - }; - - case DW_FORM_data2: { - u16 data; - memcpy(&data, file + data_offset, 2); - data_offset += 2; - //printf("%#x\n", data); - break; - }; - - case DW_FORM_data4: { - u32 data; - memcpy(&data, file + data_offset, 4); - data_offset += 4; - //printf("%#x\n", data); - break; - }; - - case DW_FORM_data8: { - u64 data; - memcpy(&data, file + data_offset, 8); - data_offset += 8; - //printf("%#lx\n", data); - break; - }; - - default: { - if (form) { - printf("unknown attribute form %d\n", form); - } + } else if (tag == DW_TAG_subprogram) { + if (attribute == DW_AT_name) { + func->name = (char *) value; + } else if (attribute == DW_AT_low_pc) { + func->offset = value; } } } while (attribute != 0 || form != 0); - } while (1); - - return(0); + + if (tag == DW_TAG_subprogram) { + ++func; + ++dest->func_count; + } + } } static void -construct_line_number_table(u8 *file, struct mi_sourcepoint *dest, char **dest_files, int *dest_size, int *dest_files_size) +parse_debug_line(u8 *file, struct mi_sourcepoint *dest, char **dest_files, int *dest_size, int *dest_files_size) { u64 dl_offset = get_section_offset(file, ".debug_line"); @@ -326,15 +343,15 @@ is followed by a single null byte." */ while (*p != 0) { ++p; } + + ++p; } header.ndirs = ndirs; header.include_directories = 0; // malloc(ndirs * sizeof(char *)); - dl_offset += (p - (file + dl_offset)) + 1; - + ++p; - p = file + dl_offset; while (*p != 0) { /* null-terminated string */ if (dest_files) { @@ -658,7 +675,7 @@ get_executable_base_address(u8 *elf_file, int pid) end = strtoll(at + 1, &at, 16); (void) end; - while (*at < '0' || *at > '9') ++at; + while (*at < '0' || '9' < *at) ++at; offset = strtoll(at, &at, 16); @@ -671,30 +688,3 @@ get_executable_base_address(u8 *elf_file, int pid) return(0); } - -static u64 -get_address_of_subroutine(u8 *file, char *sr) -{ - u64 debug_info_offset = get_section_offset(file, ".debug_info"); - printf("Found .debug_info at offset %#lx\n", debug_info_offset); - - u64 debug_line_offset = get_section_offset(file, ".debug_line"); - printf("Found .debug_line at offset %#lx\n", debug_line_offset); - - u64 debug_abbrev_offset = get_section_offset(file, ".debug_abbrev"); - printf("Found .debug_abbrev at offset %#lx\n", debug_abbrev_offset); - - u64 debug_str_offset = get_section_offset(file, ".debug_str"); - printf("Found .debug_str at offset %#lx\n", debug_str_offset); - - struct dwarf_debug_info_header_x32 di_header = { 0 }; - memcpy(&di_header, file + debug_info_offset, sizeof(di_header)); - - u64 abbrev_offset = debug_abbrev_offset + di_header.debug_abbrev_offset; - u64 data_offset = debug_info_offset + sizeof(di_header); - - u64 result = find_subroutine_offset(file, sizeof(di_header), di_header.address_size, - debug_str_offset, abbrev_offset, data_offset, sr); - - return(result); -} \ No newline at end of file diff --git a/main.c b/main.c index 9b84e7a..6d1985b 100644 --- a/main.c +++ b/main.c @@ -20,8 +20,10 @@ main(int argc, char *argv[]) char *command = malloc(max_command_length + 1); char *last_command = malloc(max_command_length + 1); + parse_debug_info(process.elf, &process.debug); + process.base_address = 0x555555554000UL; // get_executable_base_address(file, proc.pid); - process.main_address = get_address_of_subroutine(process.elf, "main"); + process.main_address = get_address_of_subroutine(process, "main"); printf("Base address: %#lx\n", process.base_address); printf("Main address: %#lx\n", process.main_address); @@ -29,11 +31,11 @@ main(int argc, char *argv[]) printf("> "); fflush(stdout); - construct_line_number_table(process.elf, 0, 0, &process.sp_count, &process.source_file_count); - process.sp_table = malloc(process.sp_count * sizeof(struct mi_sourcepoint)); + parse_debug_line(process.elf, 0, 0, &process.debug.sp_count, &process.source_file_count); + process.debug.sp_table = malloc(process.debug.sp_count * sizeof(struct mi_sourcepoint)); process.source_file_names = malloc(process.source_file_count * sizeof(char *)); process.source_files = malloc(process.source_file_count * sizeof(char *)); - construct_line_number_table(process.elf, process.sp_table, process.source_file_names, 0, 0); + parse_debug_line(process.elf, process.debug.sp_table, process.source_file_names, 0, 0); while ((command_length = getline(&command, &max_command_length, stdin))) { if (command_length == 1) { diff --git a/util.c b/util.c index 762ec65..a2e25bd 100644 --- a/util.c +++ b/util.c @@ -1,12 +1,25 @@ +static u64 +get_address_of_subroutine(struct mi_process proc, char *sr) +{ + for (int i = 0; i < proc.debug.func_count; ++i) { + struct mi_function *func = proc.debug.functions + i; + if (0 == strcmp(func->name, sr)) { + return(func->offset); + } + } + + return(0); +} + static struct mi_sourcepoint * pc_to_sourcepoint(struct mi_process proc, u64 pc) { // NOTE: find first point BIGGER that pc, return the sourcepoint just before that // TODO: binary search - for (int i = 0; i < proc.sp_count; ++i) { - struct mi_sourcepoint *point = proc.sp_table + i; + for (int i = 0; i < proc.debug.sp_count; ++i) { + struct mi_sourcepoint *point = proc.debug.sp_table + i; if (point->pc > pc) { - return(proc.sp_table + i - 1); + return(proc.debug.sp_table + i - 1); } } @@ -47,8 +60,13 @@ print_sourcepoint(struct mi_process proc, struct mi_sourcepoint *sp) { // NOTE: sourcepoint file indices are 1-based if (proc.source_files[sp->file - 1].data == 0) { - char *path = proc.source_file_names[sp->file - 1]; - struct mi_buffer file = read_file_mmap(path); + char *filename = proc.source_file_names[sp->file - 1]; + char *dir = proc.debug.comp_dir; + + char full_path[512] = { 0 }; + snprintf(full_path, 511, "%s/%s", dir, filename); + + struct mi_buffer file = read_file_mmap(full_path); proc.source_files[sp->file - 1] = file; }