From a83b2346a12753122e45456c7b1992cf7ebd4fb0 Mon Sep 17 00:00:00 2001 From: aolo2 Date: Sun, 1 Aug 2021 19:38:25 +0300 Subject: [PATCH] Experiments with new DIE parsing (broken) --- src/dwarf.c | 445 +++++++++++++++++++++++++++++++----------------- src/elf_dwarf.h | 46 +++++ 2 files changed, 332 insertions(+), 159 deletions(-) diff --git a/src/dwarf.c b/src/dwarf.c index 90aae52..685134b 100644 --- a/src/dwarf.c +++ b/src/dwarf.c @@ -753,6 +753,288 @@ read_actual_debug_data(u8 *file, u64 string_offset, u32 address_size, u32 form, return(increment); } +static void +read_die_compilation_unit(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_compilation_unit *dest) +{ + u64 schema = *schema_offset; + u64 data = *data_offset; + + u32 attribute, form; + + do { + u64 value; + + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + + switch (attribute) { + case DW_AT_name: { + dest->name = (char *) value; + break; + } + + case DW_AT_comp_dir: { + dest->comp_dir = (char *) value; + break; + } + + case DW_AT_low_pc: { + dest->low_pc = value; + break; + } + + case DW_AT_high_pc: { + dest->high_pc = value; + break; + } + } + } while (attribute != 0 || form != 0); + + dest->high_pc = dest->low_pc + dest->high_pc; + + *schema_offset = schema; + *data_offset = data; +} + + +static void +read_die_subprogram(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_subprogram *dest) +{ + u64 schema = *schema_offset; + u64 data = *data_offset; + + u32 attribute, form; + + do { + u64 value; + + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + + switch (attribute) { + case DW_AT_name: { + dest->name = (char *) value; + break; + } + + case DW_AT_low_pc: { + dest->low_pc = value; + break; + } + + case DW_AT_high_pc: { + dest->high_pc = value; + break; + } + } + } while (attribute != 0 || form != 0); + + dest->high_pc = dest->low_pc + dest->high_pc; + + *schema_offset = schema; + *data_offset = data; +} + + +static void +read_die_variable(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_variable *dest) +{ + u64 schema = *schema_offset; + u64 data = *data_offset; + + u32 attribute, form; + + do { + u64 value; + + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + + switch (attribute) { + case DW_AT_name: { + dest->name = (char *) value; + break; + } + + case DW_AT_location: { + dest->location = value; + break; + } + + case DW_AT_type: { + dest->type = (void * ) value; // TODO TODO TODO + break; + } + } + } while (attribute != 0 || form != 0); + + *schema_offset = schema; + *data_offset = data; +} + +static void +read_die_base_type(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_type *dest) +{ + u64 schema = *schema_offset; + u64 data = *data_offset; + + u32 attribute, form; + + do { + u64 value; + + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + + switch (attribute) { + case DW_AT_name: { + dest->name = (char *) value; + break; + } + + case DW_AT_byte_size: { + dest->size = value; + break; + } + + case DW_AT_encoding: { + dest->encoding = value; + break; + } + } + } while (attribute != 0 || form != 0); + + *schema_offset = schema; + *data_offset = data; +} + +static void +read_die_nop(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size) +{ + u64 data = *data_offset; + u64 schema = *schema_offset; + + u32 attribute, form; + do { + u64 value; + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + } while (attribute != 0 || form != 0); + + *data_offset = data; + *schema_offset = schema; +} + +static void +read_die_structure_type(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_type *dest) +{ + u64 data = *data_offset; + u64 schema = *schema_offset; + + u32 attribute, form; + do { + u64 value; + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + + switch (attribute) { + case DW_AT_name: { + dest->name = (char *) value; + break; + } + + case DW_AT_byte_size: { + dest->size = value; + break; + } + } + } while (attribute != 0 || form != 0); + + *data_offset = data; + *schema_offset = schema; +} + +static void +read_die_member(u8 *file, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + u8 address_size, struct dwarf_die_type *dest) +{ + u64 data = *data_offset; + u64 schema = *schema_offset; + + u32 attribute, form; + do { + u64 value; + schema += decode_leb128(file + schema, &attribute); + schema += decode_leb128(file + schema, &form); + data += read_actual_debug_data(file, debug_str_offset, address_size, form, data, &value); + } while (attribute != 0 || form != 0); + + *data_offset = data; + *schema_offset = schema; +} + +static void +read_one_die(u8 *file, u64 debug_info_offset, u64 debug_str_offset, u64 *schema_offset, u64 *data_offset, + enum dwarf_die_tag tag, u8 address_size, struct dwarf_die *dest) +{ + u64 schema = *schema_offset; + u64 data = *data_offset; + + dest->offset = data - debug_info_offset; + dest->tag = tag; + + switch (tag) { + case DW_TAG_compile_unit: { + read_die_compilation_unit(file, debug_str_offset, &schema, &data, address_size, &dest->compilation_unit); + break; + } + + case DW_TAG_subprogram: { + read_die_subprogram(file, debug_str_offset, &schema, &data, address_size, &dest->subprogram); + break; + } + + case DW_TAG_variable: { + read_die_variable(file, debug_str_offset, &schema, &data, address_size, &dest->variable); + break; + } + + case DW_TAG_base_type: { + read_die_base_type(file, debug_str_offset, &schema, &data, address_size, &dest->type); + break; + } + + case DW_TAG_structure_type: { + read_die_structure_type(file, debug_str_offset, &schema, &data, address_size, &dest->type); + break; + } + + case DW_TAG_member: { + read_die_member(file, debug_str_offset, &schema, &data, address_size, &dest->type); + break; + } + + default: { + read_die_nop(file, debug_str_offset, &schema, &data, address_size); + } + } + + *schema_offset = schema; + *data_offset = data; +} + static u64 read_debug_info_for_compilation_unit(u8 *file, struct mi_debuginfo *dest, u64 debug_info_offset, u64 debug_abbrev_offset, u64 debug_str_offset) @@ -763,6 +1045,7 @@ read_debug_info_for_compilation_unit(u8 *file, struct mi_debuginfo *dest, u64 abbrev_offset = debug_abbrev_offset + di_header.debug_abbrev_offset; u64 data_offset = debug_info_offset + header_size; + u8 address_size = di_header.address_size; u32 code, tag; u64 schema_offset; @@ -777,13 +1060,12 @@ read_debug_info_for_compilation_unit(u8 *file, struct mi_debuginfo *dest, struct mi_variable *variable = dest->variables + dest->var_count; struct mi_type *type = dest->types + dest->type_count; - struct mi_type *parent_type = 0; + struct dwarf_die d_die = { 0 }; + struct dwarf_die *die = &d_die; comp_unit->functions_from = dest->func_count; for (;;) { - s64 record_offset = data_offset - debug_info_offset; - data_offset += decode_leb128(file + data_offset, &code); if (code == 0) { @@ -799,169 +1081,14 @@ read_debug_info_for_compilation_unit(u8 *file, struct mi_debuginfo *dest, schema_offset += decode_leb128(file + schema_offset, NULL); schema_offset += decode_leb128(file + schema_offset, &tag); - //printf("%d %s\n", code, tag_to_str(tag)); - u32 has_children = file[schema_offset++]; if (has_children) { ++depth; } - switch (tag) { - case DW_TAG_compile_unit: { - break; - } - - case DW_TAG_subprogram: { - break; - } - - case DW_TAG_variable: { - break; - } - - case DW_TAG_structure_type: { - break; - } - - case DW_TAG_member: { - break; - } - - case DW_TAG_base_type: { - break; - } - } -#if 0 - u32 attribute, form; - - do { - schema_offset += decode_leb128(file + schema_offset, &attribute); - schema_offset += decode_leb128(file + schema_offset, &form); - - u64 value; - - data_offset += read_actual_debug_data(file, debug_str_offset, di_header.address_size, form, data_offset, &value); - - if (tag == DW_TAG_compile_unit) { - if (attribute == DW_AT_low_pc) { - comp_unit->low_pc = value; - } else if (attribute == DW_AT_high_pc) { - comp_unit->high_pc = value; - } else if (attribute == DW_AT_comp_dir) { - comp_unit->source.comp_dir = (char *) value; - } - } else if (tag == DW_TAG_subprogram) { - if (attribute == DW_AT_name) { - func->name = (char *) value; - } else if (attribute == DW_AT_low_pc) { - func->low_pc = value; - } else if (attribute == DW_AT_high_pc) { - func->high_pc = value; - } - } else if (tag == DW_TAG_variable) { - if (attribute == DW_AT_name) { - variable->name = (char *) value; - } else if (attribute == DW_AT_location) { - variable->location = value; - } else if (attribute == DW_AT_type) { - variable->type = value; - } - } else if (tag == DW_TAG_structure_type) { - type->_offset = record_offset; - if (attribute == DW_AT_name) { - type->name = (char *) value; - } else if (attribute == DW_AT_byte_size) { - type->size = value; - } - } else if (tag == DW_TAG_member) { - - } else if (tag == DW_TAG_base_type) { - type->_offset = record_offset; - if (attribute == DW_AT_name) { - type->name = (char *) value; - } else if (attribute == DW_AT_byte_size) { - type->size = value; - } else if (attribute == DW_AT_encoding) { - enum dwarf_type_encoding encoding = value; - switch (encoding) { - case DW_ATE_address: { - type->encoding = MI_ADDRESS; - break; - } - - case DW_ATE_boolean: { - type->encoding = MI_BOOLEAN; - break; - } - - case DW_ATE_float: { - type->encoding = MI_FLOAT; - break; - } - - case DW_ATE_signed_char: - case DW_ATE_signed: { - type->encoding = MI_SIGNED; - break; - } - - case DW_ATE_unsigned_char: - case DW_ATE_unsigned: { - type->encoding = MI_UNSIGNED; - break; - } - - default: { - DIE("unexpected type encoding!\n"); - } - } - } - } - } while (attribute != 0 || form != 0); - - // NOTE(aolo2): DIE completely processed, finish it ??uploads?? - if (tag == DW_TAG_subprogram) { - func->high_pc = func->low_pc + func->high_pc; - func->variables_from = dest->var_count; - ++comp_unit->functions_count; - ++dest->func_count; - ++func; - } else if (tag == DW_TAG_variable) { - struct mi_function *parent = func - 1; - ++parent->variables_count; - ++dest->var_count; - ++variable; - } else if (tag == DW_TAG_base_type) { - ++parent_type->children_count; - ++dest->type_count; - ++type; - } else if (tag == DW_TAG_structure_type) { - parent_type = type; - parent_type->children_from = dest->type_count; - ++type; - } -#endif - - - } - - // Resolve types - for (int v = vars_from; v < dest->var_count; ++v) { - struct mi_variable *variable = dest->variables + v; - - for (int t = types_from; t < dest->type_count; ++t) { - struct mi_type *type = dest->types + t; - - if (type->_offset == variable->type) { - variable->type = t; - break; - } - } + read_one_die(file, debug_info_offset, debug_str_offset, &schema_offset, &data_offset, tag, address_size, die); } - comp_unit->high_pc = comp_unit->low_pc + comp_unit->high_pc; - ++dest->cu_count; - return(di_header.length + 4); } diff --git a/src/elf_dwarf.h b/src/elf_dwarf.h index 9562563..a700651 100644 --- a/src/elf_dwarf.h +++ b/src/elf_dwarf.h @@ -721,4 +721,50 @@ struct dwarf_regset { enum dwarf_regset_register cfa_register; u32 cfa_offset; +}; + +struct dwarf_die_compilation_unit { + u64 low_pc; + u64 high_pc; + char *name; + char *comp_dir; +}; + +struct dwarf_die_subprogram { + u64 low_pc; + u64 high_pc; + char *name; + // TODO: frame base (might be NOT cfa) + // TODO: type +}; + +struct dwarf_die_variable { + char *name; + int location; + struct dwarf_die_type *type; +}; + +struct dwarf_die_type { + char *name; + int size; + int encoding; + + int is_const; + int is_pointer; + int is_restrict; + + struct dwarf_die_type *next; + struct dwarf_die_type *first_child; +}; + +struct dwarf_die { + u64 offset; + struct dwarf_die *parent; + enum dwarf_die_tag tag; + union { + struct dwarf_die_compilation_unit compilation_unit; + struct dwarf_die_subprogram subprogram; + struct dwarf_die_variable variable; + struct dwarf_die_type type; + }; }; \ No newline at end of file