commit 5231bb2f1163ffaea38a079b3651c83c4f4ff33e Author: A.Olokhtonov Date: Mon Mar 13 11:55:08 2023 +0300 HW1, HW2, HW2 bonus, HW3 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..06cbd70 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +test +test.asm +test2 +test2.asm +*.hex +8086-decode diff --git a/1-reg-reg-decode/listing_0037_single_register_mov b/1-reg-reg-decode/listing_0037_single_register_mov new file mode 100644 index 0000000..93b300d --- /dev/null +++ b/1-reg-reg-decode/listing_0037_single_register_mov @@ -0,0 +1 @@ +‰Ù \ No newline at end of file diff --git a/1-reg-reg-decode/listing_0037_single_register_mov.asm b/1-reg-reg-decode/listing_0037_single_register_mov.asm new file mode 100644 index 0000000..a20862f --- /dev/null +++ b/1-reg-reg-decode/listing_0037_single_register_mov.asm @@ -0,0 +1,19 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== */ + +; ======================================================================== +; LISTING 37 +; ======================================================================== + +bits 16 + +mov cx, bx diff --git a/1-reg-reg-decode/listing_0038_many_register_mov b/1-reg-reg-decode/listing_0038_many_register_mov new file mode 100644 index 0000000..5605230 --- /dev/null +++ b/1-reg-reg-decode/listing_0038_many_register_mov @@ -0,0 +1 @@ +‰Ùˆå‰Ú‰Þ‰ûˆÈˆí‰Ã‰ó‰ü‰Å \ No newline at end of file diff --git a/1-reg-reg-decode/listing_0038_many_register_mov.asm b/1-reg-reg-decode/listing_0038_many_register_mov.asm new file mode 100644 index 0000000..7431c2f --- /dev/null +++ b/1-reg-reg-decode/listing_0038_many_register_mov.asm @@ -0,0 +1,29 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== */ + +; ======================================================================== +; LISTING 38 +; ======================================================================== + +bits 16 + +mov cx, bx +mov ch, ah +mov dx, bx +mov si, bx +mov bx, di +mov al, cl +mov ch, ch +mov bx, ax +mov bx, si +mov sp, di +mov bp, ax diff --git a/1-reg-reg-decode/main.c b/1-reg-reg-decode/main.c new file mode 100644 index 0000000..b5669e1 --- /dev/null +++ b/1-reg-reg-decode/main.c @@ -0,0 +1,56 @@ +#include "../common.h" + +static void +decode_and_print_reg_reg_movs(u8 *data, u64 size) +{ + printf("bits 16\n\n"); + + /* We do not handle corrupt binaries */ + + for (u64 i = 0; i < size; i += 2) { + u8 b1 = data[i + 0]; + u8 b2 = data[i + 1]; + + u8 D_flag = (b1 & 0x2) >> 1; + u8 W_flag = b1 & 0x1; + + u8 mod = (b2 & 0xC0) >> 6; + u8 reg_field = (b2 & 0x38) >> 3; + u8 rm_field = b2 & 0x7; + + const char **regs = W_flag ? REGS_W1 : REGS_W0; + const char **rms = W_flag ? RM_W1 : RM_W0; + + if (D_flag) { + /* Instruction destination is specified in REG field */ + printf("mov %s, %s\n", regs[reg_field], rms[rm_field]); + } else { + /* Instruction source is specified in REG field */ + printf("mov %s, %s\n", rms[rm_field], regs[reg_field]); + } + } +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s binary_file\n", argv[0]); + return(1); + } + + char *filename = argv[1]; + int fd = open(filename, O_RDONLY); + + if (fd == -1) { + perror("open"); + return(1); + } + + u64 size = file_size(fd); + u8 *data = read_file(fd, size); + + decode_and_print_reg_reg_movs(data, size); + + return(0); +} \ No newline at end of file diff --git a/2-more-decode/listing_0039_more_movs b/2-more-decode/listing_0039_more_movs new file mode 100644 index 0000000..7536cd8 Binary files /dev/null and b/2-more-decode/listing_0039_more_movs differ diff --git a/2-more-decode/listing_0039_more_movs.asm b/2-more-decode/listing_0039_more_movs.asm new file mode 100644 index 0000000..4a667e2 --- /dev/null +++ b/2-more-decode/listing_0039_more_movs.asm @@ -0,0 +1,45 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 39 +; ======================================================================== + +bits 16 + +; Register-to-register +mov si, bx +mov dh, al + +; 8-bit immediate-to-register +mov cx, 12 +mov cx, -12 + +; 16-bit immediate-to-register +mov dx, 3948 +mov dx, -3948 + +; Source address calculation +mov al, [bx + si] +mov bx, [bp + di] +mov dx, [bp] + +; Source address calculation plus 8-bit displacement +mov ah, [bx + si + 4] + +; Source address calculation plus 16-bit displacement +mov al, [bx + si + 4999] + +; Dest address calculation +mov [bx + di], cx +mov [bp + si], cl +mov [bp], ch diff --git a/2-more-decode/listing_0040_challenge_movs b/2-more-decode/listing_0040_challenge_movs new file mode 100644 index 0000000..63b3ec5 Binary files /dev/null and b/2-more-decode/listing_0040_challenge_movs differ diff --git a/2-more-decode/listing_0040_challenge_movs.asm b/2-more-decode/listing_0040_challenge_movs.asm new file mode 100644 index 0000000..966e47a --- /dev/null +++ b/2-more-decode/listing_0040_challenge_movs.asm @@ -0,0 +1,38 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 40 +; ======================================================================== + +bits 16 + +; Signed displacements +mov ax, [bx + di - 37] +mov [si - 300], cx +mov dx, [bx - 32] + +; Explicit sizes +mov [bp + di], byte 7 +mov [di + 901], word 347 + +; Direct address +mov bp, [5] +mov bx, [3458] + +; Memory-to-accumulator test +mov ax, [2555] +mov ax, [16] + +; Accumulator-to-memory test +mov [2554], ax +mov [15], ax diff --git a/2-more-decode/main.c b/2-more-decode/main.c new file mode 100644 index 0000000..432c46c --- /dev/null +++ b/2-more-decode/main.c @@ -0,0 +1,281 @@ +#include "../common.h" + +static void +print_mov(u8 wide, u8 swap, struct rm dest, struct rm src) +{ + const char **regs = wide ? REGS_W1 : REGS_W0; + + if (swap) { + struct rm buf = dest; + dest = src; + src = buf; + } + + printf("mov "); + + /* Dest can not be an immediate, so dest.imm indicates if there's a displacement */ + + if (dest.reg1 == NO_REGISTER && dest.reg2 == NO_REGISTER && dest.disp != 0) { + printf("[%d]", dest.disp); + } else if (dest.reg2 == NO_REGISTER && dest.imm == 0) { + printf("%s", regs[dest.reg1]); + } else if (dest.reg2 != NO_REGISTER && dest.imm == 0) { + printf("[%s + %s]", REGS_W1[dest.reg1], REGS_W1[dest.reg2]); + } else if (dest.reg2 == NO_REGISTER && dest.imm != 0) { + printf("[%s + %d]", REGS_W1[dest.reg1], dest.disp); + } else if (dest.reg2 != NO_REGISTER && dest.imm != 0) { + printf("[%s + %s + %d]", REGS_W1[dest.reg1], REGS_W1[dest.reg2], dest.disp); + } + + printf(", "); + + if (src.reg1 == NO_REGISTER) { + if (src.disp == 0) { + printf("%s %d", wide ? "word" : "byte", src.imm); /* only the src can be an immediate */ + } else { + printf("[%d]", src.disp); + } + } else { + if (src.reg2 == NO_REGISTER && src.imm == 0) { + printf("%s", regs[src.reg1]); + } else if (src.reg2 != NO_REGISTER && src.imm == 0) { + printf("[%s + %s]", REGS_W1[src.reg1], REGS_W1[src.reg2]); + } else if (src.reg2 == NO_REGISTER && src.imm != 0) { + printf("[%s + %d]", REGS_W1[src.reg1], src.disp); + } else if (src.reg2 != NO_REGISTER && src.imm != 0) { + printf("[%s + %s + %d]", REGS_W1[src.reg1], REGS_W1[src.reg2], src.disp); + } + } + + printf("\n"); +} + +static int +decode_rm(u8 *data, int offset, struct rm *location) +{ + int advance = 0; + + u8 b2 = data[offset + 1]; + u8 mod = (b2 & 0xC0) >> 6; + u8 rm_field = b2 & 0x7; + + if (mod == MOD_REGISTER) { + location->reg1 = rm_field; + location->reg2 = -1; + location->disp = 0; + location->imm = 0; + + advance = 2; + } else if (mod == MOD_MEMORY) { + s16 b3 = data[offset + 2]; + s16 b4 = data[offset + 3]; + + if (rm_field == 0x6) { + location->reg1 = NO_REGISTER; + location->reg2 = NO_REGISTER; + location->disp = (b4 << 8) | b3; + location->imm = 0; + advance = 4; + } else { + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = 0; + location->imm = 1; + advance = 2; + } + } else if (mod == MOD_MEMORY_8) { + s8 b3 = data[offset + 2]; + + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = b3; + location->imm = 1; + + advance = 3; + } else if (mod == MOD_MEMORY_16) { + s16 b3 = data[offset + 2]; + s16 b4 = data[offset + 3]; + + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = (b4 << 8) | b3; + location->imm = 1; + + advance = 4; + } + + return(advance); +} + +static void +decode_and_print_more_movs(u8 *data, u64 size) +{ + printf("bits 16\n\n"); + + /* We still do not handle corrupt binaries */ + + u64 offset = 0; + + while (offset < size) { + u8 b1 = data[offset + 0]; + + if ((b1 & MASK_REG_RM) == OPCODE_REG_RM) { + u8 b2 = data[offset + 1]; + u8 W_flag = b1 & 0x1; + u8 D_flag = (b1 & 0x2) >> 1; + u8 reg_field = (b2 & 0x38) >> 3; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = reg_field; + dest.reg2 = -1; + dest.disp = 0; + dest.imm = 0; + + offset += decode_rm(data, offset, &src); + + print_mov(W_flag, !D_flag, dest, src); + } else if ((b1 & MASK_IMM_RM) == OPCODE_IMM_RM) { + u8 W_flag = b1 & 0x1; + u8 D_flag = 1; /* Immediate to register-memory always has mod/rm as destination */ + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + offset += decode_rm(data, offset, &dest); + + s16 immediate = 0; + + if (W_flag) { + s16 b5 = data[offset]; + s16 b6 = data[offset + 1]; + immediate = (b6 << 8) | b5; + offset += 2; + } else { + s16 b5 = data[offset]; + immediate = b5; + offset += 1; + } + + src.reg1 = NO_REGISTER; + src.imm = immediate; + + print_mov(W_flag, !D_flag, dest, src); + } else if ((b1 & MASK_IMM_REG) == OPCODE_IMM_REG) { + u8 W_flag = (b1 & 0x8) >> 3; + u16 b2 = data[offset + 1]; + u8 reg = b1 & 0x7; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = reg; + dest.reg2 = NO_REGISTER; + dest.disp = 0; + dest.imm = 0; + + s16 immediate = 0; + + if (W_flag) { + /* A 16-bit immediate follows */ + u16 b3 = data[offset + 2]; + immediate = (b3 << 8) | b2; + offset += 3; + } else { + /* An 8-bit immediate follows */ + immediate = b2; + offset += 2; + } + + src.reg1 = NO_REGISTER; + src.reg2 = NO_REGISTER; + src.disp = 0; + src.imm = immediate; + + print_mov(W_flag, 0, dest, src); + } else if ((b1 & MASK_MEM_ACC) == OPCODE_MEM_ACC) { + u8 W_flag = b1 & 0x1; + u8 b2 = data[offset + 1]; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = AX; + dest.reg2 = NO_REGISTER; + dest.disp = 0; + dest.imm = 0; + + u16 address = 0; + + if (W_flag) { + u16 b3 = data[offset + 2]; + address = (b3 << 8) | b2; + offset += 3; + } else { + address = b2; + offset += 2; + } + + src.reg1 = NO_REGISTER; + src.reg2 = NO_REGISTER; + src.disp = address; + src.imm = 0; + + print_mov(W_flag, 0, dest, src); + } else if ((b1 & MASK_ACC_MEM) == OPCODE_ACC_MEM) { + u8 W_flag = b1 & 0x1; + u8 b2 = data[offset + 1]; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = AX; + dest.reg2 = NO_REGISTER; + dest.disp = 0; + dest.imm = 0; + + u16 address = 0; + + if (W_flag) { + u16 b3 = data[offset + 2]; + address = (b3 << 8) | b2; + offset += 3; + } else { + address = b2; + offset += 2; + } + + src.reg1 = NO_REGISTER; + src.reg2 = NO_REGISTER; + src.disp = address; + src.imm = 0; + + print_mov(W_flag, 1, dest, src); + } + } +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s binary_file\n", argv[0]); + return(1); + } + + char *filename = argv[1]; + int fd = open(filename, O_RDONLY); + + if (fd == -1) { + perror("open"); + return(1); + } + + u64 size = file_size(fd); + u8 *data = read_file(fd, size); + + decode_and_print_more_movs(data, size); + + return(0); +} diff --git a/2-more-decode/test.sh b/2-more-decode/test.sh new file mode 100755 index 0000000..ad1e2e7 --- /dev/null +++ b/2-more-decode/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +for bin_file in listing_0039_more_movs listing_0040_challenge_movs; do + ./8086-decode $bin_file > test.asm + nasm test.asm + if ! diff -s test $bin_file; then + echo "Test $bin_file failed!" + exit 1 + fi +done + +exit 0 diff --git a/3-arithmetic/listing_0041_add_sub_cmp_jnz b/3-arithmetic/listing_0041_add_sub_cmp_jnz new file mode 100644 index 0000000..6c71f1b Binary files /dev/null and b/3-arithmetic/listing_0041_add_sub_cmp_jnz differ diff --git a/3-arithmetic/listing_0041_add_sub_cmp_jnz.asm b/3-arithmetic/listing_0041_add_sub_cmp_jnz.asm new file mode 100644 index 0000000..6b79cf0 --- /dev/null +++ b/3-arithmetic/listing_0041_add_sub_cmp_jnz.asm @@ -0,0 +1,121 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 41 +; ======================================================================== + +bits 16 + +add bx, [bx+si] +add bx, [bp] +add si, 2 +add bp, 2 +add cx, 8 +add bx, [bp + 0] +add cx, [bx + 2] +add bh, [bp + si + 4] +add di, [bp + di + 6] +add [bx+si], bx +add [bp], bx +add [bp + 0], bx +add [bx + 2], cx +add [bp + si + 4], bh +add [bp + di + 6], di +add byte [bx], 34 +add word [bp + si + 1000], 29 +add ax, [bp] +add al, [bx + si] +add ax, bx +add al, ah +add ax, 1000 +add al, -30 +add al, 9 + +sub bx, [bx+si] +sub bx, [bp] +sub si, 2 +sub bp, 2 +sub cx, 8 +sub bx, [bp + 0] +sub cx, [bx + 2] +sub bh, [bp + si + 4] +sub di, [bp + di + 6] +sub [bx+si], bx +sub [bp], bx +sub [bp + 0], bx +sub [bx + 2], cx +sub [bp + si + 4], bh +sub [bp + di + 6], di +sub byte [bx], 34 +sub word [bx + di], 29 +sub ax, [bp] +sub al, [bx + si] +sub ax, bx +sub al, ah +sub ax, 1000 +sub al, -30 +sub al, 9 + +cmp bx, [bx+si] +cmp bx, [bp] +cmp si, 2 +cmp bp, 2 +cmp cx, 8 +cmp bx, [bp + 0] +cmp cx, [bx + 2] +cmp bh, [bp + si + 4] +cmp di, [bp + di + 6] +cmp [bx+si], bx +cmp [bp], bx +cmp [bp + 0], bx +cmp [bx + 2], cx +cmp [bp + si + 4], bh +cmp [bp + di + 6], di +cmp byte [bx], 34 +cmp word [4834], 29 +cmp ax, [bp] +cmp al, [bx + si] +cmp ax, bx +cmp al, ah +cmp ax, 1000 +cmp al, -30 +cmp al, 9 + +test_label0: +jnz test_label1 +jnz test_label0 +test_label1: +jnz test_label0 +jnz test_label1 + +label: +je label +jl label +jle label +jb label +jbe label +jp label +jo label +js label +jne label +jnl label +jg label +jnb label +ja label +jnp label +jno label +jns label +loop label +loopz label +loopnz label +jcxz label diff --git a/3-arithmetic/main.c b/3-arithmetic/main.c new file mode 100644 index 0000000..249cc03 --- /dev/null +++ b/3-arithmetic/main.c @@ -0,0 +1,349 @@ +#include "../common.h" + +static void +print_inst(int instruction_offset, struct instruction *inst, int *labels, int label_count) +{ + if (inst->opcode > 0xF) { + for (int i = 0; i < label_count; ++i) { + if (instruction_offset + inst->size + inst->jump_offset == labels[i]) { + printf("%s label_%d\n", OP_NAMES[inst->opcode], i); + return; + } + } + + CRASH_SHOULDNOT(); + } + + const char **regs = inst->wide ? REGS_W1 : REGS_W0; + struct rm dest = inst->dst; + struct rm src = inst->src; + + if (inst->swap) { + struct rm buf = dest; + dest = src; + src = buf; + } + + printf("%s ", OP_NAMES[inst->opcode]); + + /* Dest can not be an immediate, so dest.imm indicates if there's a displacement */ + + if (dest.reg1 == NO_REGISTER && dest.reg2 == NO_REGISTER && dest.disp != 0) { + printf("[%d]", dest.disp); + } else if (dest.reg2 == NO_REGISTER && dest.imm == 0) { + printf("%s", regs[dest.reg1]); + } else if (dest.reg2 != NO_REGISTER && dest.imm == 0) { + printf("[%s + %s]", REGS_W1[dest.reg1], REGS_W1[dest.reg2]); + } else if (dest.reg2 == NO_REGISTER && dest.imm != 0) { + printf("[%s + %d]", REGS_W1[dest.reg1], dest.disp); + } else if (dest.reg2 != NO_REGISTER && dest.imm != 0) { + printf("[%s + %s + %d]", REGS_W1[dest.reg1], REGS_W1[dest.reg2], dest.disp); + } + + printf(", "); + + if (src.reg1 == NO_REGISTER) { + if (src.disp == 0) { + printf("%s %d", inst->wide ? "word" : "byte", src.imm); /* only the src can be an immediate */ + } else { + printf("[%d]", src.disp); + } + } else { + if (src.reg2 == NO_REGISTER && src.imm == 0) { + printf("%s", regs[src.reg1]); + } else if (src.reg2 != NO_REGISTER && src.imm == 0) { + printf("[%s + %s]", REGS_W1[src.reg1], REGS_W1[src.reg2]); + } else if (src.reg2 == NO_REGISTER && src.imm != 0) { + printf("[%s + %d]", REGS_W1[src.reg1], src.disp); + } else if (src.reg2 != NO_REGISTER && src.imm != 0) { + printf("[%s + %s + %d]", REGS_W1[src.reg1], REGS_W1[src.reg2], src.disp); + } + } + + printf("\n"); +} + +static int +decode_rm(u8 *data, int offset, struct rm *location) +{ + int advance = 0; + + u8 b2 = data[offset + 1]; + u8 mod = (b2 & 0xC0) >> 6; + u8 rm_field = b2 & 0x7; + + if (mod == MOD_REGISTER) { + location->reg1 = rm_field; + location->reg2 = -1; + location->disp = 0; + location->imm = 0; + + advance = 2; + } else if (mod == MOD_MEMORY) { + s16 b3 = data[offset + 2]; + s16 b4 = data[offset + 3]; + + if (rm_field == 0x6) { + location->reg1 = NO_REGISTER; + location->reg2 = NO_REGISTER; + location->disp = (b4 << 8) | b3; + location->imm = 0; + advance = 4; + } else { + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = 0; + location->imm = 1; + advance = 2; + } + } else if (mod == MOD_MEMORY_8) { + s8 b3 = data[offset + 2]; + + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = b3; + location->imm = 1; + + advance = 3; + } else if (mod == MOD_MEMORY_16) { + s16 b3 = data[offset + 2]; + s16 b4 = data[offset + 3]; + + location->reg1 = MOD_PAIRS_FIRST[rm_field]; + location->reg2 = MOD_PAIRS_SECOND[rm_field]; + location->disp = (b4 << 8) | b3; + location->imm = 1; + + advance = 4; + } + + return(advance); +} + +static int +decode_and_print_add_sub_cmp_jmp(u8 *data, u64 size, struct instruction *stream) +{ + printf("bits 16\n\n"); + + /* We still do not handle corrupt binaries */ + + u64 offset = 0; + int stream_at = 0; + + while (offset < size) { + int saved_offset = offset; + + u8 b1 = data[offset + 0]; + u8 b2 = data[offset + 1]; + + u8 antiop = b1 & ANTIOP_MASK; + u8 op = 0; + + if (antiop == 0x00 || antiop == 0x4) { + op = (b1 & 0x38) >> 3; + } else { + op = (b2 & 0x38) >> 3; + } + + if (antiop == 0x00) { + u8 W_flag = b1 & 0x1; + u8 D_flag = (b1 & 0x2) >> 1; + u8 reg_field = (b2 & 0x38) >> 3; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = reg_field; + dest.reg2 = -1; + dest.disp = 0; + dest.imm = 0; + + offset += decode_rm(data, offset, &src); + + struct instruction inst = { + .size = offset - saved_offset, + .opcode = op, + .dst = dest, + .src = src, + .wide = W_flag, + .swap = !D_flag + }; + + stream[stream_at++] = inst; + + continue; + } else if (antiop == 0x80) { + u8 W_flag = b1 & 0x1; + u8 S_flag = (b1 & 0x2) >> 1; + u8 D_flag = 1; /* Immediate to register-memory always has mod/rm as destination */ + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + offset += decode_rm(data, offset, &dest); + + s16 immediate = 0; + + if (S_flag && !W_flag) { + s16 b5 = data[offset]; + s16 b6 = data[offset + 1]; + immediate = (b6 << 8) | b5; + offset += 2; + } else { + s16 b5 = data[offset]; + immediate = b5; + offset += 1; + } + + src.reg1 = NO_REGISTER; + src.imm = immediate; + + struct instruction inst = { + .size = offset - saved_offset, + .opcode = op, + .dst = dest, + .src = src, + .wide = W_flag, + .swap = !D_flag + }; + + stream[stream_at++] = inst; + + continue; + } else if (antiop == 0x4) { + u8 W_flag = b1 & 0x1; + s16 immediate = 0; + + struct rm dest = { 0 }; + struct rm src = { 0 }; + + dest.reg1 = AX; + dest.reg2 = NO_REGISTER; + dest.disp = 0; + dest.imm = 0; + + if (W_flag) { + s16 b3 = data[offset + 2]; + immediate= (b3 << 8) | b2; + offset += 3; + } else { + immediate = b2; + offset += 2; + } + + src.reg1 = NO_REGISTER; + src.reg2 = NO_REGISTER; + src.disp = 0; + src.imm = immediate; + + struct instruction inst = { + .size = offset - saved_offset, + .opcode = op, + .dst = dest, + .src = src, + .wide = W_flag, + .swap = 0, + }; + + stream[stream_at++] = inst; + + continue; + } + + offset += 2; + + struct instruction inst = { + .size = offset - saved_offset, + .opcode = b1, + .jump_offset = (s8) b2 + }; + + stream[stream_at++] = inst; + } + + return(stream_at); +} + +static int +insert_labels(struct instruction *in_stream, int instruction_count, int *labels) +{ + int instruction_offset = 0; + int label_count = 0; + + for (int i = 0; i < instruction_count; ++i) { + struct instruction *inst = in_stream + i; + + if (inst->opcode > 0xF) { + /* jmp-like */ + int target = instruction_offset + inst->size + inst->jump_offset; + int label_exists = 0; + + for (int j = 0; j < label_count; ++j) { + if (labels[j] == target) { + label_exists = 1; + break; + } + } + + if (!label_exists) { + labels[label_count++] = target; + } + } + + instruction_offset += inst->size; + } + + return(label_count); +} + +static void +print_instruction_stream(struct instruction *stream, int instruction_count, + int *labels, int label_count) +{ + int instruction_offset = 0; + + for (int i = 0; i < instruction_count; ++i) { + struct instruction *inst = stream + i; + + for (int j = 0; j < label_count; ++j) { + if (labels[j] == instruction_offset) { + printf("label_%d:\n", j); + break; + } + } + + print_inst(instruction_offset, stream + i, labels, label_count); + + instruction_offset += inst->size; + } +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s binary_file\n", argv[0]); + return(1); + } + + char *filename = argv[1]; + int fd = open(filename, O_RDONLY); + + if (fd == -1) { + perror("open"); + return(1); + } + + u64 size = file_size(fd); + u8 *data = read_file(fd, size); + + struct instruction stream[MAX_INSTRUCTIONS] = { 0 }; + int labels[MAX_LABELS] = { 0 }; + + int instruction_count = decode_and_print_add_sub_cmp_jmp(data, size, stream); + int label_count = insert_labels(stream, instruction_count, labels); + + print_instruction_stream(stream, instruction_count, labels, label_count); + + return(0); +} \ No newline at end of file diff --git a/common.h b/common.h new file mode 100644 index 0000000..79f2c5a --- /dev/null +++ b/common.h @@ -0,0 +1,166 @@ +#include /* uint32_t etc */ +#include /* exit */ +#include /* perror, fprintf */ +#include /* open */ +#include /* read */ +#include /* fstat */ + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +#define MAX_INSTRUCTIONS 1024 +#define MAX_LABELS 128 + +enum regs_w { + AX = 0, + CX = 1, + DX = 2, + BX = 3, + SP = 4, + BP = 5, + SI = 6, + DI = 7, +}; + +enum regs_h { + AL = 0, + CL = 1, + DL = 2, + BL = 3, + AH = 4, + CH = 5, + DH = 6, + BH = 7, +}; + +enum mov_mod { + MOD_MEMORY = 0x00, + MOD_MEMORY_8 = 0x01, + MOD_MEMORY_16 = 0x02, + MOD_REGISTER = 0x03, +}; + +enum mov_opcode { + OPCODE_REG_RM = 0x88, + OPCODE_IMM_RM = 0xC6, + OPCODE_IMM_REG = 0xB0, + OPCODE_MEM_ACC = 0xA0, + OPCODE_ACC_MEM = 0xA2, +}; + +enum mov_opcode_mask { + MASK_REG_RM = 0xFC, + MASK_IMM_RM = 0xFE, + MASK_IMM_REG = 0xF0, + MASK_MEM_ACC = 0xFE, + MASK_ACC_MEM = 0xFE, + + MASK_ADD_SUB_CMP = 0xFC, +}; + +struct rm { + s8 reg1; // -1 here means it's an immediate + s8 reg2; // -1 means no second + s16 disp; // 0 means no displacement + s16 imm; // if it's not an immediate - imm indicates if there is a displacement +}; + +struct instruction { + u8 size; + u8 opcode; + u8 wide; + u8 swap; + + /* for mov, add, sub, cmp */ + struct rm dst; + struct rm src; + + /* for control-flow instructions */ + s8 jump_offset; +}; + +static const s8 NO_REGISTER = -1; +static const u8 ANTIOP_MASK = 0xC4; + +static const char *REGS_W0[] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; +static const char *REGS_W1[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; + +static const s8 MOD_PAIRS_FIRST[] = { BX, BX, BP, BP, SI, DI, BP, BX }; +static const s8 MOD_PAIRS_SECOND[] = { SI, DI, SI, DI, NO_REGISTER, NO_REGISTER, NO_REGISTER, NO_REGISTER }; + +static const char *OP_NAMES[] = { + [0x0] = "add", + [0x5] = "sub", + [0x7] = "cmp", + [0xF] = "mov", /* Source? I made it up */ + + [0x74] = "je", + [0x7C] = "jl", + [0x7E] = "jle", + [0x72] = "jb", + [0x76] = "jbe", + [0x7A] = "jp", + [0x70] = "jo", + [0x78] = "js", + [0x75] = "jne", + [0x7D] = "jnl", + [0x7F] = "jg", + [0x73] = "jnb", + [0x77] = "ja", + [0x7B] = "jnp", + [0x71] = "jno", + [0x79] = "jns", + + [0xE2] = "loop", + [0xE1] = "loopz", + [0xE0] = "loopnz", + [0xE3] = "jcxz", +}; + +#define CRASH_NOTIMPL() do { fprintf(stderr, "line: %d. not implemented\n", __LINE__); exit(1); } while (0) +#define CRASH_SHOULDNOT() do { fprintf(stderr, "line: %d. should not happen\n", __LINE__); exit(1); } while (0) + +static u64 +file_size(int fd) +{ + struct stat st = { 0 }; + + if (fstat(fd, &st) == -1) { + perror("fstat"); + exit(1); + } + + return(st.st_size); +} + +static u8 * +read_file(int fd, u64 size) +{ + u8 *buf = malloc(size); + + if (!buf) { + perror("malloc"); + exit(1); + } + + int rt = read(fd, buf, size); + + if (rt == -1) { + perror("read"); + exit(1); + } + + if (rt != (int) size) { + fprintf(stderr, "short read: %d < %lu\n", rt, size); + exit(1); + } + + return(buf); +} \ No newline at end of file