
commit
5231bb2f11
16 changed files with 1124 additions and 0 deletions
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
test |
||||
test.asm |
||||
test2 |
||||
test2.asm |
||||
*.hex |
||||
8086-decode |
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
; ======================================================================== |
||||
; |
||||
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. |
||||
; |
||||
; This software is provided 'as-is', without any express or implied |
||||
; warranty. In no event will the authors be held liable for any damages |
||||
; arising from the use of this software. |
||||
; |
||||
; Please see https://computerenhance.com for further information |
||||
; |
||||
; ======================================================================== */ |
||||
|
||||
; ======================================================================== |
||||
; LISTING 37 |
||||
; ======================================================================== |
||||
|
||||
bits 16 |
||||
|
||||
mov cx, bx |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
‰Ùˆå‰Ú‰Þ‰ûˆÈˆí‰Ã‰ó‰ü‰Å |
@ -0,0 +1,29 @@
@@ -0,0 +1,29 @@
|
||||
; ======================================================================== |
||||
; |
||||
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. |
||||
; |
||||
; This software is provided 'as-is', without any express or implied |
||||
; warranty. In no event will the authors be held liable for any damages |
||||
; arising from the use of this software. |
||||
; |
||||
; Please see https://computerenhance.com for further information |
||||
; |
||||
; ======================================================================== */ |
||||
|
||||
; ======================================================================== |
||||
; LISTING 38 |
||||
; ======================================================================== |
||||
|
||||
bits 16 |
||||
|
||||
mov cx, bx |
||||
mov ch, ah |
||||
mov dx, bx |
||||
mov si, bx |
||||
mov bx, di |
||||
mov al, cl |
||||
mov ch, ch |
||||
mov bx, ax |
||||
mov bx, si |
||||
mov sp, di |
||||
mov bp, ax |
@ -0,0 +1,56 @@
@@ -0,0 +1,56 @@
|
||||
#include "../common.h" |
||||
|
||||
static void |
||||
decode_and_print_reg_reg_movs(u8 *data, u64 size) |
||||
{ |
||||
printf("bits 16\n\n"); |
||||
|
||||
/* We do not handle corrupt binaries */ |
||||
|
||||
for (u64 i = 0; i < size; i += 2) { |
||||
u8 b1 = data[i + 0]; |
||||
u8 b2 = data[i + 1]; |
||||
|
||||
u8 D_flag = (b1 & 0x2) >> 1; |
||||
u8 W_flag = b1 & 0x1; |
||||
|
||||
u8 mod = (b2 & 0xC0) >> 6; |
||||
u8 reg_field = (b2 & 0x38) >> 3; |
||||
u8 rm_field = b2 & 0x7; |
||||
|
||||
const char **regs = W_flag ? REGS_W1 : REGS_W0; |
||||
const char **rms = W_flag ? RM_W1 : RM_W0; |
||||
|
||||
if (D_flag) { |
||||
/* Instruction destination is specified in REG field */ |
||||
printf("mov %s, %s\n", regs[reg_field], rms[rm_field]); |
||||
} else { |
||||
/* Instruction source is specified in REG field */ |
||||
printf("mov %s, %s\n", rms[rm_field], regs[reg_field]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
int |
||||
main(int argc, char **argv) |
||||
{ |
||||
if (argc != 2) { |
||||
fprintf(stderr, "Usage: %s binary_file\n", argv[0]); |
||||
return(1); |
||||
} |
||||
|
||||
char *filename = argv[1]; |
||||
int fd = open(filename, O_RDONLY); |
||||
|
||||
if (fd == -1) { |
||||
perror("open"); |
||||
return(1); |
||||
} |
||||
|
||||
u64 size = file_size(fd); |
||||
u8 *data = read_file(fd, size); |
||||
|
||||
decode_and_print_reg_reg_movs(data, size); |
||||
|
||||
return(0); |
||||
} |
Binary file not shown.
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
; ======================================================================== |
||||
; |
||||
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. |
||||
; |
||||
; This software is provided 'as-is', without any express or implied |
||||
; warranty. In no event will the authors be held liable for any damages |
||||
; arising from the use of this software. |
||||
; |
||||
; Please see https://computerenhance.com for further information |
||||
; |
||||
; ======================================================================== |
||||
|
||||
; ======================================================================== |
||||
; LISTING 39 |
||||
; ======================================================================== |
||||
|
||||
bits 16 |
||||
|
||||
; Register-to-register |
||||
mov si, bx |
||||
mov dh, al |
||||
|
||||
; 8-bit immediate-to-register |
||||
mov cx, 12 |
||||
mov cx, -12 |
||||
|
||||
; 16-bit immediate-to-register |
||||
mov dx, 3948 |
||||
mov dx, -3948 |
||||
|
||||
; Source address calculation |
||||
mov al, [bx + si] |
||||
mov bx, [bp + di] |
||||
mov dx, [bp] |
||||
|
||||
; Source address calculation plus 8-bit displacement |
||||
mov ah, [bx + si + 4] |
||||
|
||||
; Source address calculation plus 16-bit displacement |
||||
mov al, [bx + si + 4999] |
||||
|
||||
; Dest address calculation |
||||
mov [bx + di], cx |
||||
mov [bp + si], cl |
||||
mov [bp], ch |
Binary file not shown.
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
; ======================================================================== |
||||
; |
||||
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. |
||||
; |
||||
; This software is provided 'as-is', without any express or implied |
||||
; warranty. In no event will the authors be held liable for any damages |
||||
; arising from the use of this software. |
||||
; |
||||
; Please see https://computerenhance.com for further information |
||||
; |
||||
; ======================================================================== |
||||
|
||||
; ======================================================================== |
||||
; LISTING 40 |
||||
; ======================================================================== |
||||
|
||||
bits 16 |
||||
|
||||
; Signed displacements |
||||
mov ax, [bx + di - 37] |
||||
mov [si - 300], cx |
||||
mov dx, [bx - 32] |
||||
|
||||
; Explicit sizes |
||||
mov [bp + di], byte 7 |
||||
mov [di + 901], word 347 |
||||
|
||||
; Direct address |
||||
mov bp, [5] |
||||
mov bx, [3458] |
||||
|
||||
; Memory-to-accumulator test |
||||
mov ax, [2555] |
||||
mov ax, [16] |
||||
|
||||
; Accumulator-to-memory test |
||||
mov [2554], ax |
||||
mov [15], ax |
@ -0,0 +1,281 @@
@@ -0,0 +1,281 @@
|
||||
#include "../common.h" |
||||
|
||||
static void |
||||
print_mov(u8 wide, u8 swap, struct rm dest, struct rm src) |
||||
{ |
||||
const char **regs = wide ? REGS_W1 : REGS_W0; |
||||
|
||||
if (swap) { |
||||
struct rm buf = dest; |
||||
dest = src; |
||||
src = buf; |
||||
} |
||||
|
||||
printf("mov "); |
||||
|
||||
/* Dest can not be an immediate, so dest.imm indicates if there's a displacement */ |
||||
|
||||
if (dest.reg1 == NO_REGISTER && dest.reg2 == NO_REGISTER && dest.disp != 0) { |
||||
printf("[%d]", dest.disp); |
||||
} else if (dest.reg2 == NO_REGISTER && dest.imm == 0) { |
||||
printf("%s", regs[dest.reg1]); |
||||
} else if (dest.reg2 != NO_REGISTER && dest.imm == 0) { |
||||
printf("[%s + %s]", REGS_W1[dest.reg1], REGS_W1[dest.reg2]); |
||||
} else if (dest.reg2 == NO_REGISTER && dest.imm != 0) { |
||||
printf("[%s + %d]", REGS_W1[dest.reg1], dest.disp); |
||||
} else if (dest.reg2 != NO_REGISTER && dest.imm != 0) { |
||||
printf("[%s + %s + %d]", REGS_W1[dest.reg1], REGS_W1[dest.reg2], dest.disp); |
||||
} |
||||
|
||||
printf(", "); |
||||
|
||||
if (src.reg1 == NO_REGISTER) { |
||||
if (src.disp == 0) { |
||||
printf("%s %d", wide ? "word" : "byte", src.imm); /* only the src can be an immediate */ |
||||
} else { |
||||
printf("[%d]", src.disp); |
||||
} |
||||
} else { |
||||
if (src.reg2 == NO_REGISTER && src.imm == 0) { |
||||
printf("%s", regs[src.reg1]); |
||||
} else if (src.reg2 != NO_REGISTER && src.imm == 0) { |
||||
printf("[%s + %s]", REGS_W1[src.reg1], REGS_W1[src.reg2]); |
||||
} else if (src.reg2 == NO_REGISTER && src.imm != 0) { |
||||
printf("[%s + %d]", REGS_W1[src.reg1], src.disp); |
||||
} else if (src.reg2 != NO_REGISTER && src.imm != 0) { |
||||
printf("[%s + %s + %d]", REGS_W1[src.reg1], REGS_W1[src.reg2], src.disp); |
||||
} |
||||
} |
||||
|
||||
printf("\n"); |
||||
} |
||||
|
||||
static int |
||||
decode_rm(u8 *data, int offset, struct rm *location) |
||||
{ |
||||
int advance = 0; |
||||
|
||||
u8 b2 = data[offset + 1]; |
||||
u8 mod = (b2 & 0xC0) >> 6; |
||||
u8 rm_field = b2 & 0x7; |
||||
|
||||
if (mod == MOD_REGISTER) { |
||||
location->reg1 = rm_field; |
||||
location->reg2 = -1; |
||||
location->disp = 0; |
||||
location->imm = 0; |
||||
|
||||
advance = 2; |
||||
} else if (mod == MOD_MEMORY) { |
||||
s16 b3 = data[offset + 2]; |
||||
s16 b4 = data[offset + 3]; |
||||
|
||||
if (rm_field == 0x6) { |
||||
location->reg1 = NO_REGISTER; |
||||
location->reg2 = NO_REGISTER; |
||||
location->disp = (b4 << 8) | b3; |
||||
location->imm = 0; |
||||
advance = 4; |
||||
} else { |
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = 0; |
||||
location->imm = 1; |
||||
advance = 2; |
||||
} |
||||
} else if (mod == MOD_MEMORY_8) { |
||||
s8 b3 = data[offset + 2]; |
||||
|
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = b3; |
||||
location->imm = 1; |
||||
|
||||
advance = 3; |
||||
} else if (mod == MOD_MEMORY_16) { |
||||
s16 b3 = data[offset + 2]; |
||||
s16 b4 = data[offset + 3]; |
||||
|
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = (b4 << 8) | b3; |
||||
location->imm = 1; |
||||
|
||||
advance = 4; |
||||
} |
||||
|
||||
return(advance); |
||||
} |
||||
|
||||
static void |
||||
decode_and_print_more_movs(u8 *data, u64 size) |
||||
{ |
||||
printf("bits 16\n\n"); |
||||
|
||||
/* We still do not handle corrupt binaries */ |
||||
|
||||
u64 offset = 0; |
||||
|
||||
while (offset < size) { |
||||
u8 b1 = data[offset + 0]; |
||||
|
||||
if ((b1 & MASK_REG_RM) == OPCODE_REG_RM) { |
||||
u8 b2 = data[offset + 1]; |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 D_flag = (b1 & 0x2) >> 1; |
||||
u8 reg_field = (b2 & 0x38) >> 3; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = reg_field; |
||||
dest.reg2 = -1; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
offset += decode_rm(data, offset, &src); |
||||
|
||||
print_mov(W_flag, !D_flag, dest, src); |
||||
} else if ((b1 & MASK_IMM_RM) == OPCODE_IMM_RM) { |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 D_flag = 1; /* Immediate to register-memory always has mod/rm as destination */ |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
offset += decode_rm(data, offset, &dest); |
||||
|
||||
s16 immediate = 0; |
||||
|
||||
if (W_flag) { |
||||
s16 b5 = data[offset]; |
||||
s16 b6 = data[offset + 1]; |
||||
immediate = (b6 << 8) | b5; |
||||
offset += 2; |
||||
} else { |
||||
s16 b5 = data[offset]; |
||||
immediate = b5; |
||||
offset += 1; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.imm = immediate; |
||||
|
||||
print_mov(W_flag, !D_flag, dest, src); |
||||
} else if ((b1 & MASK_IMM_REG) == OPCODE_IMM_REG) { |
||||
u8 W_flag = (b1 & 0x8) >> 3; |
||||
u16 b2 = data[offset + 1]; |
||||
u8 reg = b1 & 0x7; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = reg; |
||||
dest.reg2 = NO_REGISTER; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
s16 immediate = 0; |
||||
|
||||
if (W_flag) { |
||||
/* A 16-bit immediate follows */ |
||||
u16 b3 = data[offset + 2]; |
||||
immediate = (b3 << 8) | b2; |
||||
offset += 3; |
||||
} else { |
||||
/* An 8-bit immediate follows */ |
||||
immediate = b2; |
||||
offset += 2; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.reg2 = NO_REGISTER; |
||||
src.disp = 0; |
||||
src.imm = immediate; |
||||
|
||||
print_mov(W_flag, 0, dest, src); |
||||
} else if ((b1 & MASK_MEM_ACC) == OPCODE_MEM_ACC) { |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 b2 = data[offset + 1]; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = AX; |
||||
dest.reg2 = NO_REGISTER; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
u16 address = 0; |
||||
|
||||
if (W_flag) { |
||||
u16 b3 = data[offset + 2]; |
||||
address = (b3 << 8) | b2; |
||||
offset += 3; |
||||
} else { |
||||
address = b2; |
||||
offset += 2; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.reg2 = NO_REGISTER; |
||||
src.disp = address; |
||||
src.imm = 0; |
||||
|
||||
print_mov(W_flag, 0, dest, src); |
||||
} else if ((b1 & MASK_ACC_MEM) == OPCODE_ACC_MEM) { |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 b2 = data[offset + 1]; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = AX; |
||||
dest.reg2 = NO_REGISTER; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
u16 address = 0; |
||||
|
||||
if (W_flag) { |
||||
u16 b3 = data[offset + 2]; |
||||
address = (b3 << 8) | b2; |
||||
offset += 3; |
||||
} else { |
||||
address = b2; |
||||
offset += 2; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.reg2 = NO_REGISTER; |
||||
src.disp = address; |
||||
src.imm = 0; |
||||
|
||||
print_mov(W_flag, 1, dest, src); |
||||
} |
||||
} |
||||
} |
||||
|
||||
int |
||||
main(int argc, char **argv) |
||||
{ |
||||
if (argc != 2) { |
||||
fprintf(stderr, "Usage: %s binary_file\n", argv[0]); |
||||
return(1); |
||||
} |
||||
|
||||
char *filename = argv[1]; |
||||
int fd = open(filename, O_RDONLY); |
||||
|
||||
if (fd == -1) { |
||||
perror("open"); |
||||
return(1); |
||||
} |
||||
|
||||
u64 size = file_size(fd); |
||||
u8 *data = read_file(fd, size); |
||||
|
||||
decode_and_print_more_movs(data, size); |
||||
|
||||
return(0); |
||||
} |
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash |
||||
|
||||
for bin_file in listing_0039_more_movs listing_0040_challenge_movs; do |
||||
./8086-decode $bin_file > test.asm |
||||
nasm test.asm |
||||
if ! diff -s test $bin_file; then |
||||
echo "Test $bin_file failed!" |
||||
exit 1 |
||||
fi |
||||
done |
||||
|
||||
exit 0 |
Binary file not shown.
@ -0,0 +1,121 @@
@@ -0,0 +1,121 @@
|
||||
; ======================================================================== |
||||
; |
||||
; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. |
||||
; |
||||
; This software is provided 'as-is', without any express or implied |
||||
; warranty. In no event will the authors be held liable for any damages |
||||
; arising from the use of this software. |
||||
; |
||||
; Please see https://computerenhance.com for further information |
||||
; |
||||
; ======================================================================== |
||||
|
||||
; ======================================================================== |
||||
; LISTING 41 |
||||
; ======================================================================== |
||||
|
||||
bits 16 |
||||
|
||||
add bx, [bx+si] |
||||
add bx, [bp] |
||||
add si, 2 |
||||
add bp, 2 |
||||
add cx, 8 |
||||
add bx, [bp + 0] |
||||
add cx, [bx + 2] |
||||
add bh, [bp + si + 4] |
||||
add di, [bp + di + 6] |
||||
add [bx+si], bx |
||||
add [bp], bx |
||||
add [bp + 0], bx |
||||
add [bx + 2], cx |
||||
add [bp + si + 4], bh |
||||
add [bp + di + 6], di |
||||
add byte [bx], 34 |
||||
add word [bp + si + 1000], 29 |
||||
add ax, [bp] |
||||
add al, [bx + si] |
||||
add ax, bx |
||||
add al, ah |
||||
add ax, 1000 |
||||
add al, -30 |
||||
add al, 9 |
||||
|
||||
sub bx, [bx+si] |
||||
sub bx, [bp] |
||||
sub si, 2 |
||||
sub bp, 2 |
||||
sub cx, 8 |
||||
sub bx, [bp + 0] |
||||
sub cx, [bx + 2] |
||||
sub bh, [bp + si + 4] |
||||
sub di, [bp + di + 6] |
||||
sub [bx+si], bx |
||||
sub [bp], bx |
||||
sub [bp + 0], bx |
||||
sub [bx + 2], cx |
||||
sub [bp + si + 4], bh |
||||
sub [bp + di + 6], di |
||||
sub byte [bx], 34 |
||||
sub word [bx + di], 29 |
||||
sub ax, [bp] |
||||
sub al, [bx + si] |
||||
sub ax, bx |
||||
sub al, ah |
||||
sub ax, 1000 |
||||
sub al, -30 |
||||
sub al, 9 |
||||
|
||||
cmp bx, [bx+si] |
||||
cmp bx, [bp] |
||||
cmp si, 2 |
||||
cmp bp, 2 |
||||
cmp cx, 8 |
||||
cmp bx, [bp + 0] |
||||
cmp cx, [bx + 2] |
||||
cmp bh, [bp + si + 4] |
||||
cmp di, [bp + di + 6] |
||||
cmp [bx+si], bx |
||||
cmp [bp], bx |
||||
cmp [bp + 0], bx |
||||
cmp [bx + 2], cx |
||||
cmp [bp + si + 4], bh |
||||
cmp [bp + di + 6], di |
||||
cmp byte [bx], 34 |
||||
cmp word [4834], 29 |
||||
cmp ax, [bp] |
||||
cmp al, [bx + si] |
||||
cmp ax, bx |
||||
cmp al, ah |
||||
cmp ax, 1000 |
||||
cmp al, -30 |
||||
cmp al, 9 |
||||
|
||||
test_label0: |
||||
jnz test_label1 |
||||
jnz test_label0 |
||||
test_label1: |
||||
jnz test_label0 |
||||
jnz test_label1 |
||||
|
||||
label: |
||||
je label |
||||
jl label |
||||
jle label |
||||
jb label |
||||
jbe label |
||||
jp label |
||||
jo label |
||||
js label |
||||
jne label |
||||
jnl label |
||||
jg label |
||||
jnb label |
||||
ja label |
||||
jnp label |
||||
jno label |
||||
jns label |
||||
loop label |
||||
loopz label |
||||
loopnz label |
||||
jcxz label |
@ -0,0 +1,349 @@
@@ -0,0 +1,349 @@
|
||||
#include "../common.h" |
||||
|
||||
static void |
||||
print_inst(int instruction_offset, struct instruction *inst, int *labels, int label_count) |
||||
{ |
||||
if (inst->opcode > 0xF) { |
||||
for (int i = 0; i < label_count; ++i) { |
||||
if (instruction_offset + inst->size + inst->jump_offset == labels[i]) { |
||||
printf("%s label_%d\n", OP_NAMES[inst->opcode], i); |
||||
return; |
||||
} |
||||
} |
||||
|
||||
CRASH_SHOULDNOT(); |
||||
} |
||||
|
||||
const char **regs = inst->wide ? REGS_W1 : REGS_W0; |
||||
struct rm dest = inst->dst; |
||||
struct rm src = inst->src; |
||||
|
||||
if (inst->swap) { |
||||
struct rm buf = dest; |
||||
dest = src; |
||||
src = buf; |
||||
} |
||||
|
||||
printf("%s ", OP_NAMES[inst->opcode]); |
||||
|
||||
/* Dest can not be an immediate, so dest.imm indicates if there's a displacement */ |
||||
|
||||
if (dest.reg1 == NO_REGISTER && dest.reg2 == NO_REGISTER && dest.disp != 0) { |
||||
printf("[%d]", dest.disp); |
||||
} else if (dest.reg2 == NO_REGISTER && dest.imm == 0) { |
||||
printf("%s", regs[dest.reg1]); |
||||
} else if (dest.reg2 != NO_REGISTER && dest.imm == 0) { |
||||
printf("[%s + %s]", REGS_W1[dest.reg1], REGS_W1[dest.reg2]); |
||||
} else if (dest.reg2 == NO_REGISTER && dest.imm != 0) { |
||||
printf("[%s + %d]", REGS_W1[dest.reg1], dest.disp); |
||||
} else if (dest.reg2 != NO_REGISTER && dest.imm != 0) { |
||||
printf("[%s + %s + %d]", REGS_W1[dest.reg1], REGS_W1[dest.reg2], dest.disp); |
||||
} |
||||
|
||||
printf(", "); |
||||
|
||||
if (src.reg1 == NO_REGISTER) { |
||||
if (src.disp == 0) { |
||||
printf("%s %d", inst->wide ? "word" : "byte", src.imm); /* only the src can be an immediate */ |
||||
} else { |
||||
printf("[%d]", src.disp); |
||||
} |
||||
} else { |
||||
if (src.reg2 == NO_REGISTER && src.imm == 0) { |
||||
printf("%s", regs[src.reg1]); |
||||
} else if (src.reg2 != NO_REGISTER && src.imm == 0) { |
||||
printf("[%s + %s]", REGS_W1[src.reg1], REGS_W1[src.reg2]); |
||||
} else if (src.reg2 == NO_REGISTER && src.imm != 0) { |
||||
printf("[%s + %d]", REGS_W1[src.reg1], src.disp); |
||||
} else if (src.reg2 != NO_REGISTER && src.imm != 0) { |
||||
printf("[%s + %s + %d]", REGS_W1[src.reg1], REGS_W1[src.reg2], src.disp); |
||||
} |
||||
} |
||||
|
||||
printf("\n"); |
||||
} |
||||
|
||||
static int |
||||
decode_rm(u8 *data, int offset, struct rm *location) |
||||
{ |
||||
int advance = 0; |
||||
|
||||
u8 b2 = data[offset + 1]; |
||||
u8 mod = (b2 & 0xC0) >> 6; |
||||
u8 rm_field = b2 & 0x7; |
||||
|
||||
if (mod == MOD_REGISTER) { |
||||
location->reg1 = rm_field; |
||||
location->reg2 = -1; |
||||
location->disp = 0; |
||||
location->imm = 0; |
||||
|
||||
advance = 2; |
||||
} else if (mod == MOD_MEMORY) { |
||||
s16 b3 = data[offset + 2]; |
||||
s16 b4 = data[offset + 3]; |
||||
|
||||
if (rm_field == 0x6) { |
||||
location->reg1 = NO_REGISTER; |
||||
location->reg2 = NO_REGISTER; |
||||
location->disp = (b4 << 8) | b3; |
||||
location->imm = 0; |
||||
advance = 4; |
||||
} else { |
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = 0; |
||||
location->imm = 1; |
||||
advance = 2; |
||||
} |
||||
} else if (mod == MOD_MEMORY_8) { |
||||
s8 b3 = data[offset + 2]; |
||||
|
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = b3; |
||||
location->imm = 1; |
||||
|
||||
advance = 3; |
||||
} else if (mod == MOD_MEMORY_16) { |
||||
s16 b3 = data[offset + 2]; |
||||
s16 b4 = data[offset + 3]; |
||||
|
||||
location->reg1 = MOD_PAIRS_FIRST[rm_field]; |
||||
location->reg2 = MOD_PAIRS_SECOND[rm_field]; |
||||
location->disp = (b4 << 8) | b3; |
||||
location->imm = 1; |
||||
|
||||
advance = 4; |
||||
} |
||||
|
||||
return(advance); |
||||
} |
||||
|
||||
static int |
||||
decode_and_print_add_sub_cmp_jmp(u8 *data, u64 size, struct instruction *stream) |
||||
{ |
||||
printf("bits 16\n\n"); |
||||
|
||||
/* We still do not handle corrupt binaries */ |
||||
|
||||
u64 offset = 0; |
||||
int stream_at = 0; |
||||
|
||||
while (offset < size) { |
||||
int saved_offset = offset; |
||||
|
||||
u8 b1 = data[offset + 0]; |
||||
u8 b2 = data[offset + 1]; |
||||
|
||||
u8 antiop = b1 & ANTIOP_MASK; |
||||
u8 op = 0; |
||||
|
||||
if (antiop == 0x00 || antiop == 0x4) { |
||||
op = (b1 & 0x38) >> 3; |
||||
} else { |
||||
op = (b2 & 0x38) >> 3; |
||||
} |
||||
|
||||
if (antiop == 0x00) { |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 D_flag = (b1 & 0x2) >> 1; |
||||
u8 reg_field = (b2 & 0x38) >> 3; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = reg_field; |
||||
dest.reg2 = -1; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
offset += decode_rm(data, offset, &src); |
||||
|
||||
struct instruction inst = { |
||||
.size = offset - saved_offset, |
||||
.opcode = op, |
||||
.dst = dest, |
||||
.src = src, |
||||
.wide = W_flag, |
||||
.swap = !D_flag |
||||
}; |
||||
|
||||
stream[stream_at++] = inst; |
||||
|
||||
continue; |
||||
} else if (antiop == 0x80) { |
||||
u8 W_flag = b1 & 0x1; |
||||
u8 S_flag = (b1 & 0x2) >> 1; |
||||
u8 D_flag = 1; /* Immediate to register-memory always has mod/rm as destination */ |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
offset += decode_rm(data, offset, &dest); |
||||
|
||||
s16 immediate = 0; |
||||
|
||||
if (S_flag && !W_flag) { |
||||
s16 b5 = data[offset]; |
||||
s16 b6 = data[offset + 1]; |
||||
immediate = (b6 << 8) | b5; |
||||
offset += 2; |
||||
} else { |
||||
s16 b5 = data[offset]; |
||||
immediate = b5; |
||||
offset += 1; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.imm = immediate; |
||||
|
||||
struct instruction inst = { |
||||
.size = offset - saved_offset, |
||||
.opcode = op, |
||||
.dst = dest, |
||||
.src = src, |
||||
.wide = W_flag, |
||||
.swap = !D_flag |
||||
}; |
||||
|
||||
stream[stream_at++] = inst; |
||||
|
||||
continue; |
||||
} else if (antiop == 0x4) { |
||||
u8 W_flag = b1 & 0x1; |
||||
s16 immediate = 0; |
||||
|
||||
struct rm dest = { 0 }; |
||||
struct rm src = { 0 }; |
||||
|
||||
dest.reg1 = AX; |
||||
dest.reg2 = NO_REGISTER; |
||||
dest.disp = 0; |
||||
dest.imm = 0; |
||||
|
||||
if (W_flag) { |
||||
s16 b3 = data[offset + 2]; |
||||
immediate= (b3 << 8) | b2; |
||||
offset += 3; |
||||
} else { |
||||
immediate = b2; |
||||
offset += 2; |
||||
} |
||||
|
||||
src.reg1 = NO_REGISTER; |
||||
src.reg2 = NO_REGISTER; |
||||
src.disp = 0; |
||||
src.imm = immediate; |
||||
|
||||
struct instruction inst = { |
||||
.size = offset - saved_offset, |
||||
.opcode = op, |
||||
.dst = dest, |
||||
.src = src, |
||||
.wide = W_flag, |
||||
.swap = 0, |
||||
}; |
||||
|
||||
stream[stream_at++] = inst; |
||||
|
||||
continue; |
||||
} |
||||
|
||||
offset += 2; |
||||
|
||||
struct instruction inst = { |
||||
.size = offset - saved_offset, |
||||
.opcode = b1, |
||||
.jump_offset = (s8) b2 |
||||
}; |
||||
|
||||
stream[stream_at++] = inst; |
||||
} |
||||
|
||||
return(stream_at); |
||||
} |
||||
|
||||
static int |
||||
insert_labels(struct instruction *in_stream, int instruction_count, int *labels) |
||||
{ |
||||
int instruction_offset = 0; |
||||
int label_count = 0; |
||||
|
||||
for (int i = 0; i < instruction_count; ++i) { |
||||
struct instruction *inst = in_stream + i; |
||||
|
||||
if (inst->opcode > 0xF) { |
||||
/* jmp-like */ |
||||
int target = instruction_offset + inst->size + inst->jump_offset; |
||||
int label_exists = 0; |
||||
|
||||
for (int j = 0; j < label_count; ++j) { |
||||
if (labels[j] == target) { |
||||
label_exists = 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (!label_exists) { |
||||
labels[label_count++] = target; |
||||
} |
||||
} |
||||
|
||||
instruction_offset += inst->size; |
||||
} |
||||
|
||||
return(label_count); |
||||
} |
||||
|
||||
static void |
||||
print_instruction_stream(struct instruction *stream, int instruction_count, |
||||
int *labels, int label_count) |
||||
{ |
||||
int instruction_offset = 0; |
||||
|
||||
for (int i = 0; i < instruction_count; ++i) { |
||||
struct instruction *inst = stream + i; |
||||
|
||||
for (int j = 0; j < label_count; ++j) { |
||||
if (labels[j] == instruction_offset) { |
||||
printf("label_%d:\n", j); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
print_inst(instruction_offset, stream + i, labels, label_count); |
||||
|
||||
instruction_offset += inst->size; |
||||
} |
||||
} |
||||
|
||||
int |
||||
main(int argc, char **argv) |
||||
{ |
||||
if (argc != 2) { |
||||
fprintf(stderr, "Usage: %s binary_file\n", argv[0]); |
||||
return(1); |
||||
} |
||||
|
||||
char *filename = argv[1]; |
||||
int fd = open(filename, O_RDONLY); |
||||
|
||||
if (fd == -1) { |
||||
perror("open"); |
||||
return(1); |
||||
} |
||||
|
||||
u64 size = file_size(fd); |
||||
u8 *data = read_file(fd, size); |
||||
|
||||
struct instruction stream[MAX_INSTRUCTIONS] = { 0 }; |
||||
int labels[MAX_LABELS] = { 0 }; |
||||
|
||||
int instruction_count = decode_and_print_add_sub_cmp_jmp(data, size, stream); |
||||
int label_count = insert_labels(stream, instruction_count, labels); |
||||
|
||||
print_instruction_stream(stream, instruction_count, labels, label_count); |
||||
|
||||
return(0); |
||||
} |
@ -0,0 +1,166 @@
@@ -0,0 +1,166 @@
|
||||
#include <stdint.h> /* uint32_t etc */ |
||||
#include <stdlib.h> /* exit */ |
||||
#include <stdio.h> /* perror, fprintf */ |
||||
#include <fcntl.h> /* open */ |
||||
#include <unistd.h> /* read */ |
||||
#include <sys/stat.h> /* fstat */ |
||||
|
||||
typedef uint8_t u8; |
||||
typedef uint16_t u16; |
||||
typedef uint32_t u32; |
||||
typedef uint64_t u64; |
||||
|
||||
typedef int8_t s8; |
||||
typedef int16_t s16; |
||||
typedef int32_t s32; |
||||
typedef int64_t s64; |
||||
|
||||
#define MAX_INSTRUCTIONS 1024 |
||||
#define MAX_LABELS 128 |
||||
|
||||
enum regs_w { |
||||
AX = 0, |
||||
CX = 1, |
||||
DX = 2, |
||||
BX = 3, |
||||
SP = 4, |
||||
BP = 5, |
||||
SI = 6, |
||||
DI = 7, |
||||
}; |
||||
|
||||
enum regs_h { |
||||
AL = 0, |
||||
CL = 1, |
||||
DL = 2, |
||||
BL = 3, |
||||
AH = 4, |
||||
CH = 5, |
||||
DH = 6, |
||||
BH = 7, |
||||
}; |
||||
|
||||
enum mov_mod { |
||||
MOD_MEMORY = 0x00, |
||||
MOD_MEMORY_8 = 0x01, |
||||
MOD_MEMORY_16 = 0x02, |
||||
MOD_REGISTER = 0x03, |
||||
}; |
||||
|
||||
enum mov_opcode { |
||||
OPCODE_REG_RM = 0x88, |
||||
OPCODE_IMM_RM = 0xC6, |
||||
OPCODE_IMM_REG = 0xB0, |
||||
OPCODE_MEM_ACC = 0xA0, |
||||
OPCODE_ACC_MEM = 0xA2, |
||||
}; |
||||
|
||||
enum mov_opcode_mask { |
||||
MASK_REG_RM = 0xFC, |
||||
MASK_IMM_RM = 0xFE, |
||||
MASK_IMM_REG = 0xF0, |
||||
MASK_MEM_ACC = 0xFE, |
||||
MASK_ACC_MEM = 0xFE, |
||||
|
||||
MASK_ADD_SUB_CMP = 0xFC, |
||||
}; |
||||
|
||||
struct rm { |
||||
s8 reg1; // -1 here means it's an immediate
|
||||
s8 reg2; // -1 means no second
|
||||
s16 disp; // 0 means no displacement
|
||||
s16 imm; // if it's not an immediate - imm indicates if there is a displacement
|
||||
}; |
||||
|
||||
struct instruction { |
||||
u8 size; |
||||
u8 opcode; |
||||
u8 wide; |
||||
u8 swap; |
||||
|
||||
/* for mov, add, sub, cmp */ |
||||
struct rm dst; |
||||
struct rm src; |
||||
|
||||
/* for control-flow instructions */ |
||||
s8 jump_offset; |
||||
}; |
||||
|
||||
static const s8 NO_REGISTER = -1; |
||||
static const u8 ANTIOP_MASK = 0xC4; |
||||
|
||||
static const char *REGS_W0[] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; |
||||
static const char *REGS_W1[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; |
||||
|
||||
static const s8 MOD_PAIRS_FIRST[] = { BX, BX, BP, BP, SI, DI, BP, BX }; |
||||
static const s8 MOD_PAIRS_SECOND[] = { SI, DI, SI, DI, NO_REGISTER, NO_REGISTER, NO_REGISTER, NO_REGISTER }; |
||||
|
||||
static const char *OP_NAMES[] = { |
||||
[0x0] = "add", |
||||
[0x5] = "sub", |
||||
[0x7] = "cmp", |
||||
[0xF] = "mov", /* Source? I made it up */ |
||||
|
||||
[0x74] = "je", |
||||
[0x7C] = "jl", |
||||
[0x7E] = "jle", |
||||
[0x72] = "jb", |
||||
[0x76] = "jbe", |
||||
[0x7A] = "jp", |
||||
[0x70] = "jo", |
||||
[0x78] = "js", |
||||
[0x75] = "jne", |
||||
[0x7D] = "jnl", |
||||
[0x7F] = "jg", |
||||
[0x73] = "jnb", |
||||
[0x77] = "ja", |
||||
[0x7B] = "jnp", |
||||
[0x71] = "jno", |
||||
[0x79] = "jns", |
||||
|
||||
[0xE2] = "loop", |
||||
[0xE1] = "loopz", |
||||
[0xE0] = "loopnz", |
||||
[0xE3] = "jcxz", |
||||
}; |
||||
|
||||
#define CRASH_NOTIMPL() do { fprintf(stderr, "line: %d. not implemented\n", __LINE__); exit(1); } while (0) |
||||
#define CRASH_SHOULDNOT() do { fprintf(stderr, "line: %d. should not happen\n", __LINE__); exit(1); } while (0) |
||||
|
||||
static u64 |
||||
file_size(int fd) |
||||
{ |
||||
struct stat st = { 0 }; |
||||
|
||||
if (fstat(fd, &st) == -1) { |
||||
perror("fstat"); |
||||
exit(1); |
||||
} |
||||
|
||||
return(st.st_size); |
||||
} |
||||
|
||||
static u8 * |
||||
read_file(int fd, u64 size) |
||||
{ |
||||
u8 *buf = malloc(size); |
||||
|
||||
if (!buf) { |
||||
perror("malloc"); |
||||
exit(1); |
||||
} |
||||
|
||||
int rt = read(fd, buf, size); |
||||
|
||||
if (rt == -1) { |
||||
perror("read"); |
||||
exit(1); |
||||
} |
||||
|
||||
if (rt != (int) size) { |
||||
fprintf(stderr, "short read: %d < %lu\n", rt, size); |
||||
exit(1); |
||||
} |
||||
|
||||
return(buf); |
||||
} |
Loading…
Reference in new issue