#include /* uint32_t etc */ #include /* exit */ #include /* perror, fprintf */ #include /* open */ #include /* read */ #include /* fstat */ typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; #define MAX_INSTRUCTIONS 1024 #define MAX_LABELS 128 enum regs_w { AX = 0, CX = 1, DX = 2, BX = 3, SP = 4, BP = 5, SI = 6, DI = 7, }; enum regs_h { AL = 0, CL = 1, DL = 2, BL = 3, AH = 4, CH = 5, DH = 6, BH = 7, }; enum mov_mod { MOD_MEMORY = 0x00, MOD_MEMORY_8 = 0x01, MOD_MEMORY_16 = 0x02, MOD_REGISTER = 0x03, }; enum mov_opcode { OPCODE_REG_RM = 0x88, OPCODE_IMM_RM = 0xC6, OPCODE_IMM_REG = 0xB0, OPCODE_MEM_ACC = 0xA0, OPCODE_ACC_MEM = 0xA2, }; enum mov_opcode_mask { MASK_REG_RM = 0xFC, MASK_IMM_RM = 0xFE, MASK_IMM_REG = 0xF0, MASK_MEM_ACC = 0xFE, MASK_ACC_MEM = 0xFE, MASK_ADD_SUB_CMP = 0xFC, }; struct rm { s8 reg1; // -1 here means it's an immediate s8 reg2; // -1 means no second s16 disp; // 0 means no displacement s16 imm; // if it's not an immediate - imm indicates if there is a displacement }; struct instruction { u8 size; u8 opcode; u8 wide; u8 swap; /* for mov, add, sub, cmp */ struct rm dst; struct rm src; /* for control-flow instructions */ s8 jump_offset; }; static const s8 NO_REGISTER = -1; static const u8 ANTIOP_MASK = 0xC4; static const char *REGS_W0[] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; static const char *REGS_W1[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; static const s8 MOD_PAIRS_FIRST[] = { BX, BX, BP, BP, SI, DI, BP, BX }; static const s8 MOD_PAIRS_SECOND[] = { SI, DI, SI, DI, NO_REGISTER, NO_REGISTER, NO_REGISTER, NO_REGISTER }; static const char *OP_NAMES[] = { [0x0] = "add", [0x5] = "sub", [0x7] = "cmp", [0xF] = "mov", /* Source? I made it up */ [0x74] = "je", [0x7C] = "jl", [0x7E] = "jle", [0x72] = "jb", [0x76] = "jbe", [0x7A] = "jp", [0x70] = "jo", [0x78] = "js", [0x75] = "jne", [0x7D] = "jnl", [0x7F] = "jg", [0x73] = "jnb", [0x77] = "ja", [0x7B] = "jnp", [0x71] = "jno", [0x79] = "jns", [0xE2] = "loop", [0xE1] = "loopz", [0xE0] = "loopnz", [0xE3] = "jcxz", }; #define CRASH_NOTIMPL() do { fprintf(stderr, "line: %d. not implemented\n", __LINE__); exit(1); } while (0) #define CRASH_SHOULDNOT() do { fprintf(stderr, "line: %d. should not happen\n", __LINE__); exit(1); } while (0) static u64 file_size(int fd) { struct stat st = { 0 }; if (fstat(fd, &st) == -1) { perror("fstat"); exit(1); } return(st.st_size); } static u8 * read_file(int fd, u64 size) { u8 *buf = malloc(size); if (!buf) { perror("malloc"); exit(1); } int rt = read(fd, buf, size); if (rt == -1) { perror("read"); exit(1); } if (rt != (int) size) { fprintf(stderr, "short read: %d < %lu\n", rt, size); exit(1); } return(buf); }