diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common.h | 85 | ||||
-rw-r--r-- | src/data_buffer.cpp | 29 | ||||
-rw-r--r-- | src/data_buffer.h | 41 | ||||
-rw-r--r-- | src/disasm.cpp | 2010 | ||||
-rw-r--r-- | src/disasm.h | 401 | ||||
-rw-r--r-- | src/elf_format.h | 328 | ||||
-rw-r--r-- | src/elf_image.cpp | 172 | ||||
-rw-r--r-- | src/elf_image.h | 55 | ||||
-rw-r--r-- | src/main.cpp | 836 |
9 files changed, 3957 insertions, 0 deletions
diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..76fc956 --- /dev/null +++ b/src/common.h @@ -0,0 +1,85 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include <cstddef> +#include <cstdint> + +enum class BFDTarget { + kAuto, + kBinary, + kELF, +}; + +struct Settings { + bool raw_data_comment{}; + bool labels{}; + bool rel_labels{}; + bool abs_labels{}; + bool imm_labels{}; + bool short_ref_local_labels{}; + bool export_labels{}; + bool export_all_labels{}; + bool export_functions{}; + bool xrefs_to{}; + bool xrefs_from{}; + bool imm_hex{}; + bool follow_jumps{}; + bool walk{}; + BFDTarget bfd{}; + const char *indent{"\t"}; +}; + +using RefKindMask = unsigned; + +constexpr RefKindMask kRef1RelMask = (1 << 0); // For first argument +constexpr RefKindMask kRef1AbsMask = (1 << 1); // For first argument +constexpr RefKindMask kRef2RelMask = (1 << 2); // For second argument +constexpr RefKindMask kRef2AbsMask = (1 << 3); // For second argument +constexpr RefKindMask kRef1ReadMask = (1 << 4); // For first argument +constexpr RefKindMask kRef1WriteMask = (1 << 5); // For first argument +constexpr RefKindMask kRef2ReadMask = (1 << 6); // For second argument +constexpr RefKindMask kRef2WriteMask = (1 << 7); // For second argument +/// Indicates whether instruction is a call or just a branch, for any argument. +/// Calls are BSR and JSR, branches are DBcc, Bcc and JMP. +constexpr RefKindMask kRefCallMask = (1 << 8); +/// Hack flag for MOVEM with PC relative value when -frel-labels is set +constexpr RefKindMask kRefPcRelFix2Bytes = (1 << 9); +/// Register 1 may have immediate moving to address register which may be a +/// labeled location +constexpr RefKindMask kRef1ImmMask = (1 << 10); +/// Everything for first argument +constexpr RefKindMask kRef1Mask = kRef1RelMask | kRef1AbsMask | kRef1ReadMask | kRef1WriteMask | kRef1ImmMask; +/// Everything for Second argument +constexpr RefKindMask kRef2Mask = kRef2RelMask | kRef2AbsMask | kRef2ReadMask | kRef2WriteMask; +constexpr RefKindMask kRefRelMask = kRef1RelMask | kRef2RelMask; +constexpr RefKindMask kRefAbsMask = kRef1AbsMask | kRef2AbsMask; +constexpr RefKindMask kRef1DataMask = kRef1ReadMask | kRef1WriteMask; // For first argument +constexpr RefKindMask kRef2DataMask = kRef2ReadMask | kRef2WriteMask; // For second argument +constexpr RefKindMask kRefReadMask = kRef1ReadMask | kRef2ReadMask; // For any argument +constexpr RefKindMask kRefWriteMask = kRef1WriteMask | kRef2WriteMask; // For any argument +constexpr RefKindMask kRefDataMask = kRefReadMask | kRefWriteMask; +constexpr size_t kInstructionSizeStepBytes = 2; +constexpr size_t kRomSizeBytes = 4 * 1024 * 1024; +constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBytes; + +static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; } + +static inline constexpr uint16_t GetU16BE(const uint8_t *buffer) +{ + return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]); +} + +static inline constexpr int16_t GetI16BE(const uint8_t *buffer) +{ + return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]); +} + +static inline constexpr int32_t GetI32BE(const uint8_t *buffer) +{ + return (static_cast<uint32_t>(buffer[0]) << 24) | + (static_cast<uint32_t>(buffer[1]) << 16) | + (static_cast<uint32_t>(buffer[2]) << 8) | + static_cast<uint32_t>(buffer[3]); +} diff --git a/src/data_buffer.cpp b/src/data_buffer.cpp new file mode 100644 index 0000000..33cb0b3 --- /dev/null +++ b/src/data_buffer.cpp @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "data_buffer.h" + +#include <cassert> +#include <cstring> + +void DataBuffer::Expand(size_t new_size) +{ + assert(buffer); + if (new_size <= buffer_size) { + return; + } + uint8_t *new_buffer{new uint8_t[new_size]}; + assert(new_buffer); + memcpy(new_buffer, buffer, occupied_size); + delete [] buffer; + buffer = new_buffer; + buffer_size = new_size; +} + +DataBuffer::~DataBuffer() +{ + delete [] buffer; + buffer = nullptr; + buffer_size = 0; + occupied_size = 0; +} diff --git a/src/data_buffer.h b/src/data_buffer.h new file mode 100644 index 0000000..bc264d2 --- /dev/null +++ b/src/data_buffer.h @@ -0,0 +1,41 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "common.h" + +#include <cstddef> +#include <cstdint> + +struct DataView { + const uint8_t *const buffer{}; + const size_t size{}; +}; + +struct DataBuffer { + DataBuffer(){}; + DataBuffer(const DataBuffer&) = delete; + constexpr DataBuffer(DataBuffer&& other) + : buffer(other.buffer) + , buffer_size(other.buffer_size) + , occupied_size(other.occupied_size) + { + other.occupied_size = 0; + other.buffer_size = 0; + other.buffer = nullptr; + }; + static constexpr size_t kInitialSize = 4 * 1024; + uint8_t *buffer{new uint8_t[kInitialSize]}; + size_t buffer_size{kInitialSize}; + size_t occupied_size{}; + void Expand(size_t new_size); + constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const + { + if (offset >= occupied_size) { + return DataView{}; + } + return DataView{buffer + offset, Min(occupied_size - offset, size)}; + }; + ~DataBuffer(); +}; diff --git a/src/disasm.cpp b/src/disasm.cpp new file mode 100644 index 0000000..2b2ea81 --- /dev/null +++ b/src/disasm.cpp @@ -0,0 +1,2010 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "disasm.h" +#include "data_buffer.h" +#include "common.h" + +#include <cassert> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +enum class MoveDirection: bool { + kRegisterToMemory = 0, + kMemoryToRegister = 1, +}; + +enum class ShiftDirection: bool { + kRight = 0, + kLeft = 1, +}; + +enum class ShiftKind: int { + kArithmeticShift = 0, + kLogicalShift = 1, + kRotateX = 2, + kRotate = 3, +}; + +constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s) +{ + if (s == OpSize::kInvalid) { + return Arg{}; + } else if (s == OpSize::kLong) { + if (address + kInstructionSizeStepBytes < code.size) { + const int32_t value = GetI32BE(code.buffer + address); + return Arg::Immediate(value); + } + } else if (address < code.size) { + const int16_t value = GetI16BE(code.buffer + address); + if (s == OpSize::kByte) { + // Technically it is impossible to have value lower that -128 in 8 + // bits signed integer, but the second byte being 0xff is actually + // a valid thing and it is how values from -255 to -129 are + // represented. + if (value > 255 || value < -255) { + // Invalid immediate value for instruction with .b suffix + return Arg{}; + } + } + return Arg::Immediate(value); + } + return Arg{}; +} + +constexpr Arg FetchArg( + const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s) +{ + switch (m) { + case 0: // Dn + return Arg::Dn(xn); + case 1: // An + return Arg::An(xn); + case 2: // (An) + return Arg::AnAddr(xn); + case 3: // (An)+ + return Arg::AnAddrIncr(xn); + case 4: // -(An) + return Arg::AnAddrDecr(xn); + case 5: // (d16, An), Additional Word + if (address < code.size) { + const int16_t d16 = GetI16BE(code.buffer + address); + return Arg::D16AnAddr(xn, d16); + } + break; + case 6: // (d8, An, Xi), Brief Extension Word + if (address < code.size) { + const uint16_t briefext = GetU16BE(code.buffer + address); + if (briefext & 0x0700) { + // briefext must have zeros on 8, 9 an 10-th bits, + // i.e. xxxx_x000_xxxx_xxxx + break; + } + // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) + const uint8_t xi = (briefext >> 12) & 0xf; + const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; + const int8_t d8 = briefext & 0xff; + return Arg::D8AnXiAddr(xn, xi, s, d8); + } + break; + case 7: + switch (xn) { + case 0: // (xxx).W, Additional Word + if (address < code.size) { + const int32_t w = GetI16BE(code.buffer + address); + return Arg::Word(w); + } + break; + case 1: // (xxx).L, Additional Long + if (address + kInstructionSizeStepBytes < code.size) { + const int32_t l = GetI32BE(code.buffer + address); + return Arg::Long(l); + } + break; + case 2: // (d16, PC), Additional Word + if (address < code.size) { + const int16_t d16 = GetI16BE(code.buffer + address); + return Arg::D16PCAddr(d16); + } + break; + case 3: // (d8, PC, Xi), Brief Extension Word + if (address < code.size) { + const uint16_t briefext = GetU16BE(code.buffer + address); + if (briefext & 0x0700) { + // briefext must have zeros on 8, 9 an 10-th bits, + // i.e. xxxx_x000_xxxx_xxxx + break; + } + // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) + const uint8_t xi = (briefext >> 12) & 0xf; + const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; + const int8_t d8 = briefext & 0xff; + return Arg::D8PCXiAddr(xn, xi, s, d8); + } + break; + case 4: // #imm + return FetchImmediate(address, code, s); + case 5: // Does not exist + case 6: // Does not exist + case 7: // Does not exist + break; + } + break; + } + return Arg{}; +} + +static Arg FetchArg( + const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s) +{ + const int addrmode = instr & 0x3f; + const int m = (addrmode >> 3) & 7; + const int xn = addrmode & 7; + return FetchArg(address, code, m, xn, s); +} + +static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr) +{ + node.op = Op::Raw(instr); + return node.size; +} + +static size_t disasm_jsr_jmp( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: // 4e80..4e87 / 4ec0..4ec7 + case AddrMode::kAn: // 4e88..4e8f / 4ec8..4ecf + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: // 4e90..4e97 / 4ed0..4ed7 + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kAnAddrIncr: // 4e98..4e9f / 4ed8..4edf + case AddrMode::kAnAddrDecr: // 4ea0..4ea7 / 4ee0..4ee7 + return disasm_verbatim(node, instr); + case AddrMode::kD16AnAddr: // 4ea8..4eaf / 4ee8..4eef + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kD8AnXiAddr: // 4eb0..4eb7 / 4ef0..4ef7 + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kWord: // 4eb8 / 4ef8 + { + const uint32_t ref_addr = static_cast<uint32_t>(a.lword); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1AbsMask; + } + break; + case AddrMode::kLong: // 4eb9 / 4ef9 + { + const uint32_t ref_addr = static_cast<uint32_t>(a.lword); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1AbsMask; + } + break; + case AddrMode::kD16PCAddr: // 4eba / 4efa + { + const uint32_t ref_addr = node.address + kInstructionSizeStepBytes + + static_cast<uint32_t>(a.d16_pc.d16); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1RelMask; + } + break; + case AddrMode::kD8PCXiAddr: // 4ebb / 4efb + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kImmediate: // 4ebc / 4efc + return disasm_verbatim(node, instr); + } + const bool is_jmp = instr & 0x40; + node.ref_kinds |= is_jmp ? 0 : kRefCallMask; + node.op = Op::Typical(is_jmp ? OpCode::kJMP : OpCode::kJSR, OpSize::kNone, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg) +{ + assert(arg.mode == AddrMode::kDn); + node.op = Op::Typical(OpCode::kEXT, opsize, arg); + return node.size = kInstructionSizeStepBytes + arg.Size(opsize); +} + +static size_t disasm_ext_movem( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto dir = static_cast<MoveDirection>((instr >> 10) & 1); + const unsigned m = (instr >> 3) & 7; + const unsigned xn = instr & 7; + const auto opsize = static_cast<OpSize>(((instr >> 6) & 1) + 1); + if (m == 0 && dir == MoveDirection::kRegisterToMemory) { + return disasm_ext(node, opsize, Arg::Dn(xn)); + } + if (node.address + kInstructionSizeStepBytes >= code.size) { + // Not enough space for regmask, but maybe it is just EXT? + return disasm_verbatim(node, instr); + } + const unsigned regmask = GetU16BE(code.buffer + node.address + kInstructionSizeStepBytes); + if (regmask == 0) { + // This is just not representable: at least one register must be specified + return disasm_verbatim(node, instr); + } + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes * 2, code, m, xn, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: // 4880..4887 / 4c80..4c87 / 48c0..48c7 / 4cc0..4cc7 + case AddrMode::kAn: // 4888..488f / 4c88..4c8f / 48c8..48cf / 4cc8..4ccf + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: // 4890..4897 / 4c90..4c97 / 48d0..48d7 / 4cd0..4cd7 + break; + case AddrMode::kAnAddrIncr: // 4898..489f / 4c89..4c9f / 48d8..48df / 4cd8..4cdf + if (dir == MoveDirection::kRegisterToMemory) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAnAddrDecr: // 48a0..48a7 / 4ca0..4ca7 / 48e0..48e7 / 4ce0..4ce7 + if (dir == MoveDirection::kMemoryToRegister) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kD16AnAddr: // 48a8..48af / 4c8a..4caf / 48e8..48ef / 4ce8..4cef + case AddrMode::kD8AnXiAddr: // 48b0..48b7 / 4cb0..4cb7 / 48f0..48f7 / 4cf0..4cf7 + break; + case AddrMode::kWord: // 48b8 / 4cb8 / 48f8 / 4cf8 + case AddrMode::kLong: // 48b9 / 4cb9 / 48f9 / 4cf9 + if (dir == MoveDirection::kRegisterToMemory) { + node.ref2_addr = static_cast<uint32_t>(a.lword); + node.ref_kinds = kRef2AbsMask | kRef2WriteMask; + } else { + node.ref1_addr = static_cast<uint32_t>(a.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + } + break; + case AddrMode::kD16PCAddr: // 48ba / 4cba / 48fa / 4cfa + case AddrMode::kD8PCXiAddr: // 48bb / 4cbb / 48fb / 4cfb + if (dir == MoveDirection::kRegisterToMemory) { + return disasm_verbatim(node, instr); + } else if (a.mode == AddrMode::kD16PCAddr) { + // XXX: kRefPcRelFix2Bytes flag is a hack that needed to correctly + // print label for PC relative referenced value of MOVEM. Alongside + // with *NOT* adding kInstructionSizeStepBytes to ref1_addr. Still + // figuring that out. + node.ref1_addr = node.address + kInstructionSizeStepBytes * 2 + + static_cast<uint32_t>(a.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask | kRefPcRelFix2Bytes; + } + break; + case AddrMode::kImmediate: // 4ebc / 4efc + return disasm_verbatim(node, instr); + } + if (dir == MoveDirection::kMemoryToRegister) { + const auto arg2 = (a.mode == AddrMode::kAnAddrDecr) + ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); + node.op = Op::Typical(OpCode::kMOVEM, opsize, a, arg2); + } else { + const auto arg1 = (a.mode == AddrMode::kAnAddrDecr) + ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); + node.op = Op::Typical(OpCode::kMOVEM, opsize, arg1, a); + } + return node.size = kInstructionSizeStepBytes * 2 + a.Size(opsize); +} + +static size_t disasm_lea( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kLong; + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + break; + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + return disasm_verbatim(node, instr); + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast<uint32_t>(addr.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast<uint32_t>(addr.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + break; + case AddrMode::kD8PCXiAddr: + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const unsigned an = ((instr >> 9) & 7); + const auto reg = Arg::An(an); + node.op = Op::Typical(OpCode::kLEA, opsize, addr, reg); + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_chk( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const unsigned dn = ((instr >> 9) & 7); + const auto dst = Arg::Dn(dn); + node.op = Op::Typical(OpCode::kCHK, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_bra_bsr_bcc( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const int16_t dispmt0 = static_cast<int8_t>(instr & 0xff); + if (dispmt0 == -1) { + // This will definitely lead to executing invalid instruction and is + // also invalid for GNU AS to assemble + return disasm_verbatim(node, instr); + } + const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord; + if (dispmt0 == 0) { + // Check the boundaries + if (node.address + kInstructionSizeStepBytes >= code.size) { + return disasm_verbatim(node, instr); + } + node.size = kInstructionSizeStepBytes * 2; + } else { + node.size = kInstructionSizeStepBytes; + } + const int16_t dispmt = kInstructionSizeStepBytes + (dispmt0 + ? dispmt0 : GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes)); + const uint32_t ref_addr = static_cast<uint32_t>(node.address + dispmt); + Condition condition = static_cast<Condition>((instr >> 8) & 0xf); + // False condition Indicates BSR + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1RelMask | ((condition == Condition::kF) ? kRefCallMask : 0); + node.op = Op{OpCode::kBcc, opsize, condition, Arg::Displacement(dispmt)}; + return node.size; +} + +static OpCode OpCodeForBitOps(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kBTST; + case 1: return OpCode::kBCHG; + case 2: return OpCode::kBCLR; + case 3: return OpCode::kBSET; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_movep( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const unsigned dn = ((instr >> 9) & 7); + const unsigned an = instr & 7; + const OpSize opsize = ((instr >> 6) & 1) ? OpSize::kLong : OpSize::kWord; + const auto dir = static_cast<MoveDirection>(!((instr >> 7) & 1)); + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, 5, an, opsize); + if (addr.mode == AddrMode::kInvalid) { + // Boundary check failed, most likely + return disasm_verbatim(node, instr); + } + assert(addr.mode == AddrMode::kD16AnAddr); + const auto reg = Arg::Dn(dn); + if (dir == MoveDirection::kRegisterToMemory) { + node.op = Op::Typical(OpCode::kMOVEP, opsize, reg, addr); + } else { + node.op = Op::Typical(OpCode::kMOVEP, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_src_arg_bitops_movep( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const bool has_dn_src = true) +{ + const unsigned m = (instr >> 3) & 7; + if ((m == 1) && has_dn_src) { + return disasm_movep(node, instr, code); + } + const unsigned dn = ((instr >> 9) & 7); + const unsigned xn = instr & 7; + const OpSize opsize0 = OpSize::kByte; + // Fetch AddrMode::kDn if has_dn_src, otherwise fetch AddrMode::kImmediate + // byte + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, + code, + (has_dn_src) ? 0 : 7, + dn, + opsize0); + if (src.mode == AddrMode::kInvalid) { + return disasm_verbatim(node, instr); + } + if (has_dn_src) { + assert(src.mode == AddrMode::kDn); + } else { + assert(dn == 4); + assert(src.mode == AddrMode::kImmediate); + } + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize0), code, m, xn, opsize0); + const unsigned opcode = (instr >> 6) & 3; + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (opcode != 0) { + // PC relative destination address argument available for BTST only + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const auto opsize = dst.mode == AddrMode::kDn ? OpSize::kLong : OpSize::kByte; + node.op = Op::Typical(OpCodeForBitOps(opcode), opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0); +} + +static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c) +{ + return disasm_src_arg_bitops_movep(n, i, c, false); +} + +static size_t disasm_logical_immediate_to( + DisasmNode &node, OpCode opcode, OpSize opsize, Arg imm) +{ + node.op = Op::Typical(opcode, opsize, imm, (opsize == OpSize::kByte) ? Arg::CCR() : Arg::SR()); + return node.size = kInstructionSizeStepBytes * 2; +} + +static OpCode OpCodeForLogicalImmediate(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kORI; + case 1: return OpCode::kANDI; + case 2: return OpCode::kSUBI; + case 3: return OpCode::kADDI; + case 4: break; + case 5: return OpCode::kEORI; + case 6: return OpCode::kCMPI; + case 7: break; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_bitops_movep( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool has_source_reg = (instr >> 8) & 1; + if (has_source_reg) { + return disasm_src_arg_bitops_movep(node, instr, code); + } + const unsigned opcode = (instr >> 9) & 7; + if (opcode == 7) { + // Does not exist + return disasm_verbatim(node, instr); + } + if (opcode == 4) { + return disasm_bitops(node, instr, code); + } + const int m = (instr >> 3) & 7; + const int xn = instr & 7; + const auto opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + // Does not exist + return disasm_verbatim(node, instr); + } + // Anticipating #imm which means "to CCR"/"to SR", depending on OpSize + if (m == 7 && xn == 4) { + if (opcode == 2 || opcode == 3 || opcode == 6) { + // CMPI, SUBI and ANDI neither have immediate destination arguments + // nor "to CCR"/"to SR" variations + return disasm_verbatim(node, instr); + } + if (opsize == OpSize::kLong) { + // Does not exist + return disasm_verbatim(node, instr); + } + } + const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); + if (src.mode == AddrMode::kInvalid) { + return disasm_verbatim(node, instr); + } + assert(src.mode == AddrMode::kImmediate); + const OpCode mnemonic = OpCodeForLogicalImmediate(opcode); + if (m == 7 && xn == 4) { + return disasm_logical_immediate_to(node, mnemonic, opsize, src); + } + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (opcode != 6) { + // PC relative destination address argument available for CMPI only + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(mnemonic, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_move_movea( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const int opsize_raw = (instr >> 12) & 3; + const OpSize opsize = (opsize_raw == 1) + ? OpSize::kByte : (opsize_raw == 3 ? OpSize::kWord : OpSize::kLong); + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (opsize == OpSize::kByte) { + // Does not exist + return disasm_verbatim(node, instr); + } + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast<uint32_t>(src.lword); + node.ref_kinds |= kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast<uint32_t>(src.d16_pc.d16); + node.ref_kinds |= kRef1RelMask | kRef1ReadMask; + break; + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const int m = (instr >> 6) & 7; + const int xn = (instr >> 9) & 7; + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (opsize == OpSize::kByte) { + // Does not exist + return disasm_verbatim(node, instr); + } + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref2_addr = static_cast<uint32_t>(dst.lword); + node.ref_kinds |= kRef2AbsMask | kRef2WriteMask; + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + // XXX Assuming that moving long immediate value into address register is + // basically a sneaky LEA. It may not be true in some cases. + if (src.type == ArgType::kImmediate && dst.type == ArgType::kAn) { + if (opsize == OpSize::kLong) { + node.ref1_addr = static_cast<uint32_t>(src.lword); + node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; + } else if (opsize == OpSize::kWord) { + node.ref1_addr = static_cast<int16_t>(static_cast<uint16_t>(src.lword)); + node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; + } + } + const auto opcode = (dst.mode == AddrMode::kAn) ? OpCode::kMOVEA : OpCode::kMOVE; + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_move_from_sr( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = OpSize::kWord; + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kMOVE, opsize, Arg::SR(), dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t disasm_move_to( + DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg) +{ + const auto opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + node.op = Op::Typical(OpCode::kMOVE, opsize, src, Arg{{reg}, {0}}); + return node.size = kInstructionSizeStepBytes + src.Size(opsize); +} + +static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kNEGX; + case 1: return OpCode::kCLR; + case 2: return OpCode::kNEG; + case 3: return OpCode::kNOT; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_move_negx_clr_neg_not( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = static_cast<OpSize>((instr >> 6) & 3); + const unsigned opcode = (instr >> 9) & 3; + if (opsize == OpSize::kInvalid) { + switch (opcode) { + case 0: + return disasm_move_from_sr(node, instr, code); + case 1: + return disasm_verbatim(node, instr); + case 2: + return disasm_move_to(node, instr, code, ArgType::kCCR); + case 3: + return disasm_move_to(node, instr, code, ArgType::kSR); + } + assert(false); + return disasm_verbatim(node, instr); + } + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(opcode_for_negx_clr_neg_not(opcode), opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_trivial( + DisasmNode &node, const OpCode opcode) +{ + node.op = Op::Typical(opcode, OpSize::kNone); + return node.size; +} + +static size_t disasm_tas( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = OpSize::kByte; + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kTAS, opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_tst_tas_illegal( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = static_cast<OpSize>((instr >> 6) & 3); + const int m = (instr >> 3) & 7; + const int xn = instr & 7; + if (opsize == OpSize::kInvalid) { + if (m == 7 && xn == 4){ + return disasm_trivial(node, OpCode::kILLEGAL); + } + return disasm_tas(node, instr, code); + } + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, m, xn, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kTST, opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_trap(DisasmNode &node, const uint16_t instr) +{ + const unsigned vector = instr & 0xf; + node.op = Op::Typical(OpCode::kTRAP, OpSize::kNone, Arg::Immediate(vector)); + return node.size = kInstructionSizeStepBytes; +} + +static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool unlk = (instr >> 3) & 1; + const unsigned xn = instr & 7; + if (unlk) { + node.op = Op::Typical(OpCode::kUNLK, OpSize::kNone, Arg::AddrModeXn(ArgType::kAn, xn)); + return node.size = kInstructionSizeStepBytes; + } + const auto opsize = OpSize::kWord; + const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); + if (src.mode != AddrMode::kImmediate) { + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kLINK, opsize, Arg::AddrModeXn(ArgType::kAn, xn), src); + return node.size = kInstructionSizeStepBytes + src.Size(opsize); +} + +static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr) +{ + const unsigned xn = instr & 7; + const auto dir = static_cast<MoveDirection>((instr >> 3) & 1); + if (dir == MoveDirection::kRegisterToMemory) { + node.op = Op::Typical( + OpCode::kMOVE, OpSize::kLong, Arg::An(xn), Arg::USP()); + } else { + node.op = Op::Typical( + OpCode::kMOVE, OpSize::kLong, Arg::USP(), Arg::An(xn)); + } + return node.size = kInstructionSizeStepBytes; +} + +static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool is_nbcd = !((instr >> 6) & 1); + const OpSize opsize0 = OpSize::kWord; + const auto arg = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize0); + bool is_swap{}; + switch (arg.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + if (!is_nbcd) { + is_swap = true; + } + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + break; + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + if (!is_nbcd) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast<uint32_t>(arg.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (is_nbcd) { + return disasm_verbatim(node, instr); + } + if (arg.mode == AddrMode::kD16PCAddr) { + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast<uint32_t>(arg.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const auto opcode = is_nbcd ? OpCode::kNBCD : is_swap ? OpCode::kSWAP : OpCode::kPEA; + const auto opsize = is_nbcd ? OpSize::kByte : is_swap ? OpSize::kWord : OpSize::kLong; + node.op = Op::Typical(opcode, opsize, arg); + return node.size = kInstructionSizeStepBytes + arg.Size(opsize0); +} + +static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord); + if (a.mode != AddrMode::kImmediate) { + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kSTOP, OpSize::kNone, a); + return node.size = kInstructionSizeStepBytes * 2; +} + +static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if ((instr & 0xf900) == 0x4000) { + return disasm_move_negx_clr_neg_not(node, instr, code); + } else if ((instr & 0xff80) == 0x4800) { + // NOTE: EXT is handled with MOVEM + return disasm_nbcd_swap_pea(node, instr, code); + } else if ((instr & 0xff00) == 0x4a00) { + return disasm_tst_tas_illegal(node, instr, code); + } else if ((instr & 0xfff0) == 0x4e40) { + return disasm_trap(node, instr); + } else if ((instr & 0xfff0) == 0x4e50) { + return disasm_link_unlink(node, instr, code); + } else if ((instr & 0xfff0) == 0x4e60) { + return disasm_move_usp(node, instr); + } else if ((instr & 0xfff8) == 0x4e70) { + if (instr == 0x4e70) { + return disasm_trivial(node, OpCode::kRESET); + } else if (instr == 0x4e71) { + return disasm_trivial(node, OpCode::kNOP); + } else if (instr == 0x4e72) { + return disasm_stop(node, instr, code); + } else if (instr == 0x4e73) { + return disasm_trivial(node, OpCode::kRTE); + } else if (instr == 0x4e75) { + return disasm_trivial(node, OpCode::kRTS); + } else if (instr == 0x4e76) { + return disasm_trivial(node, OpCode::kTRAPV); + } else if (instr == 0x4e77) { + return disasm_trivial(node, OpCode::kRTR); + } + } else if ((instr & 0xff80) == 0x4e80) { + return disasm_jsr_jmp(node, instr, code); + } else if ((instr & 0xfb80) == 0x4880) { + return disasm_ext_movem(node, instr, code); + } else if ((instr & 0xf1c0) == 0x41c0) { + return disasm_lea(node, instr, code); + } else if ((instr & 0xf1c0) == 0x4180) { + return disasm_chk(node, instr, code); + } + return disasm_verbatim(node, instr); +} + +static size_t disasm_addq_subq( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize) +{ + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: // 5x00..5x07 / 5x40..5x47 / 5x80..5x87 + break; + case AddrMode::kAn: // 5x08..5x0f / 5x48..5x4f / 5x88..5x8f + if (opsize == OpSize::kByte) { + // 5x08..5x0f + // addqb and subqb with An do not exist + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAnAddr: // 5x10..5x17 / 5x50..5x57 / 5x90..5x97 + case AddrMode::kAnAddrIncr: // 5x18..5x1f / 5x58..5x5f / 5x98..5x9f + case AddrMode::kAnAddrDecr: // 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7 + case AddrMode::kD16AnAddr: // 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf + case AddrMode::kD8AnXiAddr: // 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7 + case AddrMode::kWord: // 5x38 / 5x78 / 5xb8 + case AddrMode::kLong: // 5x39 / 5x79 / 5xb9 + break; + case AddrMode::kD16PCAddr: // 5x3a / 5x7a / 5xba + case AddrMode::kD8PCXiAddr: // 5x3b / 5x7b / 5xbb + case AddrMode::kImmediate: // 5x3c / 5x7c / 5xbc + // Does not exist + return disasm_verbatim(node, instr); + } + const unsigned imm = ((uint8_t((instr >> 9) & 7) - 1) & 7) + 1; + const auto opcode = ((instr >> 8) & 1) ? OpCode::kSUBQ : OpCode::kADDQ; + node.op = Op::Typical(opcode, opsize, Arg::Immediate(imm), a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if (node.address + kInstructionSizeStepBytes >= code.size) { + return disasm_verbatim(node, instr); + } + const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes); + const int32_t dispmt = dispmt_raw + kInstructionSizeStepBytes; + node.ref2_addr = static_cast<uint32_t>(node.address + dispmt); + node.ref_kinds = kRef2RelMask; + node.op = Op{ + OpCode::kDBcc, + OpSize::kWord, + static_cast<Condition>((instr >> 8) & 0xf), + Arg::AddrModeXn(ArgType::kDn, (instr & 7)), + Arg::Displacement(dispmt), + }; + return node.size = kInstructionSizeStepBytes * 2; +} + +static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: // 5xc0..5xc7, Dn + break; + case AddrMode::kAn: // 5xc8..5xcf, An + return disasm_dbcc(node, instr, code); + case AddrMode::kAnAddr: // 5xd0..5xd7 + case AddrMode::kAnAddrIncr: // 5xd8..5xdf + case AddrMode::kAnAddrDecr: // 5xe0..5xe7 + case AddrMode::kD16AnAddr: // 5xe8..5xef + case AddrMode::kD8AnXiAddr: // 5xf0..5xf7 + case AddrMode::kWord: // 5xf8 (xxx).W + case AddrMode::kLong: // 5xf9 (xxx).L + break; + case AddrMode::kD16PCAddr: // 5xfa + case AddrMode::kD8PCXiAddr: // 5xfb + case AddrMode::kImmediate: // 5xfc + // Does not exist + return disasm_verbatim(node, instr); + } + node.op = Op{OpCode::kScc, OpSize::kByte, static_cast<Condition>((instr >> 8) & 0xf), a}; + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c) +{ + const auto opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_scc_dbcc(n, instr, c); + } + return disasm_addq_subq(n, instr, c, opsize); +} + +static size_t disasm_moveq(DisasmNode &node, const uint16_t instr) +{ + if (instr & 0x100) { + // Does not exist + return disasm_verbatim(node, instr); + } + const int xn = (instr >> 9) & 7; + const auto dst = Arg::Dn(xn); + const int8_t data = instr & 0xff; + const OpSize opsize = OpSize::kLong; + node.op = Op::Typical(OpCode::kMOVEQ, opsize, Arg::Immediate(data), dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t disasm_divu_divs_mulu_muls( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpCode opcode) +{ + const auto opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const unsigned dn = (instr >> 9) & 7; + const auto dst = Arg::Dn(dn); + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize) + src.Size(opsize); +} + +static size_t disasm_addx_subx_abcd_sbcd( + DisasmNode &node, const uint16_t instr, const OpCode opcode) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + // Must be already handled by parent call + assert(opsize != OpSize::kInvalid); + const int m = (instr >> 3) & 1; + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = m ? Arg::AnAddrDecr(xn) : Arg::Dn(xn); + const auto dst = m ? Arg::AnAddrDecr(xi) : Arg::Dn(xi); + // XXX GNU AS does not know ABCD.B, it only knows ABCD, but happily consumes + // SBCD.B and others. That's why it is OpSize::kNone specifically for ABCD + // mnemonic. It is probably a bug in GNU AS. + node.op = Op::Typical(opcode, (opcode == OpCode::kABCD) ? OpSize::kNone : opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_or_and( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpSize opsize, + const OpCode opcode) +{ + const bool dir_to_addr = (instr >> 8) & 1; + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + if (dir_to_addr) { + // Switching dir when bot operands are data registers is not allowed + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (dir_to_addr) { + // PC relative cannot be destination + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + if (dir_to_addr) { + // immediate cannot be destination + return disasm_verbatim(node, instr); + } + break; + } + const auto reg = Arg::Dn((instr >> 9) & 7); + if (dir_to_addr) { + node.op = Op::Typical(opcode, opsize, reg, addr); + } else { + node.op = Op::Typical(opcode, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_divu_divs_sbcd_or( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + // Also ensures that opsize == OpSize::kByte, i.e. 0b00 + if ((instr & 0x1f0) == 0x100) { + return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kSBCD); + } + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + const bool is_signed = (instr >> 8) & 1; + const auto opcode = is_signed ? OpCode::kDIVS : OpCode::kDIVU; + return disasm_divu_divs_mulu_muls(node, instr, code, opcode); + } + return disasm_or_and(node, instr, code, opsize, OpCode::kOR); +} + +static size_t disasm_adda_suba_cmpa( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) +{ + const OpSize opsize = static_cast<OpSize>(((instr >> 8) & 1) + 1); + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + case AddrMode::kAn: + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const unsigned an = (instr >> 9) & 7; + const auto dst = Arg::An(an); + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_add_sub_cmp( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpCode opcode, + const OpSize opsize, + const bool dir_to_addr) +{ + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (dir_to_addr || opsize == OpSize::kByte) { + // An cannot be destination and An cannot be used as byte + return disasm_verbatim(node, instr); + } + /* Fall through */ + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + if (dir_to_addr) { + node.ref2_addr = static_cast<uint32_t>(addr.lword); + node.ref_kinds = kRef2AbsMask | kRef2ReadMask; + } else { + node.ref1_addr = static_cast<uint32_t>(addr.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + } + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (dir_to_addr) { + // PC relative cannot be destination + return disasm_verbatim(node, instr); + } + if (addr.mode == AddrMode::kD16PCAddr) { + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast<uint32_t>(addr.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + } + break; + case AddrMode::kImmediate: + if (dir_to_addr) { + // immediate cannot be destination + return disasm_verbatim(node, instr); + } + break; + } + const unsigned dn = (instr >> 9) & 7; + const auto reg = Arg::Dn(dn); + if (dir_to_addr) { + node.op = Op::Typical(opcode, opsize, reg, addr); + } else { + node.op = Op::Typical(opcode, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + // Must be already handled by parent call + assert(opsize != OpSize::kInvalid); + // M has to be set to 0b001 + assert(((instr >> 3) & 7) == 1); + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = Arg::AnAddrIncr(xn); + const auto dst = Arg::AnAddrIncr(xi); + node.op = Op::Typical(OpCode::kCMPM, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + // PC relative and immediate cannot be destination + return disasm_verbatim(node, instr); + } + const auto reg = Arg::Dn((instr >> 9) & 7); + node.op = Op::Typical(OpCode::kEOR, opsize, reg, addr); + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_eor_cmpm_cmp_cmpa( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_adda_suba_cmpa(node, instr, code, OpCode::kCMPA); + } + const bool dir_to_addr = ((instr >> 8) & 1); + if (!dir_to_addr) { + return disasm_add_sub_cmp(node, instr, code, OpCode::kCMP, opsize, dir_to_addr); + } + const int m = (instr >> 3) & 7; + if (m == 1) { + return disasm_cmpm(node, instr); + } + return disasm_eor(node, instr, code); +} + +static size_t disasm_exg(DisasmNode &node, const uint16_t instr) +{ + assert((instr & 0x130) == 0x100); + const int m1 = (instr >> 3) & 1; + const int m2 = (instr >> 6) & 3; + assert(m2 != 0); // Therefore m == 0 and m == 1 are impossible + assert(m2 != 3); // Therefore m == 6 and m == 7 are impossible + const int m = (m2 << 1) | m1; + assert(m != 4); // Only m == 2, m == 3 and m == 5 values are allowed + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = (m == 3) ? Arg::An(xi) : Arg::Dn(xi); + const auto dst = (m == 2) ? Arg::Dn(xn) : Arg::An(xn); + // GNU AS does not accept size suffix for EXG, although it's size is always + // long word. + const auto opsize = OpSize::kNone; + node.op = Op::Typical(OpCode::kEXG, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if ((instr & 0x1f0) == 0x100) { + return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD); + } + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + const bool is_signed = (instr >> 8) & 1; + const auto opcode = is_signed ? OpCode::kMULS : OpCode::kMULU; + return disasm_divu_divs_mulu_muls(node, instr, code, opcode); + } + const unsigned m_split = instr & 0x1f8; + if (m_split == 0x188 || m_split == 0x148 || m_split == 0x140) { + return disasm_exg(node, instr); + } + return disasm_or_and(node, instr, code, opsize, OpCode::kAND); +} + +static size_t disasm_add_sub_x_a( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_adda_suba_cmpa(node, instr, code, (opcode == OpCode::kSUB) ? OpCode::kSUBA : OpCode::kADDA); + } + const bool dir_to_addr = (instr >> 8) & 1; + const unsigned m = (instr >> 3) & 7; + if (dir_to_addr && (m == 0 || m == 1)) { + return disasm_addx_subx_abcd_sbcd(node, instr, (opcode == OpCode::kSUB) ? OpCode::kSUBX : OpCode::kADDX); + } + return disasm_add_sub_cmp(node, instr, code, opcode, opsize, dir_to_addr); +} + +static OpCode ShiftKindToOpcode(const ShiftKind k, const ShiftDirection d) +{ + switch (k) { + case ShiftKind::kArithmeticShift: + return d == ShiftDirection::kLeft ? OpCode::kASL : OpCode::kASR; + case ShiftKind::kLogicalShift: + return d == ShiftDirection::kLeft ? OpCode::kLSL : OpCode::kLSR; + case ShiftKind::kRotateX: + return d == ShiftDirection::kLeft ? OpCode::kROXL : OpCode::kROXR; + case ShiftKind::kRotate: + return d == ShiftDirection::kLeft ? OpCode::kROL : OpCode::kROR; + } + assert(false); + return OpCode::kNone; +} + +static bool IsValidShiftKind(const ShiftKind k) +{ + return static_cast<int>(k) < 4; +} + +static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); + const unsigned xn = instr & 7; + const uint8_t rotation = (instr >> 9) & 7; + const ShiftKind kind = (opsize == OpSize::kInvalid) + ? static_cast<ShiftKind>(rotation) + : static_cast<ShiftKind>((instr >> 3) & 3); + if (!IsValidShiftKind(kind)) { + return disasm_verbatim(node, instr); + } + const auto dst = (opsize == OpSize::kInvalid) + ? FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize) + : Arg::Dn(xn); + if (opsize == OpSize::kInvalid) { + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + // Intersects with situation when args are "#1,%dx". GNU AS would + // not understand shift instruction with single argument of "%dx". + return disasm_verbatim(node, instr); + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + } + const unsigned imm = ((rotation - 1) & 7) + 1; + const unsigned src = (opsize == OpSize::kInvalid) ? 1 : rotation; + const auto dir = static_cast<ShiftDirection>((instr >> 8) & 1); + if (opsize == OpSize::kInvalid) { + node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, dst); + } else { + const unsigned m = (instr >> 5) & 1; + const auto arg1 = m ? Arg::AddrModeXn(ArgType::kDn, src) : Arg::Immediate(imm); + node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, arg1, dst); + } + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c) +{ + switch ((i & 0xf000) >> 12) { + case 0x0: + return disasm_bitops_movep(n, i, c); + case 0x1: + case 0x2: + case 0x3: + return disasm_move_movea(n, i, c); + case 0x4: + return disasm_chunk_4(n, i, c); + case 0x5: + return disasm_addq_subq_scc_dbcc(n, i, c); + case 0x6: + return disasm_bra_bsr_bcc(n, i, c); + case 0x7: + return disasm_moveq(n, i); + case 0x8: + return disasm_divu_divs_sbcd_or(n, i, c); + case 0x9: + return disasm_add_sub_x_a(n, i, c, OpCode::kSUB); + case 0xa: + // Does not exist + return disasm_verbatim(n, i); + case 0xb: + return disasm_eor_cmpm_cmp_cmpa(n, i, c); + case 0xc: + return disasm_chunk_c(n, i, c); + case 0xd: + return disasm_add_sub_x_a(n, i, c, OpCode::kADD); + case 0xe: + return disasm_shift_rotate(n, i, c); + case 0xf: + // Does not exist + return disasm_verbatim(n, i); + } + assert(false); + return disasm_verbatim(n, i); +} + +size_t DisasmNode::Disasm(const DataView &code) +{ + // We assume that machine have no MMU and ROM data always starts at 0 + assert(this->address < code.size); + size = kInstructionSizeStepBytes; + ref_kinds = 0; + ref1_addr = 0; + ref2_addr = 0; + const uint16_t instr = GetU16BE(code.buffer + this->address); + if (IsInstruction(this->type)) { + return m68k_disasm(*this, instr, code); + } else { + // Data should not be disassembled + return disasm_verbatim(*this, instr); + } +} + +size_t DisasmNode::DisasmAsRaw(const DataView &code) +{ + // We assume that machine have no MMU and ROM data always starts at 0 + assert(this->address < code.size); + size = kInstructionSizeStepBytes; + ref_kinds = 0; + ref1_addr = 0; + ref2_addr = 0; + const uint16_t instr = GetU16BE(code.buffer + this->address); + return disasm_verbatim(*this, instr); +} + +static const char *ToString(const OpCode opcode, const Condition condition) +{ + switch (opcode) { + case OpCode::kNone: + assert(false); + break; + case OpCode::kRaw: return ".short"; + case OpCode::kORI: return "ori"; + case OpCode::kANDI: return "andi"; + case OpCode::kSUBI: return "subi"; + case OpCode::kADDI: return "addi"; + case OpCode::kEORI: return "eori"; + case OpCode::kCMPI: return "cmpi"; + case OpCode::kBTST: return "btst"; + case OpCode::kBCHG: return "bchg"; + case OpCode::kBCLR: return "bclr"; + case OpCode::kBSET: return "bset"; + case OpCode::kMOVEP: return "movep"; + case OpCode::kMOVEA: return "movea"; + case OpCode::kMOVE: return "move"; + case OpCode::kNEGX: return "negx"; + case OpCode::kCLR: return "clr"; + case OpCode::kNEG: return "neg"; + case OpCode::kNOT: return "not"; + case OpCode::kEXT: return "ext"; + case OpCode::kNBCD: return "nbcd"; + case OpCode::kSWAP: return "swap"; + case OpCode::kPEA: return "pea"; + case OpCode::kILLEGAL: return "illegal"; + case OpCode::kTAS: return "tas"; + case OpCode::kTST: return "tst"; + case OpCode::kTRAP: return "trap"; + case OpCode::kLINK: return "link"; + case OpCode::kUNLK: return "unlk"; + case OpCode::kRESET: return "reset"; + case OpCode::kNOP: return "nop"; + case OpCode::kSTOP: return "stop"; + case OpCode::kRTE: return "rte"; + case OpCode::kRTS: return "rts"; + case OpCode::kTRAPV: return "trapv"; + case OpCode::kRTR: return "rtr"; + case OpCode::kJSR: return "jsr"; + case OpCode::kJMP: return "jmp"; + case OpCode::kMOVEM: return "movem"; + case OpCode::kLEA: return "lea"; + case OpCode::kCHK: return "chk"; + case OpCode::kADDQ: return "addq"; + case OpCode::kSUBQ: return "subq"; + case OpCode::kScc: + switch(condition) { + case Condition::kT : return "st"; + case Condition::kF: return "sf"; + case Condition::kHI: return "shi"; + case Condition::kLS: return "sls"; + case Condition::kCC: return "scc"; + case Condition::kCS: return "scs"; + case Condition::kNE: return "sne"; + case Condition::kEQ: return "seq"; + case Condition::kVC: return "svc"; + case Condition::kVS: return "svs"; + case Condition::kPL: return "spl"; + case Condition::kMI: return "smi"; + case Condition::kGE: return "sge"; + case Condition::kLT: return "slt"; + case Condition::kGT: return "sgt"; + case Condition::kLE: return "sle"; + } + assert(false); + break; + case OpCode::kDBcc: + switch (condition) { + case Condition::kT: return "dbt"; + case Condition::kF: return "dbf"; + case Condition::kHI: return "dbhi"; + case Condition::kLS: return "dbls"; + case Condition::kCC: return "dbcc"; + case Condition::kCS: return "dbcs"; + case Condition::kNE: return "dbne"; + case Condition::kEQ: return "dbeq"; + case Condition::kVC: return "dbvc"; + case Condition::kVS: return "dbvs"; + case Condition::kPL: return "dbpl"; + case Condition::kMI: return "dbmi"; + case Condition::kGE: return "dbge"; + case Condition::kLT: return "dblt"; + case Condition::kGT: return "dbgt"; + case Condition::kLE: return "dble"; + } + assert(false); + break; + case OpCode::kBcc: + switch (condition) { + case Condition::kT: return "bra"; + case Condition::kF: return "bsr"; + case Condition::kHI: return "bhi"; + case Condition::kLS: return "bls"; + case Condition::kCC: return "bcc"; + case Condition::kCS: return "bcs"; + case Condition::kNE: return "bne"; + case Condition::kEQ: return "beq"; + case Condition::kVC: return "bvc"; + case Condition::kVS: return "bvs"; + case Condition::kPL: return "bpl"; + case Condition::kMI: return "bmi"; + case Condition::kGE: return "bge"; + case Condition::kLT: return "blt"; + case Condition::kGT: return "bgt"; + case Condition::kLE: return "ble"; + } + assert(false); + break; + case OpCode::kMOVEQ: return "moveq"; + case OpCode::kDIVU: return "divu"; + case OpCode::kDIVS: return "divs"; + case OpCode::kSBCD: return "sbcd"; + case OpCode::kOR: return "or"; + case OpCode::kSUB: return "sub"; + case OpCode::kSUBX: return "subx"; + case OpCode::kSUBA: return "suba"; + case OpCode::kEOR: return "eor"; + case OpCode::kCMPM: return "cmpm"; + case OpCode::kCMP: return "cmp"; + case OpCode::kCMPA: return "cmpa"; + case OpCode::kMULU: return "mulu"; + case OpCode::kMULS: return "muls"; + case OpCode::kABCD: return "abcd"; + case OpCode::kEXG: return "exg"; + case OpCode::kAND: return "and"; + case OpCode::kADD: return "add"; + case OpCode::kADDX: return "addx"; + case OpCode::kADDA: return "adda"; + case OpCode::kASR: return "asr"; + case OpCode::kASL: return "asl"; + case OpCode::kLSR: return "lsr"; + case OpCode::kLSL: return "lsl"; + case OpCode::kROXR: return "roxr"; + case OpCode::kROXL: return "roxl"; + case OpCode::kROR: return "ror"; + case OpCode::kROL: return "rol"; + } + assert(false); + return "?"; +} + +static const char *ToString(const OpSize s) +{ + switch (s) { + case OpSize::kNone: return ""; + case OpSize::kByte: return "b"; + case OpSize::kShort: return "s"; + case OpSize::kWord: return "w"; + case OpSize::kLong: return "l"; + } + assert(false); + return ""; +} + +static int OpcodeSNPrintf( + char *const buf, + const size_t bufsz, + const OpCode opcode, + const Condition condition, + const OpSize size_spec) +{ + return snprintf(buf, bufsz, "%s%s", ToString(opcode, condition), ToString(size_spec)); +} + +static char RegChar(const uint8_t xi) +{ + return (xi & 0x08) ? 'a' : 'd'; +} + +static char SizeSpecChar(const uint8_t xi) +{ + return (xi & 0x10) ? 'l' : 'w'; +} + +static unsigned RegNum(const uint8_t xi) +{ + return xi & 0x7; +} + +static size_t snprint_reg_mask( + char *const buf, const size_t bufsz, const uint32_t regmask_arg, const ArgType arg_type) +{ + const uint32_t regmask = regmask_arg & 0xffff; + size_t written = 0; + bool first_printed = 0; + size_t span = 0; + // 17-th bit used to close the span with 0 value unconditionally + for (int i = 0; i < 17; i++) { + const uint32_t mask = 1 << (arg_type == ArgType::kRegMaskPredecrement ? (15 - i) : i); + const bool hit = regmask & mask; + const bool span_open = hit && span == 0; + const bool span_closed = !hit && span > 1; + const int printable_i = i - (span_closed ? 1 : 0); + const int id = printable_i % 8; + const char regtype = (printable_i >= 8) ? 'a' : 'd'; + if (span_open || span_closed) { + const char *const delimiter = span_open ? (first_printed ? "/" : "") : "-"; + const size_t remaining = bufsz - written; + const int ret = snprintf(buf + written, remaining, "%s%%%c%d", delimiter, regtype, id); + assert(ret > 0); + assert(static_cast<unsigned>(ret) >= sizeof("%d0")-1); + assert(static_cast<unsigned>(ret) <= sizeof("-%d0")-1); + written += Min(remaining, ret); + first_printed = true; + } + span = hit ? span + 1 : 0; + } + assert(written < bufsz); // Output must not be truncated + return written; +} + +int Arg::SNPrint( + char *const buf, + const size_t bufsz, + const bool imm_as_hex, + const RefKindMask ref_kinds, + const char *const label, + const uint32_t self_addr, + const uint32_t ref_addr) const +{ + switch (type) { + case ArgType::kNone: + assert(false); + break; + case ArgType::kRaw: + return snprintf(buf, bufsz, "0x%04x", uword); + case ArgType::kDn: + return snprintf(buf, bufsz, "%%d%d", xn); + case ArgType::kAn: + return snprintf(buf, bufsz, "%%a%u", xn); + case ArgType::kAnAddr: + return snprintf(buf, bufsz, "%%a%u@", xn); + case ArgType::kAnAddrIncr: + return snprintf(buf, bufsz, "%%a%u@+", xn); + case ArgType::kAnAddrDecr: + return snprintf(buf, bufsz, "%%a%u@-", xn); + case ArgType::kD16AnAddr: + return snprintf(buf, bufsz, "%%a%u@(%d:w)", d16_an.an, d16_an.d16); + case ArgType::kD8AnXiAddr: + return snprintf( + buf, bufsz, "%%a%u@(%d,%%%c%u:%c)", + d8_an_xi.an, + d8_an_xi.d8, + RegChar(d8_an_xi.xi), + RegNum(d8_an_xi.xi), + SizeSpecChar(d8_an_xi.xi)); + case ArgType::kWord: + case ArgType::kLong: + { + const char c = type == ArgType::kLong ? 'l' : 'w'; + if (ref_kinds & kRefAbsMask) { + if (static_cast<uint32_t>(lword) == ref_addr) { + return snprintf(buf, bufsz, "%s:%c", label, c); + } else { + // It has to be AFTER the label we are gonna reference here + assert(static_cast<uint32_t>(lword) > ref_addr); + return snprintf(buf, bufsz, "%s+%d:%c", label, lword - ref_addr, c); + } + } else { + return snprintf(buf, bufsz, "0x%x:%c", lword, c); + } + } + case ArgType::kD16PCAddr: + if (ref_kinds & kRefRelMask) { + // XXX: Most of instructions with PC relative values have 2 bytes + // added to the offset, some does not. Still figuring that out. + const bool has_fix = ref_kinds & kRefPcRelFix2Bytes; + const uint32_t arg_addr = self_addr + d16_pc.d16 + kInstructionSizeStepBytes + (has_fix ? kInstructionSizeStepBytes : 0); + if (arg_addr == ref_addr) { + return snprintf(buf, bufsz, "%%pc@(%s:w)", label); + } else { + assert(arg_addr > ref_addr); + return snprintf(buf, bufsz, "%%pc@(%s+%d:w)", label, arg_addr - ref_addr); + } + } else { + return snprintf(buf, bufsz, "%%pc@(%d:w)", d16_pc.d16); + } + case ArgType::kD8PCXiAddr: + return snprintf( + buf, bufsz, "%%pc@(%d,%%%c%u:%c)", + d8_pc_xi.d8, + RegChar(d8_pc_xi.xi), + RegNum(d8_pc_xi.xi), + SizeSpecChar(d8_pc_xi.xi)); + case ArgType::kImmediate: + if (ref_kinds & kRef1ImmMask) { + if (static_cast<uint32_t>(lword) == ref_addr) { + return snprintf(buf, bufsz, "#%s", label); + } else { + // It has to be AFTER the label we are gonna reference here + assert(static_cast<uint32_t>(lword) > ref_addr); + return snprintf(buf, bufsz, "#%s+%d", label, lword - ref_addr); + } + } else if (imm_as_hex) { + return snprintf(buf, bufsz, "#0x%x", lword); + } else { + return snprintf(buf, bufsz, "#%d", lword); + } + case ArgType::kRegMask: + case ArgType::kRegMaskPredecrement: + return snprint_reg_mask(buf, bufsz, uword, type); + case ArgType::kDisplacement: + if (ref_kinds & kRefRelMask) { + if (static_cast<uint32_t>(self_addr + lword) == ref_addr) { + return snprintf(buf, bufsz, "%s", label); + } else { + assert(static_cast<uint32_t>(self_addr + lword) > ref_addr); + return snprintf(buf, bufsz, "%s+%d", label, (self_addr + lword) - ref_addr); + } + } else { + return snprintf(buf, bufsz, ".%s%d", lword >= 0 ? "+" : "", lword); + } + case ArgType::kCCR: + return snprintf(buf, bufsz, "%%ccr"); + case ArgType::kSR: + return snprintf(buf, bufsz, "%%sr"); + case ArgType::kUSP: + return snprintf(buf, bufsz, "%%usp"); + } + assert(false); + return -1; +} + +int Op::FPrint( + FILE *const stream, + const char *const indent, + const bool imm_as_hex, + const RefKindMask ref_kinds, + const char *const ref1_label, + const char *const ref2_label, + const uint32_t self_addr, + const uint32_t ref1_addr, + const uint32_t ref2_addr) const +{ + assert(opcode != OpCode::kNone); + char mnemonic_str[kMnemonicBufferSize]{}; + OpcodeSNPrintf(mnemonic_str, kMnemonicBufferSize, opcode, condition, size_spec); + if (arg1.type != ArgType::kNone) { + char arg1_str[kArgsBufferSize]{}; + const RefKindMask ref1_kinds = ref_kinds & (kRef1Mask | kRefPcRelFix2Bytes); + // It is useful to have immediate value printed as hex if destination + // argument is plain address register, status register or condition code + // register. USP is not the case because it's value may be moved only to + // or from An register. + const bool imm_as_hex_2 = imm_as_hex || + arg2.type == ArgType::kAn || + arg2.type == ArgType::kCCR || + arg2.type == ArgType::kSR; + arg1.SNPrint( + arg1_str, + kArgsBufferSize, + imm_as_hex_2, + ref1_kinds, + ref1_label, + self_addr, + ref1_addr); + if (arg2.type != ArgType::kNone) { + char arg2_str[kArgsBufferSize]{}; + const RefKindMask ref2_kinds = ref_kinds & (kRef2Mask | kRefPcRelFix2Bytes); + arg2.SNPrint( + arg2_str, + kArgsBufferSize, + false, + ref2_kinds, + ref2_label, + self_addr, + ref2_addr); + return fprintf(stream, "%s%s %s,%s", indent, mnemonic_str, arg1_str, arg2_str); + } else { + return fprintf(stream, "%s%s %s", indent, mnemonic_str, arg1_str); + } + } else { + return fprintf(stream, "%s%s", indent, mnemonic_str); + } +} + +void DisasmNode::AddReferencedBy(const uint32_t address, const ReferenceType type) +{ + ReferenceNode *node{}; + if (this->last_ref_by) { + node = this->last_ref_by; + } else { + node = new ReferenceNode{}; + assert(node); + this->ref_by = this->last_ref_by = node; + } + node->refs[node->refs_count] = ReferenceRecord{type, address}; + node->refs_count++; + if (node->refs_count >= kRefsCountPerBuffer) { + ReferenceNode *new_node = new ReferenceNode{}; + assert(new_node); + node->next = new_node; + this->last_ref_by = new_node; + } +} + +DisasmNode::~DisasmNode() +{ + ReferenceNode *ref{this->ref_by}; + while (ref) { + ReferenceNode *prev = ref; + ref = ref->next; + delete prev; + } +} diff --git a/src/disasm.h b/src/disasm.h new file mode 100644 index 0000000..65429dc --- /dev/null +++ b/src/disasm.h @@ -0,0 +1,401 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "data_buffer.h" +#include "common.h" + +#include <cstddef> +#include <cstdint> +#include <cstdio> + +enum class OpSize: int { + kByte = 0, + kWord = 1, + kLong = 2, + kInvalid = 3, + kNone = kInvalid, + kShort, ///< Semantically is the same as kByte, pseudosize, used for Bcc +}; + +enum class OpCode: uint8_t { + kNone, + kRaw, ///< Emits ".short" + kORI, + kANDI, + kSUBI, + kADDI, + kEORI, + kCMPI, + kBTST, + kBCHG, + kBCLR, + kBSET, + kMOVEP, + kMOVEA, + kMOVE, + kNEGX, + kCLR, + kNEG, + kNOT, + kEXT, + kNBCD, + kSWAP, + kPEA, + kILLEGAL, + kTAS, + kTST, + kTRAP, + kLINK, + kUNLK, + kRESET, + kNOP, + kSTOP, + kRTE, + kRTS, + kTRAPV, + kRTR, + kJSR, + kJMP, + kMOVEM, + kLEA, + kCHK, + kADDQ, + kSUBQ, + kScc, + kDBcc, + kBcc, + kMOVEQ, + kDIVU, + kDIVS, + kSBCD, + kOR, + kSUB, + kSUBX, + kSUBA, + kEOR, + kCMPM, + kCMP, + kCMPA, + kMULU, + kMULS, + kABCD, + kEXG, + kAND, + kADD, + kADDX, + kADDA, + kASR, + kASL, + kLSR, + kLSL, + kROXR, + kROXL, + kROR, + kROL, +}; + +enum class Condition: uint8_t { + kT = 0, + kF = 1, + kHI = 2, + kLS = 3, + kCC = 4, + kCS = 5, + kNE = 6, + kEQ = 7, + kVC = 8, + kVS = 9, + kPL = 10, + kMI = 11, + kGE = 12, + kLT = 13, + kGT = 14, + kLE = 15, +}; + +enum class AddrMode: uint8_t { + kInvalid = 0, + kDn = 1, + kAn = 2, + kAnAddr = 3, + kAnAddrIncr = 4, + kAnAddrDecr = 5, + kD16AnAddr = 6, + kD8AnXiAddr = 7, + kWord = 8, + kLong = 9, + kD16PCAddr = 10, + kD8PCXiAddr = 11, + kImmediate = 12, +}; + +enum class ArgType: uint8_t { + kNone = 0, + kDn = 1, ///< Dn + kAn = 2, ///< An + kAnAddr = 3, ///< (An) + kAnAddrIncr = 4, ///< (An)+ + kAnAddrDecr = 5, ///< -(An) + kD16AnAddr = 6, ///< (d16,An) + kD8AnXiAddr = 7, ///< (d8,An,Xi) + kWord = 8, ///< (xxx).W + kLong = 9, ///< (xxx).L + kD16PCAddr = 10, ///< (d16,PC) + kD8PCXiAddr = 11, ///< (d8,PC,Xn) + kImmediate = 12, ///< #imm + kRegMask, + kRegMaskPredecrement, + kDisplacement, ///< For BRA, BSR, Bcc and DBcc + kCCR, + kSR, + kUSP, + kRaw, ///< Emits "0xXXXX" for ".short" +}; + +struct D8AnPCXiAddr { + uint8_t an; ///< ID number of An reg, for kD8AnXiAddr only + /*! ID number of Xi reg (3 lower bits), for kD8AnXiAddr and kD8PCXiAddr. + * Bit 3 (mask 0x8) means 0 == Dn, 1 == An. + * Bit 4 (mask 0x10) means 0 == Word, 1 == Long. + */ + uint8_t xi; + int8_t d8; ///< Displacement, for kD8AnXiAddr and kD8PCXiAddr +}; + +struct D16AnPCAddr { + uint8_t an; ///< ID number of An reg, for kD16AnAddr only + int16_t d16; ///< Displacement, for D16AnAddr and kD16PCAddr +}; + +static_assert(sizeof(D8AnPCXiAddr) <= sizeof(uint32_t), ""); +static_assert(sizeof(D16AnPCAddr) <= sizeof(uint32_t), ""); + +struct Arg { + union { + ArgType type{ArgType::kNone}; + AddrMode mode; + }; + union { + int32_t lword{}; ///< kLong, kWord, kDisplacement, kImmediate + uint16_t uword; ///< kRegMask, kRaw + uint8_t xn; ///< kDn, kAn, kAnAddr, kAnAddrIncr, kAnAddrDecr + D16AnPCAddr d16_an; ///< kD16AnAddr + D16AnPCAddr d16_pc; ///< kD16PCAddr + D8AnPCXiAddr d8_an_xi; ///< kD8AnXiAddr + D8AnPCXiAddr d8_pc_xi; ///< kD8PCXiAddr + }; + /// Size of the instruction extension: 0, 2 or 4 bytes + constexpr size_t Size(const OpSize s) const + { + switch (mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: + case AddrMode::kAn: + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + return 0; + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + return 2; + case AddrMode::kLong: + return 4; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + return 2; + case AddrMode::kImmediate: + // Byte and Word immediate are of 2 bytes length + return s == OpSize::kLong ? 4 : 2; + } + return 0; + } + static constexpr auto AddrModeXn(const ArgType type, const uint8_t xn) { + Arg a{{type}, {0}}; + a.xn = xn; + return a; + } + static constexpr auto Dn(const uint8_t xn) { return AddrModeXn(ArgType::kDn, xn); } + static constexpr auto An(const uint8_t xn) { return AddrModeXn(ArgType::kAn, xn); } + static constexpr auto AnAddr(const uint8_t xn) { return AddrModeXn(ArgType::kAnAddr, xn); } + static constexpr auto AnAddrIncr(const uint8_t xn) + { + return AddrModeXn(ArgType::kAnAddrIncr, xn); + } + static constexpr auto AnAddrDecr(const uint8_t xn) + { + return AddrModeXn(ArgType::kAnAddrDecr, xn); + } + static constexpr auto D16AnAddr(const uint8_t xn, const int16_t d16) + { + Arg a{{ArgType::kD16AnAddr}, {0}}; + a.d16_an = D16AnPCAddr{xn, d16}; + return a; + } + static constexpr auto D16PCAddr(const int16_t d16) + { + Arg a{{ArgType::kD16PCAddr}, {0}}; + a.d16_pc = D16AnPCAddr{0, d16}; + return a; + } + static constexpr auto Word(const int16_t w) + { + Arg a{{ArgType::kWord}, {0}}; + a.lword = w; + return a; + } + static constexpr auto Long(const int32_t l) + { + Arg a{{ArgType::kLong}, {0}}; + a.lword = l; + return a; + } + static constexpr auto D8AnXiAddr( + const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) + { + Arg a{{ArgType::kD8AnXiAddr}, {0}}; + a.d8_an_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; + return a; + } + static constexpr auto D8PCXiAddr( + const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) + { + Arg a{{ArgType::kD8PCXiAddr}, {0}}; + a.d8_pc_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; + return a; + } + static constexpr auto Immediate(const int32_t value) { + Arg a{{ArgType::kImmediate}, {0}}; + a.lword = value; + return a; + } + static constexpr auto RegMask(const uint16_t regmask) { + Arg a{{ArgType::kRegMask}, {0}}; + a.uword = regmask; + return a; + } + static constexpr auto RegMaskPredecrement(const uint16_t regmask) { + Arg a{{ArgType::kRegMaskPredecrement}, {0}}; + a.uword = regmask; + return a; + } + static constexpr auto Displacement(const int32_t displacement) { + Arg a{{ArgType::kDisplacement}, {0}}; + a.lword = displacement; + return a; + } + static constexpr auto CCR() { return Arg{{ArgType::kCCR}, {0}}; } + static constexpr auto SR() { return Arg{{ArgType::kSR}, {0}}; } + static constexpr auto USP() { return Arg{{ArgType::kUSP}, {0}}; } + static constexpr auto Raw(const uint16_t instr) { + Arg a{{ArgType::kRaw}, {0}}; + a.uword = instr; + return a; + } + int SNPrint( + char *buf, + size_t bufsz, + bool imm_as_hex = false, + RefKindMask ref_kinds = 0, + const char *label = nullptr, + uint32_t self_addr = 0, + uint32_t ref_addr = 0) const; +}; + +enum class NodeType { + kTracedInstruction, + kRefInstruction, + kData, +}; + +constexpr size_t kRefsCountPerBuffer = 10; + +constexpr size_t kMnemonicBufferSize = 10; +constexpr size_t kArgsBufferSize = 80; + +enum class ReferenceType { + kUnknown = 0, + kCall, + kBranch, + kRead, + kWrite, +}; + +struct ReferenceRecord { + ReferenceType type{}; + uint32_t address{}; +}; + +struct ReferenceNode { + ReferenceNode *next{}; + ReferenceRecord refs[kRefsCountPerBuffer]; + uint32_t refs_count{}; +}; + +struct Op { + OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic) + /// Size specifier, the suffix `b`, `w` or `l` + OpSize size_spec{OpSize::kNone}; + Condition condition{Condition::kT}; ///< For Scc, Bcc and Dbcc + Arg arg1{}; ///< First argument, optional + Arg arg2{}; ///< Second argument, optional, cannot be set if arg1 is not set + static constexpr auto Typical( + const OpCode opcode = OpCode::kNone, + const OpSize opsize = OpSize::kNone, + const Arg arg1 = Arg{}, + const Arg arg2 = Arg{}) + { + return Op{opcode, opsize, Condition::kT, arg1, arg2}; + } + static constexpr auto Raw(const uint16_t instr) + { + return Op::Typical(OpCode::kRaw, OpSize::kNone, Arg::Raw(instr)); + } + int FPrint( + FILE *, + const char *indent, + bool imm_as_hex, + RefKindMask ref_kinds = 0, + const char *ref1_label = nullptr, + const char *ref2_label = nullptr, + uint32_t self_addr = 0, + uint32_t ref1_addr = 0, + uint32_t ref2_addr = 0) const; +}; + +struct DisasmNode { + const NodeType type{}; + /// Address of the instruction (PC value basically) + const uint32_t address{}; + /// Instruction size in bytes + size_t size{kInstructionSizeStepBytes}; + /// Indicates whether `ref_addr` should be interpreted and how + RefKindMask ref_kinds{}; + /// Address of first argument reference + uint32_t ref1_addr{}; + /// Address of second argument reference + uint32_t ref2_addr{}; + ReferenceNode *ref_by{}; + ReferenceNode *last_ref_by{}; + Op op{}; + + /*! Disassembles instruction with arguments + * returns size of whole instruction with arguments in bytes + */ + size_t Disasm(const DataView &code); + size_t DisasmAsRaw(const DataView &code); + void AddReferencedBy(uint32_t address, ReferenceType); + ~DisasmNode(); +}; + +static constexpr inline bool IsInstruction(NodeType t) +{ + return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; +} + +static constexpr inline bool IsBRA(Op op) +{ + return op.opcode == OpCode::kBcc && op.condition == Condition::kT; +} diff --git a/src/elf_format.h b/src/elf_format.h new file mode 100644 index 0000000..b5a268a --- /dev/null +++ b/src/elf_format.h @@ -0,0 +1,328 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include <cstddef> +#include <cstdint> + +namespace ELF { + +constexpr size_t kIdentSize = 16; +constexpr size_t kHeaderSize = kIdentSize + 36; +constexpr size_t kMagicSize = 4; +constexpr size_t kProgramHeaderSize = 32; + +using Address = uint32_t; +using Offset = uint32_t; + +enum class FileClass : uint8_t { + kNone = 0, + k32 = 1, + k64 = 2, + kUnknown, +}; + +enum class DataEncoding : uint8_t { + kNone = 0, + k2LSB = 1, + kLE = k2LSB, + k2MSB = 2, + kBE = k2MSB, + kUnknown, +}; + +enum class Version : uint8_t { + kNone = 0, + kCurrent = 1, + kUnknown, +}; + +static constexpr inline auto ParseFileClass(const uint8_t file_class) +{ + switch (file_class) { + case static_cast<uint8_t>(FileClass::kNone): return FileClass::kNone; + case static_cast<uint8_t>(FileClass::k32): return FileClass::k32; + case static_cast<uint8_t>(FileClass::k64): return FileClass::k64; + } + return FileClass::kUnknown; +} + +static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding) +{ + switch (data_encoding) { + case static_cast<uint8_t>(DataEncoding::kNone): return DataEncoding::kNone; + case static_cast<uint8_t>(DataEncoding::k2LSB): return DataEncoding::k2LSB; + case static_cast<uint8_t>(DataEncoding::k2MSB): return DataEncoding::k2MSB; + } + return DataEncoding::kUnknown; +} + +static constexpr inline auto ParseVersion(const uint8_t version) +{ + switch (version) { + case static_cast<uint8_t>(Version::kNone): return Version::kNone; + case static_cast<uint8_t>(Version::kCurrent): return Version::kCurrent; + } + return Version::kUnknown; +} + +struct Ident32Raw { + uint8_t magic[4]; + uint8_t file_class; + uint8_t data_encoding; + uint8_t version; + uint8_t os_abi; + uint8_t abi_version; + uint8_t padding[7]; + static constexpr auto inline FromBytes(const uint8_t *data) + { + return Ident32Raw{ + { data[0], data[1], data[2], data[3] }, + data[4], + data[5], + data[6], + data[7], + data[8], + { data[9], data[10], data[11], data[12], data[13], data[14], data[15], }, + }; + } +}; + +struct Ident32 { + uint8_t magic[4]; + FileClass file_class; + DataEncoding data_encoding; + Version version; + uint8_t os_abi; + uint8_t abi_version; + static constexpr inline auto FromBytes(const uint8_t *data) + { + return Ident32{ + { data[0], data[1], data[2], data[3] }, + ParseFileClass(data[4]), + ParseDataEncoding(data[5]), + ParseVersion(data[6]), + data[7], + data[8], + }; + } + static constexpr inline auto FromIdent32Raw(const Ident32Raw raw) + { + return Ident32{ + { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] }, + ParseFileClass(raw.file_class), + ParseDataEncoding(raw.data_encoding), + ParseVersion(raw.version), + raw.os_abi, + raw.abi_version, + }; + } +}; + +enum class ObjectType : uint16_t { + kNone = 0, + kRel = 1, + kExec = 2, + kDyn = 3, + kCore = 4, + kUnknown = 0x7fff, + kLoProc = 0xff00, + kHiProc = 0xffff, +}; + +enum class Machine : uint16_t { + kNone = 0, + kM32 = 1, + kSPARC = 2, + k386 = 3, + k68k = 4, + k88k = 5, + k860 = 7, + kMIPS = 8, + kUnknown, +}; + +static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint16_t(d[0]) << 8 | d[1]; + } + return uint16_t(d[1]) << 8 | d[0]; +} + +static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3]; + } + return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0]; +} + +static constexpr inline auto ParseObjectType(const uint16_t type) +{ + switch (type) { + case static_cast<uint16_t>(ObjectType::kNone): return ObjectType::kNone; + case static_cast<uint16_t>(ObjectType::kRel): return ObjectType::kRel; + case static_cast<uint16_t>(ObjectType::kExec): return ObjectType::kExec; + case static_cast<uint16_t>(ObjectType::kDyn): return ObjectType::kDyn; + case static_cast<uint16_t>(ObjectType::kCore): return ObjectType::kCore; + case static_cast<uint16_t>(ObjectType::kLoProc): return ObjectType::kLoProc; + case static_cast<uint16_t>(ObjectType::kHiProc): return ObjectType::kHiProc; + } + return ObjectType::kUnknown; +} + +static constexpr inline auto ParseMachine(const uint16_t machine) +{ + switch (machine) { + case static_cast<uint16_t>(Machine::kNone): return Machine::kNone; + case static_cast<uint16_t>(Machine::kM32): return Machine::kM32; + case static_cast<uint16_t>(Machine::kSPARC): return Machine::kSPARC; + case static_cast<uint16_t>(Machine::k386): return Machine::k386; + case static_cast<uint16_t>(Machine::k68k): return Machine::k68k; + case static_cast<uint16_t>(Machine::k88k): return Machine::k88k; + case static_cast<uint16_t>(Machine::k860): return Machine::k860; + case static_cast<uint16_t>(Machine::kMIPS): return Machine::kMIPS; + } + return Machine::kUnknown; +} + +struct Header32Raw { + Ident32Raw ident; + uint16_t type; + uint16_t machine; + uint32_t version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto ident = Ident32Raw::FromBytes(data); + const DataEncoding e = ParseDataEncoding(ident.data_encoding); + return Header32Raw{ + /* .ident */ ident, + /* .type */ ParseU16(data + kIdentSize + 0, e), + /* .machine */ ParseU16(data + kIdentSize + 2, e), + /* .version */ ParseU32(data + kIdentSize + 4, e), + /* .entry */ ParseU32(data + kIdentSize + 8, e), + /* .phoff */ ParseU32(data + kIdentSize + 12, e), + /* .shoff */ ParseU32(data + kIdentSize + 16, e), + /* .flags */ ParseU32(data + kIdentSize + 20, e), + /* .ehsize */ ParseU16(data + kIdentSize + 24, e), + /* .phentsize */ ParseU16(data + kIdentSize + 26, e), + /* .phnum */ ParseU16(data + kIdentSize + 28, e), + /* .shentsize */ ParseU16(data + kIdentSize + 30, e), + /* .shnum */ ParseU16(data + kIdentSize + 32, e), + /* .shstrndx */ ParseU16(data + kIdentSize + 34, e), + }; + } +}; + +struct Header32 { + Ident32 ident; + ObjectType type; + Machine machine; + Version version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto raw = Header32Raw::FromBytes(data); + return Header32{ + Ident32::FromIdent32Raw(raw.ident), + ParseObjectType(raw.type), + ParseMachine(raw.machine), + ParseVersion(raw.version), + raw.entry, + raw.phoff, + raw.shoff, + raw.flags, + raw.ehsize, + raw.phentsize, + raw.phnum, + raw.shentsize, + raw.shnum, + raw.shstrndx, + }; + } +}; + +enum class PHType : uint32_t { + kNull = 0, + kLoad = 1, + kDynamic = 2, + kInterp = 3, + kNote = 4, + kSHLIB = 5, + kProgramHeaderTable = 6, + kLoProc = 0x70000000, + kHiProc = 0x7fffffff, + kUnknown, +}; + +static constexpr inline auto ParsePHType(const uint32_t type) +{ + switch (type) { + case static_cast<uint32_t>(PHType::kNull): return PHType::kNull; + case static_cast<uint32_t>(PHType::kLoad): return PHType::kLoad; + case static_cast<uint32_t>(PHType::kDynamic): return PHType::kDynamic; + case static_cast<uint32_t>(PHType::kInterp): return PHType::kInterp; + case static_cast<uint32_t>(PHType::kNote): return PHType::kNote; + case static_cast<uint32_t>(PHType::kSHLIB): return PHType::kSHLIB; + case static_cast<uint32_t>(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable; + case static_cast<uint32_t>(PHType::kLoProc): return PHType::kLoProc; + case static_cast<uint32_t>(PHType::kHiProc): return PHType::kHiProc; + } + return PHType::kUnknown; +} + +constexpr uint32_t kPHFlagX = 1 << 0; +constexpr uint32_t kPHFlagW = 1 << 1; +constexpr uint32_t kPHFlagR = 1 << 2; + +struct ProgramHeader32 { + uint32_t type; + Offset offset; + Address vaddr; + Address paddr; + uint32_t filesz; + uint32_t memsz; + uint32_t flags; + uint32_t align; + static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) + { + return ProgramHeader32{ + /* type */ ParseU32(data + 0, e), + /* offset */ ParseU32(data + 4, e), + /* vaddr */ ParseU32(data + 8, e), + /* paddr */ ParseU32(data + 12, e), + /* filesz */ ParseU32(data + 16, e), + /* memsz */ ParseU32(data + 20, e), + /* flags */ ParseU32(data + 24, e), + /* align */ ParseU32(data + 28, e), + }; + } +}; + +static constexpr inline bool MagicIsValid(const uint8_t *m) +{ + return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F'; +} + +}; diff --git a/src/elf_image.cpp b/src/elf_image.cpp new file mode 100644 index 0000000..6db72f3 --- /dev/null +++ b/src/elf_image.cpp @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" + +#include <cassert> +#include <cstdio> + +ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( + const DataView &d, const DataEncoding e) +{ + if (d.buffer == nullptr || d.size == 0) { + return ELF::ProgramHeader32Table{}; + } + assert(d.size % kProgramHeaderSize == 0); + const size_t size = d.size / kProgramHeaderSize; + auto *headers = new ProgramHeader32[size]; + assert(headers != nullptr); + for (size_t i = 0; i < size; i++) { + headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e); + } + return ELF::ProgramHeader32Table{ headers, size, }; +} + +static char *ValidateELF(const DataView& d) +{ + char *error; + size_t size; + FILE *s = open_memstream(&error, &size); + assert(s); + using namespace ELF; + if (d.size < kHeaderSize) { + fprintf( + s, + "data size (%zu) is lower than minimum ELF header size (%zu): " + "ELF header could not fit", + d.size, + kHeaderSize); + fclose(s); + return error; + } + const auto header_raw = Header32Raw::FromBytes(d.buffer); + const auto header = Header32::FromBytes(d.buffer); + if (!MagicIsValid(header.ident.magic)) { + const uint8_t *m = header.ident.magic; + fprintf( + s, + "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]", + 0x7f, 'E', 'L', 'F', + m[0], m[1], m[2], m[3]); + fclose(s); + return error; + } + if (header.ident.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.ident.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.ident.version, + static_cast<int>(Version::kCurrent)); + fclose(s); + return error; + } + if (header.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.version, + static_cast<int>(Version::kCurrent)); + fclose(s); + return error; + } + if (header.type != ObjectType::kExec) { + fprintf( + s, + "object type (0x%02x) is not supported, " + "only Exec (0x%02x) object type is supported", + header_raw.type, + static_cast<int>(ObjectType::kExec)); + fclose(s); + return error; + } + if (header.machine != Machine::k68k) { + fprintf( + s, + "machine (0x%02x) is not supported, " + "only Motorola 68k (0x%02x) machine is supported", + header_raw.machine, + static_cast<int>(Machine::k68k)); + fclose(s); + return error; + } + if (header.phentsize != kProgramHeaderSize) { + fprintf( + s, + "phentsize is invalid: expected (%zu), got (%zu)", + kProgramHeaderSize, + size_t(header.phentsize)); + fclose(s); + return error; + } + if (d.size < header.phoff + header.phentsize * header.phnum) { + fprintf( + s, + "data size (%zu) is lower than program header table end offset (%zu): " + "program header table could not fit", + d.size, + size_t(header.phoff + header.phentsize * header.phnum)); + fclose(s); + return error; + } + bool has_segment_with_entry = false; + for (size_t i = 0; i < header.phnum; i++) { + const auto ph = ProgramHeader32::FromBytes( + d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding); + if (d.size < ph.offset + ph.filesz) { + fprintf( + s, + "data size (%zu) is lower than pht[%zu] segment end offset (%zu): " + "segment could not fit", + d.size, + i, + size_t(ph.offset + ph.filesz)); + fclose(s); + return error; + } + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR); + if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) { + fprintf( + s, + "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: " + "non-zero base address is not supported", + i, + ph.vaddr); + fclose(s); + return error; + } + const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz; + if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) { + has_segment_with_entry = true; + } + } + if (!has_segment_with_entry) { + fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry); + fclose(s); + return error; + } + fclose(s); + free(error); + return nullptr; +} + +ELF::Image::Image(DataBuffer&& data) + : _data(static_cast<DataBuffer&&>(data)) + , _error(ValidateELF(_data.View())) + , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer)) + , _pht(_error + ? ELF::ProgramHeader32Table{} + : ELF::ProgramHeader32Table::FromBytes( + _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding)) +{} + +ELF::Image::~Image() +{ + if (_error) { + free(_error); + } + if (_pht.headers) { + delete [] _pht.headers; + } +} diff --git a/src/elf_image.h b/src/elf_image.h new file mode 100644 index 0000000..b7c7123 --- /dev/null +++ b/src/elf_image.h @@ -0,0 +1,55 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_format.h" +#include "data_buffer.h" + +#include <cstdlib> + +namespace ELF { + +struct ProgramHeader32Table { + const ProgramHeader32 *headers{}; + size_t size{}; + static ProgramHeader32Table FromBytes(const DataView &, DataEncoding); +}; + +struct Segment { + Segment *next{}; + const DataView view{}; +}; + +class Image { + const DataBuffer _data; + char *const _error; + const Header32 _h; + const ProgramHeader32Table _pht; +public: + explicit Image(DataBuffer&&); + ~Image(); + constexpr bool IsValid() const { return _error == nullptr; } + constexpr const DataBuffer &Data() const { return _data; }; + constexpr const DataView ProgramView() const + { + if (!IsValid()) { + return DataView{}; + } + for (size_t i = 0; i < _pht.size; i++) { + const auto ph = _pht.headers[i]; + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == + (kPHFlagX | kPHFlagR); + const bool is_load = ParsePHType(ph.type) == PHType::kLoad; + const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz; + if (is_load && is_code && ph.vaddr == 0 && contains_entry) + { + return _data.View(ph.offset, ph.filesz); + } + } + return DataView{}; + }; + constexpr const char *Error() const { return _error; } +}; + +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..a6f73b3 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" +#include "data_buffer.h" +#include "disasm.h" +#include "common.h" + +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "optparse/optparse.h" + +#include <cassert> +#include <cinttypes> +#include <cstdio> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <cerrno> +#include <climits> + +enum class DisasmMapType { + kTraced, + kRaw, +}; + +class DisasmMap { + const DisasmMapType _type; + DisasmNode *_map[kDisasmMapSizeElements]{}; + constexpr DisasmNode *findNodeByAddress(uint32_t address) const; + DisasmNode &insertNode(uint32_t address, NodeType); + DisasmNode &insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type); + constexpr bool canBeAllocated(const DisasmNode& node) const; +public: + constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const + { + return findNodeByAddress(address); + }; + void InsertNode(uint32_t address, NodeType type) + { + assert(_type == DisasmMapType::kTraced); + insertNode(address, type); + } + void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); + DisasmMap(DisasmMapType type): _type(type) {} + ~DisasmMap(); +}; + +constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const +{ + if (address < kRomSizeBytes) + return _map[address / kInstructionSizeStepBytes]; + return nullptr; +} + +static constexpr uint32_t AlignInstructionAddress(const uint32_t address) +{ + return address & ~1UL; +} + +DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type) +{ + auto *node = findNodeByAddress(address); + if (node) { + // Instruction nodes take precedence over data nodes. If a node that + // was previously accessed only as data now turns out to be an + // instruction, then it must become an instruction node. + if (IsInstruction(type) && !IsInstruction(node->type)) { + *const_cast<NodeType*>(&node->type) = type; + // Make sure it is OpCode::kNone so it will be properly disassembled + node->op = Op{}; + } + return *node; + } + node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); + assert(node); + _map[address / kInstructionSizeStepBytes] = node; + return *node; +} + +DisasmNode &DisasmMap::insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type) +{ + auto &ref_node = insertNode(ref_addr, type); + ref_node.AddReferencedBy(by_addr, ref_type); + return ref_node; +} + +constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const +{ + const auto size = node.size / kInstructionSizeStepBytes; + const auto *const node_real = findNodeByAddress(node.address); + for (size_t i = 1; i < size; i++) { + const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; + if (ptr != nullptr && ptr != node_real) { + return false; + } + } + return true; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef1ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef1WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef2ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef2WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr bool IsNextLikelyAnInstruction(const Op &op) +{ + return (op.opcode != OpCode::kNone && + op.opcode != OpCode::kRaw && + !IsBRA(op) && + op.opcode != OpCode::kJMP && + op.opcode != OpCode::kRTS && + op.opcode != OpCode::kRTE && + op.opcode != OpCode::kSTOP); +} + +void DisasmMap::Disasm( + const DataView &code, const Settings &s, size_t at, bool nested) +{ + // Some of logic of this function is covered by integration tests in + // `test_walk_and_follow_jumps.bash`. + bool inside_code_span = nested; + while (at < Min(kRomSizeBytes, code.size)) { + DisasmNode *node; + if (_type == DisasmMapType::kTraced) { + node = _map[at / kInstructionSizeStepBytes]; + if (!node) { + if (inside_code_span) { + node = &insertNode(at, NodeType::kTracedInstruction); + } else { + at += kInstructionSizeStepBytes; + continue; + } + } + } else { + node = &insertNode(at, NodeType::kTracedInstruction); + } + if (node->op.opcode == OpCode::kNone || inside_code_span) { + const auto size = node->Disasm(code); + assert(size >= kInstructionSizeStepBytes); + if (canBeAllocated(*node)) { + // Spread across the size + for (size_t o = kInstructionSizeStepBytes; o < size; o++) { + _map[(node->address + o) / kInstructionSizeStepBytes] = node; + } + } else { + node->DisasmAsRaw(code); + } + } + inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); + if (nested && !inside_code_span) { + return; + } + at += node->size; + // NOTE: There is not much information about a reference passed further, + // so just don't add a reference of immediate if s.imm_labels is false + // enabled. + const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) + ? s.imm_labels + : (node->ref_kinds & kRef1Mask); + const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; + if (has_code_ref1) { + const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref1_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + const bool has_ref2 = (node->ref_kinds & kRef2Mask); + const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); + if (has_code_ref2) { + const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref2_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + } +} + +DisasmMap::~DisasmMap() +{ + for (size_t i = 0; i < kDisasmMapSizeElements; i++) { + auto *const node = _map[i]; + if (!node) { + continue; + } + const auto size = node->size / kInstructionSizeStepBytes; + for (size_t o = 0; o < size; o++) { + assert(_map[i + o] == node); + _map[i + o] = nullptr; + } + delete node; + i += size - 1; + } +} + +static size_t RenderRawDataComment( + char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) +{ + size_t overall_sz{}; + for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes) + { + overall_sz += Min( + out_sz - overall_sz, + snprintf( + out + overall_sz, + out_sz - overall_sz, + " %04x", + GetU16BE(code.buffer + address + i))); + } + overall_sz += Min( + out_sz - overall_sz, + snprintf(out + overall_sz, out_sz - overall_sz, " @%08x", address)); + return overall_sz; +} + +static constexpr const char *ReferenceTypeToString(ReferenceType type) +{ + switch (type) { + case ReferenceType::kUnknown: return "UNKNOWN"; + case ReferenceType::kCall: return "CALL"; + case ReferenceType::kBranch: return "BRANCH"; + case ReferenceType::kRead: return "READ"; + case ReferenceType::kWrite: return "WRITE"; + } + return "UNKN"; +} + +static constexpr bool ShouldPrintAsRaw(const Op& op) +{ + if (op.arg1.type == ArgType::kImmediate) { + if (op.opcode == OpCode::kADD || op.opcode == OpCode::kSUB || + op.opcode == OpCode::kAND || op.opcode == OpCode::kOR || + op.opcode == OpCode::kEOR || op.opcode == OpCode::kCMP) + { + return true; + } + } + return false; +} + +static constexpr bool HasCallReference(const DisasmNode &node) +{ + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + for (size_t i = 0; i < ref->refs_count; i++) { + if (ref->refs[i].type == ReferenceType::kCall) { + return true; + } + } + } + return false; +} + +static constexpr size_t GetNodeSizeByAddress(const DisasmMap &disasm_map, const uint32_t address) +{ + const auto *node = disasm_map.FindNodeByAddress(address); + if (node == nullptr) { + return kInstructionSizeStepBytes; + } + return node->size; +} + +static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmNode &node) +{ + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + for (size_t i = 0; i < ref->refs_count; i++) { + const ReferenceRecord &ref_rec = ref->refs[i]; + if (ref_rec.type == ReferenceType::kCall) { + // Locals are definitely not made for calls + return false; + } + const bool forward = ref_rec.address < node.address; + const size_t min_addr = forward ? ref_rec.address : node.address; + const size_t start = min_addr + GetNodeSizeByAddress(disasm_map, min_addr); + const size_t max_addr = forward ? node.address : ref_rec.address; + const size_t end = max_addr + (forward ? 0 : GetNodeSizeByAddress(disasm_map, min_addr)); + for (size_t addr = start; addr < end;) { + const auto *intermediate_node = disasm_map.FindNodeByAddress(addr); + if (intermediate_node) { + if (intermediate_node->ref_by) { + // Another labeled node detected on the jump path, hence + // current node's location cannot be considered local + return false; + } + addr += intermediate_node->size; + } else { + addr += kInstructionSizeStepBytes; + } + } + } + } + return true; +} + +static constexpr const char *StringWihoutFristNChars(const char *str, const size_t n) +{ + for (size_t i = 0, tab = 0; i < n && *str; i++, str++) { + if (*str == '\t') { + tab++; + if (tab == 7) { + tab = 0; + str++; + } + } else { + str++; + } + } + return str; +} + +static void RenderNodeDisassembly( + FILE *const output, + const DisasmMap &disasm_map, + const DataView &code, + const Settings &s, + const DisasmNode &node) +{ + if (node.ref_by) { + const bool is_local = IsLocalLocation(disasm_map, node); + if (s.labels && !(s.short_ref_local_labels && is_local)) { + const bool export_this_function = s.export_functions && HasCallReference(node); + const bool export_this_label = s.export_all_labels || + (s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) || + export_this_function; + if (export_this_label) { + fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address); + if (export_this_function) { + fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address); + } + } + } + if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) { + fprintf(output, "| XREFS:\n"); + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + if (ref->refs_count == 0) { + continue; + } + fprintf(output, "|"); + for (size_t i = 0; i < ref->refs_count; i++) { + const ReferenceRecord r = ref->refs[i]; + fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address); + } + fprintf(output, "\n"); + } + } + if (s.labels) { + if (s.short_ref_local_labels && is_local) { + fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1)); + } else { + fprintf(output, "L%08x:\n", node.address); + } + } + } + assert(node.op.opcode != OpCode::kNone); + if (ShouldPrintAsRaw(node.op)) { + auto raw = Op::Raw(GetU16BE(code.buffer + node.address)); + raw.FPrint(output, s.indent, s.imm_hex); + uint32_t i = kInstructionSizeStepBytes; + for (; i < node.size; i += kInstructionSizeStepBytes) { + char arg_str[kArgsBufferSize]{}; + const auto arg = Arg::Raw(GetU16BE(code.buffer + node.address + i)); + arg.SNPrint(arg_str, kArgsBufferSize); + fprintf(output, ", %s", arg_str); + } + } else { + const bool with_ref = node.ref_kinds && s.labels && (s.abs_labels || s.rel_labels); + const auto *ref1 = (node.ref_kinds & kRef1Mask) + ? disasm_map.FindNodeByAddress(node.ref1_addr) : nullptr; + const auto *ref2 = (node.ref_kinds & kRef2Mask) + ? disasm_map.FindNodeByAddress(node.ref2_addr) : nullptr; + const uint32_t ref1_addr = (with_ref && ref1) ? ref1->address : 0; + const uint32_t ref2_addr = (with_ref && ref2) ? ref2->address : 0; + if (with_ref && (ref1 || ref2)) { + const RefKindMask ref_kinds = + (s.abs_labels + ? ((ref1 ? (node.ref_kinds & kRef1AbsMask) : 0) | + (ref2 ? (node.ref_kinds & kRef2AbsMask) : 0)) + : 0) | + (s.rel_labels + ? ((ref1 ? (node.ref_kinds & kRef1RelMask) : 0) | + (ref2 ? (node.ref_kinds & kRef2RelMask) : 0)) + : 0) | + ((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) | + (node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes)); + const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1); + char ref1_label[32]{}; + if (ref1) { + if (s.short_ref_local_labels && ref1_is_local) { + const char dir = ref1_addr <= node.address ? 'b' : 'f'; + snprintf(ref1_label, (sizeof ref1_label), "1%c", dir); + } else { + snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr); + } + } + const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2); + char ref2_label[32]{}; + if (ref2) { + if (s.short_ref_local_labels && ref2_is_local) { + const char dir = ref2_addr <= node.address ? 'b' : 'f'; + snprintf(ref2_label, (sizeof ref2_label), "1%c", dir); + } else { + snprintf(ref2_label, (sizeof ref2_label), "L%08x", ref2_addr); + } + } + node.op.FPrint( + output, + s.indent, + s.imm_hex, + ref_kinds, + ref1_label, + ref2_label, + node.address, + ref1_addr, + ref2_addr); + const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true); + if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok) + { + fprintf(output, " | L%08x", ref1_addr); + } + if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) { + fprintf(output, " | L%08x", ref2_addr); + } + } else { + node.op.FPrint(output, s.indent, s.imm_hex); + } + } + if (s.raw_data_comment) { + char raw_data_comment[100]{}; + RenderRawDataComment( + raw_data_comment, + (sizeof raw_data_comment) - 1, + node.address, + node.size, code); + fprintf(output, " |%s", raw_data_comment); + } + fprintf(output, "\n"); +} + +static void RenderDisassembly( + FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) +{ + for (size_t i = 0; i < code.size;) { + const DisasmNode *node = disasm_map.FindNodeByAddress(i); + if (node) { + RenderNodeDisassembly(output, disasm_map, code, s, *node); + i += node->size; + } else { + auto raw = Op::Raw(GetU16BE(code.buffer + i)); + raw.FPrint(output, s.indent, s.imm_hex); + fprintf(output, "\n"); + i += kInstructionSizeStepBytes; + } + } +} + +static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) +{ + // FIXME make a full blown parser with various radixes support and different + // trace types support + bool parse = true; + for (size_t i = 0; i < trace_data.size; i++) { + if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') { + parse = true; + } else if (parse) { + errno = 0; + const char *startptr = reinterpret_cast<const char *>(trace_data.buffer + i); + char *endptr = nullptr; + const long address = strtol(startptr, &endptr, 10); + if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) { + // Parsing error, just skip + } else if (startptr == endptr) { + // Parsing error, just skip + } else if (address % 2) { + fprintf(stderr, "Error: Uneven PC values are not supported (got PC=0x%08lx), exiting\n", address); + exit(1); + } else if (static_cast<unsigned long>(address) > kRomSizeBytes) { + fprintf(stderr, "Error: PC values > 4MiB are not supported (got PC=0x%08lx), exiting\n", address); + exit(1); + } else { + // Valid value + disasm_map.InsertNode(address, NodeType::kTracedInstruction); + } + if (startptr != endptr) { + i += endptr - startptr - 1; + } + parse = false; + } + } +} + +static size_t ReadFromStream(DataBuffer &db, FILE *stream) +{ + assert(db.buffer && db.buffer_size >= db.kInitialSize); + while (1) { + const size_t read_size = db.buffer_size - db.occupied_size; + const size_t fread_ret = fread( + db.buffer + db.occupied_size, sizeof(*db.buffer), read_size, stream); + db.occupied_size += fread_ret; + if (fread_ret >= db.buffer_size) { + assert(fread_ret == db.buffer_size); + db.Expand(db.buffer_size * 2); + } else { + const int err = errno; + if (feof(stream)) { + break; + } else if (ferror(stream)) { + fprintf(stderr, "ReadFromStream: fread(%zu): Error (%d): \"%s\"\n", read_size, err, strerror(err)); + return EXIT_FAILURE; + } else if (db.buffer_size == db.occupied_size) { + db.Expand(db.buffer_size * 2); + } else { + assert(false); + } + } + } + return db.occupied_size; +} + +static DisasmMap *NewDisasmMap(FILE *trace_stream) +{ + if (trace_stream == nullptr) { + DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw}; + assert(disasm_map); + return disasm_map; + } + // Read trace file into buffer + DataBuffer trace_data{}; + const size_t trace_size = ReadFromStream(trace_data, trace_stream); + if (trace_size == 0) { + fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n"); + return nullptr; + } + // Parse trace file into map + DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced}; + assert(disasm_map); + ParseTraceData(*disasm_map, trace_data.View()); + return disasm_map; +} + +static int M68kDisasm( + FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s) +{ + // Read input file into buffer + DataBuffer input{}; + const size_t input_size = ReadFromStream(input, input_stream); + if (input_size == 0) { + fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n"); + return EXIT_FAILURE; + } + const ELF::Image elf(static_cast<DataBuffer&&>(input)); + if (s.bfd == BFDTarget::kELF && !elf.IsValid()) { + fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error()); + return EXIT_FAILURE; + } + const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid()); + const DataView code(from_elf ? elf.ProgramView() : elf.Data().View()); + assert(code.buffer != nullptr); + assert(code.size != 0); + // It is not worth it to check this somewhere while disassembling or + // emitting. Odd size is just not supported. + if (code.size % 2) { + fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n"); + return EXIT_FAILURE; + } + auto *disasm_map = NewDisasmMap(trace_stream); + if (disasm_map == nullptr) { + return EXIT_FAILURE; + } + // Disasm into output map + disasm_map->Disasm(code, s); + // Print output into output_stream + RenderDisassembly(output_stream, *disasm_map, code, s); + delete disasm_map; + return EXIT_SUCCESS; +} + +static bool FeatureStringHasPrefixNo(const char *feature) +{ + assert(feature); + // There is also implicit, embedded and free check for null terminator + if (feature[0] == 'n' && feature[1] == 'o' && feature[2] == '-') { + return true; + } + return false; +} + +static bool ApplyFeature(Settings& s, const char *feature_arg) +{ + struct { + bool Settings::* setting; + const char* feature_name; + } const features[]{ + { &Settings::raw_data_comment, "rdc" }, + { &Settings::labels, "labels" }, + { &Settings::rel_labels, "rel-labels" }, + { &Settings::abs_labels, "abs-labels" }, + { &Settings::imm_labels, "imm-labels" }, + { &Settings::short_ref_local_labels, "short-ref-local-labels" }, + { &Settings::export_labels, "export-labels" }, + { &Settings::export_all_labels, "export-all-labels" }, + { &Settings::export_functions, "export-functions" }, + { &Settings::xrefs_from, "xrefs-from" }, + { &Settings::xrefs_to, "xrefs-to" }, + { &Settings::imm_hex, "imm-hex" }, + { &Settings::follow_jumps, "follow-jumps" }, + { &Settings::walk, "walk" }, + }; + constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1; + const bool disable = FeatureStringHasPrefixNo(feature_arg); + const char *const feature = feature_arg + (disable ? sizeof_no_prefix : 0); + for (size_t i = 0; i < (sizeof features) / (sizeof *features); i++) { + if (0 == strcmp(feature, features[i].feature_name)) { + s.*(features[i].setting) = !disable; + return true; + } + } + return false; +} + +static void PrintUsage(FILE *s, const char *argv0) +{ + // Please, keep all lines in 80 columns range when printed. + fprintf(s, + "Usage: %s [options] <input-file-name>\n" + "Options:\n" + " -h, --help, Show this message.\n" + " -o, --output, Where to write disassembly to (stdout if not set)\n" + " -t, --pc-trace, File containing PC trace\n" + " --indent, Specify instruction indentation, e.g. \"\t\",\n" + " Single tab is used by default.\n" + " -f, --feature=[no-]<feature>\n" + " Enable or disable (with \"no-\" prefix) a feature.\n" + " Available features described below under the\n" + " \"Feature flags\" section.\n" + " -b, --bfd-target=bfdname\n" + " Specify target object format as `bfdname`. Will attempt\n" + " to detect automatically if not set. Only `auto,\n" + " `binary` and `elf` are currently supported.\n" + " <input_file_name> Binary or elf file with the machine code to disassemble\n" + "Feature flags:\n" + " rdc Print raw data comment.\n" + " labels Print labels above all places that have jumps from\n" + " somewhere.\n" + " rel-labels Use label instead of number on relative branch or call.\n" + " abs-labels Use label instead of number on absolute branch or call.\n" + " imm-labels Use label instead of number when immediate value moved\n" + " to address register.\n" + " short-ref-local-labels\n" + " Use local labels (numbers) for short jumps or loops.\n" + " Jump is considered short when it does not cross other\n" + " labels and has no calls.\n" + " export-labels Add `.globl` preamble to labels referenced two or more\n" + " times.\n" + " export-all-labels Add `.globl` preamble to all labels.\n" + " export-functions Add `.globl` and `.type @funciton` preamble to a label\n" + " referenced as a call.\n" + " xrefs-from Print xrefs comments above all places that have xrefs.\n" + " xrefs-to Print xrefs comments after all branch instructions.\n" + " imm-hex Print all immediate values as hexadecimal numbers.\n" + " follow-jumps Follow jumps to statically known locations.\n" + " walk Try best to detect further instructions following known\n" + " traced locations without overcommitting.\n" + , argv0); +} + +int main(int, char* argv[]) +{ + struct optparse_long longopts[] = { + {"help", 'h', OPTPARSE_NONE}, + {"output", 'o', OPTPARSE_REQUIRED}, + {"pc-trace", 't', OPTPARSE_REQUIRED}, + {"feature", 'f', OPTPARSE_REQUIRED}, + {"bfd-target", 'b', OPTPARSE_REQUIRED}, + {"indent", 80, OPTPARSE_REQUIRED}, + {}, + }; + const char *trace_file_name = nullptr; + const char *output_file_name = nullptr; + const char *input_file_name = nullptr; + Settings s{}; + struct optparse options; + optparse_init(&options, argv); + // Parse opts + int option; + while ((option = optparse_long(&options, longopts, NULL)) != -1) { + switch (option) { + case 'h': + PrintUsage(stdout, argv[0]); + return EXIT_SUCCESS; + break; + case 'o': + output_file_name = options.optarg; + break; + case 't': + trace_file_name = options.optarg; + break; + case 'f': + if (!ApplyFeature(s, options.optarg)) { + fprintf(stderr, "main: Error: Unknown feature \"%s\", exiting\n", options.optarg); + return EXIT_FAILURE; + } + break; + case 'b': + { + const auto *bfd_str = options.optarg; + if (0 == strcmp(bfd_str, "auto")) { + s.bfd = BFDTarget::kAuto; + } else if (0 == strcmp(bfd_str, "binary")) { + s.bfd = BFDTarget::kBinary; + } else if (0 == strcmp(bfd_str, "elf")) { + s.bfd = BFDTarget::kELF; + } else { + fprintf( + stderr, + "Unknown BFD target specified: \"%s\". " + "Refer to usage below to find correct BFD values.\n", + bfd_str); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; + } + } + break; + case 80: + s.indent = options.optarg; + break; + case '?': + fprintf(stderr, "main: optparse_long: Error: \"%s\"\n", options.errmsg); + return EXIT_FAILURE; + } + } + // Parse input file name + char *arg; + while ((arg = optparse_arg(&options))) { + if (input_file_name == nullptr) { + input_file_name = arg; + } else { + fprintf(stderr, "error: too many free arguments provided\n"); + return EXIT_FAILURE; + } + } + // Open the files + FILE *input_stream = nullptr; + FILE *output_stream = stdout; + FILE *trace_stream = nullptr; + if (input_file_name) { + if (0 == strcmp(input_file_name, "-")) { + input_stream = stdin; + } else { + input_stream = fopen(input_file_name, "r"); + } + if (input_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err)); + return EXIT_FAILURE; + } + } else { + fprintf(stderr, "main: Error: no input file name specified, see usage below.\n"); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; + } + if (output_file_name) { + output_stream = fopen(output_file_name, "w"); + if (output_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err)); + fclose(input_stream); + return EXIT_FAILURE; + } + } + if (trace_file_name) { + if (0 == strcmp(trace_file_name, "-")) { + if (input_stream == stdin) { + fprintf(stderr, "error: trace stream and input stream cannot be both stdin\n"); + return EXIT_FAILURE; + } + trace_stream = stdin; + } else { + trace_stream = fopen(trace_file_name, "r"); + } + if (trace_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err)); + fclose(input_stream); + fclose(output_stream); + return EXIT_FAILURE; + } + } + // Run the program + const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s); + if (trace_stream != nullptr) { + fclose(trace_stream); + } + fclose(output_stream); + fclose(input_stream); + return ret; +} |