From 21a9aa92a7cf8767a0fcb33858546dea744c4071 Mon Sep 17 00:00:00 2001 From: Oxore Date: Mon, 5 Feb 2024 01:20:51 +0300 Subject: Organize source code and tests --- CMakeLists.txt | 8 +- Makefile | 5 +- common.h | 85 -- data_buffer.cpp | 29 - data_buffer.h | 41 - disasm.cpp | 2010 --------------------------------- disasm.h | 401 ------- elf_format.h | 328 ------ elf_image.cpp | 172 --- elf_image.h | 55 - main.cpp | 836 -------------- src/common.h | 85 ++ src/data_buffer.cpp | 29 + src/data_buffer.h | 41 + src/disasm.cpp | 2010 +++++++++++++++++++++++++++++++++ src/disasm.h | 401 +++++++ src/elf_format.h | 328 ++++++ src/elf_image.cpp | 172 +++ src/elf_image.h | 55 + src/main.cpp | 836 ++++++++++++++ test.bash | 709 ------------ test.ld | 19 - test_labels_referencing.bash | 110 -- test_random.bash | 63 -- test_walk_and_follow_jumps.bash | 200 ---- tests/test.bash | 709 ++++++++++++ tests/test.ld | 19 + tests/test_labels_referencing.bash | 110 ++ tests/test_random.bash | 63 ++ tests/test_walk_and_follow_jumps.bash | 200 ++++ 30 files changed, 5064 insertions(+), 5065 deletions(-) delete mode 100644 common.h delete mode 100644 data_buffer.cpp delete mode 100644 data_buffer.h delete mode 100644 disasm.cpp delete mode 100644 disasm.h delete mode 100644 elf_format.h delete mode 100644 elf_image.cpp delete mode 100644 elf_image.h delete mode 100644 main.cpp create mode 100644 src/common.h create mode 100644 src/data_buffer.cpp create mode 100644 src/data_buffer.h create mode 100644 src/disasm.cpp create mode 100644 src/disasm.h create mode 100644 src/elf_format.h create mode 100644 src/elf_image.cpp create mode 100644 src/elf_image.h create mode 100644 src/main.cpp delete mode 100644 test.bash delete mode 100644 test.ld delete mode 100644 test_labels_referencing.bash delete mode 100644 test_random.bash delete mode 100644 test_walk_and_follow_jumps.bash create mode 100644 tests/test.bash create mode 100644 tests/test.ld create mode 100644 tests/test_labels_referencing.bash create mode 100644 tests/test_random.bash create mode 100644 tests/test_walk_and_follow_jumps.bash diff --git a/CMakeLists.txt b/CMakeLists.txt index a7dd8b7..13c67a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,10 +23,10 @@ set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} ${common_flags}") set(CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} ${common_debug_flags}") set(m68kdisasm_sources - main.cpp - data_buffer.cpp - disasm.cpp - elf_image.cpp + src/main.cpp + src/data_buffer.cpp + src/disasm.cpp + src/elf_image.cpp ) add_executable(m68k-disasm ${m68kdisasm_sources}) diff --git a/Makefile b/Makefile index 2b6ee2f..245bf54 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,11 @@ m68k-disasm: $(OBJECTS) $(LDSCRIPTS) Makefile $(OBJECTS): Makefile -%.o: %.c Makefile +%.o: src/%.c Makefile sh -c "time $(CC) $(_CFLAGS) -c -o $@ $<" -%.o: %.cpp Makefile +%.o: src/%.cpp Makefile sh -c "time $(CXX) $(_CXXFLAGS) -c -o $@ $<" clean: rm -rfv m68k-disasm $(OBJECTS) - diff --git a/common.h b/common.h deleted file mode 100644 index 76fc956..0000000 --- a/common.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -/* SPDX-License-Identifier: Unlicense - */ - -#include -#include - -enum class BFDTarget { - kAuto, - kBinary, - kELF, -}; - -struct Settings { - bool raw_data_comment{}; - bool labels{}; - bool rel_labels{}; - bool abs_labels{}; - bool imm_labels{}; - bool short_ref_local_labels{}; - bool export_labels{}; - bool export_all_labels{}; - bool export_functions{}; - bool xrefs_to{}; - bool xrefs_from{}; - bool imm_hex{}; - bool follow_jumps{}; - bool walk{}; - BFDTarget bfd{}; - const char *indent{"\t"}; -}; - -using RefKindMask = unsigned; - -constexpr RefKindMask kRef1RelMask = (1 << 0); // For first argument -constexpr RefKindMask kRef1AbsMask = (1 << 1); // For first argument -constexpr RefKindMask kRef2RelMask = (1 << 2); // For second argument -constexpr RefKindMask kRef2AbsMask = (1 << 3); // For second argument -constexpr RefKindMask kRef1ReadMask = (1 << 4); // For first argument -constexpr RefKindMask kRef1WriteMask = (1 << 5); // For first argument -constexpr RefKindMask kRef2ReadMask = (1 << 6); // For second argument -constexpr RefKindMask kRef2WriteMask = (1 << 7); // For second argument -/// Indicates whether instruction is a call or just a branch, for any argument. -/// Calls are BSR and JSR, branches are DBcc, Bcc and JMP. -constexpr RefKindMask kRefCallMask = (1 << 8); -/// Hack flag for MOVEM with PC relative value when -frel-labels is set -constexpr RefKindMask kRefPcRelFix2Bytes = (1 << 9); -/// Register 1 may have immediate moving to address register which may be a -/// labeled location -constexpr RefKindMask kRef1ImmMask = (1 << 10); -/// Everything for first argument -constexpr RefKindMask kRef1Mask = kRef1RelMask | kRef1AbsMask | kRef1ReadMask | kRef1WriteMask | kRef1ImmMask; -/// Everything for Second argument -constexpr RefKindMask kRef2Mask = kRef2RelMask | kRef2AbsMask | kRef2ReadMask | kRef2WriteMask; -constexpr RefKindMask kRefRelMask = kRef1RelMask | kRef2RelMask; -constexpr RefKindMask kRefAbsMask = kRef1AbsMask | kRef2AbsMask; -constexpr RefKindMask kRef1DataMask = kRef1ReadMask | kRef1WriteMask; // For first argument -constexpr RefKindMask kRef2DataMask = kRef2ReadMask | kRef2WriteMask; // For second argument -constexpr RefKindMask kRefReadMask = kRef1ReadMask | kRef2ReadMask; // For any argument -constexpr RefKindMask kRefWriteMask = kRef1WriteMask | kRef2WriteMask; // For any argument -constexpr RefKindMask kRefDataMask = kRefReadMask | kRefWriteMask; -constexpr size_t kInstructionSizeStepBytes = 2; -constexpr size_t kRomSizeBytes = 4 * 1024 * 1024; -constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBytes; - -static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; } - -static inline constexpr uint16_t GetU16BE(const uint8_t *buffer) -{ - return (static_cast(buffer[0]) << 8) | static_cast(buffer[1]); -} - -static inline constexpr int16_t GetI16BE(const uint8_t *buffer) -{ - return (static_cast(buffer[0]) << 8) | static_cast(buffer[1]); -} - -static inline constexpr int32_t GetI32BE(const uint8_t *buffer) -{ - return (static_cast(buffer[0]) << 24) | - (static_cast(buffer[1]) << 16) | - (static_cast(buffer[2]) << 8) | - static_cast(buffer[3]); -} diff --git a/data_buffer.cpp b/data_buffer.cpp deleted file mode 100644 index 33cb0b3..0000000 --- a/data_buffer.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: Unlicense - */ - -#include "data_buffer.h" - -#include -#include - -void DataBuffer::Expand(size_t new_size) -{ - assert(buffer); - if (new_size <= buffer_size) { - return; - } - uint8_t *new_buffer{new uint8_t[new_size]}; - assert(new_buffer); - memcpy(new_buffer, buffer, occupied_size); - delete [] buffer; - buffer = new_buffer; - buffer_size = new_size; -} - -DataBuffer::~DataBuffer() -{ - delete [] buffer; - buffer = nullptr; - buffer_size = 0; - occupied_size = 0; -} diff --git a/data_buffer.h b/data_buffer.h deleted file mode 100644 index bc264d2..0000000 --- a/data_buffer.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -/* SPDX-License-Identifier: Unlicense - */ - -#include "common.h" - -#include -#include - -struct DataView { - const uint8_t *const buffer{}; - const size_t size{}; -}; - -struct DataBuffer { - DataBuffer(){}; - DataBuffer(const DataBuffer&) = delete; - constexpr DataBuffer(DataBuffer&& other) - : buffer(other.buffer) - , buffer_size(other.buffer_size) - , occupied_size(other.occupied_size) - { - other.occupied_size = 0; - other.buffer_size = 0; - other.buffer = nullptr; - }; - static constexpr size_t kInitialSize = 4 * 1024; - uint8_t *buffer{new uint8_t[kInitialSize]}; - size_t buffer_size{kInitialSize}; - size_t occupied_size{}; - void Expand(size_t new_size); - constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const - { - if (offset >= occupied_size) { - return DataView{}; - } - return DataView{buffer + offset, Min(occupied_size - offset, size)}; - }; - ~DataBuffer(); -}; diff --git a/disasm.cpp b/disasm.cpp deleted file mode 100644 index 2b2ea81..0000000 --- a/disasm.cpp +++ /dev/null @@ -1,2010 +0,0 @@ -/* SPDX-License-Identifier: Unlicense - */ - -#include "disasm.h" -#include "data_buffer.h" -#include "common.h" - -#include -#include -#include -#include - -enum class MoveDirection: bool { - kRegisterToMemory = 0, - kMemoryToRegister = 1, -}; - -enum class ShiftDirection: bool { - kRight = 0, - kLeft = 1, -}; - -enum class ShiftKind: int { - kArithmeticShift = 0, - kLogicalShift = 1, - kRotateX = 2, - kRotate = 3, -}; - -constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s) -{ - if (s == OpSize::kInvalid) { - return Arg{}; - } else if (s == OpSize::kLong) { - if (address + kInstructionSizeStepBytes < code.size) { - const int32_t value = GetI32BE(code.buffer + address); - return Arg::Immediate(value); - } - } else if (address < code.size) { - const int16_t value = GetI16BE(code.buffer + address); - if (s == OpSize::kByte) { - // Technically it is impossible to have value lower that -128 in 8 - // bits signed integer, but the second byte being 0xff is actually - // a valid thing and it is how values from -255 to -129 are - // represented. - if (value > 255 || value < -255) { - // Invalid immediate value for instruction with .b suffix - return Arg{}; - } - } - return Arg::Immediate(value); - } - return Arg{}; -} - -constexpr Arg FetchArg( - const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s) -{ - switch (m) { - case 0: // Dn - return Arg::Dn(xn); - case 1: // An - return Arg::An(xn); - case 2: // (An) - return Arg::AnAddr(xn); - case 3: // (An)+ - return Arg::AnAddrIncr(xn); - case 4: // -(An) - return Arg::AnAddrDecr(xn); - case 5: // (d16, An), Additional Word - if (address < code.size) { - const int16_t d16 = GetI16BE(code.buffer + address); - return Arg::D16AnAddr(xn, d16); - } - break; - case 6: // (d8, An, Xi), Brief Extension Word - if (address < code.size) { - const uint16_t briefext = GetU16BE(code.buffer + address); - if (briefext & 0x0700) { - // briefext must have zeros on 8, 9 an 10-th bits, - // i.e. xxxx_x000_xxxx_xxxx - break; - } - // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) - const uint8_t xi = (briefext >> 12) & 0xf; - const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; - const int8_t d8 = briefext & 0xff; - return Arg::D8AnXiAddr(xn, xi, s, d8); - } - break; - case 7: - switch (xn) { - case 0: // (xxx).W, Additional Word - if (address < code.size) { - const int32_t w = GetI16BE(code.buffer + address); - return Arg::Word(w); - } - break; - case 1: // (xxx).L, Additional Long - if (address + kInstructionSizeStepBytes < code.size) { - const int32_t l = GetI32BE(code.buffer + address); - return Arg::Long(l); - } - break; - case 2: // (d16, PC), Additional Word - if (address < code.size) { - const int16_t d16 = GetI16BE(code.buffer + address); - return Arg::D16PCAddr(d16); - } - break; - case 3: // (d8, PC, Xi), Brief Extension Word - if (address < code.size) { - const uint16_t briefext = GetU16BE(code.buffer + address); - if (briefext & 0x0700) { - // briefext must have zeros on 8, 9 an 10-th bits, - // i.e. xxxx_x000_xxxx_xxxx - break; - } - // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) - const uint8_t xi = (briefext >> 12) & 0xf; - const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; - const int8_t d8 = briefext & 0xff; - return Arg::D8PCXiAddr(xn, xi, s, d8); - } - break; - case 4: // #imm - return FetchImmediate(address, code, s); - case 5: // Does not exist - case 6: // Does not exist - case 7: // Does not exist - break; - } - break; - } - return Arg{}; -} - -static Arg FetchArg( - const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s) -{ - const int addrmode = instr & 0x3f; - const int m = (addrmode >> 3) & 7; - const int xn = addrmode & 7; - return FetchArg(address, code, m, xn, s); -} - -static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr) -{ - node.op = Op::Raw(instr); - return node.size; -} - -static size_t disasm_jsr_jmp( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = OpSize::kWord; - const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - case AddrMode::kDn: // 4e80..4e87 / 4ec0..4ec7 - case AddrMode::kAn: // 4e88..4e8f / 4ec8..4ecf - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: // 4e90..4e97 / 4ed0..4ed7 - // NOTE: dynamic jump, ref_addr may possibly be obtained during the - // trace - break; - case AddrMode::kAnAddrIncr: // 4e98..4e9f / 4ed8..4edf - case AddrMode::kAnAddrDecr: // 4ea0..4ea7 / 4ee0..4ee7 - return disasm_verbatim(node, instr); - case AddrMode::kD16AnAddr: // 4ea8..4eaf / 4ee8..4eef - // NOTE: dynamic jump, ref_addr may possibly be obtained during the - // trace - break; - case AddrMode::kD8AnXiAddr: // 4eb0..4eb7 / 4ef0..4ef7 - // NOTE: dynamic jump, ref_addr may possibly be obtained during the - // trace - break; - case AddrMode::kWord: // 4eb8 / 4ef8 - { - const uint32_t ref_addr = static_cast(a.lword); - node.ref1_addr = ref_addr; - node.ref_kinds = kRef1AbsMask; - } - break; - case AddrMode::kLong: // 4eb9 / 4ef9 - { - const uint32_t ref_addr = static_cast(a.lword); - node.ref1_addr = ref_addr; - node.ref_kinds = kRef1AbsMask; - } - break; - case AddrMode::kD16PCAddr: // 4eba / 4efa - { - const uint32_t ref_addr = node.address + kInstructionSizeStepBytes + - static_cast(a.d16_pc.d16); - node.ref1_addr = ref_addr; - node.ref_kinds = kRef1RelMask; - } - break; - case AddrMode::kD8PCXiAddr: // 4ebb / 4efb - // NOTE: dynamic jump, ref_addr may possibly be obtained during the - // trace - break; - case AddrMode::kImmediate: // 4ebc / 4efc - return disasm_verbatim(node, instr); - } - const bool is_jmp = instr & 0x40; - node.ref_kinds |= is_jmp ? 0 : kRefCallMask; - node.op = Op::Typical(is_jmp ? OpCode::kJMP : OpCode::kJSR, OpSize::kNone, a); - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg) -{ - assert(arg.mode == AddrMode::kDn); - node.op = Op::Typical(OpCode::kEXT, opsize, arg); - return node.size = kInstructionSizeStepBytes + arg.Size(opsize); -} - -static size_t disasm_ext_movem( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto dir = static_cast((instr >> 10) & 1); - const unsigned m = (instr >> 3) & 7; - const unsigned xn = instr & 7; - const auto opsize = static_cast(((instr >> 6) & 1) + 1); - if (m == 0 && dir == MoveDirection::kRegisterToMemory) { - return disasm_ext(node, opsize, Arg::Dn(xn)); - } - if (node.address + kInstructionSizeStepBytes >= code.size) { - // Not enough space for regmask, but maybe it is just EXT? - return disasm_verbatim(node, instr); - } - const unsigned regmask = GetU16BE(code.buffer + node.address + kInstructionSizeStepBytes); - if (regmask == 0) { - // This is just not representable: at least one register must be specified - return disasm_verbatim(node, instr); - } - const auto a = FetchArg( - node.address + kInstructionSizeStepBytes * 2, code, m, xn, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - case AddrMode::kDn: // 4880..4887 / 4c80..4c87 / 48c0..48c7 / 4cc0..4cc7 - case AddrMode::kAn: // 4888..488f / 4c88..4c8f / 48c8..48cf / 4cc8..4ccf - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: // 4890..4897 / 4c90..4c97 / 48d0..48d7 / 4cd0..4cd7 - break; - case AddrMode::kAnAddrIncr: // 4898..489f / 4c89..4c9f / 48d8..48df / 4cd8..4cdf - if (dir == MoveDirection::kRegisterToMemory) { - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kAnAddrDecr: // 48a0..48a7 / 4ca0..4ca7 / 48e0..48e7 / 4ce0..4ce7 - if (dir == MoveDirection::kMemoryToRegister) { - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kD16AnAddr: // 48a8..48af / 4c8a..4caf / 48e8..48ef / 4ce8..4cef - case AddrMode::kD8AnXiAddr: // 48b0..48b7 / 4cb0..4cb7 / 48f0..48f7 / 4cf0..4cf7 - break; - case AddrMode::kWord: // 48b8 / 4cb8 / 48f8 / 4cf8 - case AddrMode::kLong: // 48b9 / 4cb9 / 48f9 / 4cf9 - if (dir == MoveDirection::kRegisterToMemory) { - node.ref2_addr = static_cast(a.lword); - node.ref_kinds = kRef2AbsMask | kRef2WriteMask; - } else { - node.ref1_addr = static_cast(a.lword); - node.ref_kinds = kRef1AbsMask | kRef1ReadMask; - } - break; - case AddrMode::kD16PCAddr: // 48ba / 4cba / 48fa / 4cfa - case AddrMode::kD8PCXiAddr: // 48bb / 4cbb / 48fb / 4cfb - if (dir == MoveDirection::kRegisterToMemory) { - return disasm_verbatim(node, instr); - } else if (a.mode == AddrMode::kD16PCAddr) { - // XXX: kRefPcRelFix2Bytes flag is a hack that needed to correctly - // print label for PC relative referenced value of MOVEM. Alongside - // with *NOT* adding kInstructionSizeStepBytes to ref1_addr. Still - // figuring that out. - node.ref1_addr = node.address + kInstructionSizeStepBytes * 2 + - static_cast(a.d16_pc.d16); - node.ref_kinds = kRef1RelMask | kRef1ReadMask | kRefPcRelFix2Bytes; - } - break; - case AddrMode::kImmediate: // 4ebc / 4efc - return disasm_verbatim(node, instr); - } - if (dir == MoveDirection::kMemoryToRegister) { - const auto arg2 = (a.mode == AddrMode::kAnAddrDecr) - ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); - node.op = Op::Typical(OpCode::kMOVEM, opsize, a, arg2); - } else { - const auto arg1 = (a.mode == AddrMode::kAnAddrDecr) - ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); - node.op = Op::Typical(OpCode::kMOVEM, opsize, arg1, a); - } - return node.size = kInstructionSizeStepBytes * 2 + a.Size(opsize); -} - -static size_t disasm_lea( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = OpSize::kLong; - const auto addr = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (addr.mode) { - case AddrMode::kInvalid: - case AddrMode::kDn: - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - break; - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - return disasm_verbatim(node, instr); - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - break; - case AddrMode::kWord: - case AddrMode::kLong: - node.ref1_addr = static_cast(addr.lword); - node.ref_kinds = kRef1AbsMask | kRef1ReadMask; - break; - case AddrMode::kD16PCAddr: - node.ref1_addr = node.address + kInstructionSizeStepBytes + - static_cast(addr.d16_pc.d16); - node.ref_kinds = kRef1RelMask | kRef1ReadMask; - break; - case AddrMode::kD8PCXiAddr: - break; - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - const unsigned an = ((instr >> 9) & 7); - const auto reg = Arg::An(an); - node.op = Op::Typical(OpCode::kLEA, opsize, addr, reg); - return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); -} - -static size_t disasm_chk( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = OpSize::kWord; - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (src.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - const unsigned dn = ((instr >> 9) & 7); - const auto dst = Arg::Dn(dn); - node.op = Op::Typical(OpCode::kCHK, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_bra_bsr_bcc( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const int16_t dispmt0 = static_cast(instr & 0xff); - if (dispmt0 == -1) { - // This will definitely lead to executing invalid instruction and is - // also invalid for GNU AS to assemble - return disasm_verbatim(node, instr); - } - const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord; - if (dispmt0 == 0) { - // Check the boundaries - if (node.address + kInstructionSizeStepBytes >= code.size) { - return disasm_verbatim(node, instr); - } - node.size = kInstructionSizeStepBytes * 2; - } else { - node.size = kInstructionSizeStepBytes; - } - const int16_t dispmt = kInstructionSizeStepBytes + (dispmt0 - ? dispmt0 : GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes)); - const uint32_t ref_addr = static_cast(node.address + dispmt); - Condition condition = static_cast((instr >> 8) & 0xf); - // False condition Indicates BSR - node.ref1_addr = ref_addr; - node.ref_kinds = kRef1RelMask | ((condition == Condition::kF) ? kRefCallMask : 0); - node.op = Op{OpCode::kBcc, opsize, condition, Arg::Displacement(dispmt)}; - return node.size; -} - -static OpCode OpCodeForBitOps(const unsigned opcode) -{ - switch (opcode) { - case 0: return OpCode::kBTST; - case 1: return OpCode::kBCHG; - case 2: return OpCode::kBCLR; - case 3: return OpCode::kBSET; - } - assert(false); - return OpCode::kNone; -} - -static size_t disasm_movep( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const unsigned dn = ((instr >> 9) & 7); - const unsigned an = instr & 7; - const OpSize opsize = ((instr >> 6) & 1) ? OpSize::kLong : OpSize::kWord; - const auto dir = static_cast(!((instr >> 7) & 1)); - const auto addr = FetchArg( - node.address + kInstructionSizeStepBytes, code, 5, an, opsize); - if (addr.mode == AddrMode::kInvalid) { - // Boundary check failed, most likely - return disasm_verbatim(node, instr); - } - assert(addr.mode == AddrMode::kD16AnAddr); - const auto reg = Arg::Dn(dn); - if (dir == MoveDirection::kRegisterToMemory) { - node.op = Op::Typical(OpCode::kMOVEP, opsize, reg, addr); - } else { - node.op = Op::Typical(OpCode::kMOVEP, opsize, addr, reg); - } - return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); -} - -static size_t disasm_src_arg_bitops_movep( - DisasmNode &node, - const uint16_t instr, - const DataView &code, - const bool has_dn_src = true) -{ - const unsigned m = (instr >> 3) & 7; - if ((m == 1) && has_dn_src) { - return disasm_movep(node, instr, code); - } - const unsigned dn = ((instr >> 9) & 7); - const unsigned xn = instr & 7; - const OpSize opsize0 = OpSize::kByte; - // Fetch AddrMode::kDn if has_dn_src, otherwise fetch AddrMode::kImmediate - // byte - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, - code, - (has_dn_src) ? 0 : 7, - dn, - opsize0); - if (src.mode == AddrMode::kInvalid) { - return disasm_verbatim(node, instr); - } - if (has_dn_src) { - assert(src.mode == AddrMode::kDn); - } else { - assert(dn == 4); - assert(src.mode == AddrMode::kImmediate); - } - const auto dst = FetchArg( - node.address + kInstructionSizeStepBytes + src.Size(opsize0), code, m, xn, opsize0); - const unsigned opcode = (instr >> 6) & 3; - switch (dst.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - if (opcode != 0) { - // PC relative destination address argument available for BTST only - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - const auto opsize = dst.mode == AddrMode::kDn ? OpSize::kLong : OpSize::kByte; - node.op = Op::Typical(OpCodeForBitOps(opcode), opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0); -} - -static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c) -{ - return disasm_src_arg_bitops_movep(n, i, c, false); -} - -static size_t disasm_logical_immediate_to( - DisasmNode &node, OpCode opcode, OpSize opsize, Arg imm) -{ - node.op = Op::Typical(opcode, opsize, imm, (opsize == OpSize::kByte) ? Arg::CCR() : Arg::SR()); - return node.size = kInstructionSizeStepBytes * 2; -} - -static OpCode OpCodeForLogicalImmediate(const unsigned opcode) -{ - switch (opcode) { - case 0: return OpCode::kORI; - case 1: return OpCode::kANDI; - case 2: return OpCode::kSUBI; - case 3: return OpCode::kADDI; - case 4: break; - case 5: return OpCode::kEORI; - case 6: return OpCode::kCMPI; - case 7: break; - } - assert(false); - return OpCode::kNone; -} - -static size_t disasm_bitops_movep( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const bool has_source_reg = (instr >> 8) & 1; - if (has_source_reg) { - return disasm_src_arg_bitops_movep(node, instr, code); - } - const unsigned opcode = (instr >> 9) & 7; - if (opcode == 7) { - // Does not exist - return disasm_verbatim(node, instr); - } - if (opcode == 4) { - return disasm_bitops(node, instr, code); - } - const int m = (instr >> 3) & 7; - const int xn = instr & 7; - const auto opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - // Does not exist - return disasm_verbatim(node, instr); - } - // Anticipating #imm which means "to CCR"/"to SR", depending on OpSize - if (m == 7 && xn == 4) { - if (opcode == 2 || opcode == 3 || opcode == 6) { - // CMPI, SUBI and ANDI neither have immediate destination arguments - // nor "to CCR"/"to SR" variations - return disasm_verbatim(node, instr); - } - if (opsize == OpSize::kLong) { - // Does not exist - return disasm_verbatim(node, instr); - } - } - const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); - if (src.mode == AddrMode::kInvalid) { - return disasm_verbatim(node, instr); - } - assert(src.mode == AddrMode::kImmediate); - const OpCode mnemonic = OpCodeForLogicalImmediate(opcode); - if (m == 7 && xn == 4) { - return disasm_logical_immediate_to(node, mnemonic, opsize, src); - } - const auto dst = FetchArg( - node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); - switch (dst.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - if (opcode != 6) { - // PC relative destination address argument available for CMPI only - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(mnemonic, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_move_movea( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const int opsize_raw = (instr >> 12) & 3; - const OpSize opsize = (opsize_raw == 1) - ? OpSize::kByte : (opsize_raw == 3 ? OpSize::kWord : OpSize::kLong); - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (src.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - if (opsize == OpSize::kByte) { - // Does not exist - return disasm_verbatim(node, instr); - } - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - break; - case AddrMode::kWord: - case AddrMode::kLong: - node.ref1_addr = static_cast(src.lword); - node.ref_kinds |= kRef1AbsMask | kRef1ReadMask; - break; - case AddrMode::kD16PCAddr: - node.ref1_addr = node.address + kInstructionSizeStepBytes + - static_cast(src.d16_pc.d16); - node.ref_kinds |= kRef1RelMask | kRef1ReadMask; - break; - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - break; - } - const int m = (instr >> 6) & 7; - const int xn = (instr >> 9) & 7; - const auto dst = FetchArg( - node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); - switch (dst.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - if (opsize == OpSize::kByte) { - // Does not exist - return disasm_verbatim(node, instr); - } - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - break; - case AddrMode::kWord: - case AddrMode::kLong: - node.ref2_addr = static_cast(dst.lword); - node.ref_kinds |= kRef2AbsMask | kRef2WriteMask; - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - // XXX Assuming that moving long immediate value into address register is - // basically a sneaky LEA. It may not be true in some cases. - if (src.type == ArgType::kImmediate && dst.type == ArgType::kAn) { - if (opsize == OpSize::kLong) { - node.ref1_addr = static_cast(src.lword); - node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; - } else if (opsize == OpSize::kWord) { - node.ref1_addr = static_cast(static_cast(src.lword)); - node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; - } - } - const auto opcode = (dst.mode == AddrMode::kAn) ? OpCode::kMOVEA : OpCode::kMOVE; - node.op = Op::Typical(opcode, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_move_from_sr( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto opsize = OpSize::kWord; - const auto dst = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (dst.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(OpCode::kMOVE, opsize, Arg::SR(), dst); - return node.size = kInstructionSizeStepBytes + dst.Size(opsize); -} - -static size_t disasm_move_to( - DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg) -{ - const auto opsize = OpSize::kWord; - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (src.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - break; - } - node.op = Op::Typical(OpCode::kMOVE, opsize, src, Arg{{reg}, {0}}); - return node.size = kInstructionSizeStepBytes + src.Size(opsize); -} - -static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode) -{ - switch (opcode) { - case 0: return OpCode::kNEGX; - case 1: return OpCode::kCLR; - case 2: return OpCode::kNEG; - case 3: return OpCode::kNOT; - } - assert(false); - return OpCode::kNone; -} - -static size_t disasm_move_negx_clr_neg_not( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto opsize = static_cast((instr >> 6) & 3); - const unsigned opcode = (instr >> 9) & 3; - if (opsize == OpSize::kInvalid) { - switch (opcode) { - case 0: - return disasm_move_from_sr(node, instr, code); - case 1: - return disasm_verbatim(node, instr); - case 2: - return disasm_move_to(node, instr, code, ArgType::kCCR); - case 3: - return disasm_move_to(node, instr, code, ArgType::kSR); - } - assert(false); - return disasm_verbatim(node, instr); - } - const auto a = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(opcode_for_negx_clr_neg_not(opcode), opsize, a); - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_trivial( - DisasmNode &node, const OpCode opcode) -{ - node.op = Op::Typical(opcode, OpSize::kNone); - return node.size; -} - -static size_t disasm_tas( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto opsize = OpSize::kByte; - const auto a = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(OpCode::kTAS, opsize, a); - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_tst_tas_illegal( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto opsize = static_cast((instr >> 6) & 3); - const int m = (instr >> 3) & 7; - const int xn = instr & 7; - if (opsize == OpSize::kInvalid) { - if (m == 7 && xn == 4){ - return disasm_trivial(node, OpCode::kILLEGAL); - } - return disasm_tas(node, instr, code); - } - const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, m, xn, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - break; - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(OpCode::kTST, opsize, a); - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_trap(DisasmNode &node, const uint16_t instr) -{ - const unsigned vector = instr & 0xf; - node.op = Op::Typical(OpCode::kTRAP, OpSize::kNone, Arg::Immediate(vector)); - return node.size = kInstructionSizeStepBytes; -} - -static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const bool unlk = (instr >> 3) & 1; - const unsigned xn = instr & 7; - if (unlk) { - node.op = Op::Typical(OpCode::kUNLK, OpSize::kNone, Arg::AddrModeXn(ArgType::kAn, xn)); - return node.size = kInstructionSizeStepBytes; - } - const auto opsize = OpSize::kWord; - const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); - if (src.mode != AddrMode::kImmediate) { - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(OpCode::kLINK, opsize, Arg::AddrModeXn(ArgType::kAn, xn), src); - return node.size = kInstructionSizeStepBytes + src.Size(opsize); -} - -static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr) -{ - const unsigned xn = instr & 7; - const auto dir = static_cast((instr >> 3) & 1); - if (dir == MoveDirection::kRegisterToMemory) { - node.op = Op::Typical( - OpCode::kMOVE, OpSize::kLong, Arg::An(xn), Arg::USP()); - } else { - node.op = Op::Typical( - OpCode::kMOVE, OpSize::kLong, Arg::USP(), Arg::An(xn)); - } - return node.size = kInstructionSizeStepBytes; -} - -static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const bool is_nbcd = !((instr >> 6) & 1); - const OpSize opsize0 = OpSize::kWord; - const auto arg = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize0); - bool is_swap{}; - switch (arg.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - if (!is_nbcd) { - is_swap = true; - } - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - break; - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - if (!is_nbcd) { - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - break; - case AddrMode::kWord: - case AddrMode::kLong: - node.ref1_addr = static_cast(arg.lword); - node.ref_kinds = kRef1AbsMask | kRef1ReadMask; - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - if (is_nbcd) { - return disasm_verbatim(node, instr); - } - if (arg.mode == AddrMode::kD16PCAddr) { - node.ref1_addr = node.address + kInstructionSizeStepBytes + - static_cast(arg.d16_pc.d16); - node.ref_kinds = kRef1RelMask | kRef1ReadMask; - } - break; - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - const auto opcode = is_nbcd ? OpCode::kNBCD : is_swap ? OpCode::kSWAP : OpCode::kPEA; - const auto opsize = is_nbcd ? OpSize::kByte : is_swap ? OpSize::kWord : OpSize::kLong; - node.op = Op::Typical(opcode, opsize, arg); - return node.size = kInstructionSizeStepBytes + arg.Size(opsize0); -} - -static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord); - if (a.mode != AddrMode::kImmediate) { - return disasm_verbatim(node, instr); - } - node.op = Op::Typical(OpCode::kSTOP, OpSize::kNone, a); - return node.size = kInstructionSizeStepBytes * 2; -} - -static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - if ((instr & 0xf900) == 0x4000) { - return disasm_move_negx_clr_neg_not(node, instr, code); - } else if ((instr & 0xff80) == 0x4800) { - // NOTE: EXT is handled with MOVEM - return disasm_nbcd_swap_pea(node, instr, code); - } else if ((instr & 0xff00) == 0x4a00) { - return disasm_tst_tas_illegal(node, instr, code); - } else if ((instr & 0xfff0) == 0x4e40) { - return disasm_trap(node, instr); - } else if ((instr & 0xfff0) == 0x4e50) { - return disasm_link_unlink(node, instr, code); - } else if ((instr & 0xfff0) == 0x4e60) { - return disasm_move_usp(node, instr); - } else if ((instr & 0xfff8) == 0x4e70) { - if (instr == 0x4e70) { - return disasm_trivial(node, OpCode::kRESET); - } else if (instr == 0x4e71) { - return disasm_trivial(node, OpCode::kNOP); - } else if (instr == 0x4e72) { - return disasm_stop(node, instr, code); - } else if (instr == 0x4e73) { - return disasm_trivial(node, OpCode::kRTE); - } else if (instr == 0x4e75) { - return disasm_trivial(node, OpCode::kRTS); - } else if (instr == 0x4e76) { - return disasm_trivial(node, OpCode::kTRAPV); - } else if (instr == 0x4e77) { - return disasm_trivial(node, OpCode::kRTR); - } - } else if ((instr & 0xff80) == 0x4e80) { - return disasm_jsr_jmp(node, instr, code); - } else if ((instr & 0xfb80) == 0x4880) { - return disasm_ext_movem(node, instr, code); - } else if ((instr & 0xf1c0) == 0x41c0) { - return disasm_lea(node, instr, code); - } else if ((instr & 0xf1c0) == 0x4180) { - return disasm_chk(node, instr, code); - } - return disasm_verbatim(node, instr); -} - -static size_t disasm_addq_subq( - DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize) -{ - const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: // 5x00..5x07 / 5x40..5x47 / 5x80..5x87 - break; - case AddrMode::kAn: // 5x08..5x0f / 5x48..5x4f / 5x88..5x8f - if (opsize == OpSize::kByte) { - // 5x08..5x0f - // addqb and subqb with An do not exist - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kAnAddr: // 5x10..5x17 / 5x50..5x57 / 5x90..5x97 - case AddrMode::kAnAddrIncr: // 5x18..5x1f / 5x58..5x5f / 5x98..5x9f - case AddrMode::kAnAddrDecr: // 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7 - case AddrMode::kD16AnAddr: // 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf - case AddrMode::kD8AnXiAddr: // 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7 - case AddrMode::kWord: // 5x38 / 5x78 / 5xb8 - case AddrMode::kLong: // 5x39 / 5x79 / 5xb9 - break; - case AddrMode::kD16PCAddr: // 5x3a / 5x7a / 5xba - case AddrMode::kD8PCXiAddr: // 5x3b / 5x7b / 5xbb - case AddrMode::kImmediate: // 5x3c / 5x7c / 5xbc - // Does not exist - return disasm_verbatim(node, instr); - } - const unsigned imm = ((uint8_t((instr >> 9) & 7) - 1) & 7) + 1; - const auto opcode = ((instr >> 8) & 1) ? OpCode::kSUBQ : OpCode::kADDQ; - node.op = Op::Typical(opcode, opsize, Arg::Immediate(imm), a); - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - if (node.address + kInstructionSizeStepBytes >= code.size) { - return disasm_verbatim(node, instr); - } - const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes); - const int32_t dispmt = dispmt_raw + kInstructionSizeStepBytes; - node.ref2_addr = static_cast(node.address + dispmt); - node.ref_kinds = kRef2RelMask; - node.op = Op{ - OpCode::kDBcc, - OpSize::kWord, - static_cast((instr >> 8) & 0xf), - Arg::AddrModeXn(ArgType::kDn, (instr & 7)), - Arg::Displacement(dispmt), - }; - return node.size = kInstructionSizeStepBytes * 2; -} - -static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = OpSize::kWord; - const auto a = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (a.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: // 5xc0..5xc7, Dn - break; - case AddrMode::kAn: // 5xc8..5xcf, An - return disasm_dbcc(node, instr, code); - case AddrMode::kAnAddr: // 5xd0..5xd7 - case AddrMode::kAnAddrIncr: // 5xd8..5xdf - case AddrMode::kAnAddrDecr: // 5xe0..5xe7 - case AddrMode::kD16AnAddr: // 5xe8..5xef - case AddrMode::kD8AnXiAddr: // 5xf0..5xf7 - case AddrMode::kWord: // 5xf8 (xxx).W - case AddrMode::kLong: // 5xf9 (xxx).L - break; - case AddrMode::kD16PCAddr: // 5xfa - case AddrMode::kD8PCXiAddr: // 5xfb - case AddrMode::kImmediate: // 5xfc - // Does not exist - return disasm_verbatim(node, instr); - } - node.op = Op{OpCode::kScc, OpSize::kByte, static_cast((instr >> 8) & 0xf), a}; - return node.size = kInstructionSizeStepBytes + a.Size(opsize); -} - -static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c) -{ - const auto opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - return disasm_scc_dbcc(n, instr, c); - } - return disasm_addq_subq(n, instr, c, opsize); -} - -static size_t disasm_moveq(DisasmNode &node, const uint16_t instr) -{ - if (instr & 0x100) { - // Does not exist - return disasm_verbatim(node, instr); - } - const int xn = (instr >> 9) & 7; - const auto dst = Arg::Dn(xn); - const int8_t data = instr & 0xff; - const OpSize opsize = OpSize::kLong; - node.op = Op::Typical(OpCode::kMOVEQ, opsize, Arg::Immediate(data), dst); - return node.size = kInstructionSizeStepBytes + dst.Size(opsize); -} - -static size_t disasm_divu_divs_mulu_muls( - DisasmNode &node, - const uint16_t instr, - const DataView &code, - const OpCode opcode) -{ - const auto opsize = OpSize::kWord; - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (src.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - break; - } - const unsigned dn = (instr >> 9) & 7; - const auto dst = Arg::Dn(dn); - node.op = Op::Typical(opcode, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + dst.Size(opsize) + src.Size(opsize); -} - -static size_t disasm_addx_subx_abcd_sbcd( - DisasmNode &node, const uint16_t instr, const OpCode opcode) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - // Must be already handled by parent call - assert(opsize != OpSize::kInvalid); - const int m = (instr >> 3) & 1; - const int xn = instr & 7; - const int xi = (instr >> 9) & 7; - const auto src = m ? Arg::AnAddrDecr(xn) : Arg::Dn(xn); - const auto dst = m ? Arg::AnAddrDecr(xi) : Arg::Dn(xi); - // XXX GNU AS does not know ABCD.B, it only knows ABCD, but happily consumes - // SBCD.B and others. That's why it is OpSize::kNone specifically for ABCD - // mnemonic. It is probably a bug in GNU AS. - node.op = Op::Typical(opcode, (opcode == OpCode::kABCD) ? OpSize::kNone : opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_or_and( - DisasmNode &node, - const uint16_t instr, - const DataView &code, - const OpSize opsize, - const OpCode opcode) -{ - const bool dir_to_addr = (instr >> 8) & 1; - const auto addr = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (addr.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - if (dir_to_addr) { - // Switching dir when bot operands are data registers is not allowed - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - if (dir_to_addr) { - // PC relative cannot be destination - return disasm_verbatim(node, instr); - } - break; - case AddrMode::kImmediate: - if (dir_to_addr) { - // immediate cannot be destination - return disasm_verbatim(node, instr); - } - break; - } - const auto reg = Arg::Dn((instr >> 9) & 7); - if (dir_to_addr) { - node.op = Op::Typical(opcode, opsize, reg, addr); - } else { - node.op = Op::Typical(opcode, opsize, addr, reg); - } - return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); -} - -static size_t disasm_divu_divs_sbcd_or( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - // Also ensures that opsize == OpSize::kByte, i.e. 0b00 - if ((instr & 0x1f0) == 0x100) { - return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kSBCD); - } - const OpSize opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - const bool is_signed = (instr >> 8) & 1; - const auto opcode = is_signed ? OpCode::kDIVS : OpCode::kDIVU; - return disasm_divu_divs_mulu_muls(node, instr, code, opcode); - } - return disasm_or_and(node, instr, code, opsize, OpCode::kOR); -} - -static size_t disasm_adda_suba_cmpa( - DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) -{ - const OpSize opsize = static_cast(((instr >> 8) & 1) + 1); - const auto src = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (src.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - case AddrMode::kAn: - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - break; - } - const unsigned an = (instr >> 9) & 7; - const auto dst = Arg::An(an); - node.op = Op::Typical(opcode, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_add_sub_cmp( - DisasmNode &node, - const uint16_t instr, - const DataView &code, - const OpCode opcode, - const OpSize opsize, - const bool dir_to_addr) -{ - const auto addr = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (addr.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - if (dir_to_addr || opsize == OpSize::kByte) { - // An cannot be destination and An cannot be used as byte - return disasm_verbatim(node, instr); - } - /* Fall through */ - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - break; - case AddrMode::kWord: - case AddrMode::kLong: - if (dir_to_addr) { - node.ref2_addr = static_cast(addr.lword); - node.ref_kinds = kRef2AbsMask | kRef2ReadMask; - } else { - node.ref1_addr = static_cast(addr.lword); - node.ref_kinds = kRef1AbsMask | kRef1ReadMask; - } - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - if (dir_to_addr) { - // PC relative cannot be destination - return disasm_verbatim(node, instr); - } - if (addr.mode == AddrMode::kD16PCAddr) { - node.ref1_addr = node.address + kInstructionSizeStepBytes + - static_cast(addr.d16_pc.d16); - node.ref_kinds = kRef1RelMask | kRef1ReadMask; - } - break; - case AddrMode::kImmediate: - if (dir_to_addr) { - // immediate cannot be destination - return disasm_verbatim(node, instr); - } - break; - } - const unsigned dn = (instr >> 9) & 7; - const auto reg = Arg::Dn(dn); - if (dir_to_addr) { - node.op = Op::Typical(opcode, opsize, reg, addr); - } else { - node.op = Op::Typical(opcode, opsize, addr, reg); - } - return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); -} - -static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - // Must be already handled by parent call - assert(opsize != OpSize::kInvalid); - // M has to be set to 0b001 - assert(((instr >> 3) & 7) == 1); - const int xn = instr & 7; - const int xi = (instr >> 9) & 7; - const auto src = Arg::AnAddrIncr(xn); - const auto dst = Arg::AnAddrIncr(xi); - node.op = Op::Typical(OpCode::kCMPM, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - const auto addr = FetchArg( - node.address + kInstructionSizeStepBytes, code, instr, opsize); - switch (addr.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - // PC relative and immediate cannot be destination - return disasm_verbatim(node, instr); - } - const auto reg = Arg::Dn((instr >> 9) & 7); - node.op = Op::Typical(OpCode::kEOR, opsize, reg, addr); - return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); -} - -static size_t disasm_eor_cmpm_cmp_cmpa( - DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - return disasm_adda_suba_cmpa(node, instr, code, OpCode::kCMPA); - } - const bool dir_to_addr = ((instr >> 8) & 1); - if (!dir_to_addr) { - return disasm_add_sub_cmp(node, instr, code, OpCode::kCMP, opsize, dir_to_addr); - } - const int m = (instr >> 3) & 7; - if (m == 1) { - return disasm_cmpm(node, instr); - } - return disasm_eor(node, instr, code); -} - -static size_t disasm_exg(DisasmNode &node, const uint16_t instr) -{ - assert((instr & 0x130) == 0x100); - const int m1 = (instr >> 3) & 1; - const int m2 = (instr >> 6) & 3; - assert(m2 != 0); // Therefore m == 0 and m == 1 are impossible - assert(m2 != 3); // Therefore m == 6 and m == 7 are impossible - const int m = (m2 << 1) | m1; - assert(m != 4); // Only m == 2, m == 3 and m == 5 values are allowed - const int xn = instr & 7; - const int xi = (instr >> 9) & 7; - const auto src = (m == 3) ? Arg::An(xi) : Arg::Dn(xi); - const auto dst = (m == 2) ? Arg::Dn(xn) : Arg::An(xn); - // GNU AS does not accept size suffix for EXG, although it's size is always - // long word. - const auto opsize = OpSize::kNone; - node.op = Op::Typical(OpCode::kEXG, opsize, src, dst); - return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); -} - -static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - if ((instr & 0x1f0) == 0x100) { - return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD); - } - const OpSize opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - const bool is_signed = (instr >> 8) & 1; - const auto opcode = is_signed ? OpCode::kMULS : OpCode::kMULU; - return disasm_divu_divs_mulu_muls(node, instr, code, opcode); - } - const unsigned m_split = instr & 0x1f8; - if (m_split == 0x188 || m_split == 0x148 || m_split == 0x140) { - return disasm_exg(node, instr); - } - return disasm_or_and(node, instr, code, opsize, OpCode::kAND); -} - -static size_t disasm_add_sub_x_a( - DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - if (opsize == OpSize::kInvalid) { - return disasm_adda_suba_cmpa(node, instr, code, (opcode == OpCode::kSUB) ? OpCode::kSUBA : OpCode::kADDA); - } - const bool dir_to_addr = (instr >> 8) & 1; - const unsigned m = (instr >> 3) & 7; - if (dir_to_addr && (m == 0 || m == 1)) { - return disasm_addx_subx_abcd_sbcd(node, instr, (opcode == OpCode::kSUB) ? OpCode::kSUBX : OpCode::kADDX); - } - return disasm_add_sub_cmp(node, instr, code, opcode, opsize, dir_to_addr); -} - -static OpCode ShiftKindToOpcode(const ShiftKind k, const ShiftDirection d) -{ - switch (k) { - case ShiftKind::kArithmeticShift: - return d == ShiftDirection::kLeft ? OpCode::kASL : OpCode::kASR; - case ShiftKind::kLogicalShift: - return d == ShiftDirection::kLeft ? OpCode::kLSL : OpCode::kLSR; - case ShiftKind::kRotateX: - return d == ShiftDirection::kLeft ? OpCode::kROXL : OpCode::kROXR; - case ShiftKind::kRotate: - return d == ShiftDirection::kLeft ? OpCode::kROL : OpCode::kROR; - } - assert(false); - return OpCode::kNone; -} - -static bool IsValidShiftKind(const ShiftKind k) -{ - return static_cast(k) < 4; -} - -static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code) -{ - const OpSize opsize = static_cast((instr >> 6) & 3); - const unsigned xn = instr & 7; - const uint8_t rotation = (instr >> 9) & 7; - const ShiftKind kind = (opsize == OpSize::kInvalid) - ? static_cast(rotation) - : static_cast((instr >> 3) & 3); - if (!IsValidShiftKind(kind)) { - return disasm_verbatim(node, instr); - } - const auto dst = (opsize == OpSize::kInvalid) - ? FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize) - : Arg::Dn(xn); - if (opsize == OpSize::kInvalid) { - switch (dst.mode) { - case AddrMode::kInvalid: - return disasm_verbatim(node, instr); - case AddrMode::kDn: - // Intersects with situation when args are "#1,%dx". GNU AS would - // not understand shift instruction with single argument of "%dx". - return disasm_verbatim(node, instr); - break; - case AddrMode::kAn: - return disasm_verbatim(node, instr); - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - case AddrMode::kLong: - break; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - case AddrMode::kImmediate: - return disasm_verbatim(node, instr); - } - } - const unsigned imm = ((rotation - 1) & 7) + 1; - const unsigned src = (opsize == OpSize::kInvalid) ? 1 : rotation; - const auto dir = static_cast((instr >> 8) & 1); - if (opsize == OpSize::kInvalid) { - node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, dst); - } else { - const unsigned m = (instr >> 5) & 1; - const auto arg1 = m ? Arg::AddrModeXn(ArgType::kDn, src) : Arg::Immediate(imm); - node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, arg1, dst); - } - return node.size = kInstructionSizeStepBytes + dst.Size(opsize); -} - -static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c) -{ - switch ((i & 0xf000) >> 12) { - case 0x0: - return disasm_bitops_movep(n, i, c); - case 0x1: - case 0x2: - case 0x3: - return disasm_move_movea(n, i, c); - case 0x4: - return disasm_chunk_4(n, i, c); - case 0x5: - return disasm_addq_subq_scc_dbcc(n, i, c); - case 0x6: - return disasm_bra_bsr_bcc(n, i, c); - case 0x7: - return disasm_moveq(n, i); - case 0x8: - return disasm_divu_divs_sbcd_or(n, i, c); - case 0x9: - return disasm_add_sub_x_a(n, i, c, OpCode::kSUB); - case 0xa: - // Does not exist - return disasm_verbatim(n, i); - case 0xb: - return disasm_eor_cmpm_cmp_cmpa(n, i, c); - case 0xc: - return disasm_chunk_c(n, i, c); - case 0xd: - return disasm_add_sub_x_a(n, i, c, OpCode::kADD); - case 0xe: - return disasm_shift_rotate(n, i, c); - case 0xf: - // Does not exist - return disasm_verbatim(n, i); - } - assert(false); - return disasm_verbatim(n, i); -} - -size_t DisasmNode::Disasm(const DataView &code) -{ - // We assume that machine have no MMU and ROM data always starts at 0 - assert(this->address < code.size); - size = kInstructionSizeStepBytes; - ref_kinds = 0; - ref1_addr = 0; - ref2_addr = 0; - const uint16_t instr = GetU16BE(code.buffer + this->address); - if (IsInstruction(this->type)) { - return m68k_disasm(*this, instr, code); - } else { - // Data should not be disassembled - return disasm_verbatim(*this, instr); - } -} - -size_t DisasmNode::DisasmAsRaw(const DataView &code) -{ - // We assume that machine have no MMU and ROM data always starts at 0 - assert(this->address < code.size); - size = kInstructionSizeStepBytes; - ref_kinds = 0; - ref1_addr = 0; - ref2_addr = 0; - const uint16_t instr = GetU16BE(code.buffer + this->address); - return disasm_verbatim(*this, instr); -} - -static const char *ToString(const OpCode opcode, const Condition condition) -{ - switch (opcode) { - case OpCode::kNone: - assert(false); - break; - case OpCode::kRaw: return ".short"; - case OpCode::kORI: return "ori"; - case OpCode::kANDI: return "andi"; - case OpCode::kSUBI: return "subi"; - case OpCode::kADDI: return "addi"; - case OpCode::kEORI: return "eori"; - case OpCode::kCMPI: return "cmpi"; - case OpCode::kBTST: return "btst"; - case OpCode::kBCHG: return "bchg"; - case OpCode::kBCLR: return "bclr"; - case OpCode::kBSET: return "bset"; - case OpCode::kMOVEP: return "movep"; - case OpCode::kMOVEA: return "movea"; - case OpCode::kMOVE: return "move"; - case OpCode::kNEGX: return "negx"; - case OpCode::kCLR: return "clr"; - case OpCode::kNEG: return "neg"; - case OpCode::kNOT: return "not"; - case OpCode::kEXT: return "ext"; - case OpCode::kNBCD: return "nbcd"; - case OpCode::kSWAP: return "swap"; - case OpCode::kPEA: return "pea"; - case OpCode::kILLEGAL: return "illegal"; - case OpCode::kTAS: return "tas"; - case OpCode::kTST: return "tst"; - case OpCode::kTRAP: return "trap"; - case OpCode::kLINK: return "link"; - case OpCode::kUNLK: return "unlk"; - case OpCode::kRESET: return "reset"; - case OpCode::kNOP: return "nop"; - case OpCode::kSTOP: return "stop"; - case OpCode::kRTE: return "rte"; - case OpCode::kRTS: return "rts"; - case OpCode::kTRAPV: return "trapv"; - case OpCode::kRTR: return "rtr"; - case OpCode::kJSR: return "jsr"; - case OpCode::kJMP: return "jmp"; - case OpCode::kMOVEM: return "movem"; - case OpCode::kLEA: return "lea"; - case OpCode::kCHK: return "chk"; - case OpCode::kADDQ: return "addq"; - case OpCode::kSUBQ: return "subq"; - case OpCode::kScc: - switch(condition) { - case Condition::kT : return "st"; - case Condition::kF: return "sf"; - case Condition::kHI: return "shi"; - case Condition::kLS: return "sls"; - case Condition::kCC: return "scc"; - case Condition::kCS: return "scs"; - case Condition::kNE: return "sne"; - case Condition::kEQ: return "seq"; - case Condition::kVC: return "svc"; - case Condition::kVS: return "svs"; - case Condition::kPL: return "spl"; - case Condition::kMI: return "smi"; - case Condition::kGE: return "sge"; - case Condition::kLT: return "slt"; - case Condition::kGT: return "sgt"; - case Condition::kLE: return "sle"; - } - assert(false); - break; - case OpCode::kDBcc: - switch (condition) { - case Condition::kT: return "dbt"; - case Condition::kF: return "dbf"; - case Condition::kHI: return "dbhi"; - case Condition::kLS: return "dbls"; - case Condition::kCC: return "dbcc"; - case Condition::kCS: return "dbcs"; - case Condition::kNE: return "dbne"; - case Condition::kEQ: return "dbeq"; - case Condition::kVC: return "dbvc"; - case Condition::kVS: return "dbvs"; - case Condition::kPL: return "dbpl"; - case Condition::kMI: return "dbmi"; - case Condition::kGE: return "dbge"; - case Condition::kLT: return "dblt"; - case Condition::kGT: return "dbgt"; - case Condition::kLE: return "dble"; - } - assert(false); - break; - case OpCode::kBcc: - switch (condition) { - case Condition::kT: return "bra"; - case Condition::kF: return "bsr"; - case Condition::kHI: return "bhi"; - case Condition::kLS: return "bls"; - case Condition::kCC: return "bcc"; - case Condition::kCS: return "bcs"; - case Condition::kNE: return "bne"; - case Condition::kEQ: return "beq"; - case Condition::kVC: return "bvc"; - case Condition::kVS: return "bvs"; - case Condition::kPL: return "bpl"; - case Condition::kMI: return "bmi"; - case Condition::kGE: return "bge"; - case Condition::kLT: return "blt"; - case Condition::kGT: return "bgt"; - case Condition::kLE: return "ble"; - } - assert(false); - break; - case OpCode::kMOVEQ: return "moveq"; - case OpCode::kDIVU: return "divu"; - case OpCode::kDIVS: return "divs"; - case OpCode::kSBCD: return "sbcd"; - case OpCode::kOR: return "or"; - case OpCode::kSUB: return "sub"; - case OpCode::kSUBX: return "subx"; - case OpCode::kSUBA: return "suba"; - case OpCode::kEOR: return "eor"; - case OpCode::kCMPM: return "cmpm"; - case OpCode::kCMP: return "cmp"; - case OpCode::kCMPA: return "cmpa"; - case OpCode::kMULU: return "mulu"; - case OpCode::kMULS: return "muls"; - case OpCode::kABCD: return "abcd"; - case OpCode::kEXG: return "exg"; - case OpCode::kAND: return "and"; - case OpCode::kADD: return "add"; - case OpCode::kADDX: return "addx"; - case OpCode::kADDA: return "adda"; - case OpCode::kASR: return "asr"; - case OpCode::kASL: return "asl"; - case OpCode::kLSR: return "lsr"; - case OpCode::kLSL: return "lsl"; - case OpCode::kROXR: return "roxr"; - case OpCode::kROXL: return "roxl"; - case OpCode::kROR: return "ror"; - case OpCode::kROL: return "rol"; - } - assert(false); - return "?"; -} - -static const char *ToString(const OpSize s) -{ - switch (s) { - case OpSize::kNone: return ""; - case OpSize::kByte: return "b"; - case OpSize::kShort: return "s"; - case OpSize::kWord: return "w"; - case OpSize::kLong: return "l"; - } - assert(false); - return ""; -} - -static int OpcodeSNPrintf( - char *const buf, - const size_t bufsz, - const OpCode opcode, - const Condition condition, - const OpSize size_spec) -{ - return snprintf(buf, bufsz, "%s%s", ToString(opcode, condition), ToString(size_spec)); -} - -static char RegChar(const uint8_t xi) -{ - return (xi & 0x08) ? 'a' : 'd'; -} - -static char SizeSpecChar(const uint8_t xi) -{ - return (xi & 0x10) ? 'l' : 'w'; -} - -static unsigned RegNum(const uint8_t xi) -{ - return xi & 0x7; -} - -static size_t snprint_reg_mask( - char *const buf, const size_t bufsz, const uint32_t regmask_arg, const ArgType arg_type) -{ - const uint32_t regmask = regmask_arg & 0xffff; - size_t written = 0; - bool first_printed = 0; - size_t span = 0; - // 17-th bit used to close the span with 0 value unconditionally - for (int i = 0; i < 17; i++) { - const uint32_t mask = 1 << (arg_type == ArgType::kRegMaskPredecrement ? (15 - i) : i); - const bool hit = regmask & mask; - const bool span_open = hit && span == 0; - const bool span_closed = !hit && span > 1; - const int printable_i = i - (span_closed ? 1 : 0); - const int id = printable_i % 8; - const char regtype = (printable_i >= 8) ? 'a' : 'd'; - if (span_open || span_closed) { - const char *const delimiter = span_open ? (first_printed ? "/" : "") : "-"; - const size_t remaining = bufsz - written; - const int ret = snprintf(buf + written, remaining, "%s%%%c%d", delimiter, regtype, id); - assert(ret > 0); - assert(static_cast(ret) >= sizeof("%d0")-1); - assert(static_cast(ret) <= sizeof("-%d0")-1); - written += Min(remaining, ret); - first_printed = true; - } - span = hit ? span + 1 : 0; - } - assert(written < bufsz); // Output must not be truncated - return written; -} - -int Arg::SNPrint( - char *const buf, - const size_t bufsz, - const bool imm_as_hex, - const RefKindMask ref_kinds, - const char *const label, - const uint32_t self_addr, - const uint32_t ref_addr) const -{ - switch (type) { - case ArgType::kNone: - assert(false); - break; - case ArgType::kRaw: - return snprintf(buf, bufsz, "0x%04x", uword); - case ArgType::kDn: - return snprintf(buf, bufsz, "%%d%d", xn); - case ArgType::kAn: - return snprintf(buf, bufsz, "%%a%u", xn); - case ArgType::kAnAddr: - return snprintf(buf, bufsz, "%%a%u@", xn); - case ArgType::kAnAddrIncr: - return snprintf(buf, bufsz, "%%a%u@+", xn); - case ArgType::kAnAddrDecr: - return snprintf(buf, bufsz, "%%a%u@-", xn); - case ArgType::kD16AnAddr: - return snprintf(buf, bufsz, "%%a%u@(%d:w)", d16_an.an, d16_an.d16); - case ArgType::kD8AnXiAddr: - return snprintf( - buf, bufsz, "%%a%u@(%d,%%%c%u:%c)", - d8_an_xi.an, - d8_an_xi.d8, - RegChar(d8_an_xi.xi), - RegNum(d8_an_xi.xi), - SizeSpecChar(d8_an_xi.xi)); - case ArgType::kWord: - case ArgType::kLong: - { - const char c = type == ArgType::kLong ? 'l' : 'w'; - if (ref_kinds & kRefAbsMask) { - if (static_cast(lword) == ref_addr) { - return snprintf(buf, bufsz, "%s:%c", label, c); - } else { - // It has to be AFTER the label we are gonna reference here - assert(static_cast(lword) > ref_addr); - return snprintf(buf, bufsz, "%s+%d:%c", label, lword - ref_addr, c); - } - } else { - return snprintf(buf, bufsz, "0x%x:%c", lword, c); - } - } - case ArgType::kD16PCAddr: - if (ref_kinds & kRefRelMask) { - // XXX: Most of instructions with PC relative values have 2 bytes - // added to the offset, some does not. Still figuring that out. - const bool has_fix = ref_kinds & kRefPcRelFix2Bytes; - const uint32_t arg_addr = self_addr + d16_pc.d16 + kInstructionSizeStepBytes + (has_fix ? kInstructionSizeStepBytes : 0); - if (arg_addr == ref_addr) { - return snprintf(buf, bufsz, "%%pc@(%s:w)", label); - } else { - assert(arg_addr > ref_addr); - return snprintf(buf, bufsz, "%%pc@(%s+%d:w)", label, arg_addr - ref_addr); - } - } else { - return snprintf(buf, bufsz, "%%pc@(%d:w)", d16_pc.d16); - } - case ArgType::kD8PCXiAddr: - return snprintf( - buf, bufsz, "%%pc@(%d,%%%c%u:%c)", - d8_pc_xi.d8, - RegChar(d8_pc_xi.xi), - RegNum(d8_pc_xi.xi), - SizeSpecChar(d8_pc_xi.xi)); - case ArgType::kImmediate: - if (ref_kinds & kRef1ImmMask) { - if (static_cast(lword) == ref_addr) { - return snprintf(buf, bufsz, "#%s", label); - } else { - // It has to be AFTER the label we are gonna reference here - assert(static_cast(lword) > ref_addr); - return snprintf(buf, bufsz, "#%s+%d", label, lword - ref_addr); - } - } else if (imm_as_hex) { - return snprintf(buf, bufsz, "#0x%x", lword); - } else { - return snprintf(buf, bufsz, "#%d", lword); - } - case ArgType::kRegMask: - case ArgType::kRegMaskPredecrement: - return snprint_reg_mask(buf, bufsz, uword, type); - case ArgType::kDisplacement: - if (ref_kinds & kRefRelMask) { - if (static_cast(self_addr + lword) == ref_addr) { - return snprintf(buf, bufsz, "%s", label); - } else { - assert(static_cast(self_addr + lword) > ref_addr); - return snprintf(buf, bufsz, "%s+%d", label, (self_addr + lword) - ref_addr); - } - } else { - return snprintf(buf, bufsz, ".%s%d", lword >= 0 ? "+" : "", lword); - } - case ArgType::kCCR: - return snprintf(buf, bufsz, "%%ccr"); - case ArgType::kSR: - return snprintf(buf, bufsz, "%%sr"); - case ArgType::kUSP: - return snprintf(buf, bufsz, "%%usp"); - } - assert(false); - return -1; -} - -int Op::FPrint( - FILE *const stream, - const char *const indent, - const bool imm_as_hex, - const RefKindMask ref_kinds, - const char *const ref1_label, - const char *const ref2_label, - const uint32_t self_addr, - const uint32_t ref1_addr, - const uint32_t ref2_addr) const -{ - assert(opcode != OpCode::kNone); - char mnemonic_str[kMnemonicBufferSize]{}; - OpcodeSNPrintf(mnemonic_str, kMnemonicBufferSize, opcode, condition, size_spec); - if (arg1.type != ArgType::kNone) { - char arg1_str[kArgsBufferSize]{}; - const RefKindMask ref1_kinds = ref_kinds & (kRef1Mask | kRefPcRelFix2Bytes); - // It is useful to have immediate value printed as hex if destination - // argument is plain address register, status register or condition code - // register. USP is not the case because it's value may be moved only to - // or from An register. - const bool imm_as_hex_2 = imm_as_hex || - arg2.type == ArgType::kAn || - arg2.type == ArgType::kCCR || - arg2.type == ArgType::kSR; - arg1.SNPrint( - arg1_str, - kArgsBufferSize, - imm_as_hex_2, - ref1_kinds, - ref1_label, - self_addr, - ref1_addr); - if (arg2.type != ArgType::kNone) { - char arg2_str[kArgsBufferSize]{}; - const RefKindMask ref2_kinds = ref_kinds & (kRef2Mask | kRefPcRelFix2Bytes); - arg2.SNPrint( - arg2_str, - kArgsBufferSize, - false, - ref2_kinds, - ref2_label, - self_addr, - ref2_addr); - return fprintf(stream, "%s%s %s,%s", indent, mnemonic_str, arg1_str, arg2_str); - } else { - return fprintf(stream, "%s%s %s", indent, mnemonic_str, arg1_str); - } - } else { - return fprintf(stream, "%s%s", indent, mnemonic_str); - } -} - -void DisasmNode::AddReferencedBy(const uint32_t address, const ReferenceType type) -{ - ReferenceNode *node{}; - if (this->last_ref_by) { - node = this->last_ref_by; - } else { - node = new ReferenceNode{}; - assert(node); - this->ref_by = this->last_ref_by = node; - } - node->refs[node->refs_count] = ReferenceRecord{type, address}; - node->refs_count++; - if (node->refs_count >= kRefsCountPerBuffer) { - ReferenceNode *new_node = new ReferenceNode{}; - assert(new_node); - node->next = new_node; - this->last_ref_by = new_node; - } -} - -DisasmNode::~DisasmNode() -{ - ReferenceNode *ref{this->ref_by}; - while (ref) { - ReferenceNode *prev = ref; - ref = ref->next; - delete prev; - } -} diff --git a/disasm.h b/disasm.h deleted file mode 100644 index 65429dc..0000000 --- a/disasm.h +++ /dev/null @@ -1,401 +0,0 @@ -#pragma once - -/* SPDX-License-Identifier: Unlicense - */ - -#include "data_buffer.h" -#include "common.h" - -#include -#include -#include - -enum class OpSize: int { - kByte = 0, - kWord = 1, - kLong = 2, - kInvalid = 3, - kNone = kInvalid, - kShort, ///< Semantically is the same as kByte, pseudosize, used for Bcc -}; - -enum class OpCode: uint8_t { - kNone, - kRaw, ///< Emits ".short" - kORI, - kANDI, - kSUBI, - kADDI, - kEORI, - kCMPI, - kBTST, - kBCHG, - kBCLR, - kBSET, - kMOVEP, - kMOVEA, - kMOVE, - kNEGX, - kCLR, - kNEG, - kNOT, - kEXT, - kNBCD, - kSWAP, - kPEA, - kILLEGAL, - kTAS, - kTST, - kTRAP, - kLINK, - kUNLK, - kRESET, - kNOP, - kSTOP, - kRTE, - kRTS, - kTRAPV, - kRTR, - kJSR, - kJMP, - kMOVEM, - kLEA, - kCHK, - kADDQ, - kSUBQ, - kScc, - kDBcc, - kBcc, - kMOVEQ, - kDIVU, - kDIVS, - kSBCD, - kOR, - kSUB, - kSUBX, - kSUBA, - kEOR, - kCMPM, - kCMP, - kCMPA, - kMULU, - kMULS, - kABCD, - kEXG, - kAND, - kADD, - kADDX, - kADDA, - kASR, - kASL, - kLSR, - kLSL, - kROXR, - kROXL, - kROR, - kROL, -}; - -enum class Condition: uint8_t { - kT = 0, - kF = 1, - kHI = 2, - kLS = 3, - kCC = 4, - kCS = 5, - kNE = 6, - kEQ = 7, - kVC = 8, - kVS = 9, - kPL = 10, - kMI = 11, - kGE = 12, - kLT = 13, - kGT = 14, - kLE = 15, -}; - -enum class AddrMode: uint8_t { - kInvalid = 0, - kDn = 1, - kAn = 2, - kAnAddr = 3, - kAnAddrIncr = 4, - kAnAddrDecr = 5, - kD16AnAddr = 6, - kD8AnXiAddr = 7, - kWord = 8, - kLong = 9, - kD16PCAddr = 10, - kD8PCXiAddr = 11, - kImmediate = 12, -}; - -enum class ArgType: uint8_t { - kNone = 0, - kDn = 1, ///< Dn - kAn = 2, ///< An - kAnAddr = 3, ///< (An) - kAnAddrIncr = 4, ///< (An)+ - kAnAddrDecr = 5, ///< -(An) - kD16AnAddr = 6, ///< (d16,An) - kD8AnXiAddr = 7, ///< (d8,An,Xi) - kWord = 8, ///< (xxx).W - kLong = 9, ///< (xxx).L - kD16PCAddr = 10, ///< (d16,PC) - kD8PCXiAddr = 11, ///< (d8,PC,Xn) - kImmediate = 12, ///< #imm - kRegMask, - kRegMaskPredecrement, - kDisplacement, ///< For BRA, BSR, Bcc and DBcc - kCCR, - kSR, - kUSP, - kRaw, ///< Emits "0xXXXX" for ".short" -}; - -struct D8AnPCXiAddr { - uint8_t an; ///< ID number of An reg, for kD8AnXiAddr only - /*! ID number of Xi reg (3 lower bits), for kD8AnXiAddr and kD8PCXiAddr. - * Bit 3 (mask 0x8) means 0 == Dn, 1 == An. - * Bit 4 (mask 0x10) means 0 == Word, 1 == Long. - */ - uint8_t xi; - int8_t d8; ///< Displacement, for kD8AnXiAddr and kD8PCXiAddr -}; - -struct D16AnPCAddr { - uint8_t an; ///< ID number of An reg, for kD16AnAddr only - int16_t d16; ///< Displacement, for D16AnAddr and kD16PCAddr -}; - -static_assert(sizeof(D8AnPCXiAddr) <= sizeof(uint32_t), ""); -static_assert(sizeof(D16AnPCAddr) <= sizeof(uint32_t), ""); - -struct Arg { - union { - ArgType type{ArgType::kNone}; - AddrMode mode; - }; - union { - int32_t lword{}; ///< kLong, kWord, kDisplacement, kImmediate - uint16_t uword; ///< kRegMask, kRaw - uint8_t xn; ///< kDn, kAn, kAnAddr, kAnAddrIncr, kAnAddrDecr - D16AnPCAddr d16_an; ///< kD16AnAddr - D16AnPCAddr d16_pc; ///< kD16PCAddr - D8AnPCXiAddr d8_an_xi; ///< kD8AnXiAddr - D8AnPCXiAddr d8_pc_xi; ///< kD8PCXiAddr - }; - /// Size of the instruction extension: 0, 2 or 4 bytes - constexpr size_t Size(const OpSize s) const - { - switch (mode) { - case AddrMode::kInvalid: - case AddrMode::kDn: - case AddrMode::kAn: - case AddrMode::kAnAddr: - case AddrMode::kAnAddrIncr: - case AddrMode::kAnAddrDecr: - return 0; - case AddrMode::kD16AnAddr: - case AddrMode::kD8AnXiAddr: - case AddrMode::kWord: - return 2; - case AddrMode::kLong: - return 4; - case AddrMode::kD16PCAddr: - case AddrMode::kD8PCXiAddr: - return 2; - case AddrMode::kImmediate: - // Byte and Word immediate are of 2 bytes length - return s == OpSize::kLong ? 4 : 2; - } - return 0; - } - static constexpr auto AddrModeXn(const ArgType type, const uint8_t xn) { - Arg a{{type}, {0}}; - a.xn = xn; - return a; - } - static constexpr auto Dn(const uint8_t xn) { return AddrModeXn(ArgType::kDn, xn); } - static constexpr auto An(const uint8_t xn) { return AddrModeXn(ArgType::kAn, xn); } - static constexpr auto AnAddr(const uint8_t xn) { return AddrModeXn(ArgType::kAnAddr, xn); } - static constexpr auto AnAddrIncr(const uint8_t xn) - { - return AddrModeXn(ArgType::kAnAddrIncr, xn); - } - static constexpr auto AnAddrDecr(const uint8_t xn) - { - return AddrModeXn(ArgType::kAnAddrDecr, xn); - } - static constexpr auto D16AnAddr(const uint8_t xn, const int16_t d16) - { - Arg a{{ArgType::kD16AnAddr}, {0}}; - a.d16_an = D16AnPCAddr{xn, d16}; - return a; - } - static constexpr auto D16PCAddr(const int16_t d16) - { - Arg a{{ArgType::kD16PCAddr}, {0}}; - a.d16_pc = D16AnPCAddr{0, d16}; - return a; - } - static constexpr auto Word(const int16_t w) - { - Arg a{{ArgType::kWord}, {0}}; - a.lword = w; - return a; - } - static constexpr auto Long(const int32_t l) - { - Arg a{{ArgType::kLong}, {0}}; - a.lword = l; - return a; - } - static constexpr auto D8AnXiAddr( - const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) - { - Arg a{{ArgType::kD8AnXiAddr}, {0}}; - a.d8_an_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; - return a; - } - static constexpr auto D8PCXiAddr( - const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) - { - Arg a{{ArgType::kD8PCXiAddr}, {0}}; - a.d8_pc_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; - return a; - } - static constexpr auto Immediate(const int32_t value) { - Arg a{{ArgType::kImmediate}, {0}}; - a.lword = value; - return a; - } - static constexpr auto RegMask(const uint16_t regmask) { - Arg a{{ArgType::kRegMask}, {0}}; - a.uword = regmask; - return a; - } - static constexpr auto RegMaskPredecrement(const uint16_t regmask) { - Arg a{{ArgType::kRegMaskPredecrement}, {0}}; - a.uword = regmask; - return a; - } - static constexpr auto Displacement(const int32_t displacement) { - Arg a{{ArgType::kDisplacement}, {0}}; - a.lword = displacement; - return a; - } - static constexpr auto CCR() { return Arg{{ArgType::kCCR}, {0}}; } - static constexpr auto SR() { return Arg{{ArgType::kSR}, {0}}; } - static constexpr auto USP() { return Arg{{ArgType::kUSP}, {0}}; } - static constexpr auto Raw(const uint16_t instr) { - Arg a{{ArgType::kRaw}, {0}}; - a.uword = instr; - return a; - } - int SNPrint( - char *buf, - size_t bufsz, - bool imm_as_hex = false, - RefKindMask ref_kinds = 0, - const char *label = nullptr, - uint32_t self_addr = 0, - uint32_t ref_addr = 0) const; -}; - -enum class NodeType { - kTracedInstruction, - kRefInstruction, - kData, -}; - -constexpr size_t kRefsCountPerBuffer = 10; - -constexpr size_t kMnemonicBufferSize = 10; -constexpr size_t kArgsBufferSize = 80; - -enum class ReferenceType { - kUnknown = 0, - kCall, - kBranch, - kRead, - kWrite, -}; - -struct ReferenceRecord { - ReferenceType type{}; - uint32_t address{}; -}; - -struct ReferenceNode { - ReferenceNode *next{}; - ReferenceRecord refs[kRefsCountPerBuffer]; - uint32_t refs_count{}; -}; - -struct Op { - OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic) - /// Size specifier, the suffix `b`, `w` or `l` - OpSize size_spec{OpSize::kNone}; - Condition condition{Condition::kT}; ///< For Scc, Bcc and Dbcc - Arg arg1{}; ///< First argument, optional - Arg arg2{}; ///< Second argument, optional, cannot be set if arg1 is not set - static constexpr auto Typical( - const OpCode opcode = OpCode::kNone, - const OpSize opsize = OpSize::kNone, - const Arg arg1 = Arg{}, - const Arg arg2 = Arg{}) - { - return Op{opcode, opsize, Condition::kT, arg1, arg2}; - } - static constexpr auto Raw(const uint16_t instr) - { - return Op::Typical(OpCode::kRaw, OpSize::kNone, Arg::Raw(instr)); - } - int FPrint( - FILE *, - const char *indent, - bool imm_as_hex, - RefKindMask ref_kinds = 0, - const char *ref1_label = nullptr, - const char *ref2_label = nullptr, - uint32_t self_addr = 0, - uint32_t ref1_addr = 0, - uint32_t ref2_addr = 0) const; -}; - -struct DisasmNode { - const NodeType type{}; - /// Address of the instruction (PC value basically) - const uint32_t address{}; - /// Instruction size in bytes - size_t size{kInstructionSizeStepBytes}; - /// Indicates whether `ref_addr` should be interpreted and how - RefKindMask ref_kinds{}; - /// Address of first argument reference - uint32_t ref1_addr{}; - /// Address of second argument reference - uint32_t ref2_addr{}; - ReferenceNode *ref_by{}; - ReferenceNode *last_ref_by{}; - Op op{}; - - /*! Disassembles instruction with arguments - * returns size of whole instruction with arguments in bytes - */ - size_t Disasm(const DataView &code); - size_t DisasmAsRaw(const DataView &code); - void AddReferencedBy(uint32_t address, ReferenceType); - ~DisasmNode(); -}; - -static constexpr inline bool IsInstruction(NodeType t) -{ - return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; -} - -static constexpr inline bool IsBRA(Op op) -{ - return op.opcode == OpCode::kBcc && op.condition == Condition::kT; -} diff --git a/elf_format.h b/elf_format.h deleted file mode 100644 index b5a268a..0000000 --- a/elf_format.h +++ /dev/null @@ -1,328 +0,0 @@ -#pragma once - -/* SPDX-License-Identifier: Unlicense - */ - -#include -#include - -namespace ELF { - -constexpr size_t kIdentSize = 16; -constexpr size_t kHeaderSize = kIdentSize + 36; -constexpr size_t kMagicSize = 4; -constexpr size_t kProgramHeaderSize = 32; - -using Address = uint32_t; -using Offset = uint32_t; - -enum class FileClass : uint8_t { - kNone = 0, - k32 = 1, - k64 = 2, - kUnknown, -}; - -enum class DataEncoding : uint8_t { - kNone = 0, - k2LSB = 1, - kLE = k2LSB, - k2MSB = 2, - kBE = k2MSB, - kUnknown, -}; - -enum class Version : uint8_t { - kNone = 0, - kCurrent = 1, - kUnknown, -}; - -static constexpr inline auto ParseFileClass(const uint8_t file_class) -{ - switch (file_class) { - case static_cast(FileClass::kNone): return FileClass::kNone; - case static_cast(FileClass::k32): return FileClass::k32; - case static_cast(FileClass::k64): return FileClass::k64; - } - return FileClass::kUnknown; -} - -static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding) -{ - switch (data_encoding) { - case static_cast(DataEncoding::kNone): return DataEncoding::kNone; - case static_cast(DataEncoding::k2LSB): return DataEncoding::k2LSB; - case static_cast(DataEncoding::k2MSB): return DataEncoding::k2MSB; - } - return DataEncoding::kUnknown; -} - -static constexpr inline auto ParseVersion(const uint8_t version) -{ - switch (version) { - case static_cast(Version::kNone): return Version::kNone; - case static_cast(Version::kCurrent): return Version::kCurrent; - } - return Version::kUnknown; -} - -struct Ident32Raw { - uint8_t magic[4]; - uint8_t file_class; - uint8_t data_encoding; - uint8_t version; - uint8_t os_abi; - uint8_t abi_version; - uint8_t padding[7]; - static constexpr auto inline FromBytes(const uint8_t *data) - { - return Ident32Raw{ - { data[0], data[1], data[2], data[3] }, - data[4], - data[5], - data[6], - data[7], - data[8], - { data[9], data[10], data[11], data[12], data[13], data[14], data[15], }, - }; - } -}; - -struct Ident32 { - uint8_t magic[4]; - FileClass file_class; - DataEncoding data_encoding; - Version version; - uint8_t os_abi; - uint8_t abi_version; - static constexpr inline auto FromBytes(const uint8_t *data) - { - return Ident32{ - { data[0], data[1], data[2], data[3] }, - ParseFileClass(data[4]), - ParseDataEncoding(data[5]), - ParseVersion(data[6]), - data[7], - data[8], - }; - } - static constexpr inline auto FromIdent32Raw(const Ident32Raw raw) - { - return Ident32{ - { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] }, - ParseFileClass(raw.file_class), - ParseDataEncoding(raw.data_encoding), - ParseVersion(raw.version), - raw.os_abi, - raw.abi_version, - }; - } -}; - -enum class ObjectType : uint16_t { - kNone = 0, - kRel = 1, - kExec = 2, - kDyn = 3, - kCore = 4, - kUnknown = 0x7fff, - kLoProc = 0xff00, - kHiProc = 0xffff, -}; - -enum class Machine : uint16_t { - kNone = 0, - kM32 = 1, - kSPARC = 2, - k386 = 3, - k68k = 4, - k88k = 5, - k860 = 7, - kMIPS = 8, - kUnknown, -}; - -static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e) -{ - if (e == DataEncoding::k2MSB) { - return uint16_t(d[0]) << 8 | d[1]; - } - return uint16_t(d[1]) << 8 | d[0]; -} - -static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e) -{ - if (e == DataEncoding::k2MSB) { - return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3]; - } - return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0]; -} - -static constexpr inline auto ParseObjectType(const uint16_t type) -{ - switch (type) { - case static_cast(ObjectType::kNone): return ObjectType::kNone; - case static_cast(ObjectType::kRel): return ObjectType::kRel; - case static_cast(ObjectType::kExec): return ObjectType::kExec; - case static_cast(ObjectType::kDyn): return ObjectType::kDyn; - case static_cast(ObjectType::kCore): return ObjectType::kCore; - case static_cast(ObjectType::kLoProc): return ObjectType::kLoProc; - case static_cast(ObjectType::kHiProc): return ObjectType::kHiProc; - } - return ObjectType::kUnknown; -} - -static constexpr inline auto ParseMachine(const uint16_t machine) -{ - switch (machine) { - case static_cast(Machine::kNone): return Machine::kNone; - case static_cast(Machine::kM32): return Machine::kM32; - case static_cast(Machine::kSPARC): return Machine::kSPARC; - case static_cast(Machine::k386): return Machine::k386; - case static_cast(Machine::k68k): return Machine::k68k; - case static_cast(Machine::k88k): return Machine::k88k; - case static_cast(Machine::k860): return Machine::k860; - case static_cast(Machine::kMIPS): return Machine::kMIPS; - } - return Machine::kUnknown; -} - -struct Header32Raw { - Ident32Raw ident; - uint16_t type; - uint16_t machine; - uint32_t version; - Address entry; - Offset phoff; - Offset shoff; - uint32_t flags; - uint16_t ehsize; - uint16_t phentsize; - uint16_t phnum; - uint16_t shentsize; - uint16_t shnum; - uint16_t shstrndx; - static constexpr inline auto FromBytes(const uint8_t *data) - { - const auto ident = Ident32Raw::FromBytes(data); - const DataEncoding e = ParseDataEncoding(ident.data_encoding); - return Header32Raw{ - /* .ident */ ident, - /* .type */ ParseU16(data + kIdentSize + 0, e), - /* .machine */ ParseU16(data + kIdentSize + 2, e), - /* .version */ ParseU32(data + kIdentSize + 4, e), - /* .entry */ ParseU32(data + kIdentSize + 8, e), - /* .phoff */ ParseU32(data + kIdentSize + 12, e), - /* .shoff */ ParseU32(data + kIdentSize + 16, e), - /* .flags */ ParseU32(data + kIdentSize + 20, e), - /* .ehsize */ ParseU16(data + kIdentSize + 24, e), - /* .phentsize */ ParseU16(data + kIdentSize + 26, e), - /* .phnum */ ParseU16(data + kIdentSize + 28, e), - /* .shentsize */ ParseU16(data + kIdentSize + 30, e), - /* .shnum */ ParseU16(data + kIdentSize + 32, e), - /* .shstrndx */ ParseU16(data + kIdentSize + 34, e), - }; - } -}; - -struct Header32 { - Ident32 ident; - ObjectType type; - Machine machine; - Version version; - Address entry; - Offset phoff; - Offset shoff; - uint32_t flags; - uint16_t ehsize; - uint16_t phentsize; - uint16_t phnum; - uint16_t shentsize; - uint16_t shnum; - uint16_t shstrndx; - static constexpr inline auto FromBytes(const uint8_t *data) - { - const auto raw = Header32Raw::FromBytes(data); - return Header32{ - Ident32::FromIdent32Raw(raw.ident), - ParseObjectType(raw.type), - ParseMachine(raw.machine), - ParseVersion(raw.version), - raw.entry, - raw.phoff, - raw.shoff, - raw.flags, - raw.ehsize, - raw.phentsize, - raw.phnum, - raw.shentsize, - raw.shnum, - raw.shstrndx, - }; - } -}; - -enum class PHType : uint32_t { - kNull = 0, - kLoad = 1, - kDynamic = 2, - kInterp = 3, - kNote = 4, - kSHLIB = 5, - kProgramHeaderTable = 6, - kLoProc = 0x70000000, - kHiProc = 0x7fffffff, - kUnknown, -}; - -static constexpr inline auto ParsePHType(const uint32_t type) -{ - switch (type) { - case static_cast(PHType::kNull): return PHType::kNull; - case static_cast(PHType::kLoad): return PHType::kLoad; - case static_cast(PHType::kDynamic): return PHType::kDynamic; - case static_cast(PHType::kInterp): return PHType::kInterp; - case static_cast(PHType::kNote): return PHType::kNote; - case static_cast(PHType::kSHLIB): return PHType::kSHLIB; - case static_cast(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable; - case static_cast(PHType::kLoProc): return PHType::kLoProc; - case static_cast(PHType::kHiProc): return PHType::kHiProc; - } - return PHType::kUnknown; -} - -constexpr uint32_t kPHFlagX = 1 << 0; -constexpr uint32_t kPHFlagW = 1 << 1; -constexpr uint32_t kPHFlagR = 1 << 2; - -struct ProgramHeader32 { - uint32_t type; - Offset offset; - Address vaddr; - Address paddr; - uint32_t filesz; - uint32_t memsz; - uint32_t flags; - uint32_t align; - static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) - { - return ProgramHeader32{ - /* type */ ParseU32(data + 0, e), - /* offset */ ParseU32(data + 4, e), - /* vaddr */ ParseU32(data + 8, e), - /* paddr */ ParseU32(data + 12, e), - /* filesz */ ParseU32(data + 16, e), - /* memsz */ ParseU32(data + 20, e), - /* flags */ ParseU32(data + 24, e), - /* align */ ParseU32(data + 28, e), - }; - } -}; - -static constexpr inline bool MagicIsValid(const uint8_t *m) -{ - return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F'; -} - -}; diff --git a/elf_image.cpp b/elf_image.cpp deleted file mode 100644 index 6db72f3..0000000 --- a/elf_image.cpp +++ /dev/null @@ -1,172 +0,0 @@ -/* SPDX-License-Identifier: Unlicense - */ - -#include "elf_image.h" - -#include -#include - -ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( - const DataView &d, const DataEncoding e) -{ - if (d.buffer == nullptr || d.size == 0) { - return ELF::ProgramHeader32Table{}; - } - assert(d.size % kProgramHeaderSize == 0); - const size_t size = d.size / kProgramHeaderSize; - auto *headers = new ProgramHeader32[size]; - assert(headers != nullptr); - for (size_t i = 0; i < size; i++) { - headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e); - } - return ELF::ProgramHeader32Table{ headers, size, }; -} - -static char *ValidateELF(const DataView& d) -{ - char *error; - size_t size; - FILE *s = open_memstream(&error, &size); - assert(s); - using namespace ELF; - if (d.size < kHeaderSize) { - fprintf( - s, - "data size (%zu) is lower than minimum ELF header size (%zu): " - "ELF header could not fit", - d.size, - kHeaderSize); - fclose(s); - return error; - } - const auto header_raw = Header32Raw::FromBytes(d.buffer); - const auto header = Header32::FromBytes(d.buffer); - if (!MagicIsValid(header.ident.magic)) { - const uint8_t *m = header.ident.magic; - fprintf( - s, - "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]", - 0x7f, 'E', 'L', 'F', - m[0], m[1], m[2], m[3]); - fclose(s); - return error; - } - if (header.ident.version != Version::kCurrent) { - fprintf( - s, - "version (0x%02x) of ELF header.ident.version is not supported, " - "only \"Current\" version (0x%02x) is supported", - header_raw.ident.version, - static_cast(Version::kCurrent)); - fclose(s); - return error; - } - if (header.version != Version::kCurrent) { - fprintf( - s, - "version (0x%02x) of ELF header.version is not supported, " - "only \"Current\" version (0x%02x) is supported", - header_raw.version, - static_cast(Version::kCurrent)); - fclose(s); - return error; - } - if (header.type != ObjectType::kExec) { - fprintf( - s, - "object type (0x%02x) is not supported, " - "only Exec (0x%02x) object type is supported", - header_raw.type, - static_cast(ObjectType::kExec)); - fclose(s); - return error; - } - if (header.machine != Machine::k68k) { - fprintf( - s, - "machine (0x%02x) is not supported, " - "only Motorola 68k (0x%02x) machine is supported", - header_raw.machine, - static_cast(Machine::k68k)); - fclose(s); - return error; - } - if (header.phentsize != kProgramHeaderSize) { - fprintf( - s, - "phentsize is invalid: expected (%zu), got (%zu)", - kProgramHeaderSize, - size_t(header.phentsize)); - fclose(s); - return error; - } - if (d.size < header.phoff + header.phentsize * header.phnum) { - fprintf( - s, - "data size (%zu) is lower than program header table end offset (%zu): " - "program header table could not fit", - d.size, - size_t(header.phoff + header.phentsize * header.phnum)); - fclose(s); - return error; - } - bool has_segment_with_entry = false; - for (size_t i = 0; i < header.phnum; i++) { - const auto ph = ProgramHeader32::FromBytes( - d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding); - if (d.size < ph.offset + ph.filesz) { - fprintf( - s, - "data size (%zu) is lower than pht[%zu] segment end offset (%zu): " - "segment could not fit", - d.size, - i, - size_t(ph.offset + ph.filesz)); - fclose(s); - return error; - } - const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR); - if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) { - fprintf( - s, - "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: " - "non-zero base address is not supported", - i, - ph.vaddr); - fclose(s); - return error; - } - const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz; - if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) { - has_segment_with_entry = true; - } - } - if (!has_segment_with_entry) { - fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry); - fclose(s); - return error; - } - fclose(s); - free(error); - return nullptr; -} - -ELF::Image::Image(DataBuffer&& data) - : _data(static_cast(data)) - , _error(ValidateELF(_data.View())) - , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer)) - , _pht(_error - ? ELF::ProgramHeader32Table{} - : ELF::ProgramHeader32Table::FromBytes( - _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding)) -{} - -ELF::Image::~Image() -{ - if (_error) { - free(_error); - } - if (_pht.headers) { - delete [] _pht.headers; - } -} diff --git a/elf_image.h b/elf_image.h deleted file mode 100644 index b7c7123..0000000 --- a/elf_image.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -/* SPDX-License-Identifier: Unlicense - */ - -#include "elf_format.h" -#include "data_buffer.h" - -#include - -namespace ELF { - -struct ProgramHeader32Table { - const ProgramHeader32 *headers{}; - size_t size{}; - static ProgramHeader32Table FromBytes(const DataView &, DataEncoding); -}; - -struct Segment { - Segment *next{}; - const DataView view{}; -}; - -class Image { - const DataBuffer _data; - char *const _error; - const Header32 _h; - const ProgramHeader32Table _pht; -public: - explicit Image(DataBuffer&&); - ~Image(); - constexpr bool IsValid() const { return _error == nullptr; } - constexpr const DataBuffer &Data() const { return _data; }; - constexpr const DataView ProgramView() const - { - if (!IsValid()) { - return DataView{}; - } - for (size_t i = 0; i < _pht.size; i++) { - const auto ph = _pht.headers[i]; - const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == - (kPHFlagX | kPHFlagR); - const bool is_load = ParsePHType(ph.type) == PHType::kLoad; - const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz; - if (is_load && is_code && ph.vaddr == 0 && contains_entry) - { - return _data.View(ph.offset, ph.filesz); - } - } - return DataView{}; - }; - constexpr const char *Error() const { return _error; } -}; - -} diff --git a/main.cpp b/main.cpp deleted file mode 100644 index a6f73b3..0000000 --- a/main.cpp +++ /dev/null @@ -1,836 +0,0 @@ -/* SPDX-License-Identifier: Unlicense - */ - -#include "elf_image.h" -#include "data_buffer.h" -#include "disasm.h" -#include "common.h" - -#define OPTPARSE_IMPLEMENTATION -#define OPTPARSE_API static -#include "optparse/optparse.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -enum class DisasmMapType { - kTraced, - kRaw, -}; - -class DisasmMap { - const DisasmMapType _type; - DisasmNode *_map[kDisasmMapSizeElements]{}; - constexpr DisasmNode *findNodeByAddress(uint32_t address) const; - DisasmNode &insertNode(uint32_t address, NodeType); - DisasmNode &insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type); - constexpr bool canBeAllocated(const DisasmNode& node) const; -public: - constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const - { - return findNodeByAddress(address); - }; - void InsertNode(uint32_t address, NodeType type) - { - assert(_type == DisasmMapType::kTraced); - insertNode(address, type); - } - void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); - DisasmMap(DisasmMapType type): _type(type) {} - ~DisasmMap(); -}; - -constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const -{ - if (address < kRomSizeBytes) - return _map[address / kInstructionSizeStepBytes]; - return nullptr; -} - -static constexpr uint32_t AlignInstructionAddress(const uint32_t address) -{ - return address & ~1UL; -} - -DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type) -{ - auto *node = findNodeByAddress(address); - if (node) { - // Instruction nodes take precedence over data nodes. If a node that - // was previously accessed only as data now turns out to be an - // instruction, then it must become an instruction node. - if (IsInstruction(type) && !IsInstruction(node->type)) { - *const_cast(&node->type) = type; - // Make sure it is OpCode::kNone so it will be properly disassembled - node->op = Op{}; - } - return *node; - } - node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); - assert(node); - _map[address / kInstructionSizeStepBytes] = node; - return *node; -} - -DisasmNode &DisasmMap::insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type) -{ - auto &ref_node = insertNode(ref_addr, type); - ref_node.AddReferencedBy(by_addr, ref_type); - return ref_node; -} - -constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const -{ - const auto size = node.size / kInstructionSizeStepBytes; - const auto *const node_real = findNodeByAddress(node.address); - for (size_t i = 1; i < size; i++) { - const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; - if (ptr != nullptr && ptr != node_real) { - return false; - } - } - return true; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) -{ - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef1ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef1WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) -{ - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef2ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef2WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr bool IsNextLikelyAnInstruction(const Op &op) -{ - return (op.opcode != OpCode::kNone && - op.opcode != OpCode::kRaw && - !IsBRA(op) && - op.opcode != OpCode::kJMP && - op.opcode != OpCode::kRTS && - op.opcode != OpCode::kRTE && - op.opcode != OpCode::kSTOP); -} - -void DisasmMap::Disasm( - const DataView &code, const Settings &s, size_t at, bool nested) -{ - // Some of logic of this function is covered by integration tests in - // `test_walk_and_follow_jumps.bash`. - bool inside_code_span = nested; - while (at < Min(kRomSizeBytes, code.size)) { - DisasmNode *node; - if (_type == DisasmMapType::kTraced) { - node = _map[at / kInstructionSizeStepBytes]; - if (!node) { - if (inside_code_span) { - node = &insertNode(at, NodeType::kTracedInstruction); - } else { - at += kInstructionSizeStepBytes; - continue; - } - } - } else { - node = &insertNode(at, NodeType::kTracedInstruction); - } - if (node->op.opcode == OpCode::kNone || inside_code_span) { - const auto size = node->Disasm(code); - assert(size >= kInstructionSizeStepBytes); - if (canBeAllocated(*node)) { - // Spread across the size - for (size_t o = kInstructionSizeStepBytes; o < size; o++) { - _map[(node->address + o) / kInstructionSizeStepBytes] = node; - } - } else { - node->DisasmAsRaw(code); - } - } - inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); - if (nested && !inside_code_span) { - return; - } - at += node->size; - // NOTE: There is not much information about a reference passed further, - // so just don't add a reference of immediate if s.imm_labels is false - // enabled. - const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) - ? s.imm_labels - : (node->ref_kinds & kRef1Mask); - const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; - if (has_code_ref1) { - const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref1_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - const bool has_ref2 = (node->ref_kinds & kRef2Mask); - const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); - if (has_code_ref2) { - const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref2_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - } -} - -DisasmMap::~DisasmMap() -{ - for (size_t i = 0; i < kDisasmMapSizeElements; i++) { - auto *const node = _map[i]; - if (!node) { - continue; - } - const auto size = node->size / kInstructionSizeStepBytes; - for (size_t o = 0; o < size; o++) { - assert(_map[i + o] == node); - _map[i + o] = nullptr; - } - delete node; - i += size - 1; - } -} - -static size_t RenderRawDataComment( - char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) -{ - size_t overall_sz{}; - for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes) - { - overall_sz += Min( - out_sz - overall_sz, - snprintf( - out + overall_sz, - out_sz - overall_sz, - " %04x", - GetU16BE(code.buffer + address + i))); - } - overall_sz += Min( - out_sz - overall_sz, - snprintf(out + overall_sz, out_sz - overall_sz, " @%08x", address)); - return overall_sz; -} - -static constexpr const char *ReferenceTypeToString(ReferenceType type) -{ - switch (type) { - case ReferenceType::kUnknown: return "UNKNOWN"; - case ReferenceType::kCall: return "CALL"; - case ReferenceType::kBranch: return "BRANCH"; - case ReferenceType::kRead: return "READ"; - case ReferenceType::kWrite: return "WRITE"; - } - return "UNKN"; -} - -static constexpr bool ShouldPrintAsRaw(const Op& op) -{ - if (op.arg1.type == ArgType::kImmediate) { - if (op.opcode == OpCode::kADD || op.opcode == OpCode::kSUB || - op.opcode == OpCode::kAND || op.opcode == OpCode::kOR || - op.opcode == OpCode::kEOR || op.opcode == OpCode::kCMP) - { - return true; - } - } - return false; -} - -static constexpr bool HasCallReference(const DisasmNode &node) -{ - for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { - for (size_t i = 0; i < ref->refs_count; i++) { - if (ref->refs[i].type == ReferenceType::kCall) { - return true; - } - } - } - return false; -} - -static constexpr size_t GetNodeSizeByAddress(const DisasmMap &disasm_map, const uint32_t address) -{ - const auto *node = disasm_map.FindNodeByAddress(address); - if (node == nullptr) { - return kInstructionSizeStepBytes; - } - return node->size; -} - -static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmNode &node) -{ - for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { - for (size_t i = 0; i < ref->refs_count; i++) { - const ReferenceRecord &ref_rec = ref->refs[i]; - if (ref_rec.type == ReferenceType::kCall) { - // Locals are definitely not made for calls - return false; - } - const bool forward = ref_rec.address < node.address; - const size_t min_addr = forward ? ref_rec.address : node.address; - const size_t start = min_addr + GetNodeSizeByAddress(disasm_map, min_addr); - const size_t max_addr = forward ? node.address : ref_rec.address; - const size_t end = max_addr + (forward ? 0 : GetNodeSizeByAddress(disasm_map, min_addr)); - for (size_t addr = start; addr < end;) { - const auto *intermediate_node = disasm_map.FindNodeByAddress(addr); - if (intermediate_node) { - if (intermediate_node->ref_by) { - // Another labeled node detected on the jump path, hence - // current node's location cannot be considered local - return false; - } - addr += intermediate_node->size; - } else { - addr += kInstructionSizeStepBytes; - } - } - } - } - return true; -} - -static constexpr const char *StringWihoutFristNChars(const char *str, const size_t n) -{ - for (size_t i = 0, tab = 0; i < n && *str; i++, str++) { - if (*str == '\t') { - tab++; - if (tab == 7) { - tab = 0; - str++; - } - } else { - str++; - } - } - return str; -} - -static void RenderNodeDisassembly( - FILE *const output, - const DisasmMap &disasm_map, - const DataView &code, - const Settings &s, - const DisasmNode &node) -{ - if (node.ref_by) { - const bool is_local = IsLocalLocation(disasm_map, node); - if (s.labels && !(s.short_ref_local_labels && is_local)) { - const bool export_this_function = s.export_functions && HasCallReference(node); - const bool export_this_label = s.export_all_labels || - (s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) || - export_this_function; - if (export_this_label) { - fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address); - if (export_this_function) { - fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address); - } - } - } - if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) { - fprintf(output, "| XREFS:\n"); - for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { - if (ref->refs_count == 0) { - continue; - } - fprintf(output, "|"); - for (size_t i = 0; i < ref->refs_count; i++) { - const ReferenceRecord r = ref->refs[i]; - fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address); - } - fprintf(output, "\n"); - } - } - if (s.labels) { - if (s.short_ref_local_labels && is_local) { - fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1)); - } else { - fprintf(output, "L%08x:\n", node.address); - } - } - } - assert(node.op.opcode != OpCode::kNone); - if (ShouldPrintAsRaw(node.op)) { - auto raw = Op::Raw(GetU16BE(code.buffer + node.address)); - raw.FPrint(output, s.indent, s.imm_hex); - uint32_t i = kInstructionSizeStepBytes; - for (; i < node.size; i += kInstructionSizeStepBytes) { - char arg_str[kArgsBufferSize]{}; - const auto arg = Arg::Raw(GetU16BE(code.buffer + node.address + i)); - arg.SNPrint(arg_str, kArgsBufferSize); - fprintf(output, ", %s", arg_str); - } - } else { - const bool with_ref = node.ref_kinds && s.labels && (s.abs_labels || s.rel_labels); - const auto *ref1 = (node.ref_kinds & kRef1Mask) - ? disasm_map.FindNodeByAddress(node.ref1_addr) : nullptr; - const auto *ref2 = (node.ref_kinds & kRef2Mask) - ? disasm_map.FindNodeByAddress(node.ref2_addr) : nullptr; - const uint32_t ref1_addr = (with_ref && ref1) ? ref1->address : 0; - const uint32_t ref2_addr = (with_ref && ref2) ? ref2->address : 0; - if (with_ref && (ref1 || ref2)) { - const RefKindMask ref_kinds = - (s.abs_labels - ? ((ref1 ? (node.ref_kinds & kRef1AbsMask) : 0) | - (ref2 ? (node.ref_kinds & kRef2AbsMask) : 0)) - : 0) | - (s.rel_labels - ? ((ref1 ? (node.ref_kinds & kRef1RelMask) : 0) | - (ref2 ? (node.ref_kinds & kRef2RelMask) : 0)) - : 0) | - ((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) | - (node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes)); - const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1); - char ref1_label[32]{}; - if (ref1) { - if (s.short_ref_local_labels && ref1_is_local) { - const char dir = ref1_addr <= node.address ? 'b' : 'f'; - snprintf(ref1_label, (sizeof ref1_label), "1%c", dir); - } else { - snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr); - } - } - const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2); - char ref2_label[32]{}; - if (ref2) { - if (s.short_ref_local_labels && ref2_is_local) { - const char dir = ref2_addr <= node.address ? 'b' : 'f'; - snprintf(ref2_label, (sizeof ref2_label), "1%c", dir); - } else { - snprintf(ref2_label, (sizeof ref2_label), "L%08x", ref2_addr); - } - } - node.op.FPrint( - output, - s.indent, - s.imm_hex, - ref_kinds, - ref1_label, - ref2_label, - node.address, - ref1_addr, - ref2_addr); - const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true); - if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok) - { - fprintf(output, " | L%08x", ref1_addr); - } - if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) { - fprintf(output, " | L%08x", ref2_addr); - } - } else { - node.op.FPrint(output, s.indent, s.imm_hex); - } - } - if (s.raw_data_comment) { - char raw_data_comment[100]{}; - RenderRawDataComment( - raw_data_comment, - (sizeof raw_data_comment) - 1, - node.address, - node.size, code); - fprintf(output, " |%s", raw_data_comment); - } - fprintf(output, "\n"); -} - -static void RenderDisassembly( - FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) -{ - for (size_t i = 0; i < code.size;) { - const DisasmNode *node = disasm_map.FindNodeByAddress(i); - if (node) { - RenderNodeDisassembly(output, disasm_map, code, s, *node); - i += node->size; - } else { - auto raw = Op::Raw(GetU16BE(code.buffer + i)); - raw.FPrint(output, s.indent, s.imm_hex); - fprintf(output, "\n"); - i += kInstructionSizeStepBytes; - } - } -} - -static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) -{ - // FIXME make a full blown parser with various radixes support and different - // trace types support - bool parse = true; - for (size_t i = 0; i < trace_data.size; i++) { - if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') { - parse = true; - } else if (parse) { - errno = 0; - const char *startptr = reinterpret_cast(trace_data.buffer + i); - char *endptr = nullptr; - const long address = strtol(startptr, &endptr, 10); - if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) { - // Parsing error, just skip - } else if (startptr == endptr) { - // Parsing error, just skip - } else if (address % 2) { - fprintf(stderr, "Error: Uneven PC values are not supported (got PC=0x%08lx), exiting\n", address); - exit(1); - } else if (static_cast(address) > kRomSizeBytes) { - fprintf(stderr, "Error: PC values > 4MiB are not supported (got PC=0x%08lx), exiting\n", address); - exit(1); - } else { - // Valid value - disasm_map.InsertNode(address, NodeType::kTracedInstruction); - } - if (startptr != endptr) { - i += endptr - startptr - 1; - } - parse = false; - } - } -} - -static size_t ReadFromStream(DataBuffer &db, FILE *stream) -{ - assert(db.buffer && db.buffer_size >= db.kInitialSize); - while (1) { - const size_t read_size = db.buffer_size - db.occupied_size; - const size_t fread_ret = fread( - db.buffer + db.occupied_size, sizeof(*db.buffer), read_size, stream); - db.occupied_size += fread_ret; - if (fread_ret >= db.buffer_size) { - assert(fread_ret == db.buffer_size); - db.Expand(db.buffer_size * 2); - } else { - const int err = errno; - if (feof(stream)) { - break; - } else if (ferror(stream)) { - fprintf(stderr, "ReadFromStream: fread(%zu): Error (%d): \"%s\"\n", read_size, err, strerror(err)); - return EXIT_FAILURE; - } else if (db.buffer_size == db.occupied_size) { - db.Expand(db.buffer_size * 2); - } else { - assert(false); - } - } - } - return db.occupied_size; -} - -static DisasmMap *NewDisasmMap(FILE *trace_stream) -{ - if (trace_stream == nullptr) { - DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw}; - assert(disasm_map); - return disasm_map; - } - // Read trace file into buffer - DataBuffer trace_data{}; - const size_t trace_size = ReadFromStream(trace_data, trace_stream); - if (trace_size == 0) { - fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n"); - return nullptr; - } - // Parse trace file into map - DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced}; - assert(disasm_map); - ParseTraceData(*disasm_map, trace_data.View()); - return disasm_map; -} - -static int M68kDisasm( - FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s) -{ - // Read input file into buffer - DataBuffer input{}; - const size_t input_size = ReadFromStream(input, input_stream); - if (input_size == 0) { - fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n"); - return EXIT_FAILURE; - } - const ELF::Image elf(static_cast(input)); - if (s.bfd == BFDTarget::kELF && !elf.IsValid()) { - fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error()); - return EXIT_FAILURE; - } - const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid()); - const DataView code(from_elf ? elf.ProgramView() : elf.Data().View()); - assert(code.buffer != nullptr); - assert(code.size != 0); - // It is not worth it to check this somewhere while disassembling or - // emitting. Odd size is just not supported. - if (code.size % 2) { - fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n"); - return EXIT_FAILURE; - } - auto *disasm_map = NewDisasmMap(trace_stream); - if (disasm_map == nullptr) { - return EXIT_FAILURE; - } - // Disasm into output map - disasm_map->Disasm(code, s); - // Print output into output_stream - RenderDisassembly(output_stream, *disasm_map, code, s); - delete disasm_map; - return EXIT_SUCCESS; -} - -static bool FeatureStringHasPrefixNo(const char *feature) -{ - assert(feature); - // There is also implicit, embedded and free check for null terminator - if (feature[0] == 'n' && feature[1] == 'o' && feature[2] == '-') { - return true; - } - return false; -} - -static bool ApplyFeature(Settings& s, const char *feature_arg) -{ - struct { - bool Settings::* setting; - const char* feature_name; - } const features[]{ - { &Settings::raw_data_comment, "rdc" }, - { &Settings::labels, "labels" }, - { &Settings::rel_labels, "rel-labels" }, - { &Settings::abs_labels, "abs-labels" }, - { &Settings::imm_labels, "imm-labels" }, - { &Settings::short_ref_local_labels, "short-ref-local-labels" }, - { &Settings::export_labels, "export-labels" }, - { &Settings::export_all_labels, "export-all-labels" }, - { &Settings::export_functions, "export-functions" }, - { &Settings::xrefs_from, "xrefs-from" }, - { &Settings::xrefs_to, "xrefs-to" }, - { &Settings::imm_hex, "imm-hex" }, - { &Settings::follow_jumps, "follow-jumps" }, - { &Settings::walk, "walk" }, - }; - constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1; - const bool disable = FeatureStringHasPrefixNo(feature_arg); - const char *const feature = feature_arg + (disable ? sizeof_no_prefix : 0); - for (size_t i = 0; i < (sizeof features) / (sizeof *features); i++) { - if (0 == strcmp(feature, features[i].feature_name)) { - s.*(features[i].setting) = !disable; - return true; - } - } - return false; -} - -static void PrintUsage(FILE *s, const char *argv0) -{ - // Please, keep all lines in 80 columns range when printed. - fprintf(s, - "Usage: %s [options] \n" - "Options:\n" - " -h, --help, Show this message.\n" - " -o, --output, Where to write disassembly to (stdout if not set)\n" - " -t, --pc-trace, File containing PC trace\n" - " --indent, Specify instruction indentation, e.g. \"\t\",\n" - " Single tab is used by default.\n" - " -f, --feature=[no-]\n" - " Enable or disable (with \"no-\" prefix) a feature.\n" - " Available features described below under the\n" - " \"Feature flags\" section.\n" - " -b, --bfd-target=bfdname\n" - " Specify target object format as `bfdname`. Will attempt\n" - " to detect automatically if not set. Only `auto,\n" - " `binary` and `elf` are currently supported.\n" - " Binary or elf file with the machine code to disassemble\n" - "Feature flags:\n" - " rdc Print raw data comment.\n" - " labels Print labels above all places that have jumps from\n" - " somewhere.\n" - " rel-labels Use label instead of number on relative branch or call.\n" - " abs-labels Use label instead of number on absolute branch or call.\n" - " imm-labels Use label instead of number when immediate value moved\n" - " to address register.\n" - " short-ref-local-labels\n" - " Use local labels (numbers) for short jumps or loops.\n" - " Jump is considered short when it does not cross other\n" - " labels and has no calls.\n" - " export-labels Add `.globl` preamble to labels referenced two or more\n" - " times.\n" - " export-all-labels Add `.globl` preamble to all labels.\n" - " export-functions Add `.globl` and `.type @funciton` preamble to a label\n" - " referenced as a call.\n" - " xrefs-from Print xrefs comments above all places that have xrefs.\n" - " xrefs-to Print xrefs comments after all branch instructions.\n" - " imm-hex Print all immediate values as hexadecimal numbers.\n" - " follow-jumps Follow jumps to statically known locations.\n" - " walk Try best to detect further instructions following known\n" - " traced locations without overcommitting.\n" - , argv0); -} - -int main(int, char* argv[]) -{ - struct optparse_long longopts[] = { - {"help", 'h', OPTPARSE_NONE}, - {"output", 'o', OPTPARSE_REQUIRED}, - {"pc-trace", 't', OPTPARSE_REQUIRED}, - {"feature", 'f', OPTPARSE_REQUIRED}, - {"bfd-target", 'b', OPTPARSE_REQUIRED}, - {"indent", 80, OPTPARSE_REQUIRED}, - {}, - }; - const char *trace_file_name = nullptr; - const char *output_file_name = nullptr; - const char *input_file_name = nullptr; - Settings s{}; - struct optparse options; - optparse_init(&options, argv); - // Parse opts - int option; - while ((option = optparse_long(&options, longopts, NULL)) != -1) { - switch (option) { - case 'h': - PrintUsage(stdout, argv[0]); - return EXIT_SUCCESS; - break; - case 'o': - output_file_name = options.optarg; - break; - case 't': - trace_file_name = options.optarg; - break; - case 'f': - if (!ApplyFeature(s, options.optarg)) { - fprintf(stderr, "main: Error: Unknown feature \"%s\", exiting\n", options.optarg); - return EXIT_FAILURE; - } - break; - case 'b': - { - const auto *bfd_str = options.optarg; - if (0 == strcmp(bfd_str, "auto")) { - s.bfd = BFDTarget::kAuto; - } else if (0 == strcmp(bfd_str, "binary")) { - s.bfd = BFDTarget::kBinary; - } else if (0 == strcmp(bfd_str, "elf")) { - s.bfd = BFDTarget::kELF; - } else { - fprintf( - stderr, - "Unknown BFD target specified: \"%s\". " - "Refer to usage below to find correct BFD values.\n", - bfd_str); - PrintUsage(stderr, argv[0]); - return EXIT_FAILURE; - } - } - break; - case 80: - s.indent = options.optarg; - break; - case '?': - fprintf(stderr, "main: optparse_long: Error: \"%s\"\n", options.errmsg); - return EXIT_FAILURE; - } - } - // Parse input file name - char *arg; - while ((arg = optparse_arg(&options))) { - if (input_file_name == nullptr) { - input_file_name = arg; - } else { - fprintf(stderr, "error: too many free arguments provided\n"); - return EXIT_FAILURE; - } - } - // Open the files - FILE *input_stream = nullptr; - FILE *output_stream = stdout; - FILE *trace_stream = nullptr; - if (input_file_name) { - if (0 == strcmp(input_file_name, "-")) { - input_stream = stdin; - } else { - input_stream = fopen(input_file_name, "r"); - } - if (input_stream == nullptr) { - const int err = errno; - fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err)); - return EXIT_FAILURE; - } - } else { - fprintf(stderr, "main: Error: no input file name specified, see usage below.\n"); - PrintUsage(stderr, argv[0]); - return EXIT_FAILURE; - } - if (output_file_name) { - output_stream = fopen(output_file_name, "w"); - if (output_stream == nullptr) { - const int err = errno; - fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err)); - fclose(input_stream); - return EXIT_FAILURE; - } - } - if (trace_file_name) { - if (0 == strcmp(trace_file_name, "-")) { - if (input_stream == stdin) { - fprintf(stderr, "error: trace stream and input stream cannot be both stdin\n"); - return EXIT_FAILURE; - } - trace_stream = stdin; - } else { - trace_stream = fopen(trace_file_name, "r"); - } - if (trace_stream == nullptr) { - const int err = errno; - fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err)); - fclose(input_stream); - fclose(output_stream); - return EXIT_FAILURE; - } - } - // Run the program - const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s); - if (trace_stream != nullptr) { - fclose(trace_stream); - } - fclose(output_stream); - fclose(input_stream); - return ret; -} diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..76fc956 --- /dev/null +++ b/src/common.h @@ -0,0 +1,85 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include +#include + +enum class BFDTarget { + kAuto, + kBinary, + kELF, +}; + +struct Settings { + bool raw_data_comment{}; + bool labels{}; + bool rel_labels{}; + bool abs_labels{}; + bool imm_labels{}; + bool short_ref_local_labels{}; + bool export_labels{}; + bool export_all_labels{}; + bool export_functions{}; + bool xrefs_to{}; + bool xrefs_from{}; + bool imm_hex{}; + bool follow_jumps{}; + bool walk{}; + BFDTarget bfd{}; + const char *indent{"\t"}; +}; + +using RefKindMask = unsigned; + +constexpr RefKindMask kRef1RelMask = (1 << 0); // For first argument +constexpr RefKindMask kRef1AbsMask = (1 << 1); // For first argument +constexpr RefKindMask kRef2RelMask = (1 << 2); // For second argument +constexpr RefKindMask kRef2AbsMask = (1 << 3); // For second argument +constexpr RefKindMask kRef1ReadMask = (1 << 4); // For first argument +constexpr RefKindMask kRef1WriteMask = (1 << 5); // For first argument +constexpr RefKindMask kRef2ReadMask = (1 << 6); // For second argument +constexpr RefKindMask kRef2WriteMask = (1 << 7); // For second argument +/// Indicates whether instruction is a call or just a branch, for any argument. +/// Calls are BSR and JSR, branches are DBcc, Bcc and JMP. +constexpr RefKindMask kRefCallMask = (1 << 8); +/// Hack flag for MOVEM with PC relative value when -frel-labels is set +constexpr RefKindMask kRefPcRelFix2Bytes = (1 << 9); +/// Register 1 may have immediate moving to address register which may be a +/// labeled location +constexpr RefKindMask kRef1ImmMask = (1 << 10); +/// Everything for first argument +constexpr RefKindMask kRef1Mask = kRef1RelMask | kRef1AbsMask | kRef1ReadMask | kRef1WriteMask | kRef1ImmMask; +/// Everything for Second argument +constexpr RefKindMask kRef2Mask = kRef2RelMask | kRef2AbsMask | kRef2ReadMask | kRef2WriteMask; +constexpr RefKindMask kRefRelMask = kRef1RelMask | kRef2RelMask; +constexpr RefKindMask kRefAbsMask = kRef1AbsMask | kRef2AbsMask; +constexpr RefKindMask kRef1DataMask = kRef1ReadMask | kRef1WriteMask; // For first argument +constexpr RefKindMask kRef2DataMask = kRef2ReadMask | kRef2WriteMask; // For second argument +constexpr RefKindMask kRefReadMask = kRef1ReadMask | kRef2ReadMask; // For any argument +constexpr RefKindMask kRefWriteMask = kRef1WriteMask | kRef2WriteMask; // For any argument +constexpr RefKindMask kRefDataMask = kRefReadMask | kRefWriteMask; +constexpr size_t kInstructionSizeStepBytes = 2; +constexpr size_t kRomSizeBytes = 4 * 1024 * 1024; +constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBytes; + +static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; } + +static inline constexpr uint16_t GetU16BE(const uint8_t *buffer) +{ + return (static_cast(buffer[0]) << 8) | static_cast(buffer[1]); +} + +static inline constexpr int16_t GetI16BE(const uint8_t *buffer) +{ + return (static_cast(buffer[0]) << 8) | static_cast(buffer[1]); +} + +static inline constexpr int32_t GetI32BE(const uint8_t *buffer) +{ + return (static_cast(buffer[0]) << 24) | + (static_cast(buffer[1]) << 16) | + (static_cast(buffer[2]) << 8) | + static_cast(buffer[3]); +} diff --git a/src/data_buffer.cpp b/src/data_buffer.cpp new file mode 100644 index 0000000..33cb0b3 --- /dev/null +++ b/src/data_buffer.cpp @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "data_buffer.h" + +#include +#include + +void DataBuffer::Expand(size_t new_size) +{ + assert(buffer); + if (new_size <= buffer_size) { + return; + } + uint8_t *new_buffer{new uint8_t[new_size]}; + assert(new_buffer); + memcpy(new_buffer, buffer, occupied_size); + delete [] buffer; + buffer = new_buffer; + buffer_size = new_size; +} + +DataBuffer::~DataBuffer() +{ + delete [] buffer; + buffer = nullptr; + buffer_size = 0; + occupied_size = 0; +} diff --git a/src/data_buffer.h b/src/data_buffer.h new file mode 100644 index 0000000..bc264d2 --- /dev/null +++ b/src/data_buffer.h @@ -0,0 +1,41 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "common.h" + +#include +#include + +struct DataView { + const uint8_t *const buffer{}; + const size_t size{}; +}; + +struct DataBuffer { + DataBuffer(){}; + DataBuffer(const DataBuffer&) = delete; + constexpr DataBuffer(DataBuffer&& other) + : buffer(other.buffer) + , buffer_size(other.buffer_size) + , occupied_size(other.occupied_size) + { + other.occupied_size = 0; + other.buffer_size = 0; + other.buffer = nullptr; + }; + static constexpr size_t kInitialSize = 4 * 1024; + uint8_t *buffer{new uint8_t[kInitialSize]}; + size_t buffer_size{kInitialSize}; + size_t occupied_size{}; + void Expand(size_t new_size); + constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const + { + if (offset >= occupied_size) { + return DataView{}; + } + return DataView{buffer + offset, Min(occupied_size - offset, size)}; + }; + ~DataBuffer(); +}; diff --git a/src/disasm.cpp b/src/disasm.cpp new file mode 100644 index 0000000..2b2ea81 --- /dev/null +++ b/src/disasm.cpp @@ -0,0 +1,2010 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "disasm.h" +#include "data_buffer.h" +#include "common.h" + +#include +#include +#include +#include + +enum class MoveDirection: bool { + kRegisterToMemory = 0, + kMemoryToRegister = 1, +}; + +enum class ShiftDirection: bool { + kRight = 0, + kLeft = 1, +}; + +enum class ShiftKind: int { + kArithmeticShift = 0, + kLogicalShift = 1, + kRotateX = 2, + kRotate = 3, +}; + +constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s) +{ + if (s == OpSize::kInvalid) { + return Arg{}; + } else if (s == OpSize::kLong) { + if (address + kInstructionSizeStepBytes < code.size) { + const int32_t value = GetI32BE(code.buffer + address); + return Arg::Immediate(value); + } + } else if (address < code.size) { + const int16_t value = GetI16BE(code.buffer + address); + if (s == OpSize::kByte) { + // Technically it is impossible to have value lower that -128 in 8 + // bits signed integer, but the second byte being 0xff is actually + // a valid thing and it is how values from -255 to -129 are + // represented. + if (value > 255 || value < -255) { + // Invalid immediate value for instruction with .b suffix + return Arg{}; + } + } + return Arg::Immediate(value); + } + return Arg{}; +} + +constexpr Arg FetchArg( + const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s) +{ + switch (m) { + case 0: // Dn + return Arg::Dn(xn); + case 1: // An + return Arg::An(xn); + case 2: // (An) + return Arg::AnAddr(xn); + case 3: // (An)+ + return Arg::AnAddrIncr(xn); + case 4: // -(An) + return Arg::AnAddrDecr(xn); + case 5: // (d16, An), Additional Word + if (address < code.size) { + const int16_t d16 = GetI16BE(code.buffer + address); + return Arg::D16AnAddr(xn, d16); + } + break; + case 6: // (d8, An, Xi), Brief Extension Word + if (address < code.size) { + const uint16_t briefext = GetU16BE(code.buffer + address); + if (briefext & 0x0700) { + // briefext must have zeros on 8, 9 an 10-th bits, + // i.e. xxxx_x000_xxxx_xxxx + break; + } + // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) + const uint8_t xi = (briefext >> 12) & 0xf; + const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; + const int8_t d8 = briefext & 0xff; + return Arg::D8AnXiAddr(xn, xi, s, d8); + } + break; + case 7: + switch (xn) { + case 0: // (xxx).W, Additional Word + if (address < code.size) { + const int32_t w = GetI16BE(code.buffer + address); + return Arg::Word(w); + } + break; + case 1: // (xxx).L, Additional Long + if (address + kInstructionSizeStepBytes < code.size) { + const int32_t l = GetI32BE(code.buffer + address); + return Arg::Long(l); + } + break; + case 2: // (d16, PC), Additional Word + if (address < code.size) { + const int16_t d16 = GetI16BE(code.buffer + address); + return Arg::D16PCAddr(d16); + } + break; + case 3: // (d8, PC, Xi), Brief Extension Word + if (address < code.size) { + const uint16_t briefext = GetU16BE(code.buffer + address); + if (briefext & 0x0700) { + // briefext must have zeros on 8, 9 an 10-th bits, + // i.e. xxxx_x000_xxxx_xxxx + break; + } + // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8) + const uint8_t xi = (briefext >> 12) & 0xf; + const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord; + const int8_t d8 = briefext & 0xff; + return Arg::D8PCXiAddr(xn, xi, s, d8); + } + break; + case 4: // #imm + return FetchImmediate(address, code, s); + case 5: // Does not exist + case 6: // Does not exist + case 7: // Does not exist + break; + } + break; + } + return Arg{}; +} + +static Arg FetchArg( + const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s) +{ + const int addrmode = instr & 0x3f; + const int m = (addrmode >> 3) & 7; + const int xn = addrmode & 7; + return FetchArg(address, code, m, xn, s); +} + +static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr) +{ + node.op = Op::Raw(instr); + return node.size; +} + +static size_t disasm_jsr_jmp( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: // 4e80..4e87 / 4ec0..4ec7 + case AddrMode::kAn: // 4e88..4e8f / 4ec8..4ecf + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: // 4e90..4e97 / 4ed0..4ed7 + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kAnAddrIncr: // 4e98..4e9f / 4ed8..4edf + case AddrMode::kAnAddrDecr: // 4ea0..4ea7 / 4ee0..4ee7 + return disasm_verbatim(node, instr); + case AddrMode::kD16AnAddr: // 4ea8..4eaf / 4ee8..4eef + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kD8AnXiAddr: // 4eb0..4eb7 / 4ef0..4ef7 + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kWord: // 4eb8 / 4ef8 + { + const uint32_t ref_addr = static_cast(a.lword); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1AbsMask; + } + break; + case AddrMode::kLong: // 4eb9 / 4ef9 + { + const uint32_t ref_addr = static_cast(a.lword); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1AbsMask; + } + break; + case AddrMode::kD16PCAddr: // 4eba / 4efa + { + const uint32_t ref_addr = node.address + kInstructionSizeStepBytes + + static_cast(a.d16_pc.d16); + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1RelMask; + } + break; + case AddrMode::kD8PCXiAddr: // 4ebb / 4efb + // NOTE: dynamic jump, ref_addr may possibly be obtained during the + // trace + break; + case AddrMode::kImmediate: // 4ebc / 4efc + return disasm_verbatim(node, instr); + } + const bool is_jmp = instr & 0x40; + node.ref_kinds |= is_jmp ? 0 : kRefCallMask; + node.op = Op::Typical(is_jmp ? OpCode::kJMP : OpCode::kJSR, OpSize::kNone, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg) +{ + assert(arg.mode == AddrMode::kDn); + node.op = Op::Typical(OpCode::kEXT, opsize, arg); + return node.size = kInstructionSizeStepBytes + arg.Size(opsize); +} + +static size_t disasm_ext_movem( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto dir = static_cast((instr >> 10) & 1); + const unsigned m = (instr >> 3) & 7; + const unsigned xn = instr & 7; + const auto opsize = static_cast(((instr >> 6) & 1) + 1); + if (m == 0 && dir == MoveDirection::kRegisterToMemory) { + return disasm_ext(node, opsize, Arg::Dn(xn)); + } + if (node.address + kInstructionSizeStepBytes >= code.size) { + // Not enough space for regmask, but maybe it is just EXT? + return disasm_verbatim(node, instr); + } + const unsigned regmask = GetU16BE(code.buffer + node.address + kInstructionSizeStepBytes); + if (regmask == 0) { + // This is just not representable: at least one register must be specified + return disasm_verbatim(node, instr); + } + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes * 2, code, m, xn, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: // 4880..4887 / 4c80..4c87 / 48c0..48c7 / 4cc0..4cc7 + case AddrMode::kAn: // 4888..488f / 4c88..4c8f / 48c8..48cf / 4cc8..4ccf + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: // 4890..4897 / 4c90..4c97 / 48d0..48d7 / 4cd0..4cd7 + break; + case AddrMode::kAnAddrIncr: // 4898..489f / 4c89..4c9f / 48d8..48df / 4cd8..4cdf + if (dir == MoveDirection::kRegisterToMemory) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAnAddrDecr: // 48a0..48a7 / 4ca0..4ca7 / 48e0..48e7 / 4ce0..4ce7 + if (dir == MoveDirection::kMemoryToRegister) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kD16AnAddr: // 48a8..48af / 4c8a..4caf / 48e8..48ef / 4ce8..4cef + case AddrMode::kD8AnXiAddr: // 48b0..48b7 / 4cb0..4cb7 / 48f0..48f7 / 4cf0..4cf7 + break; + case AddrMode::kWord: // 48b8 / 4cb8 / 48f8 / 4cf8 + case AddrMode::kLong: // 48b9 / 4cb9 / 48f9 / 4cf9 + if (dir == MoveDirection::kRegisterToMemory) { + node.ref2_addr = static_cast(a.lword); + node.ref_kinds = kRef2AbsMask | kRef2WriteMask; + } else { + node.ref1_addr = static_cast(a.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + } + break; + case AddrMode::kD16PCAddr: // 48ba / 4cba / 48fa / 4cfa + case AddrMode::kD8PCXiAddr: // 48bb / 4cbb / 48fb / 4cfb + if (dir == MoveDirection::kRegisterToMemory) { + return disasm_verbatim(node, instr); + } else if (a.mode == AddrMode::kD16PCAddr) { + // XXX: kRefPcRelFix2Bytes flag is a hack that needed to correctly + // print label for PC relative referenced value of MOVEM. Alongside + // with *NOT* adding kInstructionSizeStepBytes to ref1_addr. Still + // figuring that out. + node.ref1_addr = node.address + kInstructionSizeStepBytes * 2 + + static_cast(a.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask | kRefPcRelFix2Bytes; + } + break; + case AddrMode::kImmediate: // 4ebc / 4efc + return disasm_verbatim(node, instr); + } + if (dir == MoveDirection::kMemoryToRegister) { + const auto arg2 = (a.mode == AddrMode::kAnAddrDecr) + ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); + node.op = Op::Typical(OpCode::kMOVEM, opsize, a, arg2); + } else { + const auto arg1 = (a.mode == AddrMode::kAnAddrDecr) + ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask); + node.op = Op::Typical(OpCode::kMOVEM, opsize, arg1, a); + } + return node.size = kInstructionSizeStepBytes * 2 + a.Size(opsize); +} + +static size_t disasm_lea( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kLong; + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + break; + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + return disasm_verbatim(node, instr); + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast(addr.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast(addr.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + break; + case AddrMode::kD8PCXiAddr: + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const unsigned an = ((instr >> 9) & 7); + const auto reg = Arg::An(an); + node.op = Op::Typical(OpCode::kLEA, opsize, addr, reg); + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_chk( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const unsigned dn = ((instr >> 9) & 7); + const auto dst = Arg::Dn(dn); + node.op = Op::Typical(OpCode::kCHK, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_bra_bsr_bcc( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const int16_t dispmt0 = static_cast(instr & 0xff); + if (dispmt0 == -1) { + // This will definitely lead to executing invalid instruction and is + // also invalid for GNU AS to assemble + return disasm_verbatim(node, instr); + } + const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord; + if (dispmt0 == 0) { + // Check the boundaries + if (node.address + kInstructionSizeStepBytes >= code.size) { + return disasm_verbatim(node, instr); + } + node.size = kInstructionSizeStepBytes * 2; + } else { + node.size = kInstructionSizeStepBytes; + } + const int16_t dispmt = kInstructionSizeStepBytes + (dispmt0 + ? dispmt0 : GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes)); + const uint32_t ref_addr = static_cast(node.address + dispmt); + Condition condition = static_cast((instr >> 8) & 0xf); + // False condition Indicates BSR + node.ref1_addr = ref_addr; + node.ref_kinds = kRef1RelMask | ((condition == Condition::kF) ? kRefCallMask : 0); + node.op = Op{OpCode::kBcc, opsize, condition, Arg::Displacement(dispmt)}; + return node.size; +} + +static OpCode OpCodeForBitOps(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kBTST; + case 1: return OpCode::kBCHG; + case 2: return OpCode::kBCLR; + case 3: return OpCode::kBSET; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_movep( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const unsigned dn = ((instr >> 9) & 7); + const unsigned an = instr & 7; + const OpSize opsize = ((instr >> 6) & 1) ? OpSize::kLong : OpSize::kWord; + const auto dir = static_cast(!((instr >> 7) & 1)); + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, 5, an, opsize); + if (addr.mode == AddrMode::kInvalid) { + // Boundary check failed, most likely + return disasm_verbatim(node, instr); + } + assert(addr.mode == AddrMode::kD16AnAddr); + const auto reg = Arg::Dn(dn); + if (dir == MoveDirection::kRegisterToMemory) { + node.op = Op::Typical(OpCode::kMOVEP, opsize, reg, addr); + } else { + node.op = Op::Typical(OpCode::kMOVEP, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_src_arg_bitops_movep( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const bool has_dn_src = true) +{ + const unsigned m = (instr >> 3) & 7; + if ((m == 1) && has_dn_src) { + return disasm_movep(node, instr, code); + } + const unsigned dn = ((instr >> 9) & 7); + const unsigned xn = instr & 7; + const OpSize opsize0 = OpSize::kByte; + // Fetch AddrMode::kDn if has_dn_src, otherwise fetch AddrMode::kImmediate + // byte + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, + code, + (has_dn_src) ? 0 : 7, + dn, + opsize0); + if (src.mode == AddrMode::kInvalid) { + return disasm_verbatim(node, instr); + } + if (has_dn_src) { + assert(src.mode == AddrMode::kDn); + } else { + assert(dn == 4); + assert(src.mode == AddrMode::kImmediate); + } + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize0), code, m, xn, opsize0); + const unsigned opcode = (instr >> 6) & 3; + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (opcode != 0) { + // PC relative destination address argument available for BTST only + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const auto opsize = dst.mode == AddrMode::kDn ? OpSize::kLong : OpSize::kByte; + node.op = Op::Typical(OpCodeForBitOps(opcode), opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0); +} + +static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c) +{ + return disasm_src_arg_bitops_movep(n, i, c, false); +} + +static size_t disasm_logical_immediate_to( + DisasmNode &node, OpCode opcode, OpSize opsize, Arg imm) +{ + node.op = Op::Typical(opcode, opsize, imm, (opsize == OpSize::kByte) ? Arg::CCR() : Arg::SR()); + return node.size = kInstructionSizeStepBytes * 2; +} + +static OpCode OpCodeForLogicalImmediate(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kORI; + case 1: return OpCode::kANDI; + case 2: return OpCode::kSUBI; + case 3: return OpCode::kADDI; + case 4: break; + case 5: return OpCode::kEORI; + case 6: return OpCode::kCMPI; + case 7: break; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_bitops_movep( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool has_source_reg = (instr >> 8) & 1; + if (has_source_reg) { + return disasm_src_arg_bitops_movep(node, instr, code); + } + const unsigned opcode = (instr >> 9) & 7; + if (opcode == 7) { + // Does not exist + return disasm_verbatim(node, instr); + } + if (opcode == 4) { + return disasm_bitops(node, instr, code); + } + const int m = (instr >> 3) & 7; + const int xn = instr & 7; + const auto opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + // Does not exist + return disasm_verbatim(node, instr); + } + // Anticipating #imm which means "to CCR"/"to SR", depending on OpSize + if (m == 7 && xn == 4) { + if (opcode == 2 || opcode == 3 || opcode == 6) { + // CMPI, SUBI and ANDI neither have immediate destination arguments + // nor "to CCR"/"to SR" variations + return disasm_verbatim(node, instr); + } + if (opsize == OpSize::kLong) { + // Does not exist + return disasm_verbatim(node, instr); + } + } + const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); + if (src.mode == AddrMode::kInvalid) { + return disasm_verbatim(node, instr); + } + assert(src.mode == AddrMode::kImmediate); + const OpCode mnemonic = OpCodeForLogicalImmediate(opcode); + if (m == 7 && xn == 4) { + return disasm_logical_immediate_to(node, mnemonic, opsize, src); + } + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (opcode != 6) { + // PC relative destination address argument available for CMPI only + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(mnemonic, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_move_movea( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const int opsize_raw = (instr >> 12) & 3; + const OpSize opsize = (opsize_raw == 1) + ? OpSize::kByte : (opsize_raw == 3 ? OpSize::kWord : OpSize::kLong); + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (opsize == OpSize::kByte) { + // Does not exist + return disasm_verbatim(node, instr); + } + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast(src.lword); + node.ref_kinds |= kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast(src.d16_pc.d16); + node.ref_kinds |= kRef1RelMask | kRef1ReadMask; + break; + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const int m = (instr >> 6) & 7; + const int xn = (instr >> 9) & 7; + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (opsize == OpSize::kByte) { + // Does not exist + return disasm_verbatim(node, instr); + } + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref2_addr = static_cast(dst.lword); + node.ref_kinds |= kRef2AbsMask | kRef2WriteMask; + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + // XXX Assuming that moving long immediate value into address register is + // basically a sneaky LEA. It may not be true in some cases. + if (src.type == ArgType::kImmediate && dst.type == ArgType::kAn) { + if (opsize == OpSize::kLong) { + node.ref1_addr = static_cast(src.lword); + node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; + } else if (opsize == OpSize::kWord) { + node.ref1_addr = static_cast(static_cast(src.lword)); + node.ref_kinds |= kRef1ImmMask | kRef1ReadMask; + } + } + const auto opcode = (dst.mode == AddrMode::kAn) ? OpCode::kMOVEA : OpCode::kMOVE; + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_move_from_sr( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = OpSize::kWord; + const auto dst = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kMOVE, opsize, Arg::SR(), dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t disasm_move_to( + DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg) +{ + const auto opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + node.op = Op::Typical(OpCode::kMOVE, opsize, src, Arg{{reg}, {0}}); + return node.size = kInstructionSizeStepBytes + src.Size(opsize); +} + +static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode) +{ + switch (opcode) { + case 0: return OpCode::kNEGX; + case 1: return OpCode::kCLR; + case 2: return OpCode::kNEG; + case 3: return OpCode::kNOT; + } + assert(false); + return OpCode::kNone; +} + +static size_t disasm_move_negx_clr_neg_not( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = static_cast((instr >> 6) & 3); + const unsigned opcode = (instr >> 9) & 3; + if (opsize == OpSize::kInvalid) { + switch (opcode) { + case 0: + return disasm_move_from_sr(node, instr, code); + case 1: + return disasm_verbatim(node, instr); + case 2: + return disasm_move_to(node, instr, code, ArgType::kCCR); + case 3: + return disasm_move_to(node, instr, code, ArgType::kSR); + } + assert(false); + return disasm_verbatim(node, instr); + } + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(opcode_for_negx_clr_neg_not(opcode), opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_trivial( + DisasmNode &node, const OpCode opcode) +{ + node.op = Op::Typical(opcode, OpSize::kNone); + return node.size; +} + +static size_t disasm_tas( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = OpSize::kByte; + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kTAS, opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_tst_tas_illegal( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto opsize = static_cast((instr >> 6) & 3); + const int m = (instr >> 3) & 7; + const int xn = instr & 7; + if (opsize == OpSize::kInvalid) { + if (m == 7 && xn == 4){ + return disasm_trivial(node, OpCode::kILLEGAL); + } + return disasm_tas(node, instr, code); + } + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, m, xn, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kTST, opsize, a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_trap(DisasmNode &node, const uint16_t instr) +{ + const unsigned vector = instr & 0xf; + node.op = Op::Typical(OpCode::kTRAP, OpSize::kNone, Arg::Immediate(vector)); + return node.size = kInstructionSizeStepBytes; +} + +static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool unlk = (instr >> 3) & 1; + const unsigned xn = instr & 7; + if (unlk) { + node.op = Op::Typical(OpCode::kUNLK, OpSize::kNone, Arg::AddrModeXn(ArgType::kAn, xn)); + return node.size = kInstructionSizeStepBytes; + } + const auto opsize = OpSize::kWord; + const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize); + if (src.mode != AddrMode::kImmediate) { + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kLINK, opsize, Arg::AddrModeXn(ArgType::kAn, xn), src); + return node.size = kInstructionSizeStepBytes + src.Size(opsize); +} + +static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr) +{ + const unsigned xn = instr & 7; + const auto dir = static_cast((instr >> 3) & 1); + if (dir == MoveDirection::kRegisterToMemory) { + node.op = Op::Typical( + OpCode::kMOVE, OpSize::kLong, Arg::An(xn), Arg::USP()); + } else { + node.op = Op::Typical( + OpCode::kMOVE, OpSize::kLong, Arg::USP(), Arg::An(xn)); + } + return node.size = kInstructionSizeStepBytes; +} + +static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const bool is_nbcd = !((instr >> 6) & 1); + const OpSize opsize0 = OpSize::kWord; + const auto arg = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize0); + bool is_swap{}; + switch (arg.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + if (!is_nbcd) { + is_swap = true; + } + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + break; + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + if (!is_nbcd) { + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + node.ref1_addr = static_cast(arg.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (is_nbcd) { + return disasm_verbatim(node, instr); + } + if (arg.mode == AddrMode::kD16PCAddr) { + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast(arg.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + } + break; + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + const auto opcode = is_nbcd ? OpCode::kNBCD : is_swap ? OpCode::kSWAP : OpCode::kPEA; + const auto opsize = is_nbcd ? OpSize::kByte : is_swap ? OpSize::kWord : OpSize::kLong; + node.op = Op::Typical(opcode, opsize, arg); + return node.size = kInstructionSizeStepBytes + arg.Size(opsize0); +} + +static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord); + if (a.mode != AddrMode::kImmediate) { + return disasm_verbatim(node, instr); + } + node.op = Op::Typical(OpCode::kSTOP, OpSize::kNone, a); + return node.size = kInstructionSizeStepBytes * 2; +} + +static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if ((instr & 0xf900) == 0x4000) { + return disasm_move_negx_clr_neg_not(node, instr, code); + } else if ((instr & 0xff80) == 0x4800) { + // NOTE: EXT is handled with MOVEM + return disasm_nbcd_swap_pea(node, instr, code); + } else if ((instr & 0xff00) == 0x4a00) { + return disasm_tst_tas_illegal(node, instr, code); + } else if ((instr & 0xfff0) == 0x4e40) { + return disasm_trap(node, instr); + } else if ((instr & 0xfff0) == 0x4e50) { + return disasm_link_unlink(node, instr, code); + } else if ((instr & 0xfff0) == 0x4e60) { + return disasm_move_usp(node, instr); + } else if ((instr & 0xfff8) == 0x4e70) { + if (instr == 0x4e70) { + return disasm_trivial(node, OpCode::kRESET); + } else if (instr == 0x4e71) { + return disasm_trivial(node, OpCode::kNOP); + } else if (instr == 0x4e72) { + return disasm_stop(node, instr, code); + } else if (instr == 0x4e73) { + return disasm_trivial(node, OpCode::kRTE); + } else if (instr == 0x4e75) { + return disasm_trivial(node, OpCode::kRTS); + } else if (instr == 0x4e76) { + return disasm_trivial(node, OpCode::kTRAPV); + } else if (instr == 0x4e77) { + return disasm_trivial(node, OpCode::kRTR); + } + } else if ((instr & 0xff80) == 0x4e80) { + return disasm_jsr_jmp(node, instr, code); + } else if ((instr & 0xfb80) == 0x4880) { + return disasm_ext_movem(node, instr, code); + } else if ((instr & 0xf1c0) == 0x41c0) { + return disasm_lea(node, instr, code); + } else if ((instr & 0xf1c0) == 0x4180) { + return disasm_chk(node, instr, code); + } + return disasm_verbatim(node, instr); +} + +static size_t disasm_addq_subq( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize) +{ + const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: // 5x00..5x07 / 5x40..5x47 / 5x80..5x87 + break; + case AddrMode::kAn: // 5x08..5x0f / 5x48..5x4f / 5x88..5x8f + if (opsize == OpSize::kByte) { + // 5x08..5x0f + // addqb and subqb with An do not exist + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAnAddr: // 5x10..5x17 / 5x50..5x57 / 5x90..5x97 + case AddrMode::kAnAddrIncr: // 5x18..5x1f / 5x58..5x5f / 5x98..5x9f + case AddrMode::kAnAddrDecr: // 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7 + case AddrMode::kD16AnAddr: // 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf + case AddrMode::kD8AnXiAddr: // 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7 + case AddrMode::kWord: // 5x38 / 5x78 / 5xb8 + case AddrMode::kLong: // 5x39 / 5x79 / 5xb9 + break; + case AddrMode::kD16PCAddr: // 5x3a / 5x7a / 5xba + case AddrMode::kD8PCXiAddr: // 5x3b / 5x7b / 5xbb + case AddrMode::kImmediate: // 5x3c / 5x7c / 5xbc + // Does not exist + return disasm_verbatim(node, instr); + } + const unsigned imm = ((uint8_t((instr >> 9) & 7) - 1) & 7) + 1; + const auto opcode = ((instr >> 8) & 1) ? OpCode::kSUBQ : OpCode::kADDQ; + node.op = Op::Typical(opcode, opsize, Arg::Immediate(imm), a); + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if (node.address + kInstructionSizeStepBytes >= code.size) { + return disasm_verbatim(node, instr); + } + const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes); + const int32_t dispmt = dispmt_raw + kInstructionSizeStepBytes; + node.ref2_addr = static_cast(node.address + dispmt); + node.ref_kinds = kRef2RelMask; + node.op = Op{ + OpCode::kDBcc, + OpSize::kWord, + static_cast((instr >> 8) & 0xf), + Arg::AddrModeXn(ArgType::kDn, (instr & 7)), + Arg::Displacement(dispmt), + }; + return node.size = kInstructionSizeStepBytes * 2; +} + +static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = OpSize::kWord; + const auto a = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (a.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: // 5xc0..5xc7, Dn + break; + case AddrMode::kAn: // 5xc8..5xcf, An + return disasm_dbcc(node, instr, code); + case AddrMode::kAnAddr: // 5xd0..5xd7 + case AddrMode::kAnAddrIncr: // 5xd8..5xdf + case AddrMode::kAnAddrDecr: // 5xe0..5xe7 + case AddrMode::kD16AnAddr: // 5xe8..5xef + case AddrMode::kD8AnXiAddr: // 5xf0..5xf7 + case AddrMode::kWord: // 5xf8 (xxx).W + case AddrMode::kLong: // 5xf9 (xxx).L + break; + case AddrMode::kD16PCAddr: // 5xfa + case AddrMode::kD8PCXiAddr: // 5xfb + case AddrMode::kImmediate: // 5xfc + // Does not exist + return disasm_verbatim(node, instr); + } + node.op = Op{OpCode::kScc, OpSize::kByte, static_cast((instr >> 8) & 0xf), a}; + return node.size = kInstructionSizeStepBytes + a.Size(opsize); +} + +static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c) +{ + const auto opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_scc_dbcc(n, instr, c); + } + return disasm_addq_subq(n, instr, c, opsize); +} + +static size_t disasm_moveq(DisasmNode &node, const uint16_t instr) +{ + if (instr & 0x100) { + // Does not exist + return disasm_verbatim(node, instr); + } + const int xn = (instr >> 9) & 7; + const auto dst = Arg::Dn(xn); + const int8_t data = instr & 0xff; + const OpSize opsize = OpSize::kLong; + node.op = Op::Typical(OpCode::kMOVEQ, opsize, Arg::Immediate(data), dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t disasm_divu_divs_mulu_muls( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpCode opcode) +{ + const auto opsize = OpSize::kWord; + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const unsigned dn = (instr >> 9) & 7; + const auto dst = Arg::Dn(dn); + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + dst.Size(opsize) + src.Size(opsize); +} + +static size_t disasm_addx_subx_abcd_sbcd( + DisasmNode &node, const uint16_t instr, const OpCode opcode) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + // Must be already handled by parent call + assert(opsize != OpSize::kInvalid); + const int m = (instr >> 3) & 1; + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = m ? Arg::AnAddrDecr(xn) : Arg::Dn(xn); + const auto dst = m ? Arg::AnAddrDecr(xi) : Arg::Dn(xi); + // XXX GNU AS does not know ABCD.B, it only knows ABCD, but happily consumes + // SBCD.B and others. That's why it is OpSize::kNone specifically for ABCD + // mnemonic. It is probably a bug in GNU AS. + node.op = Op::Typical(opcode, (opcode == OpCode::kABCD) ? OpSize::kNone : opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_or_and( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpSize opsize, + const OpCode opcode) +{ + const bool dir_to_addr = (instr >> 8) & 1; + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + if (dir_to_addr) { + // Switching dir when bot operands are data registers is not allowed + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (dir_to_addr) { + // PC relative cannot be destination + return disasm_verbatim(node, instr); + } + break; + case AddrMode::kImmediate: + if (dir_to_addr) { + // immediate cannot be destination + return disasm_verbatim(node, instr); + } + break; + } + const auto reg = Arg::Dn((instr >> 9) & 7); + if (dir_to_addr) { + node.op = Op::Typical(opcode, opsize, reg, addr); + } else { + node.op = Op::Typical(opcode, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_divu_divs_sbcd_or( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + // Also ensures that opsize == OpSize::kByte, i.e. 0b00 + if ((instr & 0x1f0) == 0x100) { + return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kSBCD); + } + const OpSize opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + const bool is_signed = (instr >> 8) & 1; + const auto opcode = is_signed ? OpCode::kDIVS : OpCode::kDIVU; + return disasm_divu_divs_mulu_muls(node, instr, code, opcode); + } + return disasm_or_and(node, instr, code, opsize, OpCode::kOR); +} + +static size_t disasm_adda_suba_cmpa( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) +{ + const OpSize opsize = static_cast(((instr >> 8) & 1) + 1); + const auto src = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (src.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + case AddrMode::kAn: + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + break; + } + const unsigned an = (instr >> 9) & 7; + const auto dst = Arg::An(an); + node.op = Op::Typical(opcode, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_add_sub_cmp( + DisasmNode &node, + const uint16_t instr, + const DataView &code, + const OpCode opcode, + const OpSize opsize, + const bool dir_to_addr) +{ + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + if (dir_to_addr || opsize == OpSize::kByte) { + // An cannot be destination and An cannot be used as byte + return disasm_verbatim(node, instr); + } + /* Fall through */ + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + break; + case AddrMode::kWord: + case AddrMode::kLong: + if (dir_to_addr) { + node.ref2_addr = static_cast(addr.lword); + node.ref_kinds = kRef2AbsMask | kRef2ReadMask; + } else { + node.ref1_addr = static_cast(addr.lword); + node.ref_kinds = kRef1AbsMask | kRef1ReadMask; + } + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + if (dir_to_addr) { + // PC relative cannot be destination + return disasm_verbatim(node, instr); + } + if (addr.mode == AddrMode::kD16PCAddr) { + node.ref1_addr = node.address + kInstructionSizeStepBytes + + static_cast(addr.d16_pc.d16); + node.ref_kinds = kRef1RelMask | kRef1ReadMask; + } + break; + case AddrMode::kImmediate: + if (dir_to_addr) { + // immediate cannot be destination + return disasm_verbatim(node, instr); + } + break; + } + const unsigned dn = (instr >> 9) & 7; + const auto reg = Arg::Dn(dn); + if (dir_to_addr) { + node.op = Op::Typical(opcode, opsize, reg, addr); + } else { + node.op = Op::Typical(opcode, opsize, addr, reg); + } + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + // Must be already handled by parent call + assert(opsize != OpSize::kInvalid); + // M has to be set to 0b001 + assert(((instr >> 3) & 7) == 1); + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = Arg::AnAddrIncr(xn); + const auto dst = Arg::AnAddrIncr(xi); + node.op = Op::Typical(OpCode::kCMPM, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + const auto addr = FetchArg( + node.address + kInstructionSizeStepBytes, code, instr, opsize); + switch (addr.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + // PC relative and immediate cannot be destination + return disasm_verbatim(node, instr); + } + const auto reg = Arg::Dn((instr >> 9) & 7); + node.op = Op::Typical(OpCode::kEOR, opsize, reg, addr); + return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize); +} + +static size_t disasm_eor_cmpm_cmp_cmpa( + DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_adda_suba_cmpa(node, instr, code, OpCode::kCMPA); + } + const bool dir_to_addr = ((instr >> 8) & 1); + if (!dir_to_addr) { + return disasm_add_sub_cmp(node, instr, code, OpCode::kCMP, opsize, dir_to_addr); + } + const int m = (instr >> 3) & 7; + if (m == 1) { + return disasm_cmpm(node, instr); + } + return disasm_eor(node, instr, code); +} + +static size_t disasm_exg(DisasmNode &node, const uint16_t instr) +{ + assert((instr & 0x130) == 0x100); + const int m1 = (instr >> 3) & 1; + const int m2 = (instr >> 6) & 3; + assert(m2 != 0); // Therefore m == 0 and m == 1 are impossible + assert(m2 != 3); // Therefore m == 6 and m == 7 are impossible + const int m = (m2 << 1) | m1; + assert(m != 4); // Only m == 2, m == 3 and m == 5 values are allowed + const int xn = instr & 7; + const int xi = (instr >> 9) & 7; + const auto src = (m == 3) ? Arg::An(xi) : Arg::Dn(xi); + const auto dst = (m == 2) ? Arg::Dn(xn) : Arg::An(xn); + // GNU AS does not accept size suffix for EXG, although it's size is always + // long word. + const auto opsize = OpSize::kNone; + node.op = Op::Typical(OpCode::kEXG, opsize, src, dst); + return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); +} + +static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + if ((instr & 0x1f0) == 0x100) { + return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD); + } + const OpSize opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + const bool is_signed = (instr >> 8) & 1; + const auto opcode = is_signed ? OpCode::kMULS : OpCode::kMULU; + return disasm_divu_divs_mulu_muls(node, instr, code, opcode); + } + const unsigned m_split = instr & 0x1f8; + if (m_split == 0x188 || m_split == 0x148 || m_split == 0x140) { + return disasm_exg(node, instr); + } + return disasm_or_and(node, instr, code, opsize, OpCode::kAND); +} + +static size_t disasm_add_sub_x_a( + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + if (opsize == OpSize::kInvalid) { + return disasm_adda_suba_cmpa(node, instr, code, (opcode == OpCode::kSUB) ? OpCode::kSUBA : OpCode::kADDA); + } + const bool dir_to_addr = (instr >> 8) & 1; + const unsigned m = (instr >> 3) & 7; + if (dir_to_addr && (m == 0 || m == 1)) { + return disasm_addx_subx_abcd_sbcd(node, instr, (opcode == OpCode::kSUB) ? OpCode::kSUBX : OpCode::kADDX); + } + return disasm_add_sub_cmp(node, instr, code, opcode, opsize, dir_to_addr); +} + +static OpCode ShiftKindToOpcode(const ShiftKind k, const ShiftDirection d) +{ + switch (k) { + case ShiftKind::kArithmeticShift: + return d == ShiftDirection::kLeft ? OpCode::kASL : OpCode::kASR; + case ShiftKind::kLogicalShift: + return d == ShiftDirection::kLeft ? OpCode::kLSL : OpCode::kLSR; + case ShiftKind::kRotateX: + return d == ShiftDirection::kLeft ? OpCode::kROXL : OpCode::kROXR; + case ShiftKind::kRotate: + return d == ShiftDirection::kLeft ? OpCode::kROL : OpCode::kROR; + } + assert(false); + return OpCode::kNone; +} + +static bool IsValidShiftKind(const ShiftKind k) +{ + return static_cast(k) < 4; +} + +static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code) +{ + const OpSize opsize = static_cast((instr >> 6) & 3); + const unsigned xn = instr & 7; + const uint8_t rotation = (instr >> 9) & 7; + const ShiftKind kind = (opsize == OpSize::kInvalid) + ? static_cast(rotation) + : static_cast((instr >> 3) & 3); + if (!IsValidShiftKind(kind)) { + return disasm_verbatim(node, instr); + } + const auto dst = (opsize == OpSize::kInvalid) + ? FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize) + : Arg::Dn(xn); + if (opsize == OpSize::kInvalid) { + switch (dst.mode) { + case AddrMode::kInvalid: + return disasm_verbatim(node, instr); + case AddrMode::kDn: + // Intersects with situation when args are "#1,%dx". GNU AS would + // not understand shift instruction with single argument of "%dx". + return disasm_verbatim(node, instr); + break; + case AddrMode::kAn: + return disasm_verbatim(node, instr); + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + case AddrMode::kLong: + break; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + case AddrMode::kImmediate: + return disasm_verbatim(node, instr); + } + } + const unsigned imm = ((rotation - 1) & 7) + 1; + const unsigned src = (opsize == OpSize::kInvalid) ? 1 : rotation; + const auto dir = static_cast((instr >> 8) & 1); + if (opsize == OpSize::kInvalid) { + node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, dst); + } else { + const unsigned m = (instr >> 5) & 1; + const auto arg1 = m ? Arg::AddrModeXn(ArgType::kDn, src) : Arg::Immediate(imm); + node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, arg1, dst); + } + return node.size = kInstructionSizeStepBytes + dst.Size(opsize); +} + +static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c) +{ + switch ((i & 0xf000) >> 12) { + case 0x0: + return disasm_bitops_movep(n, i, c); + case 0x1: + case 0x2: + case 0x3: + return disasm_move_movea(n, i, c); + case 0x4: + return disasm_chunk_4(n, i, c); + case 0x5: + return disasm_addq_subq_scc_dbcc(n, i, c); + case 0x6: + return disasm_bra_bsr_bcc(n, i, c); + case 0x7: + return disasm_moveq(n, i); + case 0x8: + return disasm_divu_divs_sbcd_or(n, i, c); + case 0x9: + return disasm_add_sub_x_a(n, i, c, OpCode::kSUB); + case 0xa: + // Does not exist + return disasm_verbatim(n, i); + case 0xb: + return disasm_eor_cmpm_cmp_cmpa(n, i, c); + case 0xc: + return disasm_chunk_c(n, i, c); + case 0xd: + return disasm_add_sub_x_a(n, i, c, OpCode::kADD); + case 0xe: + return disasm_shift_rotate(n, i, c); + case 0xf: + // Does not exist + return disasm_verbatim(n, i); + } + assert(false); + return disasm_verbatim(n, i); +} + +size_t DisasmNode::Disasm(const DataView &code) +{ + // We assume that machine have no MMU and ROM data always starts at 0 + assert(this->address < code.size); + size = kInstructionSizeStepBytes; + ref_kinds = 0; + ref1_addr = 0; + ref2_addr = 0; + const uint16_t instr = GetU16BE(code.buffer + this->address); + if (IsInstruction(this->type)) { + return m68k_disasm(*this, instr, code); + } else { + // Data should not be disassembled + return disasm_verbatim(*this, instr); + } +} + +size_t DisasmNode::DisasmAsRaw(const DataView &code) +{ + // We assume that machine have no MMU and ROM data always starts at 0 + assert(this->address < code.size); + size = kInstructionSizeStepBytes; + ref_kinds = 0; + ref1_addr = 0; + ref2_addr = 0; + const uint16_t instr = GetU16BE(code.buffer + this->address); + return disasm_verbatim(*this, instr); +} + +static const char *ToString(const OpCode opcode, const Condition condition) +{ + switch (opcode) { + case OpCode::kNone: + assert(false); + break; + case OpCode::kRaw: return ".short"; + case OpCode::kORI: return "ori"; + case OpCode::kANDI: return "andi"; + case OpCode::kSUBI: return "subi"; + case OpCode::kADDI: return "addi"; + case OpCode::kEORI: return "eori"; + case OpCode::kCMPI: return "cmpi"; + case OpCode::kBTST: return "btst"; + case OpCode::kBCHG: return "bchg"; + case OpCode::kBCLR: return "bclr"; + case OpCode::kBSET: return "bset"; + case OpCode::kMOVEP: return "movep"; + case OpCode::kMOVEA: return "movea"; + case OpCode::kMOVE: return "move"; + case OpCode::kNEGX: return "negx"; + case OpCode::kCLR: return "clr"; + case OpCode::kNEG: return "neg"; + case OpCode::kNOT: return "not"; + case OpCode::kEXT: return "ext"; + case OpCode::kNBCD: return "nbcd"; + case OpCode::kSWAP: return "swap"; + case OpCode::kPEA: return "pea"; + case OpCode::kILLEGAL: return "illegal"; + case OpCode::kTAS: return "tas"; + case OpCode::kTST: return "tst"; + case OpCode::kTRAP: return "trap"; + case OpCode::kLINK: return "link"; + case OpCode::kUNLK: return "unlk"; + case OpCode::kRESET: return "reset"; + case OpCode::kNOP: return "nop"; + case OpCode::kSTOP: return "stop"; + case OpCode::kRTE: return "rte"; + case OpCode::kRTS: return "rts"; + case OpCode::kTRAPV: return "trapv"; + case OpCode::kRTR: return "rtr"; + case OpCode::kJSR: return "jsr"; + case OpCode::kJMP: return "jmp"; + case OpCode::kMOVEM: return "movem"; + case OpCode::kLEA: return "lea"; + case OpCode::kCHK: return "chk"; + case OpCode::kADDQ: return "addq"; + case OpCode::kSUBQ: return "subq"; + case OpCode::kScc: + switch(condition) { + case Condition::kT : return "st"; + case Condition::kF: return "sf"; + case Condition::kHI: return "shi"; + case Condition::kLS: return "sls"; + case Condition::kCC: return "scc"; + case Condition::kCS: return "scs"; + case Condition::kNE: return "sne"; + case Condition::kEQ: return "seq"; + case Condition::kVC: return "svc"; + case Condition::kVS: return "svs"; + case Condition::kPL: return "spl"; + case Condition::kMI: return "smi"; + case Condition::kGE: return "sge"; + case Condition::kLT: return "slt"; + case Condition::kGT: return "sgt"; + case Condition::kLE: return "sle"; + } + assert(false); + break; + case OpCode::kDBcc: + switch (condition) { + case Condition::kT: return "dbt"; + case Condition::kF: return "dbf"; + case Condition::kHI: return "dbhi"; + case Condition::kLS: return "dbls"; + case Condition::kCC: return "dbcc"; + case Condition::kCS: return "dbcs"; + case Condition::kNE: return "dbne"; + case Condition::kEQ: return "dbeq"; + case Condition::kVC: return "dbvc"; + case Condition::kVS: return "dbvs"; + case Condition::kPL: return "dbpl"; + case Condition::kMI: return "dbmi"; + case Condition::kGE: return "dbge"; + case Condition::kLT: return "dblt"; + case Condition::kGT: return "dbgt"; + case Condition::kLE: return "dble"; + } + assert(false); + break; + case OpCode::kBcc: + switch (condition) { + case Condition::kT: return "bra"; + case Condition::kF: return "bsr"; + case Condition::kHI: return "bhi"; + case Condition::kLS: return "bls"; + case Condition::kCC: return "bcc"; + case Condition::kCS: return "bcs"; + case Condition::kNE: return "bne"; + case Condition::kEQ: return "beq"; + case Condition::kVC: return "bvc"; + case Condition::kVS: return "bvs"; + case Condition::kPL: return "bpl"; + case Condition::kMI: return "bmi"; + case Condition::kGE: return "bge"; + case Condition::kLT: return "blt"; + case Condition::kGT: return "bgt"; + case Condition::kLE: return "ble"; + } + assert(false); + break; + case OpCode::kMOVEQ: return "moveq"; + case OpCode::kDIVU: return "divu"; + case OpCode::kDIVS: return "divs"; + case OpCode::kSBCD: return "sbcd"; + case OpCode::kOR: return "or"; + case OpCode::kSUB: return "sub"; + case OpCode::kSUBX: return "subx"; + case OpCode::kSUBA: return "suba"; + case OpCode::kEOR: return "eor"; + case OpCode::kCMPM: return "cmpm"; + case OpCode::kCMP: return "cmp"; + case OpCode::kCMPA: return "cmpa"; + case OpCode::kMULU: return "mulu"; + case OpCode::kMULS: return "muls"; + case OpCode::kABCD: return "abcd"; + case OpCode::kEXG: return "exg"; + case OpCode::kAND: return "and"; + case OpCode::kADD: return "add"; + case OpCode::kADDX: return "addx"; + case OpCode::kADDA: return "adda"; + case OpCode::kASR: return "asr"; + case OpCode::kASL: return "asl"; + case OpCode::kLSR: return "lsr"; + case OpCode::kLSL: return "lsl"; + case OpCode::kROXR: return "roxr"; + case OpCode::kROXL: return "roxl"; + case OpCode::kROR: return "ror"; + case OpCode::kROL: return "rol"; + } + assert(false); + return "?"; +} + +static const char *ToString(const OpSize s) +{ + switch (s) { + case OpSize::kNone: return ""; + case OpSize::kByte: return "b"; + case OpSize::kShort: return "s"; + case OpSize::kWord: return "w"; + case OpSize::kLong: return "l"; + } + assert(false); + return ""; +} + +static int OpcodeSNPrintf( + char *const buf, + const size_t bufsz, + const OpCode opcode, + const Condition condition, + const OpSize size_spec) +{ + return snprintf(buf, bufsz, "%s%s", ToString(opcode, condition), ToString(size_spec)); +} + +static char RegChar(const uint8_t xi) +{ + return (xi & 0x08) ? 'a' : 'd'; +} + +static char SizeSpecChar(const uint8_t xi) +{ + return (xi & 0x10) ? 'l' : 'w'; +} + +static unsigned RegNum(const uint8_t xi) +{ + return xi & 0x7; +} + +static size_t snprint_reg_mask( + char *const buf, const size_t bufsz, const uint32_t regmask_arg, const ArgType arg_type) +{ + const uint32_t regmask = regmask_arg & 0xffff; + size_t written = 0; + bool first_printed = 0; + size_t span = 0; + // 17-th bit used to close the span with 0 value unconditionally + for (int i = 0; i < 17; i++) { + const uint32_t mask = 1 << (arg_type == ArgType::kRegMaskPredecrement ? (15 - i) : i); + const bool hit = regmask & mask; + const bool span_open = hit && span == 0; + const bool span_closed = !hit && span > 1; + const int printable_i = i - (span_closed ? 1 : 0); + const int id = printable_i % 8; + const char regtype = (printable_i >= 8) ? 'a' : 'd'; + if (span_open || span_closed) { + const char *const delimiter = span_open ? (first_printed ? "/" : "") : "-"; + const size_t remaining = bufsz - written; + const int ret = snprintf(buf + written, remaining, "%s%%%c%d", delimiter, regtype, id); + assert(ret > 0); + assert(static_cast(ret) >= sizeof("%d0")-1); + assert(static_cast(ret) <= sizeof("-%d0")-1); + written += Min(remaining, ret); + first_printed = true; + } + span = hit ? span + 1 : 0; + } + assert(written < bufsz); // Output must not be truncated + return written; +} + +int Arg::SNPrint( + char *const buf, + const size_t bufsz, + const bool imm_as_hex, + const RefKindMask ref_kinds, + const char *const label, + const uint32_t self_addr, + const uint32_t ref_addr) const +{ + switch (type) { + case ArgType::kNone: + assert(false); + break; + case ArgType::kRaw: + return snprintf(buf, bufsz, "0x%04x", uword); + case ArgType::kDn: + return snprintf(buf, bufsz, "%%d%d", xn); + case ArgType::kAn: + return snprintf(buf, bufsz, "%%a%u", xn); + case ArgType::kAnAddr: + return snprintf(buf, bufsz, "%%a%u@", xn); + case ArgType::kAnAddrIncr: + return snprintf(buf, bufsz, "%%a%u@+", xn); + case ArgType::kAnAddrDecr: + return snprintf(buf, bufsz, "%%a%u@-", xn); + case ArgType::kD16AnAddr: + return snprintf(buf, bufsz, "%%a%u@(%d:w)", d16_an.an, d16_an.d16); + case ArgType::kD8AnXiAddr: + return snprintf( + buf, bufsz, "%%a%u@(%d,%%%c%u:%c)", + d8_an_xi.an, + d8_an_xi.d8, + RegChar(d8_an_xi.xi), + RegNum(d8_an_xi.xi), + SizeSpecChar(d8_an_xi.xi)); + case ArgType::kWord: + case ArgType::kLong: + { + const char c = type == ArgType::kLong ? 'l' : 'w'; + if (ref_kinds & kRefAbsMask) { + if (static_cast(lword) == ref_addr) { + return snprintf(buf, bufsz, "%s:%c", label, c); + } else { + // It has to be AFTER the label we are gonna reference here + assert(static_cast(lword) > ref_addr); + return snprintf(buf, bufsz, "%s+%d:%c", label, lword - ref_addr, c); + } + } else { + return snprintf(buf, bufsz, "0x%x:%c", lword, c); + } + } + case ArgType::kD16PCAddr: + if (ref_kinds & kRefRelMask) { + // XXX: Most of instructions with PC relative values have 2 bytes + // added to the offset, some does not. Still figuring that out. + const bool has_fix = ref_kinds & kRefPcRelFix2Bytes; + const uint32_t arg_addr = self_addr + d16_pc.d16 + kInstructionSizeStepBytes + (has_fix ? kInstructionSizeStepBytes : 0); + if (arg_addr == ref_addr) { + return snprintf(buf, bufsz, "%%pc@(%s:w)", label); + } else { + assert(arg_addr > ref_addr); + return snprintf(buf, bufsz, "%%pc@(%s+%d:w)", label, arg_addr - ref_addr); + } + } else { + return snprintf(buf, bufsz, "%%pc@(%d:w)", d16_pc.d16); + } + case ArgType::kD8PCXiAddr: + return snprintf( + buf, bufsz, "%%pc@(%d,%%%c%u:%c)", + d8_pc_xi.d8, + RegChar(d8_pc_xi.xi), + RegNum(d8_pc_xi.xi), + SizeSpecChar(d8_pc_xi.xi)); + case ArgType::kImmediate: + if (ref_kinds & kRef1ImmMask) { + if (static_cast(lword) == ref_addr) { + return snprintf(buf, bufsz, "#%s", label); + } else { + // It has to be AFTER the label we are gonna reference here + assert(static_cast(lword) > ref_addr); + return snprintf(buf, bufsz, "#%s+%d", label, lword - ref_addr); + } + } else if (imm_as_hex) { + return snprintf(buf, bufsz, "#0x%x", lword); + } else { + return snprintf(buf, bufsz, "#%d", lword); + } + case ArgType::kRegMask: + case ArgType::kRegMaskPredecrement: + return snprint_reg_mask(buf, bufsz, uword, type); + case ArgType::kDisplacement: + if (ref_kinds & kRefRelMask) { + if (static_cast(self_addr + lword) == ref_addr) { + return snprintf(buf, bufsz, "%s", label); + } else { + assert(static_cast(self_addr + lword) > ref_addr); + return snprintf(buf, bufsz, "%s+%d", label, (self_addr + lword) - ref_addr); + } + } else { + return snprintf(buf, bufsz, ".%s%d", lword >= 0 ? "+" : "", lword); + } + case ArgType::kCCR: + return snprintf(buf, bufsz, "%%ccr"); + case ArgType::kSR: + return snprintf(buf, bufsz, "%%sr"); + case ArgType::kUSP: + return snprintf(buf, bufsz, "%%usp"); + } + assert(false); + return -1; +} + +int Op::FPrint( + FILE *const stream, + const char *const indent, + const bool imm_as_hex, + const RefKindMask ref_kinds, + const char *const ref1_label, + const char *const ref2_label, + const uint32_t self_addr, + const uint32_t ref1_addr, + const uint32_t ref2_addr) const +{ + assert(opcode != OpCode::kNone); + char mnemonic_str[kMnemonicBufferSize]{}; + OpcodeSNPrintf(mnemonic_str, kMnemonicBufferSize, opcode, condition, size_spec); + if (arg1.type != ArgType::kNone) { + char arg1_str[kArgsBufferSize]{}; + const RefKindMask ref1_kinds = ref_kinds & (kRef1Mask | kRefPcRelFix2Bytes); + // It is useful to have immediate value printed as hex if destination + // argument is plain address register, status register or condition code + // register. USP is not the case because it's value may be moved only to + // or from An register. + const bool imm_as_hex_2 = imm_as_hex || + arg2.type == ArgType::kAn || + arg2.type == ArgType::kCCR || + arg2.type == ArgType::kSR; + arg1.SNPrint( + arg1_str, + kArgsBufferSize, + imm_as_hex_2, + ref1_kinds, + ref1_label, + self_addr, + ref1_addr); + if (arg2.type != ArgType::kNone) { + char arg2_str[kArgsBufferSize]{}; + const RefKindMask ref2_kinds = ref_kinds & (kRef2Mask | kRefPcRelFix2Bytes); + arg2.SNPrint( + arg2_str, + kArgsBufferSize, + false, + ref2_kinds, + ref2_label, + self_addr, + ref2_addr); + return fprintf(stream, "%s%s %s,%s", indent, mnemonic_str, arg1_str, arg2_str); + } else { + return fprintf(stream, "%s%s %s", indent, mnemonic_str, arg1_str); + } + } else { + return fprintf(stream, "%s%s", indent, mnemonic_str); + } +} + +void DisasmNode::AddReferencedBy(const uint32_t address, const ReferenceType type) +{ + ReferenceNode *node{}; + if (this->last_ref_by) { + node = this->last_ref_by; + } else { + node = new ReferenceNode{}; + assert(node); + this->ref_by = this->last_ref_by = node; + } + node->refs[node->refs_count] = ReferenceRecord{type, address}; + node->refs_count++; + if (node->refs_count >= kRefsCountPerBuffer) { + ReferenceNode *new_node = new ReferenceNode{}; + assert(new_node); + node->next = new_node; + this->last_ref_by = new_node; + } +} + +DisasmNode::~DisasmNode() +{ + ReferenceNode *ref{this->ref_by}; + while (ref) { + ReferenceNode *prev = ref; + ref = ref->next; + delete prev; + } +} diff --git a/src/disasm.h b/src/disasm.h new file mode 100644 index 0000000..65429dc --- /dev/null +++ b/src/disasm.h @@ -0,0 +1,401 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "data_buffer.h" +#include "common.h" + +#include +#include +#include + +enum class OpSize: int { + kByte = 0, + kWord = 1, + kLong = 2, + kInvalid = 3, + kNone = kInvalid, + kShort, ///< Semantically is the same as kByte, pseudosize, used for Bcc +}; + +enum class OpCode: uint8_t { + kNone, + kRaw, ///< Emits ".short" + kORI, + kANDI, + kSUBI, + kADDI, + kEORI, + kCMPI, + kBTST, + kBCHG, + kBCLR, + kBSET, + kMOVEP, + kMOVEA, + kMOVE, + kNEGX, + kCLR, + kNEG, + kNOT, + kEXT, + kNBCD, + kSWAP, + kPEA, + kILLEGAL, + kTAS, + kTST, + kTRAP, + kLINK, + kUNLK, + kRESET, + kNOP, + kSTOP, + kRTE, + kRTS, + kTRAPV, + kRTR, + kJSR, + kJMP, + kMOVEM, + kLEA, + kCHK, + kADDQ, + kSUBQ, + kScc, + kDBcc, + kBcc, + kMOVEQ, + kDIVU, + kDIVS, + kSBCD, + kOR, + kSUB, + kSUBX, + kSUBA, + kEOR, + kCMPM, + kCMP, + kCMPA, + kMULU, + kMULS, + kABCD, + kEXG, + kAND, + kADD, + kADDX, + kADDA, + kASR, + kASL, + kLSR, + kLSL, + kROXR, + kROXL, + kROR, + kROL, +}; + +enum class Condition: uint8_t { + kT = 0, + kF = 1, + kHI = 2, + kLS = 3, + kCC = 4, + kCS = 5, + kNE = 6, + kEQ = 7, + kVC = 8, + kVS = 9, + kPL = 10, + kMI = 11, + kGE = 12, + kLT = 13, + kGT = 14, + kLE = 15, +}; + +enum class AddrMode: uint8_t { + kInvalid = 0, + kDn = 1, + kAn = 2, + kAnAddr = 3, + kAnAddrIncr = 4, + kAnAddrDecr = 5, + kD16AnAddr = 6, + kD8AnXiAddr = 7, + kWord = 8, + kLong = 9, + kD16PCAddr = 10, + kD8PCXiAddr = 11, + kImmediate = 12, +}; + +enum class ArgType: uint8_t { + kNone = 0, + kDn = 1, ///< Dn + kAn = 2, ///< An + kAnAddr = 3, ///< (An) + kAnAddrIncr = 4, ///< (An)+ + kAnAddrDecr = 5, ///< -(An) + kD16AnAddr = 6, ///< (d16,An) + kD8AnXiAddr = 7, ///< (d8,An,Xi) + kWord = 8, ///< (xxx).W + kLong = 9, ///< (xxx).L + kD16PCAddr = 10, ///< (d16,PC) + kD8PCXiAddr = 11, ///< (d8,PC,Xn) + kImmediate = 12, ///< #imm + kRegMask, + kRegMaskPredecrement, + kDisplacement, ///< For BRA, BSR, Bcc and DBcc + kCCR, + kSR, + kUSP, + kRaw, ///< Emits "0xXXXX" for ".short" +}; + +struct D8AnPCXiAddr { + uint8_t an; ///< ID number of An reg, for kD8AnXiAddr only + /*! ID number of Xi reg (3 lower bits), for kD8AnXiAddr and kD8PCXiAddr. + * Bit 3 (mask 0x8) means 0 == Dn, 1 == An. + * Bit 4 (mask 0x10) means 0 == Word, 1 == Long. + */ + uint8_t xi; + int8_t d8; ///< Displacement, for kD8AnXiAddr and kD8PCXiAddr +}; + +struct D16AnPCAddr { + uint8_t an; ///< ID number of An reg, for kD16AnAddr only + int16_t d16; ///< Displacement, for D16AnAddr and kD16PCAddr +}; + +static_assert(sizeof(D8AnPCXiAddr) <= sizeof(uint32_t), ""); +static_assert(sizeof(D16AnPCAddr) <= sizeof(uint32_t), ""); + +struct Arg { + union { + ArgType type{ArgType::kNone}; + AddrMode mode; + }; + union { + int32_t lword{}; ///< kLong, kWord, kDisplacement, kImmediate + uint16_t uword; ///< kRegMask, kRaw + uint8_t xn; ///< kDn, kAn, kAnAddr, kAnAddrIncr, kAnAddrDecr + D16AnPCAddr d16_an; ///< kD16AnAddr + D16AnPCAddr d16_pc; ///< kD16PCAddr + D8AnPCXiAddr d8_an_xi; ///< kD8AnXiAddr + D8AnPCXiAddr d8_pc_xi; ///< kD8PCXiAddr + }; + /// Size of the instruction extension: 0, 2 or 4 bytes + constexpr size_t Size(const OpSize s) const + { + switch (mode) { + case AddrMode::kInvalid: + case AddrMode::kDn: + case AddrMode::kAn: + case AddrMode::kAnAddr: + case AddrMode::kAnAddrIncr: + case AddrMode::kAnAddrDecr: + return 0; + case AddrMode::kD16AnAddr: + case AddrMode::kD8AnXiAddr: + case AddrMode::kWord: + return 2; + case AddrMode::kLong: + return 4; + case AddrMode::kD16PCAddr: + case AddrMode::kD8PCXiAddr: + return 2; + case AddrMode::kImmediate: + // Byte and Word immediate are of 2 bytes length + return s == OpSize::kLong ? 4 : 2; + } + return 0; + } + static constexpr auto AddrModeXn(const ArgType type, const uint8_t xn) { + Arg a{{type}, {0}}; + a.xn = xn; + return a; + } + static constexpr auto Dn(const uint8_t xn) { return AddrModeXn(ArgType::kDn, xn); } + static constexpr auto An(const uint8_t xn) { return AddrModeXn(ArgType::kAn, xn); } + static constexpr auto AnAddr(const uint8_t xn) { return AddrModeXn(ArgType::kAnAddr, xn); } + static constexpr auto AnAddrIncr(const uint8_t xn) + { + return AddrModeXn(ArgType::kAnAddrIncr, xn); + } + static constexpr auto AnAddrDecr(const uint8_t xn) + { + return AddrModeXn(ArgType::kAnAddrDecr, xn); + } + static constexpr auto D16AnAddr(const uint8_t xn, const int16_t d16) + { + Arg a{{ArgType::kD16AnAddr}, {0}}; + a.d16_an = D16AnPCAddr{xn, d16}; + return a; + } + static constexpr auto D16PCAddr(const int16_t d16) + { + Arg a{{ArgType::kD16PCAddr}, {0}}; + a.d16_pc = D16AnPCAddr{0, d16}; + return a; + } + static constexpr auto Word(const int16_t w) + { + Arg a{{ArgType::kWord}, {0}}; + a.lword = w; + return a; + } + static constexpr auto Long(const int32_t l) + { + Arg a{{ArgType::kLong}, {0}}; + a.lword = l; + return a; + } + static constexpr auto D8AnXiAddr( + const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) + { + Arg a{{ArgType::kD8AnXiAddr}, {0}}; + a.d8_an_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; + return a; + } + static constexpr auto D8PCXiAddr( + const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8) + { + Arg a{{ArgType::kD8PCXiAddr}, {0}}; + a.d8_pc_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8}; + return a; + } + static constexpr auto Immediate(const int32_t value) { + Arg a{{ArgType::kImmediate}, {0}}; + a.lword = value; + return a; + } + static constexpr auto RegMask(const uint16_t regmask) { + Arg a{{ArgType::kRegMask}, {0}}; + a.uword = regmask; + return a; + } + static constexpr auto RegMaskPredecrement(const uint16_t regmask) { + Arg a{{ArgType::kRegMaskPredecrement}, {0}}; + a.uword = regmask; + return a; + } + static constexpr auto Displacement(const int32_t displacement) { + Arg a{{ArgType::kDisplacement}, {0}}; + a.lword = displacement; + return a; + } + static constexpr auto CCR() { return Arg{{ArgType::kCCR}, {0}}; } + static constexpr auto SR() { return Arg{{ArgType::kSR}, {0}}; } + static constexpr auto USP() { return Arg{{ArgType::kUSP}, {0}}; } + static constexpr auto Raw(const uint16_t instr) { + Arg a{{ArgType::kRaw}, {0}}; + a.uword = instr; + return a; + } + int SNPrint( + char *buf, + size_t bufsz, + bool imm_as_hex = false, + RefKindMask ref_kinds = 0, + const char *label = nullptr, + uint32_t self_addr = 0, + uint32_t ref_addr = 0) const; +}; + +enum class NodeType { + kTracedInstruction, + kRefInstruction, + kData, +}; + +constexpr size_t kRefsCountPerBuffer = 10; + +constexpr size_t kMnemonicBufferSize = 10; +constexpr size_t kArgsBufferSize = 80; + +enum class ReferenceType { + kUnknown = 0, + kCall, + kBranch, + kRead, + kWrite, +}; + +struct ReferenceRecord { + ReferenceType type{}; + uint32_t address{}; +}; + +struct ReferenceNode { + ReferenceNode *next{}; + ReferenceRecord refs[kRefsCountPerBuffer]; + uint32_t refs_count{}; +}; + +struct Op { + OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic) + /// Size specifier, the suffix `b`, `w` or `l` + OpSize size_spec{OpSize::kNone}; + Condition condition{Condition::kT}; ///< For Scc, Bcc and Dbcc + Arg arg1{}; ///< First argument, optional + Arg arg2{}; ///< Second argument, optional, cannot be set if arg1 is not set + static constexpr auto Typical( + const OpCode opcode = OpCode::kNone, + const OpSize opsize = OpSize::kNone, + const Arg arg1 = Arg{}, + const Arg arg2 = Arg{}) + { + return Op{opcode, opsize, Condition::kT, arg1, arg2}; + } + static constexpr auto Raw(const uint16_t instr) + { + return Op::Typical(OpCode::kRaw, OpSize::kNone, Arg::Raw(instr)); + } + int FPrint( + FILE *, + const char *indent, + bool imm_as_hex, + RefKindMask ref_kinds = 0, + const char *ref1_label = nullptr, + const char *ref2_label = nullptr, + uint32_t self_addr = 0, + uint32_t ref1_addr = 0, + uint32_t ref2_addr = 0) const; +}; + +struct DisasmNode { + const NodeType type{}; + /// Address of the instruction (PC value basically) + const uint32_t address{}; + /// Instruction size in bytes + size_t size{kInstructionSizeStepBytes}; + /// Indicates whether `ref_addr` should be interpreted and how + RefKindMask ref_kinds{}; + /// Address of first argument reference + uint32_t ref1_addr{}; + /// Address of second argument reference + uint32_t ref2_addr{}; + ReferenceNode *ref_by{}; + ReferenceNode *last_ref_by{}; + Op op{}; + + /*! Disassembles instruction with arguments + * returns size of whole instruction with arguments in bytes + */ + size_t Disasm(const DataView &code); + size_t DisasmAsRaw(const DataView &code); + void AddReferencedBy(uint32_t address, ReferenceType); + ~DisasmNode(); +}; + +static constexpr inline bool IsInstruction(NodeType t) +{ + return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; +} + +static constexpr inline bool IsBRA(Op op) +{ + return op.opcode == OpCode::kBcc && op.condition == Condition::kT; +} diff --git a/src/elf_format.h b/src/elf_format.h new file mode 100644 index 0000000..b5a268a --- /dev/null +++ b/src/elf_format.h @@ -0,0 +1,328 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include +#include + +namespace ELF { + +constexpr size_t kIdentSize = 16; +constexpr size_t kHeaderSize = kIdentSize + 36; +constexpr size_t kMagicSize = 4; +constexpr size_t kProgramHeaderSize = 32; + +using Address = uint32_t; +using Offset = uint32_t; + +enum class FileClass : uint8_t { + kNone = 0, + k32 = 1, + k64 = 2, + kUnknown, +}; + +enum class DataEncoding : uint8_t { + kNone = 0, + k2LSB = 1, + kLE = k2LSB, + k2MSB = 2, + kBE = k2MSB, + kUnknown, +}; + +enum class Version : uint8_t { + kNone = 0, + kCurrent = 1, + kUnknown, +}; + +static constexpr inline auto ParseFileClass(const uint8_t file_class) +{ + switch (file_class) { + case static_cast(FileClass::kNone): return FileClass::kNone; + case static_cast(FileClass::k32): return FileClass::k32; + case static_cast(FileClass::k64): return FileClass::k64; + } + return FileClass::kUnknown; +} + +static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding) +{ + switch (data_encoding) { + case static_cast(DataEncoding::kNone): return DataEncoding::kNone; + case static_cast(DataEncoding::k2LSB): return DataEncoding::k2LSB; + case static_cast(DataEncoding::k2MSB): return DataEncoding::k2MSB; + } + return DataEncoding::kUnknown; +} + +static constexpr inline auto ParseVersion(const uint8_t version) +{ + switch (version) { + case static_cast(Version::kNone): return Version::kNone; + case static_cast(Version::kCurrent): return Version::kCurrent; + } + return Version::kUnknown; +} + +struct Ident32Raw { + uint8_t magic[4]; + uint8_t file_class; + uint8_t data_encoding; + uint8_t version; + uint8_t os_abi; + uint8_t abi_version; + uint8_t padding[7]; + static constexpr auto inline FromBytes(const uint8_t *data) + { + return Ident32Raw{ + { data[0], data[1], data[2], data[3] }, + data[4], + data[5], + data[6], + data[7], + data[8], + { data[9], data[10], data[11], data[12], data[13], data[14], data[15], }, + }; + } +}; + +struct Ident32 { + uint8_t magic[4]; + FileClass file_class; + DataEncoding data_encoding; + Version version; + uint8_t os_abi; + uint8_t abi_version; + static constexpr inline auto FromBytes(const uint8_t *data) + { + return Ident32{ + { data[0], data[1], data[2], data[3] }, + ParseFileClass(data[4]), + ParseDataEncoding(data[5]), + ParseVersion(data[6]), + data[7], + data[8], + }; + } + static constexpr inline auto FromIdent32Raw(const Ident32Raw raw) + { + return Ident32{ + { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] }, + ParseFileClass(raw.file_class), + ParseDataEncoding(raw.data_encoding), + ParseVersion(raw.version), + raw.os_abi, + raw.abi_version, + }; + } +}; + +enum class ObjectType : uint16_t { + kNone = 0, + kRel = 1, + kExec = 2, + kDyn = 3, + kCore = 4, + kUnknown = 0x7fff, + kLoProc = 0xff00, + kHiProc = 0xffff, +}; + +enum class Machine : uint16_t { + kNone = 0, + kM32 = 1, + kSPARC = 2, + k386 = 3, + k68k = 4, + k88k = 5, + k860 = 7, + kMIPS = 8, + kUnknown, +}; + +static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint16_t(d[0]) << 8 | d[1]; + } + return uint16_t(d[1]) << 8 | d[0]; +} + +static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3]; + } + return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0]; +} + +static constexpr inline auto ParseObjectType(const uint16_t type) +{ + switch (type) { + case static_cast(ObjectType::kNone): return ObjectType::kNone; + case static_cast(ObjectType::kRel): return ObjectType::kRel; + case static_cast(ObjectType::kExec): return ObjectType::kExec; + case static_cast(ObjectType::kDyn): return ObjectType::kDyn; + case static_cast(ObjectType::kCore): return ObjectType::kCore; + case static_cast(ObjectType::kLoProc): return ObjectType::kLoProc; + case static_cast(ObjectType::kHiProc): return ObjectType::kHiProc; + } + return ObjectType::kUnknown; +} + +static constexpr inline auto ParseMachine(const uint16_t machine) +{ + switch (machine) { + case static_cast(Machine::kNone): return Machine::kNone; + case static_cast(Machine::kM32): return Machine::kM32; + case static_cast(Machine::kSPARC): return Machine::kSPARC; + case static_cast(Machine::k386): return Machine::k386; + case static_cast(Machine::k68k): return Machine::k68k; + case static_cast(Machine::k88k): return Machine::k88k; + case static_cast(Machine::k860): return Machine::k860; + case static_cast(Machine::kMIPS): return Machine::kMIPS; + } + return Machine::kUnknown; +} + +struct Header32Raw { + Ident32Raw ident; + uint16_t type; + uint16_t machine; + uint32_t version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto ident = Ident32Raw::FromBytes(data); + const DataEncoding e = ParseDataEncoding(ident.data_encoding); + return Header32Raw{ + /* .ident */ ident, + /* .type */ ParseU16(data + kIdentSize + 0, e), + /* .machine */ ParseU16(data + kIdentSize + 2, e), + /* .version */ ParseU32(data + kIdentSize + 4, e), + /* .entry */ ParseU32(data + kIdentSize + 8, e), + /* .phoff */ ParseU32(data + kIdentSize + 12, e), + /* .shoff */ ParseU32(data + kIdentSize + 16, e), + /* .flags */ ParseU32(data + kIdentSize + 20, e), + /* .ehsize */ ParseU16(data + kIdentSize + 24, e), + /* .phentsize */ ParseU16(data + kIdentSize + 26, e), + /* .phnum */ ParseU16(data + kIdentSize + 28, e), + /* .shentsize */ ParseU16(data + kIdentSize + 30, e), + /* .shnum */ ParseU16(data + kIdentSize + 32, e), + /* .shstrndx */ ParseU16(data + kIdentSize + 34, e), + }; + } +}; + +struct Header32 { + Ident32 ident; + ObjectType type; + Machine machine; + Version version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto raw = Header32Raw::FromBytes(data); + return Header32{ + Ident32::FromIdent32Raw(raw.ident), + ParseObjectType(raw.type), + ParseMachine(raw.machine), + ParseVersion(raw.version), + raw.entry, + raw.phoff, + raw.shoff, + raw.flags, + raw.ehsize, + raw.phentsize, + raw.phnum, + raw.shentsize, + raw.shnum, + raw.shstrndx, + }; + } +}; + +enum class PHType : uint32_t { + kNull = 0, + kLoad = 1, + kDynamic = 2, + kInterp = 3, + kNote = 4, + kSHLIB = 5, + kProgramHeaderTable = 6, + kLoProc = 0x70000000, + kHiProc = 0x7fffffff, + kUnknown, +}; + +static constexpr inline auto ParsePHType(const uint32_t type) +{ + switch (type) { + case static_cast(PHType::kNull): return PHType::kNull; + case static_cast(PHType::kLoad): return PHType::kLoad; + case static_cast(PHType::kDynamic): return PHType::kDynamic; + case static_cast(PHType::kInterp): return PHType::kInterp; + case static_cast(PHType::kNote): return PHType::kNote; + case static_cast(PHType::kSHLIB): return PHType::kSHLIB; + case static_cast(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable; + case static_cast(PHType::kLoProc): return PHType::kLoProc; + case static_cast(PHType::kHiProc): return PHType::kHiProc; + } + return PHType::kUnknown; +} + +constexpr uint32_t kPHFlagX = 1 << 0; +constexpr uint32_t kPHFlagW = 1 << 1; +constexpr uint32_t kPHFlagR = 1 << 2; + +struct ProgramHeader32 { + uint32_t type; + Offset offset; + Address vaddr; + Address paddr; + uint32_t filesz; + uint32_t memsz; + uint32_t flags; + uint32_t align; + static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) + { + return ProgramHeader32{ + /* type */ ParseU32(data + 0, e), + /* offset */ ParseU32(data + 4, e), + /* vaddr */ ParseU32(data + 8, e), + /* paddr */ ParseU32(data + 12, e), + /* filesz */ ParseU32(data + 16, e), + /* memsz */ ParseU32(data + 20, e), + /* flags */ ParseU32(data + 24, e), + /* align */ ParseU32(data + 28, e), + }; + } +}; + +static constexpr inline bool MagicIsValid(const uint8_t *m) +{ + return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F'; +} + +}; diff --git a/src/elf_image.cpp b/src/elf_image.cpp new file mode 100644 index 0000000..6db72f3 --- /dev/null +++ b/src/elf_image.cpp @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" + +#include +#include + +ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( + const DataView &d, const DataEncoding e) +{ + if (d.buffer == nullptr || d.size == 0) { + return ELF::ProgramHeader32Table{}; + } + assert(d.size % kProgramHeaderSize == 0); + const size_t size = d.size / kProgramHeaderSize; + auto *headers = new ProgramHeader32[size]; + assert(headers != nullptr); + for (size_t i = 0; i < size; i++) { + headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e); + } + return ELF::ProgramHeader32Table{ headers, size, }; +} + +static char *ValidateELF(const DataView& d) +{ + char *error; + size_t size; + FILE *s = open_memstream(&error, &size); + assert(s); + using namespace ELF; + if (d.size < kHeaderSize) { + fprintf( + s, + "data size (%zu) is lower than minimum ELF header size (%zu): " + "ELF header could not fit", + d.size, + kHeaderSize); + fclose(s); + return error; + } + const auto header_raw = Header32Raw::FromBytes(d.buffer); + const auto header = Header32::FromBytes(d.buffer); + if (!MagicIsValid(header.ident.magic)) { + const uint8_t *m = header.ident.magic; + fprintf( + s, + "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]", + 0x7f, 'E', 'L', 'F', + m[0], m[1], m[2], m[3]); + fclose(s); + return error; + } + if (header.ident.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.ident.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.ident.version, + static_cast(Version::kCurrent)); + fclose(s); + return error; + } + if (header.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.version, + static_cast(Version::kCurrent)); + fclose(s); + return error; + } + if (header.type != ObjectType::kExec) { + fprintf( + s, + "object type (0x%02x) is not supported, " + "only Exec (0x%02x) object type is supported", + header_raw.type, + static_cast(ObjectType::kExec)); + fclose(s); + return error; + } + if (header.machine != Machine::k68k) { + fprintf( + s, + "machine (0x%02x) is not supported, " + "only Motorola 68k (0x%02x) machine is supported", + header_raw.machine, + static_cast(Machine::k68k)); + fclose(s); + return error; + } + if (header.phentsize != kProgramHeaderSize) { + fprintf( + s, + "phentsize is invalid: expected (%zu), got (%zu)", + kProgramHeaderSize, + size_t(header.phentsize)); + fclose(s); + return error; + } + if (d.size < header.phoff + header.phentsize * header.phnum) { + fprintf( + s, + "data size (%zu) is lower than program header table end offset (%zu): " + "program header table could not fit", + d.size, + size_t(header.phoff + header.phentsize * header.phnum)); + fclose(s); + return error; + } + bool has_segment_with_entry = false; + for (size_t i = 0; i < header.phnum; i++) { + const auto ph = ProgramHeader32::FromBytes( + d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding); + if (d.size < ph.offset + ph.filesz) { + fprintf( + s, + "data size (%zu) is lower than pht[%zu] segment end offset (%zu): " + "segment could not fit", + d.size, + i, + size_t(ph.offset + ph.filesz)); + fclose(s); + return error; + } + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR); + if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) { + fprintf( + s, + "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: " + "non-zero base address is not supported", + i, + ph.vaddr); + fclose(s); + return error; + } + const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz; + if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) { + has_segment_with_entry = true; + } + } + if (!has_segment_with_entry) { + fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry); + fclose(s); + return error; + } + fclose(s); + free(error); + return nullptr; +} + +ELF::Image::Image(DataBuffer&& data) + : _data(static_cast(data)) + , _error(ValidateELF(_data.View())) + , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer)) + , _pht(_error + ? ELF::ProgramHeader32Table{} + : ELF::ProgramHeader32Table::FromBytes( + _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding)) +{} + +ELF::Image::~Image() +{ + if (_error) { + free(_error); + } + if (_pht.headers) { + delete [] _pht.headers; + } +} diff --git a/src/elf_image.h b/src/elf_image.h new file mode 100644 index 0000000..b7c7123 --- /dev/null +++ b/src/elf_image.h @@ -0,0 +1,55 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_format.h" +#include "data_buffer.h" + +#include + +namespace ELF { + +struct ProgramHeader32Table { + const ProgramHeader32 *headers{}; + size_t size{}; + static ProgramHeader32Table FromBytes(const DataView &, DataEncoding); +}; + +struct Segment { + Segment *next{}; + const DataView view{}; +}; + +class Image { + const DataBuffer _data; + char *const _error; + const Header32 _h; + const ProgramHeader32Table _pht; +public: + explicit Image(DataBuffer&&); + ~Image(); + constexpr bool IsValid() const { return _error == nullptr; } + constexpr const DataBuffer &Data() const { return _data; }; + constexpr const DataView ProgramView() const + { + if (!IsValid()) { + return DataView{}; + } + for (size_t i = 0; i < _pht.size; i++) { + const auto ph = _pht.headers[i]; + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == + (kPHFlagX | kPHFlagR); + const bool is_load = ParsePHType(ph.type) == PHType::kLoad; + const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz; + if (is_load && is_code && ph.vaddr == 0 && contains_entry) + { + return _data.View(ph.offset, ph.filesz); + } + } + return DataView{}; + }; + constexpr const char *Error() const { return _error; } +}; + +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..a6f73b3 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" +#include "data_buffer.h" +#include "disasm.h" +#include "common.h" + +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "optparse/optparse.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +enum class DisasmMapType { + kTraced, + kRaw, +}; + +class DisasmMap { + const DisasmMapType _type; + DisasmNode *_map[kDisasmMapSizeElements]{}; + constexpr DisasmNode *findNodeByAddress(uint32_t address) const; + DisasmNode &insertNode(uint32_t address, NodeType); + DisasmNode &insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type); + constexpr bool canBeAllocated(const DisasmNode& node) const; +public: + constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const + { + return findNodeByAddress(address); + }; + void InsertNode(uint32_t address, NodeType type) + { + assert(_type == DisasmMapType::kTraced); + insertNode(address, type); + } + void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); + DisasmMap(DisasmMapType type): _type(type) {} + ~DisasmMap(); +}; + +constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const +{ + if (address < kRomSizeBytes) + return _map[address / kInstructionSizeStepBytes]; + return nullptr; +} + +static constexpr uint32_t AlignInstructionAddress(const uint32_t address) +{ + return address & ~1UL; +} + +DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type) +{ + auto *node = findNodeByAddress(address); + if (node) { + // Instruction nodes take precedence over data nodes. If a node that + // was previously accessed only as data now turns out to be an + // instruction, then it must become an instruction node. + if (IsInstruction(type) && !IsInstruction(node->type)) { + *const_cast(&node->type) = type; + // Make sure it is OpCode::kNone so it will be properly disassembled + node->op = Op{}; + } + return *node; + } + node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); + assert(node); + _map[address / kInstructionSizeStepBytes] = node; + return *node; +} + +DisasmNode &DisasmMap::insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type) +{ + auto &ref_node = insertNode(ref_addr, type); + ref_node.AddReferencedBy(by_addr, ref_type); + return ref_node; +} + +constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const +{ + const auto size = node.size / kInstructionSizeStepBytes; + const auto *const node_real = findNodeByAddress(node.address); + for (size_t i = 1; i < size; i++) { + const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; + if (ptr != nullptr && ptr != node_real) { + return false; + } + } + return true; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef1ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef1WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef2ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef2WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr bool IsNextLikelyAnInstruction(const Op &op) +{ + return (op.opcode != OpCode::kNone && + op.opcode != OpCode::kRaw && + !IsBRA(op) && + op.opcode != OpCode::kJMP && + op.opcode != OpCode::kRTS && + op.opcode != OpCode::kRTE && + op.opcode != OpCode::kSTOP); +} + +void DisasmMap::Disasm( + const DataView &code, const Settings &s, size_t at, bool nested) +{ + // Some of logic of this function is covered by integration tests in + // `test_walk_and_follow_jumps.bash`. + bool inside_code_span = nested; + while (at < Min(kRomSizeBytes, code.size)) { + DisasmNode *node; + if (_type == DisasmMapType::kTraced) { + node = _map[at / kInstructionSizeStepBytes]; + if (!node) { + if (inside_code_span) { + node = &insertNode(at, NodeType::kTracedInstruction); + } else { + at += kInstructionSizeStepBytes; + continue; + } + } + } else { + node = &insertNode(at, NodeType::kTracedInstruction); + } + if (node->op.opcode == OpCode::kNone || inside_code_span) { + const auto size = node->Disasm(code); + assert(size >= kInstructionSizeStepBytes); + if (canBeAllocated(*node)) { + // Spread across the size + for (size_t o = kInstructionSizeStepBytes; o < size; o++) { + _map[(node->address + o) / kInstructionSizeStepBytes] = node; + } + } else { + node->DisasmAsRaw(code); + } + } + inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); + if (nested && !inside_code_span) { + return; + } + at += node->size; + // NOTE: There is not much information about a reference passed further, + // so just don't add a reference of immediate if s.imm_labels is false + // enabled. + const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) + ? s.imm_labels + : (node->ref_kinds & kRef1Mask); + const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; + if (has_code_ref1) { + const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref1_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + const bool has_ref2 = (node->ref_kinds & kRef2Mask); + const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); + if (has_code_ref2) { + const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref2_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + } +} + +DisasmMap::~DisasmMap() +{ + for (size_t i = 0; i < kDisasmMapSizeElements; i++) { + auto *const node = _map[i]; + if (!node) { + continue; + } + const auto size = node->size / kInstructionSizeStepBytes; + for (size_t o = 0; o < size; o++) { + assert(_map[i + o] == node); + _map[i + o] = nullptr; + } + delete node; + i += size - 1; + } +} + +static size_t RenderRawDataComment( + char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) +{ + size_t overall_sz{}; + for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes) + { + overall_sz += Min( + out_sz - overall_sz, + snprintf( + out + overall_sz, + out_sz - overall_sz, + " %04x", + GetU16BE(code.buffer + address + i))); + } + overall_sz += Min( + out_sz - overall_sz, + snprintf(out + overall_sz, out_sz - overall_sz, " @%08x", address)); + return overall_sz; +} + +static constexpr const char *ReferenceTypeToString(ReferenceType type) +{ + switch (type) { + case ReferenceType::kUnknown: return "UNKNOWN"; + case ReferenceType::kCall: return "CALL"; + case ReferenceType::kBranch: return "BRANCH"; + case ReferenceType::kRead: return "READ"; + case ReferenceType::kWrite: return "WRITE"; + } + return "UNKN"; +} + +static constexpr bool ShouldPrintAsRaw(const Op& op) +{ + if (op.arg1.type == ArgType::kImmediate) { + if (op.opcode == OpCode::kADD || op.opcode == OpCode::kSUB || + op.opcode == OpCode::kAND || op.opcode == OpCode::kOR || + op.opcode == OpCode::kEOR || op.opcode == OpCode::kCMP) + { + return true; + } + } + return false; +} + +static constexpr bool HasCallReference(const DisasmNode &node) +{ + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + for (size_t i = 0; i < ref->refs_count; i++) { + if (ref->refs[i].type == ReferenceType::kCall) { + return true; + } + } + } + return false; +} + +static constexpr size_t GetNodeSizeByAddress(const DisasmMap &disasm_map, const uint32_t address) +{ + const auto *node = disasm_map.FindNodeByAddress(address); + if (node == nullptr) { + return kInstructionSizeStepBytes; + } + return node->size; +} + +static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmNode &node) +{ + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + for (size_t i = 0; i < ref->refs_count; i++) { + const ReferenceRecord &ref_rec = ref->refs[i]; + if (ref_rec.type == ReferenceType::kCall) { + // Locals are definitely not made for calls + return false; + } + const bool forward = ref_rec.address < node.address; + const size_t min_addr = forward ? ref_rec.address : node.address; + const size_t start = min_addr + GetNodeSizeByAddress(disasm_map, min_addr); + const size_t max_addr = forward ? node.address : ref_rec.address; + const size_t end = max_addr + (forward ? 0 : GetNodeSizeByAddress(disasm_map, min_addr)); + for (size_t addr = start; addr < end;) { + const auto *intermediate_node = disasm_map.FindNodeByAddress(addr); + if (intermediate_node) { + if (intermediate_node->ref_by) { + // Another labeled node detected on the jump path, hence + // current node's location cannot be considered local + return false; + } + addr += intermediate_node->size; + } else { + addr += kInstructionSizeStepBytes; + } + } + } + } + return true; +} + +static constexpr const char *StringWihoutFristNChars(const char *str, const size_t n) +{ + for (size_t i = 0, tab = 0; i < n && *str; i++, str++) { + if (*str == '\t') { + tab++; + if (tab == 7) { + tab = 0; + str++; + } + } else { + str++; + } + } + return str; +} + +static void RenderNodeDisassembly( + FILE *const output, + const DisasmMap &disasm_map, + const DataView &code, + const Settings &s, + const DisasmNode &node) +{ + if (node.ref_by) { + const bool is_local = IsLocalLocation(disasm_map, node); + if (s.labels && !(s.short_ref_local_labels && is_local)) { + const bool export_this_function = s.export_functions && HasCallReference(node); + const bool export_this_label = s.export_all_labels || + (s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) || + export_this_function; + if (export_this_label) { + fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address); + if (export_this_function) { + fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address); + } + } + } + if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) { + fprintf(output, "| XREFS:\n"); + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + if (ref->refs_count == 0) { + continue; + } + fprintf(output, "|"); + for (size_t i = 0; i < ref->refs_count; i++) { + const ReferenceRecord r = ref->refs[i]; + fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address); + } + fprintf(output, "\n"); + } + } + if (s.labels) { + if (s.short_ref_local_labels && is_local) { + fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1)); + } else { + fprintf(output, "L%08x:\n", node.address); + } + } + } + assert(node.op.opcode != OpCode::kNone); + if (ShouldPrintAsRaw(node.op)) { + auto raw = Op::Raw(GetU16BE(code.buffer + node.address)); + raw.FPrint(output, s.indent, s.imm_hex); + uint32_t i = kInstructionSizeStepBytes; + for (; i < node.size; i += kInstructionSizeStepBytes) { + char arg_str[kArgsBufferSize]{}; + const auto arg = Arg::Raw(GetU16BE(code.buffer + node.address + i)); + arg.SNPrint(arg_str, kArgsBufferSize); + fprintf(output, ", %s", arg_str); + } + } else { + const bool with_ref = node.ref_kinds && s.labels && (s.abs_labels || s.rel_labels); + const auto *ref1 = (node.ref_kinds & kRef1Mask) + ? disasm_map.FindNodeByAddress(node.ref1_addr) : nullptr; + const auto *ref2 = (node.ref_kinds & kRef2Mask) + ? disasm_map.FindNodeByAddress(node.ref2_addr) : nullptr; + const uint32_t ref1_addr = (with_ref && ref1) ? ref1->address : 0; + const uint32_t ref2_addr = (with_ref && ref2) ? ref2->address : 0; + if (with_ref && (ref1 || ref2)) { + const RefKindMask ref_kinds = + (s.abs_labels + ? ((ref1 ? (node.ref_kinds & kRef1AbsMask) : 0) | + (ref2 ? (node.ref_kinds & kRef2AbsMask) : 0)) + : 0) | + (s.rel_labels + ? ((ref1 ? (node.ref_kinds & kRef1RelMask) : 0) | + (ref2 ? (node.ref_kinds & kRef2RelMask) : 0)) + : 0) | + ((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) | + (node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes)); + const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1); + char ref1_label[32]{}; + if (ref1) { + if (s.short_ref_local_labels && ref1_is_local) { + const char dir = ref1_addr <= node.address ? 'b' : 'f'; + snprintf(ref1_label, (sizeof ref1_label), "1%c", dir); + } else { + snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr); + } + } + const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2); + char ref2_label[32]{}; + if (ref2) { + if (s.short_ref_local_labels && ref2_is_local) { + const char dir = ref2_addr <= node.address ? 'b' : 'f'; + snprintf(ref2_label, (sizeof ref2_label), "1%c", dir); + } else { + snprintf(ref2_label, (sizeof ref2_label), "L%08x", ref2_addr); + } + } + node.op.FPrint( + output, + s.indent, + s.imm_hex, + ref_kinds, + ref1_label, + ref2_label, + node.address, + ref1_addr, + ref2_addr); + const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true); + if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok) + { + fprintf(output, " | L%08x", ref1_addr); + } + if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) { + fprintf(output, " | L%08x", ref2_addr); + } + } else { + node.op.FPrint(output, s.indent, s.imm_hex); + } + } + if (s.raw_data_comment) { + char raw_data_comment[100]{}; + RenderRawDataComment( + raw_data_comment, + (sizeof raw_data_comment) - 1, + node.address, + node.size, code); + fprintf(output, " |%s", raw_data_comment); + } + fprintf(output, "\n"); +} + +static void RenderDisassembly( + FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) +{ + for (size_t i = 0; i < code.size;) { + const DisasmNode *node = disasm_map.FindNodeByAddress(i); + if (node) { + RenderNodeDisassembly(output, disasm_map, code, s, *node); + i += node->size; + } else { + auto raw = Op::Raw(GetU16BE(code.buffer + i)); + raw.FPrint(output, s.indent, s.imm_hex); + fprintf(output, "\n"); + i += kInstructionSizeStepBytes; + } + } +} + +static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) +{ + // FIXME make a full blown parser with various radixes support and different + // trace types support + bool parse = true; + for (size_t i = 0; i < trace_data.size; i++) { + if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') { + parse = true; + } else if (parse) { + errno = 0; + const char *startptr = reinterpret_cast(trace_data.buffer + i); + char *endptr = nullptr; + const long address = strtol(startptr, &endptr, 10); + if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) { + // Parsing error, just skip + } else if (startptr == endptr) { + // Parsing error, just skip + } else if (address % 2) { + fprintf(stderr, "Error: Uneven PC values are not supported (got PC=0x%08lx), exiting\n", address); + exit(1); + } else if (static_cast(address) > kRomSizeBytes) { + fprintf(stderr, "Error: PC values > 4MiB are not supported (got PC=0x%08lx), exiting\n", address); + exit(1); + } else { + // Valid value + disasm_map.InsertNode(address, NodeType::kTracedInstruction); + } + if (startptr != endptr) { + i += endptr - startptr - 1; + } + parse = false; + } + } +} + +static size_t ReadFromStream(DataBuffer &db, FILE *stream) +{ + assert(db.buffer && db.buffer_size >= db.kInitialSize); + while (1) { + const size_t read_size = db.buffer_size - db.occupied_size; + const size_t fread_ret = fread( + db.buffer + db.occupied_size, sizeof(*db.buffer), read_size, stream); + db.occupied_size += fread_ret; + if (fread_ret >= db.buffer_size) { + assert(fread_ret == db.buffer_size); + db.Expand(db.buffer_size * 2); + } else { + const int err = errno; + if (feof(stream)) { + break; + } else if (ferror(stream)) { + fprintf(stderr, "ReadFromStream: fread(%zu): Error (%d): \"%s\"\n", read_size, err, strerror(err)); + return EXIT_FAILURE; + } else if (db.buffer_size == db.occupied_size) { + db.Expand(db.buffer_size * 2); + } else { + assert(false); + } + } + } + return db.occupied_size; +} + +static DisasmMap *NewDisasmMap(FILE *trace_stream) +{ + if (trace_stream == nullptr) { + DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw}; + assert(disasm_map); + return disasm_map; + } + // Read trace file into buffer + DataBuffer trace_data{}; + const size_t trace_size = ReadFromStream(trace_data, trace_stream); + if (trace_size == 0) { + fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n"); + return nullptr; + } + // Parse trace file into map + DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced}; + assert(disasm_map); + ParseTraceData(*disasm_map, trace_data.View()); + return disasm_map; +} + +static int M68kDisasm( + FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s) +{ + // Read input file into buffer + DataBuffer input{}; + const size_t input_size = ReadFromStream(input, input_stream); + if (input_size == 0) { + fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n"); + return EXIT_FAILURE; + } + const ELF::Image elf(static_cast(input)); + if (s.bfd == BFDTarget::kELF && !elf.IsValid()) { + fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error()); + return EXIT_FAILURE; + } + const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid()); + const DataView code(from_elf ? elf.ProgramView() : elf.Data().View()); + assert(code.buffer != nullptr); + assert(code.size != 0); + // It is not worth it to check this somewhere while disassembling or + // emitting. Odd size is just not supported. + if (code.size % 2) { + fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n"); + return EXIT_FAILURE; + } + auto *disasm_map = NewDisasmMap(trace_stream); + if (disasm_map == nullptr) { + return EXIT_FAILURE; + } + // Disasm into output map + disasm_map->Disasm(code, s); + // Print output into output_stream + RenderDisassembly(output_stream, *disasm_map, code, s); + delete disasm_map; + return EXIT_SUCCESS; +} + +static bool FeatureStringHasPrefixNo(const char *feature) +{ + assert(feature); + // There is also implicit, embedded and free check for null terminator + if (feature[0] == 'n' && feature[1] == 'o' && feature[2] == '-') { + return true; + } + return false; +} + +static bool ApplyFeature(Settings& s, const char *feature_arg) +{ + struct { + bool Settings::* setting; + const char* feature_name; + } const features[]{ + { &Settings::raw_data_comment, "rdc" }, + { &Settings::labels, "labels" }, + { &Settings::rel_labels, "rel-labels" }, + { &Settings::abs_labels, "abs-labels" }, + { &Settings::imm_labels, "imm-labels" }, + { &Settings::short_ref_local_labels, "short-ref-local-labels" }, + { &Settings::export_labels, "export-labels" }, + { &Settings::export_all_labels, "export-all-labels" }, + { &Settings::export_functions, "export-functions" }, + { &Settings::xrefs_from, "xrefs-from" }, + { &Settings::xrefs_to, "xrefs-to" }, + { &Settings::imm_hex, "imm-hex" }, + { &Settings::follow_jumps, "follow-jumps" }, + { &Settings::walk, "walk" }, + }; + constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1; + const bool disable = FeatureStringHasPrefixNo(feature_arg); + const char *const feature = feature_arg + (disable ? sizeof_no_prefix : 0); + for (size_t i = 0; i < (sizeof features) / (sizeof *features); i++) { + if (0 == strcmp(feature, features[i].feature_name)) { + s.*(features[i].setting) = !disable; + return true; + } + } + return false; +} + +static void PrintUsage(FILE *s, const char *argv0) +{ + // Please, keep all lines in 80 columns range when printed. + fprintf(s, + "Usage: %s [options] \n" + "Options:\n" + " -h, --help, Show this message.\n" + " -o, --output, Where to write disassembly to (stdout if not set)\n" + " -t, --pc-trace, File containing PC trace\n" + " --indent, Specify instruction indentation, e.g. \"\t\",\n" + " Single tab is used by default.\n" + " -f, --feature=[no-]\n" + " Enable or disable (with \"no-\" prefix) a feature.\n" + " Available features described below under the\n" + " \"Feature flags\" section.\n" + " -b, --bfd-target=bfdname\n" + " Specify target object format as `bfdname`. Will attempt\n" + " to detect automatically if not set. Only `auto,\n" + " `binary` and `elf` are currently supported.\n" + " Binary or elf file with the machine code to disassemble\n" + "Feature flags:\n" + " rdc Print raw data comment.\n" + " labels Print labels above all places that have jumps from\n" + " somewhere.\n" + " rel-labels Use label instead of number on relative branch or call.\n" + " abs-labels Use label instead of number on absolute branch or call.\n" + " imm-labels Use label instead of number when immediate value moved\n" + " to address register.\n" + " short-ref-local-labels\n" + " Use local labels (numbers) for short jumps or loops.\n" + " Jump is considered short when it does not cross other\n" + " labels and has no calls.\n" + " export-labels Add `.globl` preamble to labels referenced two or more\n" + " times.\n" + " export-all-labels Add `.globl` preamble to all labels.\n" + " export-functions Add `.globl` and `.type @funciton` preamble to a label\n" + " referenced as a call.\n" + " xrefs-from Print xrefs comments above all places that have xrefs.\n" + " xrefs-to Print xrefs comments after all branch instructions.\n" + " imm-hex Print all immediate values as hexadecimal numbers.\n" + " follow-jumps Follow jumps to statically known locations.\n" + " walk Try best to detect further instructions following known\n" + " traced locations without overcommitting.\n" + , argv0); +} + +int main(int, char* argv[]) +{ + struct optparse_long longopts[] = { + {"help", 'h', OPTPARSE_NONE}, + {"output", 'o', OPTPARSE_REQUIRED}, + {"pc-trace", 't', OPTPARSE_REQUIRED}, + {"feature", 'f', OPTPARSE_REQUIRED}, + {"bfd-target", 'b', OPTPARSE_REQUIRED}, + {"indent", 80, OPTPARSE_REQUIRED}, + {}, + }; + const char *trace_file_name = nullptr; + const char *output_file_name = nullptr; + const char *input_file_name = nullptr; + Settings s{}; + struct optparse options; + optparse_init(&options, argv); + // Parse opts + int option; + while ((option = optparse_long(&options, longopts, NULL)) != -1) { + switch (option) { + case 'h': + PrintUsage(stdout, argv[0]); + return EXIT_SUCCESS; + break; + case 'o': + output_file_name = options.optarg; + break; + case 't': + trace_file_name = options.optarg; + break; + case 'f': + if (!ApplyFeature(s, options.optarg)) { + fprintf(stderr, "main: Error: Unknown feature \"%s\", exiting\n", options.optarg); + return EXIT_FAILURE; + } + break; + case 'b': + { + const auto *bfd_str = options.optarg; + if (0 == strcmp(bfd_str, "auto")) { + s.bfd = BFDTarget::kAuto; + } else if (0 == strcmp(bfd_str, "binary")) { + s.bfd = BFDTarget::kBinary; + } else if (0 == strcmp(bfd_str, "elf")) { + s.bfd = BFDTarget::kELF; + } else { + fprintf( + stderr, + "Unknown BFD target specified: \"%s\". " + "Refer to usage below to find correct BFD values.\n", + bfd_str); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; + } + } + break; + case 80: + s.indent = options.optarg; + break; + case '?': + fprintf(stderr, "main: optparse_long: Error: \"%s\"\n", options.errmsg); + return EXIT_FAILURE; + } + } + // Parse input file name + char *arg; + while ((arg = optparse_arg(&options))) { + if (input_file_name == nullptr) { + input_file_name = arg; + } else { + fprintf(stderr, "error: too many free arguments provided\n"); + return EXIT_FAILURE; + } + } + // Open the files + FILE *input_stream = nullptr; + FILE *output_stream = stdout; + FILE *trace_stream = nullptr; + if (input_file_name) { + if (0 == strcmp(input_file_name, "-")) { + input_stream = stdin; + } else { + input_stream = fopen(input_file_name, "r"); + } + if (input_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err)); + return EXIT_FAILURE; + } + } else { + fprintf(stderr, "main: Error: no input file name specified, see usage below.\n"); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; + } + if (output_file_name) { + output_stream = fopen(output_file_name, "w"); + if (output_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err)); + fclose(input_stream); + return EXIT_FAILURE; + } + } + if (trace_file_name) { + if (0 == strcmp(trace_file_name, "-")) { + if (input_stream == stdin) { + fprintf(stderr, "error: trace stream and input stream cannot be both stdin\n"); + return EXIT_FAILURE; + } + trace_stream = stdin; + } else { + trace_stream = fopen(trace_file_name, "r"); + } + if (trace_stream == nullptr) { + const int err = errno; + fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err)); + fclose(input_stream); + fclose(output_stream); + return EXIT_FAILURE; + } + } + // Run the program + const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s); + if (trace_stream != nullptr) { + fclose(trace_stream); + } + fclose(output_stream); + fclose(input_stream); + return ret; +} diff --git a/test.bash b/test.bash deleted file mode 100644 index 3289473..0000000 --- a/test.bash +++ /dev/null @@ -1,709 +0,0 @@ -#!/usr/bin/env bash -# -# SPDX-License-Identifier: Unlicense -# -# Tests against m68k-none-elf-as. - -AS=m68k-none-elf-as -OBJCOPY=m68k-none-elf-objcopy -LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm -fabs-labels -frel-labels -flabels -fimm-hex -ffollow-jumps" -TEST_DIR=/tmp/m68k-disasm-tests - -set -e -CRED="\033[31m" -CGREEN="\033[32m" -CRST="\033[39m" - -rm -rf ${TEST_DIR} -mkdir -p ${TEST_DIR} - -run_test_expect_short() { - local test_name=$1 - local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} - local data=$2 - local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin - local file_asm=${TEST_DIR}/${test_name_sanitized}.S - local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o - local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf - local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin - echo -ne "Test expect .short \"${test_name}\"... " - echo -ne "${data}" >${file_orig_bin} - ${DISASM} -o ${file_asm} ${file_orig_bin} - ${AS} -m68000 -o ${file_as_o} ${file_asm} - ${LD} -o ${file_as_elf} ${file_as_o} - ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} - if ! grep ".short" ${file_asm} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: NOT .short emitted, but .short EXPECTED" - cat ${file_asm} - elif ! cmp ${file_orig_bin} ${file_as_bin} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" - cat ${file_asm} - echo ${file_orig_bin} - hexdump -Cv ${file_orig_bin} | head -n1 - echo ${file_as_bin} - hexdump -Cv ${file_as_bin} | head -n1 - else - echo -e "${CGREEN}OK${CRST}" - #cat ${file_asm} - fi -} - -run_test_simple() { - local test_name=$1 - local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} - local data=$2 - local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin - local file_asm=${TEST_DIR}/${test_name_sanitized}.S - local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o - local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf - local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin - echo -ne "Test \"${test_name}\"... " - echo -ne "${data}" >${file_orig_bin} - ${DISASM} -o ${file_asm} ${file_orig_bin} - ${AS} -m68000 -o ${file_as_o} ${file_asm} - ${LD} -o ${file_as_elf} ${file_as_o} - ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} - if ! cmp ${file_orig_bin} ${file_as_bin} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" - cat ${file_asm} - echo ${file_orig_bin} - hexdump -Cv ${file_orig_bin} | head -n1 - echo ${file_as_bin} - hexdump -Cv ${file_as_bin} | head -n1 - elif grep ".short" ${file_asm} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: .short emitted" - cat ${file_asm} - else - echo -e "${CGREEN}OK${CRST}" - #cat ${file_asm} - fi -} - -run_test_iterative() { - local test_name=$1 - local prefix=$2 - local offset=$3 - local count=$4 - local step=$5 - local suffix=$6 - for i in $(seq 0 $(( step )) $(( count*step-1 )) ); do - local value=$(printf "%02x" $(( offset+i ))) - run_test_simple "${test_name}:${value}" "${prefix}\x${value}${suffix}" - done -} - -# bxxx cmpm -# -run_test_simple "cmpmb (An)+, (An)+" "\xb1\x08" -run_test_simple "cmpmw (An)+, (An)+" "\xb1\x48" -run_test_simple "cmpml (An)+, (An)+" "\xb1\x88" - -# bxxx eor -# -run_test_simple "eorb Dn, Dn" "\xb5\x01" -run_test_simple "eorb Dn, (An)" "\xb5\x11" -run_test_simple "eorb Dn, (An)+" "\xb5\x19" -run_test_simple "eorw Dn, -(An)" "\xb5\x61" -run_test_simple "eorl Dn, (xxx).L" "\xb5\xb9\xff\xff\x00\x00" - -# bxxx cmp -# -run_test_simple "cmpb Dn, Dn" "\xb4\x01" -run_test_expect_short "cmpb An, Dn" "\xb4\x09" -run_test_simple "cmpw An, Dn" "\xb4\x49" -run_test_simple "cmpb (An), Dn" "\xb4\x11" -run_test_simple "cmpb (An)+, Dn" "\xb4\x19" -run_test_simple "cmpb -(An), Dn" "\xb4\x21" -run_test_simple "cmpl (d8,PC,An), Dn" "\xb0\xbb\x88\xff" -run_test_simple "cmpw (xxx).W, Dn" "\xb0\x78\x88\xff" -# GNU AS would emit CMPI for "cmp #imm,Xn", so we disassemble it as short -run_test_expect_short "cmpl #imm, D6" "\xb6\xbc\x44\xd1\xe6\xe9" - -# bxxx cmpa -# -run_test_simple "cmpaw Dn, An" "\xb4\xc1" -run_test_simple "cmpal An, An" "\xbb\xca" -run_test_simple "cmpaw (An)+, An" "\xba\xda" -run_test_simple "cmpal (xxx).L, An" "\xbb\xf9\x80\x00\x00\x00" -run_test_simple "cmpaw #imm, An" "\xba\xfc\x01\x00" -run_test_simple "cmpal #imm, An" "\xbb\xfc\x80\x00\x00\x00" - -# cxxx divu divs -# -run_test_simple "divuw Dn, Dn" "\x82\xc6" -run_test_simple "divsw (An), Dn" "\x83\xd6" -run_test_simple "divuw (An)+, Dn" "\x82\xde" -run_test_simple "divsw -(An), Dn" "\x83\xe6" -run_test_simple "divuw (d16,An), Dn" "\x82\xee\xa0\x00" -run_test_simple "divsw (d8,An,Dn:l), Dn" "\x83\xf6\x68\xf0" -run_test_simple "divuw (xxx).W, Dn" "\x82\xf8\x30\x00" -run_test_simple "divsw (xxx).L, Dn" "\x83\xf9\x80\x00\x00\x00" -run_test_simple "divuw (d16,PC), Dn" "\x82\xfa\xff\xff" -run_test_simple "divsw (d8,PC,An:w), Dn" "\x83\xfb\x90\xff" -run_test_simple "divuw #imm, Dn" "\x82\xfc\x30\x00" - -# cxxx mulu muls -# -run_test_simple "muluw Dn, Dn" "\xc2\xc6" -run_test_simple "mulsw (An), Dn" "\xc3\xd6" -run_test_simple "muluw (An)+, Dn" "\xc2\xde" -run_test_simple "mulsw -(An), Dn" "\xc3\xe6" -run_test_simple "muluw (d16,An), Dn" "\xc2\xee\xa0\x00" -run_test_simple "mulsw (d8,An,Dn:l), Dn" "\xc3\xf6\x68\xf0" -run_test_simple "muluw (xxx).W, Dn" "\xc2\xf8\x30\x00" -run_test_simple "mulsw (xxx).L, Dn" "\xc3\xf9\x80\x00\x00\x00" -run_test_simple "muluw (d16,PC), Dn" "\xc2\xfa\xff\xff" -run_test_simple "mulsw (d8,PC,An:w), Dn" "\xc3\xfb\x90\xff" -run_test_simple "muluw #imm, Dn" "\xc2\xfc\x30\x00" - -# cxxx exg -# -run_test_simple "exg Dn, Dn" "\xcd\x41" -run_test_simple "exg Dn, An" "\xcd\x89" -run_test_simple "exg An, An" "\xcd\x49" - -# cxxx and -# -run_test_simple "andb Dn, Dn" "\xc4\x01" -run_test_expect_short "andb An, Dn" "\xc4\x09" -run_test_expect_short "andw An, Dn" "\xc4\x49" -run_test_simple "andb (An), Dn" "\xc4\x11" -run_test_simple "andb (An)+, Dn" "\xc4\x19" -run_test_simple "andw -(An), Dn" "\xc4\x61" -run_test_simple "andl (d8,PC,An), Dn" "\xc0\xbb\xc8\x07" -# GNU AS would emit ANDI for "and #imm,Xn", so we disassemble it as short -run_test_expect_short "andl #imm, D6" "\xc6\xbc\x44\xd1\xe6\xe9" - -# cxxx abcd -# -run_test_simple "abcd Dn, Dn" "\xc1\x01" -run_test_simple "abcd -(An), -(An)" "\xc1\x09" - -# 8xxx sbcd -# -run_test_simple "sbcdb Dn, Dn" "\x81\x01" -run_test_simple "sbcdb -(An), -(An)" "\x81\x09" - -# 8xxx or -# -run_test_simple "orb Dn, Dn" "\x84\x01" -run_test_expect_short "orb An, Dn" "\x84\x09" -run_test_expect_short "orw An, Dn" "\x84\x49" -run_test_simple "orb (An), Dn" "\x84\x11" -run_test_simple "orb (An)+, Dn" "\x84\x19" -run_test_simple "orw -(An), Dn" "\x84\x61" -run_test_simple "orl (d8,PC,An), Dn" "\x80\xbb\x88\x07" -# GNU AS would emit ORI for "or #imm,Xn", so we disassemble it as short -run_test_expect_short "orl #imm, D6" "\x86\xbc\x44\xd1\xe6\xe9" -run_test_expect_short "orl D2, D0 swapped direction" "\x81\x42" - -# 48xx nbcd swap pea -# -run_test_simple "swapw Dn" "\x48\x47" -run_test_simple "swapw Dn" "\x48\x42" -run_test_simple "peal (An)" "\x48\x50" -run_test_simple "peal (d16,An)" "\x48\x68\x80\x00" -run_test_simple "peal (d8,An,An)" "\x48\x77\x90\xfe" -run_test_simple "peal (d16,PC)" "\x48\x7a\x7f\xff" -run_test_simple "peal (d8,PC,Dn)" "\x48\x7b\x68\xfe" -run_test_simple "nbcdb Dn" "\x48\x03" -run_test_simple "nbcdb (An)" "\x48\x14" -run_test_simple "nbcdb (An)+" "\x48\x1c" -run_test_simple "nbcdb -(An)" "\x48\x25" -run_test_simple "nbcdb (d16,An)" "\x48\x28\x80\x00" -run_test_simple "nbcdb (d8,An,An)" "\x48\x37\x90\xfe" - -# 48xx ext -# -run_test_simple "extw %d7" "\x48\x87" -run_test_simple "extl %d4" "\x48\xc4" - -# exxx asl, asr, lsl, lsr, roxl, roxr, rol, ror -# -run_test_simple "asrb Dn, Dn" "\xe2\x22" -run_test_simple "asrb #1, Dn" "\xe2\x02" -run_test_simple "asrb #8, Dn" "\xe0\x02" -run_test_simple "aslb #7, Dn" "\xef\x02" -run_test_simple "asrw Dn, Dn" "\xe2\x62" -run_test_simple "asrl Dn, Dn" "\xe2\xa2" -run_test_simple "aslw #6, Dn" "\xed\x43" -run_test_simple "asll #5, Dn" "\xeb\x83" -run_test_simple "asrw (An)" "\xe0\xd0" -run_test_simple "lsrw (An)+" "\xe2\xd8" -run_test_simple "roxrw -(An)" "\xe4\xe0" -run_test_simple "rorw (d16,An)" "\xe6\xef\x01\x00" -# Found on random tests -run_test_simple "lsrb D1,D4" "\xe2\x2c" - -# 9xxx subx -# -run_test_simple "subxb Dn, Dn" "\x91\x00" -run_test_simple "subxw Dn, Dn" "\x93\x47" -run_test_simple "subxl Dn, Dn" "\x95\x86" -run_test_simple "subxb -(An), -(An)" "\x91\x08" -run_test_simple "subxw -(An), -(An)" "\x93\x4f" -run_test_simple "subxl -(An), -(An)" "\x95\x8e" - -# 9xxx suba -# -run_test_simple "subaw Dn, An" "\x94\xc1" -run_test_simple "subal An, An" "\x9b\xca" -run_test_simple "subaw (An)+, An" "\x9a\xda" -run_test_simple "subaw #imm, An" "\x9a\xfc\x01\x00" -run_test_simple "subal #imm, An" "\x9b\xfc\x80\x00\x00\x00" - -# 9xxx sub -# -run_test_simple "subb Dn, Dn" "\x94\x01" -run_test_expect_short "subb An, Dn" "\x94\x09" -run_test_simple "subw An, Dn" "\x94\x49" -run_test_simple "subb (An), Dn" "\x94\x11" -run_test_simple "subb (An)+, Dn" "\x94\x19" -run_test_simple "subb -(An), Dn" "\x94\x21" -# GNU AS would emit SUBQ for "sub #imm,Xn", so we disassemble it as short -run_test_expect_short "subl #imm, D6" "\x96\xbc\x44\xd1\xe6\xe9" - -# dxxx addx -# -run_test_simple "addxb Dn, Dn" "\xd1\x00" -run_test_simple "addxw Dn, Dn" "\xd3\x47" -run_test_simple "addxl Dn, Dn" "\xd5\x86" -run_test_simple "addxb -(An), -(An)" "\xd1\x08" -run_test_simple "addxw -(An), -(An)" "\xd3\x4f" -run_test_simple "addxl -(An), -(An)" "\xd5\x8e" - -# dxxx adda -# -run_test_simple "addaw Dn, An" "\xd4\xc1" -run_test_simple "addal An, An" "\xdb\xca" -run_test_simple "addaw (An)+, An" "\xda\xda" -run_test_simple "addaw #imm, An" "\xda\xfc\x01\x00" -run_test_simple "addal #imm, An" "\xdb\xfc\x80\x00\x00\x00" - -# dxxx add -# -run_test_simple "addb Dn, Dn" "\xd4\x01" -run_test_expect_short "addb An, Dn" "\xd4\x09" -run_test_simple "addw An, Dn" "\xd4\x49" -run_test_simple "addb (An), Dn" "\xd4\x11" -run_test_simple "addb (An)+, Dn" "\xd4\x19" -run_test_simple "addb -(An), Dn" "\xd4\x21" -run_test_simple "addl (d8,PC,An), Dn" "\xd0\xbb\x88\xff" -# GNU AS would emit ADDI for "add #imm,Xn", so we disassemble it as short -run_test_expect_short "addl #imm, D6" "\xd6\xbc\x44\xd1\xe6\xe9" - -# 4xxx chkw -# -run_test_simple "chkw Dn" "\x47\x82" -run_test_simple "chkw (An)" "\x41\x90" -run_test_simple "chkw (An)+" "\x47\x9b" -run_test_simple "chkw (d16,An)" "\x47\xa9\x80\x00" -run_test_simple "chkw (d8,An,Xi)" "\x47\xb2\xa8\x7f" - -# 4xxx leal -# -run_test_simple "leal (An)" "\x41\xd0" -run_test_simple "leal (d16,An)" "\x47\xe9\x80\x00" -run_test_simple "leal (d8,An,Xi)" "\x47\xf2\xa8\x7f" -run_test_simple "leal (d16,PC)" "\x47\xfa\x7f\xff" -run_test_simple "leal (d8,PC,Xi)" "\x47\xfb\xa8\x80" - -# 0xxx movep -# -run_test_simple "movepw Dn to (An)" "\x01\x0b\x00\xa0" -run_test_simple "movepl Dn to (An)" "\x03\x4a\x00\xa0" -run_test_simple "movepw (An) to Dn" "\x05\x89\x00\xa0" -run_test_simple "movepl (An) to Dn" "\x07\xc8\x00\xa0" - -# 0xxx bitwise ops -# -run_test_simple "btstl immediate in Dn" "\x08\x07\x00\x06" -run_test_simple "btstb immediate in (An)" "\x08\x17\x00\x06" -run_test_simple "btstb immediate in (xxx).L" "\x08\x39\x00\x06\xff\x00\x00\x00" -run_test_simple "btstb Dn in (xxx).L" "\x03\x39\xff\x00\x00\x00" -run_test_simple "bchgb Dn in (xxx).L" "\x05\x79\xff\x00\x00\x00" -run_test_simple "bclrb Dn in (xxx).L" "\x07\xb9\xff\x00\x00\x00" -run_test_simple "bsetb Dn in (xxx).L" "\x09\xf9\xff\x00\x00\x00" -run_test_expect_short "btstb large immediate in (xxx).L" "\x08\x39\x10\x21\xff\x00\x00\x00" - -# 0xxx immediate ops -# -run_test_simple "orib #0, D0" "\x00\x00\x00\x00" -run_test_simple "orib zero to CCR" "\x00\x3c\x00\x00" -run_test_simple "orib positive to CCR" "\x00\x3c\x00\x01" -run_test_simple "orib positive to CCR" "\x00\x3c\x00\x7f" -run_test_expect_short "orib #imm (too much for orib) to CCR" "\x00\x3c\x01\x00" -run_test_simple "orib negative to CCR" "\x00\x3c\xff\x80" -run_test_simple "orib negative to CCR" "\x00\x3c\xff\xff" -run_test_simple "oriw zero to SR" "\x00\x7c\x00\x00" -run_test_simple "oriw positive to SR" "\x00\x7c\x00\x0a" -run_test_simple "andiw positive to SR" "\x02\x7c\x00\x0a" -run_test_simple "eoriw positive to SR" "\x0a\x7c\x00\x0a" -run_test_simple "andib positive to CCR" "\x02\x3c\x00\x0a" -run_test_simple "eorib positive to CCR" "\x0a\x3c\x00\x0a" -run_test_simple "orib positive to Dn" "\x00\x07\x00\x0a" -run_test_simple "oriw positive to Dn" "\x00\x45\x00\x0a" -run_test_simple "oril positive to Dn" "\x00\x83\x00\x00\x00\x0a" -run_test_simple "andib negative to Dn" "\x00\x07\xff\x80" -run_test_simple "andiw negative to Dn" "\x00\x45\xff\x80" -run_test_simple "andil negative to Dn" "\x00\x83\xff\x80\x00\x00" -run_test_simple "addiw zero to (An)+" "\x06\x5a\x00\x00" -run_test_simple "subiw zero from -(An)" "\x06\x62\x00\x00" -run_test_simple "cmpib zero to (An)" "\x0c\x12\x00\x20" -run_test_simple "cmpiw zero to (An)" "\x0c\x52\x00\x30" -run_test_simple "cmpil zero to (An)" "\x0c\x92\x00\x00\x00\x40" -# From random tests -run_test_expect_short "cmpil with invalid opsize" "\x0c\xe4\x26\xa3" - -# 4axx -# -run_test_simple "tas Dn" "\x4a\xc2" -run_test_simple "tstb Dn" "\x4a\x02" -run_test_simple "tstw Dn" "\x4a\x42" -run_test_simple "tstl Dn" "\x4a\x82" -run_test_expect_short "tas (d16,PC)" "\x4a\xfa\xff\xff" -run_test_expect_short "tas (d8,PC,Xi)" "\x4a\xfb\x00\x00" - -# 4xxx -# -run_test_simple "negxb Dn" "\x40\x04" -run_test_simple "clrb Dn" "\x42\x05" -run_test_simple "negb Dn" "\x44\x06" -run_test_simple "notb Dn" "\x46\x07" -run_test_simple "negxw Dn" "\x40\x44" -run_test_simple "clrw Dn" "\x42\x45" -run_test_simple "negw Dn" "\x44\x46" -run_test_simple "notw Dn" "\x46\x47" -run_test_simple "negxl Dn" "\x40\x84" -run_test_simple "clrl Dn" "\x42\x85" -run_test_simple "negl Dn" "\x44\x86" -run_test_simple "notl Dn" "\x46\x87" - -# 4e4x -# -run_test_simple "trap 0" "\x4e\x40" -run_test_simple "trap 8" "\x4e\x48" -run_test_simple "trap 15" "\x4e\x4f" - -# 4e5x -# -run_test_simple "linkw positive" "\x4e\x52\x01\x00" -run_test_simple "linkw negative" "\x4e\x52\xff\xff" -run_test_simple "linkw negative" "\x4e\x52\x80\x00" -run_test_simple "unlk" "\x4e\x5a" - -# 4e6x -# -run_test_simple "move to USP" "\x4e\x62" -run_test_simple "move from USP" "\x4e\x6f" - -# 4xxx -# -run_test_simple "move from SR" "\x40\xc1" -run_test_simple "move to CCR" "\x44\xc2" -run_test_simple "move to SR" "\x46\xc3" - -# 70xx / 72xx/ 74xx / 76xx / 78xx / 7axx / 7cxx / 7exx -# -run_test_simple "moveq #0 to D0" "\x70\x00" -run_test_simple "moveq #1 to D2" "\x74\x01" -run_test_simple "moveq #127 to D7" "\x7e\x7f" -run_test_simple "moveq #-1 to D5" "\x7a\xff" -run_test_simple "moveq #-128 to D1" "\x72\x80" - -# 1xxx [xxxx [xxxx]] -# -run_test_simple "moveb Dn to Dn" "\x10\x01" -run_test_expect_short "moveb An to Dn" "\x10\x09" -run_test_simple "moveb (An) to Dn" "\x10\x11" -run_test_simple "moveb (An)+ to Dn" "\x10\x19" -run_test_simple "moveb -(An) to Dn" "\x10\x21" -run_test_simple "moveb (d16,An) to Dn" "\x10\x29\xfc\xeb" -run_test_simple "moveb (d8,An,Xi) to Dn" "\x10\x31\x98\x70" -run_test_simple "moveb (xxx).W to Dn" "\x10\x38\x98\x70" -run_test_simple "moveb (xxx).L to Dn" "\x10\x39\x30\x30\x30\x70" -run_test_simple "moveb (d16,PC) to Dn" "\x10\x3a\xfc\xeb" -run_test_simple "moveb (d8,PC,Xi) to Dn" "\x10\x3b\xa8\x70" -run_test_simple "moveb #imm to Dn" "\x10\x3c\xff\xff" - -# 3xxx [xxxx [xxxx]] -# -run_test_simple "movew Dn to Dn" "\x3e\x02" -run_test_simple "movew An to Dn" "\x3e\x0a" -run_test_simple "movew (An) to Dn" "\x3e\x12" -run_test_simple "movew (An)+ to Dn" "\x30\x1a" -run_test_simple "movew -(An) to Dn" "\x30\x22" -run_test_simple "movew (d16,An) to Dn" "\x30\x2a\x3f\xff" -run_test_simple "movew (d8,An,Xi) to Dn" "\x30\x32\x90\x80" -run_test_simple "movew (xxx).W to Dn" "\x30\x38\x90\x80" -run_test_simple "movew (xxx).L to Dn" "\x30\x39\xaa\xaa\xaa\xaa" -run_test_simple "movew (d16,PC) to Dn" "\x30\x3a\x3f\xff" -run_test_simple "movew (d8,PC,Xi) to Dn" "\x30\x3b\xa0\x80" -run_test_simple "movew #imm to Dn" "\x30\x3c\xa5\xa5" -run_test_simple "moveaw Dn" "\x30\x41" -run_test_simple "moveaw #imm" "\x30\x7c\xa8\x90" - -# 2xxx [xxxx [xxxx]] -# -run_test_simple "movel Dn to Dn" "\x24\x05" -run_test_simple "movel An to Dn" "\x24\x0d" -run_test_simple "movel (An) to Dn" "\x24\x15" -run_test_simple "movel (An)+ to Dn" "\x24\x1d" -run_test_simple "movel -(An) to Dn" "\x24\x25" -run_test_simple "movel (d16,An) to Dn" "\x24\x2d\x78\x20" -run_test_simple "movel (d8,An,Xi) to Dn" "\x24\x35\x98\x90" -run_test_simple "movel (xxx).W to Dn" "\x24\x38\x78\x90" -run_test_simple "movel (xxx).L to Dn" "\x24\x39\x00\x00\x78\x90" -run_test_simple "movel (d16,PC) to Dn" "\x24\x3a\x78\x20" -run_test_simple "movel (d8,PC,Xi) to Dn" "\x24\x3b\xa8\x90" -run_test_simple "movel #imm to Dn" "\x24\x3c\xa8\x90\x00\x00" -run_test_simple "moveal Dn" "\x20\x41" -run_test_simple "moveal #imm" "\x20\x7c\xa8\x90\x00\x00" - -# From random tests -# -run_test_simple "movel %pc@(-16,%a0:l),%a3@+ with nop" "\x26\xfb\x88\xf0\x4e\x71" - -# 4890 xxx -# -run_test_simple "movemw single register to (An)" "\x48\x90\x00\x01" -run_test_simple "movemw d0-d1,a0-a1 to (An)" "\x48\x90\x03\x03" -run_test_simple "moveml 6 spans to (An)" "\x48\xd0\xb6\xdb" -run_test_simple "movemw 8 non-neighboring regs to (An)" "\x48\x90\x55\x55" -run_test_simple "moveml other 8 non-neighboring regs to (An)" "\x48\xd0\xaa\xaa" -run_test_simple "moveml all registers to (An)" "\x48\xd0\xff\xff" -run_test_simple "movemw all registers to -(An)" "\x48\xa0\xff\xff" -run_test_simple "moveml all registers to (d16,An)" "\x48\xe8\xff\xff\x30\x1d" -run_test_simple "movemw all registers to (d8,An,Xi)" "\x48\xb7\xff\xff\x48\x0a" -run_test_simple "moveml all registers to (xxx).W" "\x48\xf8\xff\xff\x80\x10" -run_test_simple "movemw all registers to (xxx).L" "\x48\xb9\xff\xff\x00\x00\x7f\xf0" -run_test_simple "movemw (An) to all registers " "\x4c\x90\xff\xff" -run_test_simple "moveml (An)+ to all registers" "\x4c\xd8\xff\xff" -run_test_simple "movemw (d16,An) to all registers" "\x4c\xa8\xff\xff\x30\x1d" -run_test_simple "moveml (d8,An,Xi) to all registers" "\x4c\xf7\xff\xff\x48\x0a" -run_test_simple "movemw (xxx).W to all registers" "\x4c\xb8\xff\xff\x80\x10" -run_test_simple "moveml (xxx).L to all registers" "\x4c\xf9\xff\xff\x00\x00\x7f\xf0" - -# From random tests -# -run_test_expect_short "movem truncated" "\x48\x92" - -# 5x38 / 5x78 / 5xb8 (xxx).W -# -run_test_simple "addqb #8,offset:w" "\x50\x38\x00\x73" -run_test_simple "addql #4,offset:w" "\x58\xb8\x80\x14" - -# 5x39 / 5x79 / 5xb9 (xxx).L -# -run_test_simple "addqw #5,offset:l" "\x5a\x79\x18\xfc\x00\x00" -run_test_simple "addql #1,offset:l" "\x52\xb9\xf1\x00\x00\x01" - -# 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7, (d16, An, Xi), Brief Extension Word -# -run_test_simple "addqb #8,a7(positive,d0:w)" "\x50\x37\x00\x73" -run_test_simple "addqw #5,a2(negative,d1:l)" "\x5a\x72\x18\xfc" -run_test_simple "addql #1,a3(negative,a3:w)" "\x52\xb3\xb0\x81" - -# 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf, (d16, An), Displacement Word -# -run_test_simple "addqb #8,a7(positive)" "\x50\x2f\x00\x80" -run_test_simple "addqw #5,a2(negative)" "\x5a\x6a\xfc\xfc" -run_test_simple "addql #1,a3(negative)" "\x52\xab\xff\xff" - -# 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7, -(An) -# -run_test_simple "addqb #8,-(a7)" "\x50\x27" -run_test_simple "addqw #5,-(a2)" "\x5a\x62" -run_test_simple "addql #1,-(a3)" "\x52\xa3" - -# 5x18..5x1f / 5x58..5x5f / 5x98..5x9f, (An)+ -# -run_test_simple "addqb #8,(a7)+" "\x50\x1f" -run_test_simple "addqw #5,(a2)+" "\x5a\x5a" -run_test_simple "addql #1,(a3)+" "\x52\x9d" - -# 5x10..5x17 / 5x50..5x57 / 5x90..5x97, (An) -# -run_test_simple "addqb #8,(a7)" "\x50\x17" -run_test_simple "addqw #5,(a2)" "\x5a\x52" -run_test_simple "addql #1,(a3)" "\x52\x93" - -# 5x08..5x0f / 5x48..5x4f / 5x88..5x8f, An -# -# NOTE: addqb with An does not exits -run_test_simple "addqw #6,a7" "\x5c\x4f" -run_test_simple "addql #1,a5" "\x52\x8d" - -# 5x00..5x07 / 5x40..5x47 / 5x80..5x87, Dn -# -run_test_simple "addqb #8,d7" "\x50\x07" -run_test_simple "addqw #5,d2" "\x5a\x42" -run_test_simple "addql #1,d3" "\x52\x83" - -# 50f9 xxxx -# -run_test_simple "st d16:l positive" "\x51\xf9\x00\x00\x00\x74" -run_test_simple "st d16:l negative" "\x51\xf9\xc0\xfe\xba\xbe" - -# 50f8 xxxx -# -run_test_simple "st d16:w positive" "\x51\xf8\x00\x66" -run_test_simple "st d16:w negative" "\x51\xf8\x80\xc4" - -# 51f0 xxxx -# -run_test_simple "sf (d16:w,A4,D3) positive" "\x51\xf4\xb0\x04" -run_test_simple "sf (d16:w,A3,A6) negative" "\x51\xf3\x60\xf2" - -# 5fe8 xxxx -# -run_test_simple "sle (d16,A0) positive" "\x5f\xe8\x00\xa0" -run_test_simple "sle (d16,A0) negative" "\x5f\xe8\xe4\x02" - -# 5ee1 -# -run_test_simple "sgt -(%a1)" "\x5e\xe1" - -# 56df -# -run_test_simple "sne (%a7)+" "\x56\xdf" - -# 5dd3 -# -run_test_simple "slt (%a3)" "\x5d\xd3" - -# 57cx -# -run_test_iterative "seq Xn" "\x57" 0xc0 8 1 - -# 50cf xxxx -# -run_test_simple "dbt negative displacement" "\x50\xcf\xff\xfc" -run_test_simple "dbt positive displacement" "\x50\xcf\x01\x08" - -# 50c9 7ffe -# -# From random tests -run_test_simple "dbt %d1,.+32768" "\x50\xc9\x7f\xfe" - -# 60xx -# -run_test_simple "bras negative displacement" "\x60\xfc" -run_test_simple "bras positive displacement" "\x60\x08" - -# 60xx (xxxx) -# -run_test_simple "braw negative displacement" "\x60\x00\xf8\x2e" -run_test_simple "braw positive displacement" "\x60\x00\x03\xe6" -run_test_simple "braw zero displacement" "\x60\x00\x00\x00" - -# 61xx (xxxx) -# -run_test_simple "bsrs negative displacement" "\x61\x88" -run_test_simple "bsrw positive displacement" "\x61\x00\x03\xe6" - -# 6xxx -# -run_test_simple "bhis" "\x62\x0a" -run_test_simple "blss" "\x63\x0a" -run_test_simple "bccs" "\x64\x0a" -run_test_simple "bcss" "\x65\x0a" -run_test_simple "bnes" "\x66\x0a" -run_test_simple "beqs" "\x67\x0a" -run_test_simple "bvcs" "\x68\x0a" -run_test_simple "bvss" "\x69\x0a" -run_test_simple "bpls" "\x6a\x0a" -run_test_simple "bmis" "\x6b\x0a" -run_test_simple "bges" "\x6c\x0a" -run_test_simple "blts" "\x6d\x0a" -run_test_simple "bgts" "\x6e\x0a" -run_test_simple "bles" "\x6f\x0a" - -# 4afc -# -# reset -# -run_test_simple "illegal" "\x4a\xfc" - -# 4e70 -# -# reset -run_test_simple "reset" "\x4e\x70" - -# 4e71 -# -# nop -run_test_simple "nop" "\x4e\x71" - -# 4e72 xxxx -# -run_test_simple "stop #8:w" "\x4e\x72\x00\x08" -run_test_simple "stop #ffff:w" "\x4e\x72\xff\xff" - -# 4e73 -# -# rte -run_test_simple "rte" "\x4e\x73" - -# 4e75 -# -# rts -run_test_simple "rts" "\x4e\x75" - -# 4e76 -# -# trapv -run_test_simple "trapv" "\x4e\x76" - -# 4e77 -# -# rtr -run_test_simple "rtr" "\x4e\x77" - -# 4e90..4e97 -# -run_test_iterative "jsr M2 all An" "\x4e" 0x90 8 1 "" - -# (4ea8..4eaf) xxxx -# -run_test_simple "jsr M5 zero value" "\x4e\xa8\x00\x00" -run_test_iterative "jsr M5 all An, positive" "\x4e" 0xa8 8 1 "\x00\x0a" -run_test_simple "jsr M5 A0 negative" "\x4e\xa8\x80\x0f" - -# (4eb0..4eb7) xxxx -# -run_test_iterative "jsr M6 arbitrary An, positive" "\x4e" 0xb0 8 1 "\x00\x0f" -run_test_simple "jsr M6 A0 negative" "\x4e\xb0\x00\xf0" -run_test_simple "jsr M6 A0 zero" "\x4e\xb0\x00\x00" -run_test_simple "jsr M6 address register" "\x4e\xb0\x80\x0a" -run_test_simple "jsr M6 long displacement positive" "\x4e\xb0\x08\x0c" -run_test_simple "jsr M6 long displacement negative" "\x4e\xb0\x08\xb0" -run_test_iterative "jsr M6 arbitrary Xn2" "\x4e\xb0" 0x00 8 0x10 "\x0f" - -# 4eb8 xxxx Word displacement -# -run_test_simple "jsr M7 Xn0 zero" "\x4e\xb8\x00\x00" -run_test_simple "jsr M7 Xn0 positive" "\x4e\xb8\x00\x1f" -run_test_simple "jsr M7 Xn0 negative" "\x4e\xb8\x8a\x0c" - -# 4eb9 xxxx Long displacement -# -run_test_simple "jsr M7 X1 zero" "\x4e\xb9\x00\x00\x00\x00" -run_test_simple "jsr M7 X1 positive" "\x4e\xb9\x10\xbb\x43\x1f" -run_test_simple "jsr M7 X1 negative" "\x4e\xb9\x80\xcc\xd9\x8a" - -# 4eba xxxx -# -run_test_simple "jsr M7 X2 zero value" "\x4e\xba\x00\x00" -run_test_simple "jsr M7 X2 positive value" "\x4e\xba\x00\x1f" -run_test_simple "jsr M7 X2 negative value" "\x4e\xba\x8a\x0c" - -# 4ebb xxxx -# -run_test_simple "jsr M7 X3 negative" "\x4e\xbb\x00\xf0" -run_test_simple "jsr M7 X3 zero displacement" "\x4e\xbb\x00\x00" -run_test_simple "jsr M7 X3 An2=A0" "\x4e\xbb\x80\x0a" -run_test_simple "jsr M7 X3 long positive displacement" "\x4e\xbb\x08\x0c" -run_test_simple "jsr M7 X3 long negative displacement" "\x4e\xbb\x08\xb0" -run_test_iterative "jsr M7 X3 arbitrary Dn2" "\x4e\xbb" 0x00 8 0x10 "\x0f" diff --git a/test.ld b/test.ld deleted file mode 100644 index f939414..0000000 --- a/test.ld +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: Unlicense - */ - -MEMORY { - ROM(rx) : ORIGIN = 0x00000000, LENGTH = 4M -} - -SECTIONS { - . = ORIGIN(ROM); - .text : { - KEEP(*(.text)) - . = ALIGN(2); - *(.text*) - . = ALIGN(2); - KEEP(*(.rodata)) - *(.rodata*) - . = ALIGN(2); - } >ROM -} diff --git a/test_labels_referencing.bash b/test_labels_referencing.bash deleted file mode 100644 index 978dbc7..0000000 --- a/test_labels_referencing.bash +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env bash -# -# SPDX-License-Identifier: Unlicense -# -# Tests against m68k-none-elf-as. - -AS=m68k-none-elf-as -OBJCOPY=m68k-none-elf-objcopy -LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm -ffollow-jumps" -TEST_DIR=/tmp/m68k-disasm-tests-labels-referencing - -set -e -CRED="\033[31m" -CGREEN="\033[32m" -CRST="\033[39m" - -rm -rf ${TEST_DIR} -mkdir -p ${TEST_DIR} - -run_test_r() { - local test_name=$1 - local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} - local data=$2 - local args=$3 - local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin - local file_asm=${TEST_DIR}/${test_name_sanitized}.S - local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o - local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf - local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin - echo -ne "Test \"${test_name}\"... " - echo -ne "${data}" >${file_orig_bin} - ${DISASM} $args -o ${file_asm} ${file_orig_bin} - ${AS} -m68000 -o ${file_as_o} ${file_asm} - ${LD} -o ${file_as_elf} ${file_as_o} - ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} - if ! cmp ${file_orig_bin} ${file_as_bin}; then - cat ${file_asm} - echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" - hexdump -Cv ${file_orig_bin} >${file_orig_bin}.txt - hexdump -Cv ${file_as_bin} >${file_as_bin}.txt - echo ${file_orig_bin} - echo ${file_as_bin} - exit - elif grep ".short" ${file_asm} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: .short emitted" - cat ${file_asm} - exit - fi - local run_check=$4 - $run_check - #echo && cat ${file_asm} - echo -e "${CGREEN}OK${CRST}" -} - -run_check_rdisp() { - if grep -e "\s\.\([+-]\+\|\s\+\|$\)" ${file_asm} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: raw displacement emitted" - cat ${file_asm} - exit - fi -} - -run_check_r() { - if grep -e "[^0-9a-zA-Z_+][0-9]\+" ${file_asm} >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: raw number or displacement emitted" - cat ${file_asm} - exit - fi -} - -run_check_dummy() { :; } - -run_test_rdisp() { - run_test_r "$1" "$2" "-flabels -frel-labels" run_check_rdisp -} - -run_test_rword() { - run_test_r "$1" "$2" "-flabels -fabs-labels" run_check_r -} - -run_test_rpcrel() { - run_test_r "$1" "$2" "-flabels -frel-labels" run_check_r -} - -run_test_rlocal() { - run_test_r "$1" "$2" "-flabels -frel-labels -fabs-labels -fshort-ref-local-labels" run_check_dummy -} - -run_test_rdisp "bras ." "\x60\xfe" -run_test_rdisp "bras .-2" "\x4e\x71\x60\xfc" -run_test_rdisp "bras .-1" "\x4e\x71\x60\xfd" -run_test_rdisp "braw .+2" "\x4e\x71\x60\x00\x00\x00" -run_test_rword "moveml 0x0:w,%d0" "\x4c\xf8\x00\x01\x00\x00" -run_test_rword "moveml 0x6:w,%a0" "\x4c\xf8\x01\x00\x00\x06\x4e\x71\x4e\x71" -run_test_rword "movemw 0x0:l,%a0" "\x4e\x71\x4e\x71\x4c\xb9\x01\x00\x00\x00\x00\x02" -run_test_rpcrel "movemw (0,PC),%a0" "\x4e\x71\x4e\x71\x4c\xba\x01\x00\x00\x00" -run_test_rpcrel "lea (0,PC)" "\x47\xfa\x00\x00" -run_test_rpcrel "jmp (0,PC)" "\x4e\xfa\x00\x00" -run_test_rword "peal 0x0:w" "\x48\x78\x00\x00" -run_test_rword "peal 0x0:l" "\x48\x79\x00\x00\x00\x00" -run_test_rpcrel "peal (0,PC)" "\x48\x7a\x00\x00" -run_test_rword "nbcd 0x0:w" "\x48\x38\x00\x00" -run_test_rword "nbcd 0x6:l with nop" "\x48\x39\x00\x00\x00\x06\x4e\x71" -run_test_rword "cmpl 0x4:w, D2 with nop" "\xb4\xb8\x00\x04\x4e\x71" -run_test_rword "cmpw 0x0:l, D2" "\xb4\x79\x00\x00\x00\x00" -run_test_rpcrel "cmpl (0,PC), D2" "\xb4\xba\x00\x00" -run_test_rpcrel "cmpl (-2,PC), D2" "\xb4\xba\xff\xfe" -run_test_rlocal "bras 1f; nop; 1: bras 1b" "\x60\x02\x4e\x71\x60\xfe" -run_test_rlocal "2: bras 1f; nop; 1: bras 2b" "\x60\x02\x4e\x71\x60\xfa" diff --git a/test_random.bash b/test_random.bash deleted file mode 100644 index 6d2b17c..0000000 --- a/test_random.bash +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash -# -# SPDX-License-Identifier: Unlicense -# -# Tests against m68k-none-elf-as. - -AS=m68k-none-elf-as -OBJCOPY=m68k-none-elf-objcopy -LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -frel-labels -fabs-labels -fshort-ref-local-labels -fimm-hex -ffollow-jumps" -TEST_DIR=/tmp/m68k-disasm-random-tests - -set -e -CRED="\033[31m" -CGREEN="\033[32m" -CRST="\033[39m" - -rm -rf ${TEST_DIR} -mkdir -p ${TEST_DIR} - -run_test_random() { - local pass_number=$1 - local blocks_count=$2 - local test_name_sanitized=${pass_number//[^a-zA-Z0-9_\-]/-} - local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin - local file_asm=${TEST_DIR}/${test_name_sanitized}.S - local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o - local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf - local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin - echo -ne "Test random, pass ${pass_number}... " - dd if=/dev/urandom of=${file_orig_bin} bs=1024 count=${blocks_count} >/dev/null 2>&1 - ${DISASM} -o ${file_asm} ${file_orig_bin} - ${AS} -o ${file_as_o} ${file_asm} - ${LD} -o ${file_as_elf} ${file_as_o} - ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} - if ! cmp ${file_orig_bin} ${file_as_bin}; then - echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" - hexdump -Cv ${file_orig_bin} >${file_orig_bin}.txt - hexdump -Cv ${file_as_bin} >${file_as_bin}.txt - echo ${file_orig_bin} - echo ${file_as_bin} - exit - else - echo -e "${CGREEN}OK${CRST}" - rm ${file_orig_bin} ${file_asm} ${file_as_o} ${file_as_elf} ${file_as_bin} - fi -} - -# Tiny tests are mostly for ensuring that bounds checking is working properly, -# because it is more likely to encounter something that looks like a truncated -# instruction. -# -# If there is an obvious bug, then it will most likely be detected -# here and it is easier to dissect tiny test blob than huge test trying to debug -# single test case. -for i in `seq 1 1000`; do - run_test_random tiny$i 1 -done - -# Huge tests are for the broad coverage. They catch a lot! -for i in `seq 1 10`; do - run_test_random huge$i 1024 -done diff --git a/test_walk_and_follow_jumps.bash b/test_walk_and_follow_jumps.bash deleted file mode 100644 index 2f317b4..0000000 --- a/test_walk_and_follow_jumps.bash +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env bash -# -# SPDX-License-Identifier: Unlicense -# -# Tests against reference text for -ffollow-jumps and -fwalk features - -TEST_DIR=/tmp/m68k-disasm-follow-jumps-walk-tests -DISASM="./cmake-build/m68k-disasm -flabels -frel-labels -fabs-labels" - -set -e -CRED="\033[31m" -CGREEN="\033[32m" -CRST="\033[39m" - -rm -rf ${TEST_DIR} -mkdir -p ${TEST_DIR} - -OUTPUT_ASM="$TEST_DIR"/output.S -TRACE="$TEST_DIR"/trace.txt -REFERENCE="$TEST_DIR"/reference.S -REFERENCE_W="$TEST_DIR"/reference_w.S -REFERENCE_F="$TEST_DIR"/reference_f.S -REFERENCE_WF="$TEST_DIR"/reference_wf.S - -run_test_inner() { - local test_name=$1 - local disasm_args="$2" - local input="$3" - local reference="$4" - echo -ne "Test \"${test_name}\" ($disasm_args)... " - echo -ne "$input" | ${DISASM} --indent=' ' $disasm_args -t "$TRACE" -o "$OUTPUT_ASM" - - if ! diff --ignore-trailing-space "$reference" "$OUTPUT_ASM" >/dev/null 2>&1; then - echo -e "${CRED}FAIL${CRST}: output and reference text files do not match" - diff --color=always --unified --ignore-trailing-space "$reference" "$OUTPUT_ASM" || true - else - echo -e "${CGREEN}OK${CRST}" - fi -} - -run_test() { - local test_name=$1 - local input="$2" - local reference="$3" - local reference_w="$4" - local reference_f="$5" - local reference_wf="$6" - run_test_inner "$test_name" "" "$input" "$reference" - run_test_inner "$test_name" "-fwalk" "$input" "$reference_w" - run_test_inner "$test_name" "-ffollow-jumps" "$input" "$reference_f" - run_test_inner "$test_name" "-fwalk -ffollow-jumps" "$input" "$reference_wf" -} - - -echo -e "0" >"$TRACE" -cat >"$REFERENCE" << EOF - nop - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF - nop - nop -EOF -# $REFERENCE_F is same as $REFERENCE -# $REFERENCE_WF is same as $REFERENCE_W -run_test "linear nops, trace @0" "\x4e\x71\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" - - -cat >"$REFERENCE" << EOF - nop - .short 0x6002 - .short 0x4e71 - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF - nop - bras L00000006 - .short 0x4e71 -L00000006: - .short 0x4e71 -EOF -# $REFERENCE_F is same as $REFERENCE -cat >"$REFERENCE_WF" << EOF - nop - bras L00000006 - .short 0x4e71 -L00000006: - nop -EOF -run_test "nop and unconditional branch, trace @0" "\x4e\x71\x60\x02\x4e\x71\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_WF" - - -cat >"$REFERENCE" << EOF - nop - .short 0x6602 - .short 0x4e71 - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF - nop - bnes L00000006 - nop -L00000006: - nop -EOF -# $REFERENCE_F is same as $REFERENCE -# $REFERENCE_WF is same as $REFERENCE_W -run_test "nop and conditional branch, trace @0" "\x4e\x71\x66\x02\x4e\x71\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" - - -cat >"$REFERENCE" << EOF - bnes L00000004 - .short 0x4e71 -L00000004: - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF - bnes L00000004 - nop -L00000004: - nop -EOF -cat >"$REFERENCE_F" << EOF - bnes L00000004 - .short 0x4e71 -L00000004: - nop -EOF -# $REFERENCE_WF is same as $REFERENCE_W -run_test "conditional branch, trace @0" "\x66\x02\x4e\x71\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_W" - - -cat >"$REFERENCE" << EOF - bras L00000004 - .short 0x4e71 -L00000004: - .short 0x4e71 -EOF -# $REFERENCE_W is same as $REFERENCE -cat >"$REFERENCE_F" << EOF - bras L00000004 - .short 0x4e71 -L00000004: - nop -EOF -# $REFERENCE_WF is same as $REFERENCE_F -run_test "unconditional branch, trace @0" "\x60\x02\x4e\x71\x4e\x71" \ - "$REFERENCE" "$REFERENCE" "$REFERENCE_F" "$REFERENCE_F" - - -echo -e "0\n2" >"$TRACE" -cat >"$REFERENCE" << EOF -L00000000: - nop - bnes L00000000 - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF -L00000000: - nop - bnes L00000000 - nop -EOF -# $REFERENCE_F is same as $REFERENCE -# $REFERENCE_WF is same as $REFERENCE_W -run_test "nop and conditional branch backwards, trace @0, @2" "\x4e\x71\x66\xfc\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" - - -echo -e "2" >"$TRACE" -cat >"$REFERENCE" << EOF -L00000000: - .short 0x4e71 - bnes L00000000 - .short 0x4e71 -EOF -cat >"$REFERENCE_W" << EOF -L00000000: - .short 0x4e71 - bnes L00000000 - nop -EOF -cat >"$REFERENCE_F" << EOF -L00000000: - nop - bnes L00000000 - .short 0x4e71 -EOF -cat >"$REFERENCE_WF" << EOF -L00000000: - nop - bnes L00000000 - nop -EOF -run_test "nop and conditional branch backwards, trace @2" "\x4e\x71\x66\xfc\x4e\x71" \ - "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_WF" - diff --git a/tests/test.bash b/tests/test.bash new file mode 100644 index 0000000..6025908 --- /dev/null +++ b/tests/test.bash @@ -0,0 +1,709 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: Unlicense +# +# Tests against m68k-none-elf-as. + +AS=m68k-none-elf-as +OBJCOPY=m68k-none-elf-objcopy +LD="m68k-none-elf-ld -Ttest.ld" +DISASM="../cmake-build/m68k-disasm -fabs-labels -frel-labels -flabels -fimm-hex -ffollow-jumps" +TEST_DIR=/tmp/m68k-disasm-tests + +set -e +CRED="\033[31m" +CGREEN="\033[32m" +CRST="\033[39m" + +rm -rf ${TEST_DIR} +mkdir -p ${TEST_DIR} + +run_test_expect_short() { + local test_name=$1 + local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} + local data=$2 + local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin + local file_asm=${TEST_DIR}/${test_name_sanitized}.S + local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o + local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf + local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin + echo -ne "Test expect .short \"${test_name}\"... " + echo -ne "${data}" >${file_orig_bin} + ${DISASM} -o ${file_asm} ${file_orig_bin} + ${AS} -m68000 -o ${file_as_o} ${file_asm} + ${LD} -o ${file_as_elf} ${file_as_o} + ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} + if ! grep ".short" ${file_asm} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: NOT .short emitted, but .short EXPECTED" + cat ${file_asm} + elif ! cmp ${file_orig_bin} ${file_as_bin} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" + cat ${file_asm} + echo ${file_orig_bin} + hexdump -Cv ${file_orig_bin} | head -n1 + echo ${file_as_bin} + hexdump -Cv ${file_as_bin} | head -n1 + else + echo -e "${CGREEN}OK${CRST}" + #cat ${file_asm} + fi +} + +run_test_simple() { + local test_name=$1 + local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} + local data=$2 + local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin + local file_asm=${TEST_DIR}/${test_name_sanitized}.S + local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o + local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf + local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin + echo -ne "Test \"${test_name}\"... " + echo -ne "${data}" >${file_orig_bin} + ${DISASM} -o ${file_asm} ${file_orig_bin} + ${AS} -m68000 -o ${file_as_o} ${file_asm} + ${LD} -o ${file_as_elf} ${file_as_o} + ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} + if ! cmp ${file_orig_bin} ${file_as_bin} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" + cat ${file_asm} + echo ${file_orig_bin} + hexdump -Cv ${file_orig_bin} | head -n1 + echo ${file_as_bin} + hexdump -Cv ${file_as_bin} | head -n1 + elif grep ".short" ${file_asm} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: .short emitted" + cat ${file_asm} + else + echo -e "${CGREEN}OK${CRST}" + #cat ${file_asm} + fi +} + +run_test_iterative() { + local test_name=$1 + local prefix=$2 + local offset=$3 + local count=$4 + local step=$5 + local suffix=$6 + for i in $(seq 0 $(( step )) $(( count*step-1 )) ); do + local value=$(printf "%02x" $(( offset+i ))) + run_test_simple "${test_name}:${value}" "${prefix}\x${value}${suffix}" + done +} + +# bxxx cmpm +# +run_test_simple "cmpmb (An)+, (An)+" "\xb1\x08" +run_test_simple "cmpmw (An)+, (An)+" "\xb1\x48" +run_test_simple "cmpml (An)+, (An)+" "\xb1\x88" + +# bxxx eor +# +run_test_simple "eorb Dn, Dn" "\xb5\x01" +run_test_simple "eorb Dn, (An)" "\xb5\x11" +run_test_simple "eorb Dn, (An)+" "\xb5\x19" +run_test_simple "eorw Dn, -(An)" "\xb5\x61" +run_test_simple "eorl Dn, (xxx).L" "\xb5\xb9\xff\xff\x00\x00" + +# bxxx cmp +# +run_test_simple "cmpb Dn, Dn" "\xb4\x01" +run_test_expect_short "cmpb An, Dn" "\xb4\x09" +run_test_simple "cmpw An, Dn" "\xb4\x49" +run_test_simple "cmpb (An), Dn" "\xb4\x11" +run_test_simple "cmpb (An)+, Dn" "\xb4\x19" +run_test_simple "cmpb -(An), Dn" "\xb4\x21" +run_test_simple "cmpl (d8,PC,An), Dn" "\xb0\xbb\x88\xff" +run_test_simple "cmpw (xxx).W, Dn" "\xb0\x78\x88\xff" +# GNU AS would emit CMPI for "cmp #imm,Xn", so we disassemble it as short +run_test_expect_short "cmpl #imm, D6" "\xb6\xbc\x44\xd1\xe6\xe9" + +# bxxx cmpa +# +run_test_simple "cmpaw Dn, An" "\xb4\xc1" +run_test_simple "cmpal An, An" "\xbb\xca" +run_test_simple "cmpaw (An)+, An" "\xba\xda" +run_test_simple "cmpal (xxx).L, An" "\xbb\xf9\x80\x00\x00\x00" +run_test_simple "cmpaw #imm, An" "\xba\xfc\x01\x00" +run_test_simple "cmpal #imm, An" "\xbb\xfc\x80\x00\x00\x00" + +# cxxx divu divs +# +run_test_simple "divuw Dn, Dn" "\x82\xc6" +run_test_simple "divsw (An), Dn" "\x83\xd6" +run_test_simple "divuw (An)+, Dn" "\x82\xde" +run_test_simple "divsw -(An), Dn" "\x83\xe6" +run_test_simple "divuw (d16,An), Dn" "\x82\xee\xa0\x00" +run_test_simple "divsw (d8,An,Dn:l), Dn" "\x83\xf6\x68\xf0" +run_test_simple "divuw (xxx).W, Dn" "\x82\xf8\x30\x00" +run_test_simple "divsw (xxx).L, Dn" "\x83\xf9\x80\x00\x00\x00" +run_test_simple "divuw (d16,PC), Dn" "\x82\xfa\xff\xff" +run_test_simple "divsw (d8,PC,An:w), Dn" "\x83\xfb\x90\xff" +run_test_simple "divuw #imm, Dn" "\x82\xfc\x30\x00" + +# cxxx mulu muls +# +run_test_simple "muluw Dn, Dn" "\xc2\xc6" +run_test_simple "mulsw (An), Dn" "\xc3\xd6" +run_test_simple "muluw (An)+, Dn" "\xc2\xde" +run_test_simple "mulsw -(An), Dn" "\xc3\xe6" +run_test_simple "muluw (d16,An), Dn" "\xc2\xee\xa0\x00" +run_test_simple "mulsw (d8,An,Dn:l), Dn" "\xc3\xf6\x68\xf0" +run_test_simple "muluw (xxx).W, Dn" "\xc2\xf8\x30\x00" +run_test_simple "mulsw (xxx).L, Dn" "\xc3\xf9\x80\x00\x00\x00" +run_test_simple "muluw (d16,PC), Dn" "\xc2\xfa\xff\xff" +run_test_simple "mulsw (d8,PC,An:w), Dn" "\xc3\xfb\x90\xff" +run_test_simple "muluw #imm, Dn" "\xc2\xfc\x30\x00" + +# cxxx exg +# +run_test_simple "exg Dn, Dn" "\xcd\x41" +run_test_simple "exg Dn, An" "\xcd\x89" +run_test_simple "exg An, An" "\xcd\x49" + +# cxxx and +# +run_test_simple "andb Dn, Dn" "\xc4\x01" +run_test_expect_short "andb An, Dn" "\xc4\x09" +run_test_expect_short "andw An, Dn" "\xc4\x49" +run_test_simple "andb (An), Dn" "\xc4\x11" +run_test_simple "andb (An)+, Dn" "\xc4\x19" +run_test_simple "andw -(An), Dn" "\xc4\x61" +run_test_simple "andl (d8,PC,An), Dn" "\xc0\xbb\xc8\x07" +# GNU AS would emit ANDI for "and #imm,Xn", so we disassemble it as short +run_test_expect_short "andl #imm, D6" "\xc6\xbc\x44\xd1\xe6\xe9" + +# cxxx abcd +# +run_test_simple "abcd Dn, Dn" "\xc1\x01" +run_test_simple "abcd -(An), -(An)" "\xc1\x09" + +# 8xxx sbcd +# +run_test_simple "sbcdb Dn, Dn" "\x81\x01" +run_test_simple "sbcdb -(An), -(An)" "\x81\x09" + +# 8xxx or +# +run_test_simple "orb Dn, Dn" "\x84\x01" +run_test_expect_short "orb An, Dn" "\x84\x09" +run_test_expect_short "orw An, Dn" "\x84\x49" +run_test_simple "orb (An), Dn" "\x84\x11" +run_test_simple "orb (An)+, Dn" "\x84\x19" +run_test_simple "orw -(An), Dn" "\x84\x61" +run_test_simple "orl (d8,PC,An), Dn" "\x80\xbb\x88\x07" +# GNU AS would emit ORI for "or #imm,Xn", so we disassemble it as short +run_test_expect_short "orl #imm, D6" "\x86\xbc\x44\xd1\xe6\xe9" +run_test_expect_short "orl D2, D0 swapped direction" "\x81\x42" + +# 48xx nbcd swap pea +# +run_test_simple "swapw Dn" "\x48\x47" +run_test_simple "swapw Dn" "\x48\x42" +run_test_simple "peal (An)" "\x48\x50" +run_test_simple "peal (d16,An)" "\x48\x68\x80\x00" +run_test_simple "peal (d8,An,An)" "\x48\x77\x90\xfe" +run_test_simple "peal (d16,PC)" "\x48\x7a\x7f\xff" +run_test_simple "peal (d8,PC,Dn)" "\x48\x7b\x68\xfe" +run_test_simple "nbcdb Dn" "\x48\x03" +run_test_simple "nbcdb (An)" "\x48\x14" +run_test_simple "nbcdb (An)+" "\x48\x1c" +run_test_simple "nbcdb -(An)" "\x48\x25" +run_test_simple "nbcdb (d16,An)" "\x48\x28\x80\x00" +run_test_simple "nbcdb (d8,An,An)" "\x48\x37\x90\xfe" + +# 48xx ext +# +run_test_simple "extw %d7" "\x48\x87" +run_test_simple "extl %d4" "\x48\xc4" + +# exxx asl, asr, lsl, lsr, roxl, roxr, rol, ror +# +run_test_simple "asrb Dn, Dn" "\xe2\x22" +run_test_simple "asrb #1, Dn" "\xe2\x02" +run_test_simple "asrb #8, Dn" "\xe0\x02" +run_test_simple "aslb #7, Dn" "\xef\x02" +run_test_simple "asrw Dn, Dn" "\xe2\x62" +run_test_simple "asrl Dn, Dn" "\xe2\xa2" +run_test_simple "aslw #6, Dn" "\xed\x43" +run_test_simple "asll #5, Dn" "\xeb\x83" +run_test_simple "asrw (An)" "\xe0\xd0" +run_test_simple "lsrw (An)+" "\xe2\xd8" +run_test_simple "roxrw -(An)" "\xe4\xe0" +run_test_simple "rorw (d16,An)" "\xe6\xef\x01\x00" +# Found on random tests +run_test_simple "lsrb D1,D4" "\xe2\x2c" + +# 9xxx subx +# +run_test_simple "subxb Dn, Dn" "\x91\x00" +run_test_simple "subxw Dn, Dn" "\x93\x47" +run_test_simple "subxl Dn, Dn" "\x95\x86" +run_test_simple "subxb -(An), -(An)" "\x91\x08" +run_test_simple "subxw -(An), -(An)" "\x93\x4f" +run_test_simple "subxl -(An), -(An)" "\x95\x8e" + +# 9xxx suba +# +run_test_simple "subaw Dn, An" "\x94\xc1" +run_test_simple "subal An, An" "\x9b\xca" +run_test_simple "subaw (An)+, An" "\x9a\xda" +run_test_simple "subaw #imm, An" "\x9a\xfc\x01\x00" +run_test_simple "subal #imm, An" "\x9b\xfc\x80\x00\x00\x00" + +# 9xxx sub +# +run_test_simple "subb Dn, Dn" "\x94\x01" +run_test_expect_short "subb An, Dn" "\x94\x09" +run_test_simple "subw An, Dn" "\x94\x49" +run_test_simple "subb (An), Dn" "\x94\x11" +run_test_simple "subb (An)+, Dn" "\x94\x19" +run_test_simple "subb -(An), Dn" "\x94\x21" +# GNU AS would emit SUBQ for "sub #imm,Xn", so we disassemble it as short +run_test_expect_short "subl #imm, D6" "\x96\xbc\x44\xd1\xe6\xe9" + +# dxxx addx +# +run_test_simple "addxb Dn, Dn" "\xd1\x00" +run_test_simple "addxw Dn, Dn" "\xd3\x47" +run_test_simple "addxl Dn, Dn" "\xd5\x86" +run_test_simple "addxb -(An), -(An)" "\xd1\x08" +run_test_simple "addxw -(An), -(An)" "\xd3\x4f" +run_test_simple "addxl -(An), -(An)" "\xd5\x8e" + +# dxxx adda +# +run_test_simple "addaw Dn, An" "\xd4\xc1" +run_test_simple "addal An, An" "\xdb\xca" +run_test_simple "addaw (An)+, An" "\xda\xda" +run_test_simple "addaw #imm, An" "\xda\xfc\x01\x00" +run_test_simple "addal #imm, An" "\xdb\xfc\x80\x00\x00\x00" + +# dxxx add +# +run_test_simple "addb Dn, Dn" "\xd4\x01" +run_test_expect_short "addb An, Dn" "\xd4\x09" +run_test_simple "addw An, Dn" "\xd4\x49" +run_test_simple "addb (An), Dn" "\xd4\x11" +run_test_simple "addb (An)+, Dn" "\xd4\x19" +run_test_simple "addb -(An), Dn" "\xd4\x21" +run_test_simple "addl (d8,PC,An), Dn" "\xd0\xbb\x88\xff" +# GNU AS would emit ADDI for "add #imm,Xn", so we disassemble it as short +run_test_expect_short "addl #imm, D6" "\xd6\xbc\x44\xd1\xe6\xe9" + +# 4xxx chkw +# +run_test_simple "chkw Dn" "\x47\x82" +run_test_simple "chkw (An)" "\x41\x90" +run_test_simple "chkw (An)+" "\x47\x9b" +run_test_simple "chkw (d16,An)" "\x47\xa9\x80\x00" +run_test_simple "chkw (d8,An,Xi)" "\x47\xb2\xa8\x7f" + +# 4xxx leal +# +run_test_simple "leal (An)" "\x41\xd0" +run_test_simple "leal (d16,An)" "\x47\xe9\x80\x00" +run_test_simple "leal (d8,An,Xi)" "\x47\xf2\xa8\x7f" +run_test_simple "leal (d16,PC)" "\x47\xfa\x7f\xff" +run_test_simple "leal (d8,PC,Xi)" "\x47\xfb\xa8\x80" + +# 0xxx movep +# +run_test_simple "movepw Dn to (An)" "\x01\x0b\x00\xa0" +run_test_simple "movepl Dn to (An)" "\x03\x4a\x00\xa0" +run_test_simple "movepw (An) to Dn" "\x05\x89\x00\xa0" +run_test_simple "movepl (An) to Dn" "\x07\xc8\x00\xa0" + +# 0xxx bitwise ops +# +run_test_simple "btstl immediate in Dn" "\x08\x07\x00\x06" +run_test_simple "btstb immediate in (An)" "\x08\x17\x00\x06" +run_test_simple "btstb immediate in (xxx).L" "\x08\x39\x00\x06\xff\x00\x00\x00" +run_test_simple "btstb Dn in (xxx).L" "\x03\x39\xff\x00\x00\x00" +run_test_simple "bchgb Dn in (xxx).L" "\x05\x79\xff\x00\x00\x00" +run_test_simple "bclrb Dn in (xxx).L" "\x07\xb9\xff\x00\x00\x00" +run_test_simple "bsetb Dn in (xxx).L" "\x09\xf9\xff\x00\x00\x00" +run_test_expect_short "btstb large immediate in (xxx).L" "\x08\x39\x10\x21\xff\x00\x00\x00" + +# 0xxx immediate ops +# +run_test_simple "orib #0, D0" "\x00\x00\x00\x00" +run_test_simple "orib zero to CCR" "\x00\x3c\x00\x00" +run_test_simple "orib positive to CCR" "\x00\x3c\x00\x01" +run_test_simple "orib positive to CCR" "\x00\x3c\x00\x7f" +run_test_expect_short "orib #imm (too much for orib) to CCR" "\x00\x3c\x01\x00" +run_test_simple "orib negative to CCR" "\x00\x3c\xff\x80" +run_test_simple "orib negative to CCR" "\x00\x3c\xff\xff" +run_test_simple "oriw zero to SR" "\x00\x7c\x00\x00" +run_test_simple "oriw positive to SR" "\x00\x7c\x00\x0a" +run_test_simple "andiw positive to SR" "\x02\x7c\x00\x0a" +run_test_simple "eoriw positive to SR" "\x0a\x7c\x00\x0a" +run_test_simple "andib positive to CCR" "\x02\x3c\x00\x0a" +run_test_simple "eorib positive to CCR" "\x0a\x3c\x00\x0a" +run_test_simple "orib positive to Dn" "\x00\x07\x00\x0a" +run_test_simple "oriw positive to Dn" "\x00\x45\x00\x0a" +run_test_simple "oril positive to Dn" "\x00\x83\x00\x00\x00\x0a" +run_test_simple "andib negative to Dn" "\x00\x07\xff\x80" +run_test_simple "andiw negative to Dn" "\x00\x45\xff\x80" +run_test_simple "andil negative to Dn" "\x00\x83\xff\x80\x00\x00" +run_test_simple "addiw zero to (An)+" "\x06\x5a\x00\x00" +run_test_simple "subiw zero from -(An)" "\x06\x62\x00\x00" +run_test_simple "cmpib zero to (An)" "\x0c\x12\x00\x20" +run_test_simple "cmpiw zero to (An)" "\x0c\x52\x00\x30" +run_test_simple "cmpil zero to (An)" "\x0c\x92\x00\x00\x00\x40" +# From random tests +run_test_expect_short "cmpil with invalid opsize" "\x0c\xe4\x26\xa3" + +# 4axx +# +run_test_simple "tas Dn" "\x4a\xc2" +run_test_simple "tstb Dn" "\x4a\x02" +run_test_simple "tstw Dn" "\x4a\x42" +run_test_simple "tstl Dn" "\x4a\x82" +run_test_expect_short "tas (d16,PC)" "\x4a\xfa\xff\xff" +run_test_expect_short "tas (d8,PC,Xi)" "\x4a\xfb\x00\x00" + +# 4xxx +# +run_test_simple "negxb Dn" "\x40\x04" +run_test_simple "clrb Dn" "\x42\x05" +run_test_simple "negb Dn" "\x44\x06" +run_test_simple "notb Dn" "\x46\x07" +run_test_simple "negxw Dn" "\x40\x44" +run_test_simple "clrw Dn" "\x42\x45" +run_test_simple "negw Dn" "\x44\x46" +run_test_simple "notw Dn" "\x46\x47" +run_test_simple "negxl Dn" "\x40\x84" +run_test_simple "clrl Dn" "\x42\x85" +run_test_simple "negl Dn" "\x44\x86" +run_test_simple "notl Dn" "\x46\x87" + +# 4e4x +# +run_test_simple "trap 0" "\x4e\x40" +run_test_simple "trap 8" "\x4e\x48" +run_test_simple "trap 15" "\x4e\x4f" + +# 4e5x +# +run_test_simple "linkw positive" "\x4e\x52\x01\x00" +run_test_simple "linkw negative" "\x4e\x52\xff\xff" +run_test_simple "linkw negative" "\x4e\x52\x80\x00" +run_test_simple "unlk" "\x4e\x5a" + +# 4e6x +# +run_test_simple "move to USP" "\x4e\x62" +run_test_simple "move from USP" "\x4e\x6f" + +# 4xxx +# +run_test_simple "move from SR" "\x40\xc1" +run_test_simple "move to CCR" "\x44\xc2" +run_test_simple "move to SR" "\x46\xc3" + +# 70xx / 72xx/ 74xx / 76xx / 78xx / 7axx / 7cxx / 7exx +# +run_test_simple "moveq #0 to D0" "\x70\x00" +run_test_simple "moveq #1 to D2" "\x74\x01" +run_test_simple "moveq #127 to D7" "\x7e\x7f" +run_test_simple "moveq #-1 to D5" "\x7a\xff" +run_test_simple "moveq #-128 to D1" "\x72\x80" + +# 1xxx [xxxx [xxxx]] +# +run_test_simple "moveb Dn to Dn" "\x10\x01" +run_test_expect_short "moveb An to Dn" "\x10\x09" +run_test_simple "moveb (An) to Dn" "\x10\x11" +run_test_simple "moveb (An)+ to Dn" "\x10\x19" +run_test_simple "moveb -(An) to Dn" "\x10\x21" +run_test_simple "moveb (d16,An) to Dn" "\x10\x29\xfc\xeb" +run_test_simple "moveb (d8,An,Xi) to Dn" "\x10\x31\x98\x70" +run_test_simple "moveb (xxx).W to Dn" "\x10\x38\x98\x70" +run_test_simple "moveb (xxx).L to Dn" "\x10\x39\x30\x30\x30\x70" +run_test_simple "moveb (d16,PC) to Dn" "\x10\x3a\xfc\xeb" +run_test_simple "moveb (d8,PC,Xi) to Dn" "\x10\x3b\xa8\x70" +run_test_simple "moveb #imm to Dn" "\x10\x3c\xff\xff" + +# 3xxx [xxxx [xxxx]] +# +run_test_simple "movew Dn to Dn" "\x3e\x02" +run_test_simple "movew An to Dn" "\x3e\x0a" +run_test_simple "movew (An) to Dn" "\x3e\x12" +run_test_simple "movew (An)+ to Dn" "\x30\x1a" +run_test_simple "movew -(An) to Dn" "\x30\x22" +run_test_simple "movew (d16,An) to Dn" "\x30\x2a\x3f\xff" +run_test_simple "movew (d8,An,Xi) to Dn" "\x30\x32\x90\x80" +run_test_simple "movew (xxx).W to Dn" "\x30\x38\x90\x80" +run_test_simple "movew (xxx).L to Dn" "\x30\x39\xaa\xaa\xaa\xaa" +run_test_simple "movew (d16,PC) to Dn" "\x30\x3a\x3f\xff" +run_test_simple "movew (d8,PC,Xi) to Dn" "\x30\x3b\xa0\x80" +run_test_simple "movew #imm to Dn" "\x30\x3c\xa5\xa5" +run_test_simple "moveaw Dn" "\x30\x41" +run_test_simple "moveaw #imm" "\x30\x7c\xa8\x90" + +# 2xxx [xxxx [xxxx]] +# +run_test_simple "movel Dn to Dn" "\x24\x05" +run_test_simple "movel An to Dn" "\x24\x0d" +run_test_simple "movel (An) to Dn" "\x24\x15" +run_test_simple "movel (An)+ to Dn" "\x24\x1d" +run_test_simple "movel -(An) to Dn" "\x24\x25" +run_test_simple "movel (d16,An) to Dn" "\x24\x2d\x78\x20" +run_test_simple "movel (d8,An,Xi) to Dn" "\x24\x35\x98\x90" +run_test_simple "movel (xxx).W to Dn" "\x24\x38\x78\x90" +run_test_simple "movel (xxx).L to Dn" "\x24\x39\x00\x00\x78\x90" +run_test_simple "movel (d16,PC) to Dn" "\x24\x3a\x78\x20" +run_test_simple "movel (d8,PC,Xi) to Dn" "\x24\x3b\xa8\x90" +run_test_simple "movel #imm to Dn" "\x24\x3c\xa8\x90\x00\x00" +run_test_simple "moveal Dn" "\x20\x41" +run_test_simple "moveal #imm" "\x20\x7c\xa8\x90\x00\x00" + +# From random tests +# +run_test_simple "movel %pc@(-16,%a0:l),%a3@+ with nop" "\x26\xfb\x88\xf0\x4e\x71" + +# 4890 xxx +# +run_test_simple "movemw single register to (An)" "\x48\x90\x00\x01" +run_test_simple "movemw d0-d1,a0-a1 to (An)" "\x48\x90\x03\x03" +run_test_simple "moveml 6 spans to (An)" "\x48\xd0\xb6\xdb" +run_test_simple "movemw 8 non-neighboring regs to (An)" "\x48\x90\x55\x55" +run_test_simple "moveml other 8 non-neighboring regs to (An)" "\x48\xd0\xaa\xaa" +run_test_simple "moveml all registers to (An)" "\x48\xd0\xff\xff" +run_test_simple "movemw all registers to -(An)" "\x48\xa0\xff\xff" +run_test_simple "moveml all registers to (d16,An)" "\x48\xe8\xff\xff\x30\x1d" +run_test_simple "movemw all registers to (d8,An,Xi)" "\x48\xb7\xff\xff\x48\x0a" +run_test_simple "moveml all registers to (xxx).W" "\x48\xf8\xff\xff\x80\x10" +run_test_simple "movemw all registers to (xxx).L" "\x48\xb9\xff\xff\x00\x00\x7f\xf0" +run_test_simple "movemw (An) to all registers " "\x4c\x90\xff\xff" +run_test_simple "moveml (An)+ to all registers" "\x4c\xd8\xff\xff" +run_test_simple "movemw (d16,An) to all registers" "\x4c\xa8\xff\xff\x30\x1d" +run_test_simple "moveml (d8,An,Xi) to all registers" "\x4c\xf7\xff\xff\x48\x0a" +run_test_simple "movemw (xxx).W to all registers" "\x4c\xb8\xff\xff\x80\x10" +run_test_simple "moveml (xxx).L to all registers" "\x4c\xf9\xff\xff\x00\x00\x7f\xf0" + +# From random tests +# +run_test_expect_short "movem truncated" "\x48\x92" + +# 5x38 / 5x78 / 5xb8 (xxx).W +# +run_test_simple "addqb #8,offset:w" "\x50\x38\x00\x73" +run_test_simple "addql #4,offset:w" "\x58\xb8\x80\x14" + +# 5x39 / 5x79 / 5xb9 (xxx).L +# +run_test_simple "addqw #5,offset:l" "\x5a\x79\x18\xfc\x00\x00" +run_test_simple "addql #1,offset:l" "\x52\xb9\xf1\x00\x00\x01" + +# 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7, (d16, An, Xi), Brief Extension Word +# +run_test_simple "addqb #8,a7(positive,d0:w)" "\x50\x37\x00\x73" +run_test_simple "addqw #5,a2(negative,d1:l)" "\x5a\x72\x18\xfc" +run_test_simple "addql #1,a3(negative,a3:w)" "\x52\xb3\xb0\x81" + +# 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf, (d16, An), Displacement Word +# +run_test_simple "addqb #8,a7(positive)" "\x50\x2f\x00\x80" +run_test_simple "addqw #5,a2(negative)" "\x5a\x6a\xfc\xfc" +run_test_simple "addql #1,a3(negative)" "\x52\xab\xff\xff" + +# 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7, -(An) +# +run_test_simple "addqb #8,-(a7)" "\x50\x27" +run_test_simple "addqw #5,-(a2)" "\x5a\x62" +run_test_simple "addql #1,-(a3)" "\x52\xa3" + +# 5x18..5x1f / 5x58..5x5f / 5x98..5x9f, (An)+ +# +run_test_simple "addqb #8,(a7)+" "\x50\x1f" +run_test_simple "addqw #5,(a2)+" "\x5a\x5a" +run_test_simple "addql #1,(a3)+" "\x52\x9d" + +# 5x10..5x17 / 5x50..5x57 / 5x90..5x97, (An) +# +run_test_simple "addqb #8,(a7)" "\x50\x17" +run_test_simple "addqw #5,(a2)" "\x5a\x52" +run_test_simple "addql #1,(a3)" "\x52\x93" + +# 5x08..5x0f / 5x48..5x4f / 5x88..5x8f, An +# +# NOTE: addqb with An does not exits +run_test_simple "addqw #6,a7" "\x5c\x4f" +run_test_simple "addql #1,a5" "\x52\x8d" + +# 5x00..5x07 / 5x40..5x47 / 5x80..5x87, Dn +# +run_test_simple "addqb #8,d7" "\x50\x07" +run_test_simple "addqw #5,d2" "\x5a\x42" +run_test_simple "addql #1,d3" "\x52\x83" + +# 50f9 xxxx +# +run_test_simple "st d16:l positive" "\x51\xf9\x00\x00\x00\x74" +run_test_simple "st d16:l negative" "\x51\xf9\xc0\xfe\xba\xbe" + +# 50f8 xxxx +# +run_test_simple "st d16:w positive" "\x51\xf8\x00\x66" +run_test_simple "st d16:w negative" "\x51\xf8\x80\xc4" + +# 51f0 xxxx +# +run_test_simple "sf (d16:w,A4,D3) positive" "\x51\xf4\xb0\x04" +run_test_simple "sf (d16:w,A3,A6) negative" "\x51\xf3\x60\xf2" + +# 5fe8 xxxx +# +run_test_simple "sle (d16,A0) positive" "\x5f\xe8\x00\xa0" +run_test_simple "sle (d16,A0) negative" "\x5f\xe8\xe4\x02" + +# 5ee1 +# +run_test_simple "sgt -(%a1)" "\x5e\xe1" + +# 56df +# +run_test_simple "sne (%a7)+" "\x56\xdf" + +# 5dd3 +# +run_test_simple "slt (%a3)" "\x5d\xd3" + +# 57cx +# +run_test_iterative "seq Xn" "\x57" 0xc0 8 1 + +# 50cf xxxx +# +run_test_simple "dbt negative displacement" "\x50\xcf\xff\xfc" +run_test_simple "dbt positive displacement" "\x50\xcf\x01\x08" + +# 50c9 7ffe +# +# From random tests +run_test_simple "dbt %d1,.+32768" "\x50\xc9\x7f\xfe" + +# 60xx +# +run_test_simple "bras negative displacement" "\x60\xfc" +run_test_simple "bras positive displacement" "\x60\x08" + +# 60xx (xxxx) +# +run_test_simple "braw negative displacement" "\x60\x00\xf8\x2e" +run_test_simple "braw positive displacement" "\x60\x00\x03\xe6" +run_test_simple "braw zero displacement" "\x60\x00\x00\x00" + +# 61xx (xxxx) +# +run_test_simple "bsrs negative displacement" "\x61\x88" +run_test_simple "bsrw positive displacement" "\x61\x00\x03\xe6" + +# 6xxx +# +run_test_simple "bhis" "\x62\x0a" +run_test_simple "blss" "\x63\x0a" +run_test_simple "bccs" "\x64\x0a" +run_test_simple "bcss" "\x65\x0a" +run_test_simple "bnes" "\x66\x0a" +run_test_simple "beqs" "\x67\x0a" +run_test_simple "bvcs" "\x68\x0a" +run_test_simple "bvss" "\x69\x0a" +run_test_simple "bpls" "\x6a\x0a" +run_test_simple "bmis" "\x6b\x0a" +run_test_simple "bges" "\x6c\x0a" +run_test_simple "blts" "\x6d\x0a" +run_test_simple "bgts" "\x6e\x0a" +run_test_simple "bles" "\x6f\x0a" + +# 4afc +# +# reset +# +run_test_simple "illegal" "\x4a\xfc" + +# 4e70 +# +# reset +run_test_simple "reset" "\x4e\x70" + +# 4e71 +# +# nop +run_test_simple "nop" "\x4e\x71" + +# 4e72 xxxx +# +run_test_simple "stop #8:w" "\x4e\x72\x00\x08" +run_test_simple "stop #ffff:w" "\x4e\x72\xff\xff" + +# 4e73 +# +# rte +run_test_simple "rte" "\x4e\x73" + +# 4e75 +# +# rts +run_test_simple "rts" "\x4e\x75" + +# 4e76 +# +# trapv +run_test_simple "trapv" "\x4e\x76" + +# 4e77 +# +# rtr +run_test_simple "rtr" "\x4e\x77" + +# 4e90..4e97 +# +run_test_iterative "jsr M2 all An" "\x4e" 0x90 8 1 "" + +# (4ea8..4eaf) xxxx +# +run_test_simple "jsr M5 zero value" "\x4e\xa8\x00\x00" +run_test_iterative "jsr M5 all An, positive" "\x4e" 0xa8 8 1 "\x00\x0a" +run_test_simple "jsr M5 A0 negative" "\x4e\xa8\x80\x0f" + +# (4eb0..4eb7) xxxx +# +run_test_iterative "jsr M6 arbitrary An, positive" "\x4e" 0xb0 8 1 "\x00\x0f" +run_test_simple "jsr M6 A0 negative" "\x4e\xb0\x00\xf0" +run_test_simple "jsr M6 A0 zero" "\x4e\xb0\x00\x00" +run_test_simple "jsr M6 address register" "\x4e\xb0\x80\x0a" +run_test_simple "jsr M6 long displacement positive" "\x4e\xb0\x08\x0c" +run_test_simple "jsr M6 long displacement negative" "\x4e\xb0\x08\xb0" +run_test_iterative "jsr M6 arbitrary Xn2" "\x4e\xb0" 0x00 8 0x10 "\x0f" + +# 4eb8 xxxx Word displacement +# +run_test_simple "jsr M7 Xn0 zero" "\x4e\xb8\x00\x00" +run_test_simple "jsr M7 Xn0 positive" "\x4e\xb8\x00\x1f" +run_test_simple "jsr M7 Xn0 negative" "\x4e\xb8\x8a\x0c" + +# 4eb9 xxxx Long displacement +# +run_test_simple "jsr M7 X1 zero" "\x4e\xb9\x00\x00\x00\x00" +run_test_simple "jsr M7 X1 positive" "\x4e\xb9\x10\xbb\x43\x1f" +run_test_simple "jsr M7 X1 negative" "\x4e\xb9\x80\xcc\xd9\x8a" + +# 4eba xxxx +# +run_test_simple "jsr M7 X2 zero value" "\x4e\xba\x00\x00" +run_test_simple "jsr M7 X2 positive value" "\x4e\xba\x00\x1f" +run_test_simple "jsr M7 X2 negative value" "\x4e\xba\x8a\x0c" + +# 4ebb xxxx +# +run_test_simple "jsr M7 X3 negative" "\x4e\xbb\x00\xf0" +run_test_simple "jsr M7 X3 zero displacement" "\x4e\xbb\x00\x00" +run_test_simple "jsr M7 X3 An2=A0" "\x4e\xbb\x80\x0a" +run_test_simple "jsr M7 X3 long positive displacement" "\x4e\xbb\x08\x0c" +run_test_simple "jsr M7 X3 long negative displacement" "\x4e\xbb\x08\xb0" +run_test_iterative "jsr M7 X3 arbitrary Dn2" "\x4e\xbb" 0x00 8 0x10 "\x0f" diff --git a/tests/test.ld b/tests/test.ld new file mode 100644 index 0000000..f939414 --- /dev/null +++ b/tests/test.ld @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: Unlicense + */ + +MEMORY { + ROM(rx) : ORIGIN = 0x00000000, LENGTH = 4M +} + +SECTIONS { + . = ORIGIN(ROM); + .text : { + KEEP(*(.text)) + . = ALIGN(2); + *(.text*) + . = ALIGN(2); + KEEP(*(.rodata)) + *(.rodata*) + . = ALIGN(2); + } >ROM +} diff --git a/tests/test_labels_referencing.bash b/tests/test_labels_referencing.bash new file mode 100644 index 0000000..8b5df40 --- /dev/null +++ b/tests/test_labels_referencing.bash @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: Unlicense +# +# Tests against m68k-none-elf-as. + +AS=m68k-none-elf-as +OBJCOPY=m68k-none-elf-objcopy +LD="m68k-none-elf-ld -Ttest.ld" +DISASM="../cmake-build/m68k-disasm -ffollow-jumps" +TEST_DIR=/tmp/m68k-disasm-tests-labels-referencing + +set -e +CRED="\033[31m" +CGREEN="\033[32m" +CRST="\033[39m" + +rm -rf ${TEST_DIR} +mkdir -p ${TEST_DIR} + +run_test_r() { + local test_name=$1 + local test_name_sanitized=${test_name//[^a-zA-Z0-9_\-]/-} + local data=$2 + local args=$3 + local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin + local file_asm=${TEST_DIR}/${test_name_sanitized}.S + local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o + local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf + local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin + echo -ne "Test \"${test_name}\"... " + echo -ne "${data}" >${file_orig_bin} + ${DISASM} $args -o ${file_asm} ${file_orig_bin} + ${AS} -m68000 -o ${file_as_o} ${file_asm} + ${LD} -o ${file_as_elf} ${file_as_o} + ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} + if ! cmp ${file_orig_bin} ${file_as_bin}; then + cat ${file_asm} + echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" + hexdump -Cv ${file_orig_bin} >${file_orig_bin}.txt + hexdump -Cv ${file_as_bin} >${file_as_bin}.txt + echo ${file_orig_bin} + echo ${file_as_bin} + exit + elif grep ".short" ${file_asm} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: .short emitted" + cat ${file_asm} + exit + fi + local run_check=$4 + $run_check + #echo && cat ${file_asm} + echo -e "${CGREEN}OK${CRST}" +} + +run_check_rdisp() { + if grep -e "\s\.\([+-]\+\|\s\+\|$\)" ${file_asm} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: raw displacement emitted" + cat ${file_asm} + exit + fi +} + +run_check_r() { + if grep -e "[^0-9a-zA-Z_+][0-9]\+" ${file_asm} >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: raw number or displacement emitted" + cat ${file_asm} + exit + fi +} + +run_check_dummy() { :; } + +run_test_rdisp() { + run_test_r "$1" "$2" "-flabels -frel-labels" run_check_rdisp +} + +run_test_rword() { + run_test_r "$1" "$2" "-flabels -fabs-labels" run_check_r +} + +run_test_rpcrel() { + run_test_r "$1" "$2" "-flabels -frel-labels" run_check_r +} + +run_test_rlocal() { + run_test_r "$1" "$2" "-flabels -frel-labels -fabs-labels -fshort-ref-local-labels" run_check_dummy +} + +run_test_rdisp "bras ." "\x60\xfe" +run_test_rdisp "bras .-2" "\x4e\x71\x60\xfc" +run_test_rdisp "bras .-1" "\x4e\x71\x60\xfd" +run_test_rdisp "braw .+2" "\x4e\x71\x60\x00\x00\x00" +run_test_rword "moveml 0x0:w,%d0" "\x4c\xf8\x00\x01\x00\x00" +run_test_rword "moveml 0x6:w,%a0" "\x4c\xf8\x01\x00\x00\x06\x4e\x71\x4e\x71" +run_test_rword "movemw 0x0:l,%a0" "\x4e\x71\x4e\x71\x4c\xb9\x01\x00\x00\x00\x00\x02" +run_test_rpcrel "movemw (0,PC),%a0" "\x4e\x71\x4e\x71\x4c\xba\x01\x00\x00\x00" +run_test_rpcrel "lea (0,PC)" "\x47\xfa\x00\x00" +run_test_rpcrel "jmp (0,PC)" "\x4e\xfa\x00\x00" +run_test_rword "peal 0x0:w" "\x48\x78\x00\x00" +run_test_rword "peal 0x0:l" "\x48\x79\x00\x00\x00\x00" +run_test_rpcrel "peal (0,PC)" "\x48\x7a\x00\x00" +run_test_rword "nbcd 0x0:w" "\x48\x38\x00\x00" +run_test_rword "nbcd 0x6:l with nop" "\x48\x39\x00\x00\x00\x06\x4e\x71" +run_test_rword "cmpl 0x4:w, D2 with nop" "\xb4\xb8\x00\x04\x4e\x71" +run_test_rword "cmpw 0x0:l, D2" "\xb4\x79\x00\x00\x00\x00" +run_test_rpcrel "cmpl (0,PC), D2" "\xb4\xba\x00\x00" +run_test_rpcrel "cmpl (-2,PC), D2" "\xb4\xba\xff\xfe" +run_test_rlocal "bras 1f; nop; 1: bras 1b" "\x60\x02\x4e\x71\x60\xfe" +run_test_rlocal "2: bras 1f; nop; 1: bras 2b" "\x60\x02\x4e\x71\x60\xfa" diff --git a/tests/test_random.bash b/tests/test_random.bash new file mode 100644 index 0000000..3c7a0d2 --- /dev/null +++ b/tests/test_random.bash @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: Unlicense +# +# Tests against m68k-none-elf-as. + +AS=m68k-none-elf-as +OBJCOPY=m68k-none-elf-objcopy +LD="m68k-none-elf-ld -Ttest.ld" +DISASM="../cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -frel-labels -fabs-labels -fshort-ref-local-labels -fimm-hex -ffollow-jumps" +TEST_DIR=/tmp/m68k-disasm-random-tests + +set -e +CRED="\033[31m" +CGREEN="\033[32m" +CRST="\033[39m" + +rm -rf ${TEST_DIR} +mkdir -p ${TEST_DIR} + +run_test_random() { + local pass_number=$1 + local blocks_count=$2 + local test_name_sanitized=${pass_number//[^a-zA-Z0-9_\-]/-} + local file_orig_bin=${TEST_DIR}/${test_name_sanitized}.orig.bin + local file_asm=${TEST_DIR}/${test_name_sanitized}.S + local file_as_o=${TEST_DIR}/${test_name_sanitized}.as.o + local file_as_elf=${TEST_DIR}/${test_name_sanitized}.as.elf + local file_as_bin=${TEST_DIR}/${test_name_sanitized}.as.bin + echo -ne "Test random, pass ${pass_number}... " + dd if=/dev/urandom of=${file_orig_bin} bs=1024 count=${blocks_count} >/dev/null 2>&1 + ${DISASM} -o ${file_asm} ${file_orig_bin} + ${AS} -o ${file_as_o} ${file_asm} + ${LD} -o ${file_as_elf} ${file_as_o} + ${OBJCOPY} ${file_as_elf} -O binary ${file_as_bin} + if ! cmp ${file_orig_bin} ${file_as_bin}; then + echo -e "${CRED}FAIL${CRST}: output and input binaries do not match" + hexdump -Cv ${file_orig_bin} >${file_orig_bin}.txt + hexdump -Cv ${file_as_bin} >${file_as_bin}.txt + echo ${file_orig_bin} + echo ${file_as_bin} + exit + else + echo -e "${CGREEN}OK${CRST}" + rm ${file_orig_bin} ${file_asm} ${file_as_o} ${file_as_elf} ${file_as_bin} + fi +} + +# Tiny tests are mostly for ensuring that bounds checking is working properly, +# because it is more likely to encounter something that looks like a truncated +# instruction. +# +# If there is an obvious bug, then it will most likely be detected +# here and it is easier to dissect tiny test blob than huge test trying to debug +# single test case. +for i in `seq 1 1000`; do + run_test_random tiny$i 1 +done + +# Huge tests are for the broad coverage. They catch a lot! +for i in `seq 1 10`; do + run_test_random huge$i 1024 +done diff --git a/tests/test_walk_and_follow_jumps.bash b/tests/test_walk_and_follow_jumps.bash new file mode 100644 index 0000000..0f11e09 --- /dev/null +++ b/tests/test_walk_and_follow_jumps.bash @@ -0,0 +1,200 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: Unlicense +# +# Tests against reference text for -ffollow-jumps and -fwalk features + +TEST_DIR=/tmp/m68k-disasm-follow-jumps-walk-tests +DISASM="../cmake-build/m68k-disasm -flabels -frel-labels -fabs-labels" + +set -e +CRED="\033[31m" +CGREEN="\033[32m" +CRST="\033[39m" + +rm -rf ${TEST_DIR} +mkdir -p ${TEST_DIR} + +OUTPUT_ASM="$TEST_DIR"/output.S +TRACE="$TEST_DIR"/trace.txt +REFERENCE="$TEST_DIR"/reference.S +REFERENCE_W="$TEST_DIR"/reference_w.S +REFERENCE_F="$TEST_DIR"/reference_f.S +REFERENCE_WF="$TEST_DIR"/reference_wf.S + +run_test_inner() { + local test_name=$1 + local disasm_args="$2" + local input="$3" + local reference="$4" + echo -ne "Test \"${test_name}\" ($disasm_args)... " + echo -ne "$input" | ${DISASM} --indent=' ' $disasm_args -t "$TRACE" -o "$OUTPUT_ASM" - + if ! diff --ignore-trailing-space "$reference" "$OUTPUT_ASM" >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: output and reference text files do not match" + diff --color=always --unified --ignore-trailing-space "$reference" "$OUTPUT_ASM" || true + else + echo -e "${CGREEN}OK${CRST}" + fi +} + +run_test() { + local test_name=$1 + local input="$2" + local reference="$3" + local reference_w="$4" + local reference_f="$5" + local reference_wf="$6" + run_test_inner "$test_name" "" "$input" "$reference" + run_test_inner "$test_name" "-fwalk" "$input" "$reference_w" + run_test_inner "$test_name" "-ffollow-jumps" "$input" "$reference_f" + run_test_inner "$test_name" "-fwalk -ffollow-jumps" "$input" "$reference_wf" +} + + +echo -e "0" >"$TRACE" +cat >"$REFERENCE" << EOF + nop + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "linear nops, trace @0" "\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + nop + .short 0x6002 + .short 0x4e71 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + bras L00000006 + .short 0x4e71 +L00000006: + .short 0x4e71 +EOF +# $REFERENCE_F is same as $REFERENCE +cat >"$REFERENCE_WF" << EOF + nop + bras L00000006 + .short 0x4e71 +L00000006: + nop +EOF +run_test "nop and unconditional branch, trace @0" "\x4e\x71\x60\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_WF" + + +cat >"$REFERENCE" << EOF + nop + .short 0x6602 + .short 0x4e71 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + bnes L00000006 + nop +L00000006: + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "nop and conditional branch, trace @0" "\x4e\x71\x66\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + bnes L00000004 + .short 0x4e71 +L00000004: + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + bnes L00000004 + nop +L00000004: + nop +EOF +cat >"$REFERENCE_F" << EOF + bnes L00000004 + .short 0x4e71 +L00000004: + nop +EOF +# $REFERENCE_WF is same as $REFERENCE_W +run_test "conditional branch, trace @0" "\x66\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + bras L00000004 + .short 0x4e71 +L00000004: + .short 0x4e71 +EOF +# $REFERENCE_W is same as $REFERENCE +cat >"$REFERENCE_F" << EOF + bras L00000004 + .short 0x4e71 +L00000004: + nop +EOF +# $REFERENCE_WF is same as $REFERENCE_F +run_test "unconditional branch, trace @0" "\x60\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE" "$REFERENCE_F" "$REFERENCE_F" + + +echo -e "0\n2" >"$TRACE" +cat >"$REFERENCE" << EOF +L00000000: + nop + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF +L00000000: + nop + bnes L00000000 + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "nop and conditional branch backwards, trace @0, @2" "\x4e\x71\x66\xfc\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +echo -e "2" >"$TRACE" +cat >"$REFERENCE" << EOF +L00000000: + .short 0x4e71 + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF +L00000000: + .short 0x4e71 + bnes L00000000 + nop +EOF +cat >"$REFERENCE_F" << EOF +L00000000: + nop + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_WF" << EOF +L00000000: + nop + bnes L00000000 + nop +EOF +run_test "nop and conditional branch backwards, trace @2" "\x4e\x71\x66\xfc\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_WF" + -- cgit v1.2.3