summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common.h85
-rw-r--r--src/data_buffer.cpp29
-rw-r--r--src/data_buffer.h41
-rw-r--r--src/disasm.cpp2010
-rw-r--r--src/disasm.h401
-rw-r--r--src/elf_format.h328
-rw-r--r--src/elf_image.cpp172
-rw-r--r--src/elf_image.h55
-rw-r--r--src/main.cpp836
9 files changed, 3957 insertions, 0 deletions
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..76fc956
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,85 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include <cstddef>
+#include <cstdint>
+
+enum class BFDTarget {
+ kAuto,
+ kBinary,
+ kELF,
+};
+
+struct Settings {
+ bool raw_data_comment{};
+ bool labels{};
+ bool rel_labels{};
+ bool abs_labels{};
+ bool imm_labels{};
+ bool short_ref_local_labels{};
+ bool export_labels{};
+ bool export_all_labels{};
+ bool export_functions{};
+ bool xrefs_to{};
+ bool xrefs_from{};
+ bool imm_hex{};
+ bool follow_jumps{};
+ bool walk{};
+ BFDTarget bfd{};
+ const char *indent{"\t"};
+};
+
+using RefKindMask = unsigned;
+
+constexpr RefKindMask kRef1RelMask = (1 << 0); // For first argument
+constexpr RefKindMask kRef1AbsMask = (1 << 1); // For first argument
+constexpr RefKindMask kRef2RelMask = (1 << 2); // For second argument
+constexpr RefKindMask kRef2AbsMask = (1 << 3); // For second argument
+constexpr RefKindMask kRef1ReadMask = (1 << 4); // For first argument
+constexpr RefKindMask kRef1WriteMask = (1 << 5); // For first argument
+constexpr RefKindMask kRef2ReadMask = (1 << 6); // For second argument
+constexpr RefKindMask kRef2WriteMask = (1 << 7); // For second argument
+/// Indicates whether instruction is a call or just a branch, for any argument.
+/// Calls are BSR and JSR, branches are DBcc, Bcc and JMP.
+constexpr RefKindMask kRefCallMask = (1 << 8);
+/// Hack flag for MOVEM with PC relative value when -frel-labels is set
+constexpr RefKindMask kRefPcRelFix2Bytes = (1 << 9);
+/// Register 1 may have immediate moving to address register which may be a
+/// labeled location
+constexpr RefKindMask kRef1ImmMask = (1 << 10);
+/// Everything for first argument
+constexpr RefKindMask kRef1Mask = kRef1RelMask | kRef1AbsMask | kRef1ReadMask | kRef1WriteMask | kRef1ImmMask;
+/// Everything for Second argument
+constexpr RefKindMask kRef2Mask = kRef2RelMask | kRef2AbsMask | kRef2ReadMask | kRef2WriteMask;
+constexpr RefKindMask kRefRelMask = kRef1RelMask | kRef2RelMask;
+constexpr RefKindMask kRefAbsMask = kRef1AbsMask | kRef2AbsMask;
+constexpr RefKindMask kRef1DataMask = kRef1ReadMask | kRef1WriteMask; // For first argument
+constexpr RefKindMask kRef2DataMask = kRef2ReadMask | kRef2WriteMask; // For second argument
+constexpr RefKindMask kRefReadMask = kRef1ReadMask | kRef2ReadMask; // For any argument
+constexpr RefKindMask kRefWriteMask = kRef1WriteMask | kRef2WriteMask; // For any argument
+constexpr RefKindMask kRefDataMask = kRefReadMask | kRefWriteMask;
+constexpr size_t kInstructionSizeStepBytes = 2;
+constexpr size_t kRomSizeBytes = 4 * 1024 * 1024;
+constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBytes;
+
+static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; }
+
+static inline constexpr uint16_t GetU16BE(const uint8_t *buffer)
+{
+ return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]);
+}
+
+static inline constexpr int16_t GetI16BE(const uint8_t *buffer)
+{
+ return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]);
+}
+
+static inline constexpr int32_t GetI32BE(const uint8_t *buffer)
+{
+ return (static_cast<uint32_t>(buffer[0]) << 24) |
+ (static_cast<uint32_t>(buffer[1]) << 16) |
+ (static_cast<uint32_t>(buffer[2]) << 8) |
+ static_cast<uint32_t>(buffer[3]);
+}
diff --git a/src/data_buffer.cpp b/src/data_buffer.cpp
new file mode 100644
index 0000000..33cb0b3
--- /dev/null
+++ b/src/data_buffer.cpp
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "data_buffer.h"
+
+#include <cassert>
+#include <cstring>
+
+void DataBuffer::Expand(size_t new_size)
+{
+ assert(buffer);
+ if (new_size <= buffer_size) {
+ return;
+ }
+ uint8_t *new_buffer{new uint8_t[new_size]};
+ assert(new_buffer);
+ memcpy(new_buffer, buffer, occupied_size);
+ delete [] buffer;
+ buffer = new_buffer;
+ buffer_size = new_size;
+}
+
+DataBuffer::~DataBuffer()
+{
+ delete [] buffer;
+ buffer = nullptr;
+ buffer_size = 0;
+ occupied_size = 0;
+}
diff --git a/src/data_buffer.h b/src/data_buffer.h
new file mode 100644
index 0000000..bc264d2
--- /dev/null
+++ b/src/data_buffer.h
@@ -0,0 +1,41 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "common.h"
+
+#include <cstddef>
+#include <cstdint>
+
+struct DataView {
+ const uint8_t *const buffer{};
+ const size_t size{};
+};
+
+struct DataBuffer {
+ DataBuffer(){};
+ DataBuffer(const DataBuffer&) = delete;
+ constexpr DataBuffer(DataBuffer&& other)
+ : buffer(other.buffer)
+ , buffer_size(other.buffer_size)
+ , occupied_size(other.occupied_size)
+ {
+ other.occupied_size = 0;
+ other.buffer_size = 0;
+ other.buffer = nullptr;
+ };
+ static constexpr size_t kInitialSize = 4 * 1024;
+ uint8_t *buffer{new uint8_t[kInitialSize]};
+ size_t buffer_size{kInitialSize};
+ size_t occupied_size{};
+ void Expand(size_t new_size);
+ constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const
+ {
+ if (offset >= occupied_size) {
+ return DataView{};
+ }
+ return DataView{buffer + offset, Min(occupied_size - offset, size)};
+ };
+ ~DataBuffer();
+};
diff --git a/src/disasm.cpp b/src/disasm.cpp
new file mode 100644
index 0000000..2b2ea81
--- /dev/null
+++ b/src/disasm.cpp
@@ -0,0 +1,2010 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "disasm.h"
+#include "data_buffer.h"
+#include "common.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+enum class MoveDirection: bool {
+ kRegisterToMemory = 0,
+ kMemoryToRegister = 1,
+};
+
+enum class ShiftDirection: bool {
+ kRight = 0,
+ kLeft = 1,
+};
+
+enum class ShiftKind: int {
+ kArithmeticShift = 0,
+ kLogicalShift = 1,
+ kRotateX = 2,
+ kRotate = 3,
+};
+
+constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s)
+{
+ if (s == OpSize::kInvalid) {
+ return Arg{};
+ } else if (s == OpSize::kLong) {
+ if (address + kInstructionSizeStepBytes < code.size) {
+ const int32_t value = GetI32BE(code.buffer + address);
+ return Arg::Immediate(value);
+ }
+ } else if (address < code.size) {
+ const int16_t value = GetI16BE(code.buffer + address);
+ if (s == OpSize::kByte) {
+ // Technically it is impossible to have value lower that -128 in 8
+ // bits signed integer, but the second byte being 0xff is actually
+ // a valid thing and it is how values from -255 to -129 are
+ // represented.
+ if (value > 255 || value < -255) {
+ // Invalid immediate value for instruction with .b suffix
+ return Arg{};
+ }
+ }
+ return Arg::Immediate(value);
+ }
+ return Arg{};
+}
+
+constexpr Arg FetchArg(
+ const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s)
+{
+ switch (m) {
+ case 0: // Dn
+ return Arg::Dn(xn);
+ case 1: // An
+ return Arg::An(xn);
+ case 2: // (An)
+ return Arg::AnAddr(xn);
+ case 3: // (An)+
+ return Arg::AnAddrIncr(xn);
+ case 4: // -(An)
+ return Arg::AnAddrDecr(xn);
+ case 5: // (d16, An), Additional Word
+ if (address < code.size) {
+ const int16_t d16 = GetI16BE(code.buffer + address);
+ return Arg::D16AnAddr(xn, d16);
+ }
+ break;
+ case 6: // (d8, An, Xi), Brief Extension Word
+ if (address < code.size) {
+ const uint16_t briefext = GetU16BE(code.buffer + address);
+ if (briefext & 0x0700) {
+ // briefext must have zeros on 8, 9 an 10-th bits,
+ // i.e. xxxx_x000_xxxx_xxxx
+ break;
+ }
+ // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8)
+ const uint8_t xi = (briefext >> 12) & 0xf;
+ const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord;
+ const int8_t d8 = briefext & 0xff;
+ return Arg::D8AnXiAddr(xn, xi, s, d8);
+ }
+ break;
+ case 7:
+ switch (xn) {
+ case 0: // (xxx).W, Additional Word
+ if (address < code.size) {
+ const int32_t w = GetI16BE(code.buffer + address);
+ return Arg::Word(w);
+ }
+ break;
+ case 1: // (xxx).L, Additional Long
+ if (address + kInstructionSizeStepBytes < code.size) {
+ const int32_t l = GetI32BE(code.buffer + address);
+ return Arg::Long(l);
+ }
+ break;
+ case 2: // (d16, PC), Additional Word
+ if (address < code.size) {
+ const int16_t d16 = GetI16BE(code.buffer + address);
+ return Arg::D16PCAddr(d16);
+ }
+ break;
+ case 3: // (d8, PC, Xi), Brief Extension Word
+ if (address < code.size) {
+ const uint16_t briefext = GetU16BE(code.buffer + address);
+ if (briefext & 0x0700) {
+ // briefext must have zeros on 8, 9 an 10-th bits,
+ // i.e. xxxx_x000_xxxx_xxxx
+ break;
+ }
+ // Xi number (lower 3 bits, mask 0x7) with An/Dn bit (mask 0x8)
+ const uint8_t xi = (briefext >> 12) & 0xf;
+ const OpSize s = ((briefext >> 11) & 1) ? OpSize::kLong : OpSize::kWord;
+ const int8_t d8 = briefext & 0xff;
+ return Arg::D8PCXiAddr(xn, xi, s, d8);
+ }
+ break;
+ case 4: // #imm
+ return FetchImmediate(address, code, s);
+ case 5: // Does not exist
+ case 6: // Does not exist
+ case 7: // Does not exist
+ break;
+ }
+ break;
+ }
+ return Arg{};
+}
+
+static Arg FetchArg(
+ const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s)
+{
+ const int addrmode = instr & 0x3f;
+ const int m = (addrmode >> 3) & 7;
+ const int xn = addrmode & 7;
+ return FetchArg(address, code, m, xn, s);
+}
+
+static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr)
+{
+ node.op = Op::Raw(instr);
+ return node.size;
+}
+
+static size_t disasm_jsr_jmp(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = OpSize::kWord;
+ const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ case AddrMode::kDn: // 4e80..4e87 / 4ec0..4ec7
+ case AddrMode::kAn: // 4e88..4e8f / 4ec8..4ecf
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr: // 4e90..4e97 / 4ed0..4ed7
+ // NOTE: dynamic jump, ref_addr may possibly be obtained during the
+ // trace
+ break;
+ case AddrMode::kAnAddrIncr: // 4e98..4e9f / 4ed8..4edf
+ case AddrMode::kAnAddrDecr: // 4ea0..4ea7 / 4ee0..4ee7
+ return disasm_verbatim(node, instr);
+ case AddrMode::kD16AnAddr: // 4ea8..4eaf / 4ee8..4eef
+ // NOTE: dynamic jump, ref_addr may possibly be obtained during the
+ // trace
+ break;
+ case AddrMode::kD8AnXiAddr: // 4eb0..4eb7 / 4ef0..4ef7
+ // NOTE: dynamic jump, ref_addr may possibly be obtained during the
+ // trace
+ break;
+ case AddrMode::kWord: // 4eb8 / 4ef8
+ {
+ const uint32_t ref_addr = static_cast<uint32_t>(a.lword);
+ node.ref1_addr = ref_addr;
+ node.ref_kinds = kRef1AbsMask;
+ }
+ break;
+ case AddrMode::kLong: // 4eb9 / 4ef9
+ {
+ const uint32_t ref_addr = static_cast<uint32_t>(a.lword);
+ node.ref1_addr = ref_addr;
+ node.ref_kinds = kRef1AbsMask;
+ }
+ break;
+ case AddrMode::kD16PCAddr: // 4eba / 4efa
+ {
+ const uint32_t ref_addr = node.address + kInstructionSizeStepBytes +
+ static_cast<uint32_t>(a.d16_pc.d16);
+ node.ref1_addr = ref_addr;
+ node.ref_kinds = kRef1RelMask;
+ }
+ break;
+ case AddrMode::kD8PCXiAddr: // 4ebb / 4efb
+ // NOTE: dynamic jump, ref_addr may possibly be obtained during the
+ // trace
+ break;
+ case AddrMode::kImmediate: // 4ebc / 4efc
+ return disasm_verbatim(node, instr);
+ }
+ const bool is_jmp = instr & 0x40;
+ node.ref_kinds |= is_jmp ? 0 : kRefCallMask;
+ node.op = Op::Typical(is_jmp ? OpCode::kJMP : OpCode::kJSR, OpSize::kNone, a);
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg)
+{
+ assert(arg.mode == AddrMode::kDn);
+ node.op = Op::Typical(OpCode::kEXT, opsize, arg);
+ return node.size = kInstructionSizeStepBytes + arg.Size(opsize);
+}
+
+static size_t disasm_ext_movem(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto dir = static_cast<MoveDirection>((instr >> 10) & 1);
+ const unsigned m = (instr >> 3) & 7;
+ const unsigned xn = instr & 7;
+ const auto opsize = static_cast<OpSize>(((instr >> 6) & 1) + 1);
+ if (m == 0 && dir == MoveDirection::kRegisterToMemory) {
+ return disasm_ext(node, opsize, Arg::Dn(xn));
+ }
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
+ // Not enough space for regmask, but maybe it is just EXT?
+ return disasm_verbatim(node, instr);
+ }
+ const unsigned regmask = GetU16BE(code.buffer + node.address + kInstructionSizeStepBytes);
+ if (regmask == 0) {
+ // This is just not representable: at least one register must be specified
+ return disasm_verbatim(node, instr);
+ }
+ const auto a = FetchArg(
+ node.address + kInstructionSizeStepBytes * 2, code, m, xn, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ case AddrMode::kDn: // 4880..4887 / 4c80..4c87 / 48c0..48c7 / 4cc0..4cc7
+ case AddrMode::kAn: // 4888..488f / 4c88..4c8f / 48c8..48cf / 4cc8..4ccf
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr: // 4890..4897 / 4c90..4c97 / 48d0..48d7 / 4cd0..4cd7
+ break;
+ case AddrMode::kAnAddrIncr: // 4898..489f / 4c89..4c9f / 48d8..48df / 4cd8..4cdf
+ if (dir == MoveDirection::kRegisterToMemory) {
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kAnAddrDecr: // 48a0..48a7 / 4ca0..4ca7 / 48e0..48e7 / 4ce0..4ce7
+ if (dir == MoveDirection::kMemoryToRegister) {
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kD16AnAddr: // 48a8..48af / 4c8a..4caf / 48e8..48ef / 4ce8..4cef
+ case AddrMode::kD8AnXiAddr: // 48b0..48b7 / 4cb0..4cb7 / 48f0..48f7 / 4cf0..4cf7
+ break;
+ case AddrMode::kWord: // 48b8 / 4cb8 / 48f8 / 4cf8
+ case AddrMode::kLong: // 48b9 / 4cb9 / 48f9 / 4cf9
+ if (dir == MoveDirection::kRegisterToMemory) {
+ node.ref2_addr = static_cast<uint32_t>(a.lword);
+ node.ref_kinds = kRef2AbsMask | kRef2WriteMask;
+ } else {
+ node.ref1_addr = static_cast<uint32_t>(a.lword);
+ node.ref_kinds = kRef1AbsMask | kRef1ReadMask;
+ }
+ break;
+ case AddrMode::kD16PCAddr: // 48ba / 4cba / 48fa / 4cfa
+ case AddrMode::kD8PCXiAddr: // 48bb / 4cbb / 48fb / 4cfb
+ if (dir == MoveDirection::kRegisterToMemory) {
+ return disasm_verbatim(node, instr);
+ } else if (a.mode == AddrMode::kD16PCAddr) {
+ // XXX: kRefPcRelFix2Bytes flag is a hack that needed to correctly
+ // print label for PC relative referenced value of MOVEM. Alongside
+ // with *NOT* adding kInstructionSizeStepBytes to ref1_addr. Still
+ // figuring that out.
+ node.ref1_addr = node.address + kInstructionSizeStepBytes * 2 +
+ static_cast<uint32_t>(a.d16_pc.d16);
+ node.ref_kinds = kRef1RelMask | kRef1ReadMask | kRefPcRelFix2Bytes;
+ }
+ break;
+ case AddrMode::kImmediate: // 4ebc / 4efc
+ return disasm_verbatim(node, instr);
+ }
+ if (dir == MoveDirection::kMemoryToRegister) {
+ const auto arg2 = (a.mode == AddrMode::kAnAddrDecr)
+ ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask);
+ node.op = Op::Typical(OpCode::kMOVEM, opsize, a, arg2);
+ } else {
+ const auto arg1 = (a.mode == AddrMode::kAnAddrDecr)
+ ? Arg::RegMaskPredecrement(regmask) : Arg::RegMask(regmask);
+ node.op = Op::Typical(OpCode::kMOVEM, opsize, arg1, a);
+ }
+ return node.size = kInstructionSizeStepBytes * 2 + a.Size(opsize);
+}
+
+static size_t disasm_lea(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = OpSize::kLong;
+ const auto addr = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (addr.mode) {
+ case AddrMode::kInvalid:
+ case AddrMode::kDn:
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ break;
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ break;
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ node.ref1_addr = static_cast<uint32_t>(addr.lword);
+ node.ref_kinds = kRef1AbsMask | kRef1ReadMask;
+ break;
+ case AddrMode::kD16PCAddr:
+ node.ref1_addr = node.address + kInstructionSizeStepBytes +
+ static_cast<uint32_t>(addr.d16_pc.d16);
+ node.ref_kinds = kRef1RelMask | kRef1ReadMask;
+ break;
+ case AddrMode::kD8PCXiAddr:
+ break;
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ const unsigned an = ((instr >> 9) & 7);
+ const auto reg = Arg::An(an);
+ node.op = Op::Typical(OpCode::kLEA, opsize, addr, reg);
+ return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize);
+}
+
+static size_t disasm_chk(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = OpSize::kWord;
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (src.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ const unsigned dn = ((instr >> 9) & 7);
+ const auto dst = Arg::Dn(dn);
+ node.op = Op::Typical(OpCode::kCHK, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_bra_bsr_bcc(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const int16_t dispmt0 = static_cast<int8_t>(instr & 0xff);
+ if (dispmt0 == -1) {
+ // This will definitely lead to executing invalid instruction and is
+ // also invalid for GNU AS to assemble
+ return disasm_verbatim(node, instr);
+ }
+ const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord;
+ if (dispmt0 == 0) {
+ // Check the boundaries
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
+ return disasm_verbatim(node, instr);
+ }
+ node.size = kInstructionSizeStepBytes * 2;
+ } else {
+ node.size = kInstructionSizeStepBytes;
+ }
+ const int16_t dispmt = kInstructionSizeStepBytes + (dispmt0
+ ? dispmt0 : GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes));
+ const uint32_t ref_addr = static_cast<uint32_t>(node.address + dispmt);
+ Condition condition = static_cast<Condition>((instr >> 8) & 0xf);
+ // False condition Indicates BSR
+ node.ref1_addr = ref_addr;
+ node.ref_kinds = kRef1RelMask | ((condition == Condition::kF) ? kRefCallMask : 0);
+ node.op = Op{OpCode::kBcc, opsize, condition, Arg::Displacement(dispmt)};
+ return node.size;
+}
+
+static OpCode OpCodeForBitOps(const unsigned opcode)
+{
+ switch (opcode) {
+ case 0: return OpCode::kBTST;
+ case 1: return OpCode::kBCHG;
+ case 2: return OpCode::kBCLR;
+ case 3: return OpCode::kBSET;
+ }
+ assert(false);
+ return OpCode::kNone;
+}
+
+static size_t disasm_movep(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const unsigned dn = ((instr >> 9) & 7);
+ const unsigned an = instr & 7;
+ const OpSize opsize = ((instr >> 6) & 1) ? OpSize::kLong : OpSize::kWord;
+ const auto dir = static_cast<MoveDirection>(!((instr >> 7) & 1));
+ const auto addr = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, 5, an, opsize);
+ if (addr.mode == AddrMode::kInvalid) {
+ // Boundary check failed, most likely
+ return disasm_verbatim(node, instr);
+ }
+ assert(addr.mode == AddrMode::kD16AnAddr);
+ const auto reg = Arg::Dn(dn);
+ if (dir == MoveDirection::kRegisterToMemory) {
+ node.op = Op::Typical(OpCode::kMOVEP, opsize, reg, addr);
+ } else {
+ node.op = Op::Typical(OpCode::kMOVEP, opsize, addr, reg);
+ }
+ return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize);
+}
+
+static size_t disasm_src_arg_bitops_movep(
+ DisasmNode &node,
+ const uint16_t instr,
+ const DataView &code,
+ const bool has_dn_src = true)
+{
+ const unsigned m = (instr >> 3) & 7;
+ if ((m == 1) && has_dn_src) {
+ return disasm_movep(node, instr, code);
+ }
+ const unsigned dn = ((instr >> 9) & 7);
+ const unsigned xn = instr & 7;
+ const OpSize opsize0 = OpSize::kByte;
+ // Fetch AddrMode::kDn if has_dn_src, otherwise fetch AddrMode::kImmediate
+ // byte
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes,
+ code,
+ (has_dn_src) ? 0 : 7,
+ dn,
+ opsize0);
+ if (src.mode == AddrMode::kInvalid) {
+ return disasm_verbatim(node, instr);
+ }
+ if (has_dn_src) {
+ assert(src.mode == AddrMode::kDn);
+ } else {
+ assert(dn == 4);
+ assert(src.mode == AddrMode::kImmediate);
+ }
+ const auto dst = FetchArg(
+ node.address + kInstructionSizeStepBytes + src.Size(opsize0), code, m, xn, opsize0);
+ const unsigned opcode = (instr >> 6) & 3;
+ switch (dst.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ if (opcode != 0) {
+ // PC relative destination address argument available for BTST only
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ const auto opsize = dst.mode == AddrMode::kDn ? OpSize::kLong : OpSize::kByte;
+ node.op = Op::Typical(OpCodeForBitOps(opcode), opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0);
+}
+
+static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c)
+{
+ return disasm_src_arg_bitops_movep(n, i, c, false);
+}
+
+static size_t disasm_logical_immediate_to(
+ DisasmNode &node, OpCode opcode, OpSize opsize, Arg imm)
+{
+ node.op = Op::Typical(opcode, opsize, imm, (opsize == OpSize::kByte) ? Arg::CCR() : Arg::SR());
+ return node.size = kInstructionSizeStepBytes * 2;
+}
+
+static OpCode OpCodeForLogicalImmediate(const unsigned opcode)
+{
+ switch (opcode) {
+ case 0: return OpCode::kORI;
+ case 1: return OpCode::kANDI;
+ case 2: return OpCode::kSUBI;
+ case 3: return OpCode::kADDI;
+ case 4: break;
+ case 5: return OpCode::kEORI;
+ case 6: return OpCode::kCMPI;
+ case 7: break;
+ }
+ assert(false);
+ return OpCode::kNone;
+}
+
+static size_t disasm_bitops_movep(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const bool has_source_reg = (instr >> 8) & 1;
+ if (has_source_reg) {
+ return disasm_src_arg_bitops_movep(node, instr, code);
+ }
+ const unsigned opcode = (instr >> 9) & 7;
+ if (opcode == 7) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ if (opcode == 4) {
+ return disasm_bitops(node, instr, code);
+ }
+ const int m = (instr >> 3) & 7;
+ const int xn = instr & 7;
+ const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ // Anticipating #imm which means "to CCR"/"to SR", depending on OpSize
+ if (m == 7 && xn == 4) {
+ if (opcode == 2 || opcode == 3 || opcode == 6) {
+ // CMPI, SUBI and ANDI neither have immediate destination arguments
+ // nor "to CCR"/"to SR" variations
+ return disasm_verbatim(node, instr);
+ }
+ if (opsize == OpSize::kLong) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ }
+ const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize);
+ if (src.mode == AddrMode::kInvalid) {
+ return disasm_verbatim(node, instr);
+ }
+ assert(src.mode == AddrMode::kImmediate);
+ const OpCode mnemonic = OpCodeForLogicalImmediate(opcode);
+ if (m == 7 && xn == 4) {
+ return disasm_logical_immediate_to(node, mnemonic, opsize, src);
+ }
+ const auto dst = FetchArg(
+ node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize);
+ switch (dst.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ if (opcode != 6) {
+ // PC relative destination address argument available for CMPI only
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(mnemonic, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_move_movea(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const int opsize_raw = (instr >> 12) & 3;
+ const OpSize opsize = (opsize_raw == 1)
+ ? OpSize::kByte : (opsize_raw == 3 ? OpSize::kWord : OpSize::kLong);
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (src.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ if (opsize == OpSize::kByte) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ break;
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ node.ref1_addr = static_cast<uint32_t>(src.lword);
+ node.ref_kinds |= kRef1AbsMask | kRef1ReadMask;
+ break;
+ case AddrMode::kD16PCAddr:
+ node.ref1_addr = node.address + kInstructionSizeStepBytes +
+ static_cast<uint32_t>(src.d16_pc.d16);
+ node.ref_kinds |= kRef1RelMask | kRef1ReadMask;
+ break;
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ break;
+ }
+ const int m = (instr >> 6) & 7;
+ const int xn = (instr >> 9) & 7;
+ const auto dst = FetchArg(
+ node.address + kInstructionSizeStepBytes + src.Size(opsize), code, m, xn, opsize);
+ switch (dst.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ if (opsize == OpSize::kByte) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ break;
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ node.ref2_addr = static_cast<uint32_t>(dst.lword);
+ node.ref_kinds |= kRef2AbsMask | kRef2WriteMask;
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ // XXX Assuming that moving long immediate value into address register is
+ // basically a sneaky LEA. It may not be true in some cases.
+ if (src.type == ArgType::kImmediate && dst.type == ArgType::kAn) {
+ if (opsize == OpSize::kLong) {
+ node.ref1_addr = static_cast<uint32_t>(src.lword);
+ node.ref_kinds |= kRef1ImmMask | kRef1ReadMask;
+ } else if (opsize == OpSize::kWord) {
+ node.ref1_addr = static_cast<int16_t>(static_cast<uint16_t>(src.lword));
+ node.ref_kinds |= kRef1ImmMask | kRef1ReadMask;
+ }
+ }
+ const auto opcode = (dst.mode == AddrMode::kAn) ? OpCode::kMOVEA : OpCode::kMOVE;
+ node.op = Op::Typical(opcode, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_move_from_sr(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto opsize = OpSize::kWord;
+ const auto dst = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (dst.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(OpCode::kMOVE, opsize, Arg::SR(), dst);
+ return node.size = kInstructionSizeStepBytes + dst.Size(opsize);
+}
+
+static size_t disasm_move_to(
+ DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg)
+{
+ const auto opsize = OpSize::kWord;
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (src.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ break;
+ }
+ node.op = Op::Typical(OpCode::kMOVE, opsize, src, Arg{{reg}, {0}});
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize);
+}
+
+static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode)
+{
+ switch (opcode) {
+ case 0: return OpCode::kNEGX;
+ case 1: return OpCode::kCLR;
+ case 2: return OpCode::kNEG;
+ case 3: return OpCode::kNOT;
+ }
+ assert(false);
+ return OpCode::kNone;
+}
+
+static size_t disasm_move_negx_clr_neg_not(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
+ const unsigned opcode = (instr >> 9) & 3;
+ if (opsize == OpSize::kInvalid) {
+ switch (opcode) {
+ case 0:
+ return disasm_move_from_sr(node, instr, code);
+ case 1:
+ return disasm_verbatim(node, instr);
+ case 2:
+ return disasm_move_to(node, instr, code, ArgType::kCCR);
+ case 3:
+ return disasm_move_to(node, instr, code, ArgType::kSR);
+ }
+ assert(false);
+ return disasm_verbatim(node, instr);
+ }
+ const auto a = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(opcode_for_negx_clr_neg_not(opcode), opsize, a);
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_trivial(
+ DisasmNode &node, const OpCode opcode)
+{
+ node.op = Op::Typical(opcode, OpSize::kNone);
+ return node.size;
+}
+
+static size_t disasm_tas(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto opsize = OpSize::kByte;
+ const auto a = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(OpCode::kTAS, opsize, a);
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_tst_tas_illegal(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
+ const int m = (instr >> 3) & 7;
+ const int xn = instr & 7;
+ if (opsize == OpSize::kInvalid) {
+ if (m == 7 && xn == 4){
+ return disasm_trivial(node, OpCode::kILLEGAL);
+ }
+ return disasm_tas(node, instr, code);
+ }
+ const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, m, xn, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ break;
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(OpCode::kTST, opsize, a);
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_trap(DisasmNode &node, const uint16_t instr)
+{
+ const unsigned vector = instr & 0xf;
+ node.op = Op::Typical(OpCode::kTRAP, OpSize::kNone, Arg::Immediate(vector));
+ return node.size = kInstructionSizeStepBytes;
+}
+
+static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const bool unlk = (instr >> 3) & 1;
+ const unsigned xn = instr & 7;
+ if (unlk) {
+ node.op = Op::Typical(OpCode::kUNLK, OpSize::kNone, Arg::AddrModeXn(ArgType::kAn, xn));
+ return node.size = kInstructionSizeStepBytes;
+ }
+ const auto opsize = OpSize::kWord;
+ const auto src = FetchImmediate(node.address + kInstructionSizeStepBytes, code, opsize);
+ if (src.mode != AddrMode::kImmediate) {
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(OpCode::kLINK, opsize, Arg::AddrModeXn(ArgType::kAn, xn), src);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize);
+}
+
+static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr)
+{
+ const unsigned xn = instr & 7;
+ const auto dir = static_cast<MoveDirection>((instr >> 3) & 1);
+ if (dir == MoveDirection::kRegisterToMemory) {
+ node.op = Op::Typical(
+ OpCode::kMOVE, OpSize::kLong, Arg::An(xn), Arg::USP());
+ } else {
+ node.op = Op::Typical(
+ OpCode::kMOVE, OpSize::kLong, Arg::USP(), Arg::An(xn));
+ }
+ return node.size = kInstructionSizeStepBytes;
+}
+
+static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const bool is_nbcd = !((instr >> 6) & 1);
+ const OpSize opsize0 = OpSize::kWord;
+ const auto arg = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize0);
+ bool is_swap{};
+ switch (arg.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ if (!is_nbcd) {
+ is_swap = true;
+ }
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ break;
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ if (!is_nbcd) {
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ break;
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ node.ref1_addr = static_cast<uint32_t>(arg.lword);
+ node.ref_kinds = kRef1AbsMask | kRef1ReadMask;
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ if (is_nbcd) {
+ return disasm_verbatim(node, instr);
+ }
+ if (arg.mode == AddrMode::kD16PCAddr) {
+ node.ref1_addr = node.address + kInstructionSizeStepBytes +
+ static_cast<uint32_t>(arg.d16_pc.d16);
+ node.ref_kinds = kRef1RelMask | kRef1ReadMask;
+ }
+ break;
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ const auto opcode = is_nbcd ? OpCode::kNBCD : is_swap ? OpCode::kSWAP : OpCode::kPEA;
+ const auto opsize = is_nbcd ? OpSize::kByte : is_swap ? OpSize::kWord : OpSize::kLong;
+ node.op = Op::Typical(opcode, opsize, arg);
+ return node.size = kInstructionSizeStepBytes + arg.Size(opsize0);
+}
+
+static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord);
+ if (a.mode != AddrMode::kImmediate) {
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op::Typical(OpCode::kSTOP, OpSize::kNone, a);
+ return node.size = kInstructionSizeStepBytes * 2;
+}
+
+static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ if ((instr & 0xf900) == 0x4000) {
+ return disasm_move_negx_clr_neg_not(node, instr, code);
+ } else if ((instr & 0xff80) == 0x4800) {
+ // NOTE: EXT is handled with MOVEM
+ return disasm_nbcd_swap_pea(node, instr, code);
+ } else if ((instr & 0xff00) == 0x4a00) {
+ return disasm_tst_tas_illegal(node, instr, code);
+ } else if ((instr & 0xfff0) == 0x4e40) {
+ return disasm_trap(node, instr);
+ } else if ((instr & 0xfff0) == 0x4e50) {
+ return disasm_link_unlink(node, instr, code);
+ } else if ((instr & 0xfff0) == 0x4e60) {
+ return disasm_move_usp(node, instr);
+ } else if ((instr & 0xfff8) == 0x4e70) {
+ if (instr == 0x4e70) {
+ return disasm_trivial(node, OpCode::kRESET);
+ } else if (instr == 0x4e71) {
+ return disasm_trivial(node, OpCode::kNOP);
+ } else if (instr == 0x4e72) {
+ return disasm_stop(node, instr, code);
+ } else if (instr == 0x4e73) {
+ return disasm_trivial(node, OpCode::kRTE);
+ } else if (instr == 0x4e75) {
+ return disasm_trivial(node, OpCode::kRTS);
+ } else if (instr == 0x4e76) {
+ return disasm_trivial(node, OpCode::kTRAPV);
+ } else if (instr == 0x4e77) {
+ return disasm_trivial(node, OpCode::kRTR);
+ }
+ } else if ((instr & 0xff80) == 0x4e80) {
+ return disasm_jsr_jmp(node, instr, code);
+ } else if ((instr & 0xfb80) == 0x4880) {
+ return disasm_ext_movem(node, instr, code);
+ } else if ((instr & 0xf1c0) == 0x41c0) {
+ return disasm_lea(node, instr, code);
+ } else if ((instr & 0xf1c0) == 0x4180) {
+ return disasm_chk(node, instr, code);
+ }
+ return disasm_verbatim(node, instr);
+}
+
+static size_t disasm_addq_subq(
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize)
+{
+ const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn: // 5x00..5x07 / 5x40..5x47 / 5x80..5x87
+ break;
+ case AddrMode::kAn: // 5x08..5x0f / 5x48..5x4f / 5x88..5x8f
+ if (opsize == OpSize::kByte) {
+ // 5x08..5x0f
+ // addqb and subqb with An do not exist
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kAnAddr: // 5x10..5x17 / 5x50..5x57 / 5x90..5x97
+ case AddrMode::kAnAddrIncr: // 5x18..5x1f / 5x58..5x5f / 5x98..5x9f
+ case AddrMode::kAnAddrDecr: // 5x20..5x27 / 5x60..5x67 / 5xa0..5xa7
+ case AddrMode::kD16AnAddr: // 5x28..5x2f / 5x68..5x6f / 5xa8..5xaf
+ case AddrMode::kD8AnXiAddr: // 5x30..5x37 / 5x70..5x77 / 5xb0..5xb7
+ case AddrMode::kWord: // 5x38 / 5x78 / 5xb8
+ case AddrMode::kLong: // 5x39 / 5x79 / 5xb9
+ break;
+ case AddrMode::kD16PCAddr: // 5x3a / 5x7a / 5xba
+ case AddrMode::kD8PCXiAddr: // 5x3b / 5x7b / 5xbb
+ case AddrMode::kImmediate: // 5x3c / 5x7c / 5xbc
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ const unsigned imm = ((uint8_t((instr >> 9) & 7) - 1) & 7) + 1;
+ const auto opcode = ((instr >> 8) & 1) ? OpCode::kSUBQ : OpCode::kADDQ;
+ node.op = Op::Typical(opcode, opsize, Arg::Immediate(imm), a);
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
+ return disasm_verbatim(node, instr);
+ }
+ const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes);
+ const int32_t dispmt = dispmt_raw + kInstructionSizeStepBytes;
+ node.ref2_addr = static_cast<uint32_t>(node.address + dispmt);
+ node.ref_kinds = kRef2RelMask;
+ node.op = Op{
+ OpCode::kDBcc,
+ OpSize::kWord,
+ static_cast<Condition>((instr >> 8) & 0xf),
+ Arg::AddrModeXn(ArgType::kDn, (instr & 7)),
+ Arg::Displacement(dispmt),
+ };
+ return node.size = kInstructionSizeStepBytes * 2;
+}
+
+static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = OpSize::kWord;
+ const auto a = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (a.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn: // 5xc0..5xc7, Dn
+ break;
+ case AddrMode::kAn: // 5xc8..5xcf, An
+ return disasm_dbcc(node, instr, code);
+ case AddrMode::kAnAddr: // 5xd0..5xd7
+ case AddrMode::kAnAddrIncr: // 5xd8..5xdf
+ case AddrMode::kAnAddrDecr: // 5xe0..5xe7
+ case AddrMode::kD16AnAddr: // 5xe8..5xef
+ case AddrMode::kD8AnXiAddr: // 5xf0..5xf7
+ case AddrMode::kWord: // 5xf8 (xxx).W
+ case AddrMode::kLong: // 5xf9 (xxx).L
+ break;
+ case AddrMode::kD16PCAddr: // 5xfa
+ case AddrMode::kD8PCXiAddr: // 5xfb
+ case AddrMode::kImmediate: // 5xfc
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ node.op = Op{OpCode::kScc, OpSize::kByte, static_cast<Condition>((instr >> 8) & 0xf), a};
+ return node.size = kInstructionSizeStepBytes + a.Size(opsize);
+}
+
+static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c)
+{
+ const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ return disasm_scc_dbcc(n, instr, c);
+ }
+ return disasm_addq_subq(n, instr, c, opsize);
+}
+
+static size_t disasm_moveq(DisasmNode &node, const uint16_t instr)
+{
+ if (instr & 0x100) {
+ // Does not exist
+ return disasm_verbatim(node, instr);
+ }
+ const int xn = (instr >> 9) & 7;
+ const auto dst = Arg::Dn(xn);
+ const int8_t data = instr & 0xff;
+ const OpSize opsize = OpSize::kLong;
+ node.op = Op::Typical(OpCode::kMOVEQ, opsize, Arg::Immediate(data), dst);
+ return node.size = kInstructionSizeStepBytes + dst.Size(opsize);
+}
+
+static size_t disasm_divu_divs_mulu_muls(
+ DisasmNode &node,
+ const uint16_t instr,
+ const DataView &code,
+ const OpCode opcode)
+{
+ const auto opsize = OpSize::kWord;
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (src.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ break;
+ }
+ const unsigned dn = (instr >> 9) & 7;
+ const auto dst = Arg::Dn(dn);
+ node.op = Op::Typical(opcode, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + dst.Size(opsize) + src.Size(opsize);
+}
+
+static size_t disasm_addx_subx_abcd_sbcd(
+ DisasmNode &node, const uint16_t instr, const OpCode opcode)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ // Must be already handled by parent call
+ assert(opsize != OpSize::kInvalid);
+ const int m = (instr >> 3) & 1;
+ const int xn = instr & 7;
+ const int xi = (instr >> 9) & 7;
+ const auto src = m ? Arg::AnAddrDecr(xn) : Arg::Dn(xn);
+ const auto dst = m ? Arg::AnAddrDecr(xi) : Arg::Dn(xi);
+ // XXX GNU AS does not know ABCD.B, it only knows ABCD, but happily consumes
+ // SBCD.B and others. That's why it is OpSize::kNone specifically for ABCD
+ // mnemonic. It is probably a bug in GNU AS.
+ node.op = Op::Typical(opcode, (opcode == OpCode::kABCD) ? OpSize::kNone : opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_or_and(
+ DisasmNode &node,
+ const uint16_t instr,
+ const DataView &code,
+ const OpSize opsize,
+ const OpCode opcode)
+{
+ const bool dir_to_addr = (instr >> 8) & 1;
+ const auto addr = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (addr.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ if (dir_to_addr) {
+ // Switching dir when bot operands are data registers is not allowed
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ if (dir_to_addr) {
+ // PC relative cannot be destination
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ case AddrMode::kImmediate:
+ if (dir_to_addr) {
+ // immediate cannot be destination
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ }
+ const auto reg = Arg::Dn((instr >> 9) & 7);
+ if (dir_to_addr) {
+ node.op = Op::Typical(opcode, opsize, reg, addr);
+ } else {
+ node.op = Op::Typical(opcode, opsize, addr, reg);
+ }
+ return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize);
+}
+
+static size_t disasm_divu_divs_sbcd_or(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ // Also ensures that opsize == OpSize::kByte, i.e. 0b00
+ if ((instr & 0x1f0) == 0x100) {
+ return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kSBCD);
+ }
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ const bool is_signed = (instr >> 8) & 1;
+ const auto opcode = is_signed ? OpCode::kDIVS : OpCode::kDIVU;
+ return disasm_divu_divs_mulu_muls(node, instr, code, opcode);
+ }
+ return disasm_or_and(node, instr, code, opsize, OpCode::kOR);
+}
+
+static size_t disasm_adda_suba_cmpa(
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode)
+{
+ const OpSize opsize = static_cast<OpSize>(((instr >> 8) & 1) + 1);
+ const auto src = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (src.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ case AddrMode::kAn:
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ break;
+ }
+ const unsigned an = (instr >> 9) & 7;
+ const auto dst = Arg::An(an);
+ node.op = Op::Typical(opcode, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_add_sub_cmp(
+ DisasmNode &node,
+ const uint16_t instr,
+ const DataView &code,
+ const OpCode opcode,
+ const OpSize opsize,
+ const bool dir_to_addr)
+{
+ const auto addr = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (addr.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ if (dir_to_addr || opsize == OpSize::kByte) {
+ // An cannot be destination and An cannot be used as byte
+ return disasm_verbatim(node, instr);
+ }
+ /* Fall through */
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ break;
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ if (dir_to_addr) {
+ node.ref2_addr = static_cast<uint32_t>(addr.lword);
+ node.ref_kinds = kRef2AbsMask | kRef2ReadMask;
+ } else {
+ node.ref1_addr = static_cast<uint32_t>(addr.lword);
+ node.ref_kinds = kRef1AbsMask | kRef1ReadMask;
+ }
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ if (dir_to_addr) {
+ // PC relative cannot be destination
+ return disasm_verbatim(node, instr);
+ }
+ if (addr.mode == AddrMode::kD16PCAddr) {
+ node.ref1_addr = node.address + kInstructionSizeStepBytes +
+ static_cast<uint32_t>(addr.d16_pc.d16);
+ node.ref_kinds = kRef1RelMask | kRef1ReadMask;
+ }
+ break;
+ case AddrMode::kImmediate:
+ if (dir_to_addr) {
+ // immediate cannot be destination
+ return disasm_verbatim(node, instr);
+ }
+ break;
+ }
+ const unsigned dn = (instr >> 9) & 7;
+ const auto reg = Arg::Dn(dn);
+ if (dir_to_addr) {
+ node.op = Op::Typical(opcode, opsize, reg, addr);
+ } else {
+ node.op = Op::Typical(opcode, opsize, addr, reg);
+ }
+ return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize);
+}
+
+static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ // Must be already handled by parent call
+ assert(opsize != OpSize::kInvalid);
+ // M has to be set to 0b001
+ assert(((instr >> 3) & 7) == 1);
+ const int xn = instr & 7;
+ const int xi = (instr >> 9) & 7;
+ const auto src = Arg::AnAddrIncr(xn);
+ const auto dst = Arg::AnAddrIncr(xi);
+ node.op = Op::Typical(OpCode::kCMPM, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ const auto addr = FetchArg(
+ node.address + kInstructionSizeStepBytes, code, instr, opsize);
+ switch (addr.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ // PC relative and immediate cannot be destination
+ return disasm_verbatim(node, instr);
+ }
+ const auto reg = Arg::Dn((instr >> 9) & 7);
+ node.op = Op::Typical(OpCode::kEOR, opsize, reg, addr);
+ return node.size = kInstructionSizeStepBytes + addr.Size(opsize) + reg.Size(opsize);
+}
+
+static size_t disasm_eor_cmpm_cmp_cmpa(
+ DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ return disasm_adda_suba_cmpa(node, instr, code, OpCode::kCMPA);
+ }
+ const bool dir_to_addr = ((instr >> 8) & 1);
+ if (!dir_to_addr) {
+ return disasm_add_sub_cmp(node, instr, code, OpCode::kCMP, opsize, dir_to_addr);
+ }
+ const int m = (instr >> 3) & 7;
+ if (m == 1) {
+ return disasm_cmpm(node, instr);
+ }
+ return disasm_eor(node, instr, code);
+}
+
+static size_t disasm_exg(DisasmNode &node, const uint16_t instr)
+{
+ assert((instr & 0x130) == 0x100);
+ const int m1 = (instr >> 3) & 1;
+ const int m2 = (instr >> 6) & 3;
+ assert(m2 != 0); // Therefore m == 0 and m == 1 are impossible
+ assert(m2 != 3); // Therefore m == 6 and m == 7 are impossible
+ const int m = (m2 << 1) | m1;
+ assert(m != 4); // Only m == 2, m == 3 and m == 5 values are allowed
+ const int xn = instr & 7;
+ const int xi = (instr >> 9) & 7;
+ const auto src = (m == 3) ? Arg::An(xi) : Arg::Dn(xi);
+ const auto dst = (m == 2) ? Arg::Dn(xn) : Arg::An(xn);
+ // GNU AS does not accept size suffix for EXG, although it's size is always
+ // long word.
+ const auto opsize = OpSize::kNone;
+ node.op = Op::Typical(OpCode::kEXG, opsize, src, dst);
+ return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
+}
+
+static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ if ((instr & 0x1f0) == 0x100) {
+ return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD);
+ }
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ const bool is_signed = (instr >> 8) & 1;
+ const auto opcode = is_signed ? OpCode::kMULS : OpCode::kMULU;
+ return disasm_divu_divs_mulu_muls(node, instr, code, opcode);
+ }
+ const unsigned m_split = instr & 0x1f8;
+ if (m_split == 0x188 || m_split == 0x148 || m_split == 0x140) {
+ return disasm_exg(node, instr);
+ }
+ return disasm_or_and(node, instr, code, opsize, OpCode::kAND);
+}
+
+static size_t disasm_add_sub_x_a(
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ if (opsize == OpSize::kInvalid) {
+ return disasm_adda_suba_cmpa(node, instr, code, (opcode == OpCode::kSUB) ? OpCode::kSUBA : OpCode::kADDA);
+ }
+ const bool dir_to_addr = (instr >> 8) & 1;
+ const unsigned m = (instr >> 3) & 7;
+ if (dir_to_addr && (m == 0 || m == 1)) {
+ return disasm_addx_subx_abcd_sbcd(node, instr, (opcode == OpCode::kSUB) ? OpCode::kSUBX : OpCode::kADDX);
+ }
+ return disasm_add_sub_cmp(node, instr, code, opcode, opsize, dir_to_addr);
+}
+
+static OpCode ShiftKindToOpcode(const ShiftKind k, const ShiftDirection d)
+{
+ switch (k) {
+ case ShiftKind::kArithmeticShift:
+ return d == ShiftDirection::kLeft ? OpCode::kASL : OpCode::kASR;
+ case ShiftKind::kLogicalShift:
+ return d == ShiftDirection::kLeft ? OpCode::kLSL : OpCode::kLSR;
+ case ShiftKind::kRotateX:
+ return d == ShiftDirection::kLeft ? OpCode::kROXL : OpCode::kROXR;
+ case ShiftKind::kRotate:
+ return d == ShiftDirection::kLeft ? OpCode::kROL : OpCode::kROR;
+ }
+ assert(false);
+ return OpCode::kNone;
+}
+
+static bool IsValidShiftKind(const ShiftKind k)
+{
+ return static_cast<int>(k) < 4;
+}
+
+static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code)
+{
+ const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
+ const unsigned xn = instr & 7;
+ const uint8_t rotation = (instr >> 9) & 7;
+ const ShiftKind kind = (opsize == OpSize::kInvalid)
+ ? static_cast<ShiftKind>(rotation)
+ : static_cast<ShiftKind>((instr >> 3) & 3);
+ if (!IsValidShiftKind(kind)) {
+ return disasm_verbatim(node, instr);
+ }
+ const auto dst = (opsize == OpSize::kInvalid)
+ ? FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize)
+ : Arg::Dn(xn);
+ if (opsize == OpSize::kInvalid) {
+ switch (dst.mode) {
+ case AddrMode::kInvalid:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kDn:
+ // Intersects with situation when args are "#1,%dx". GNU AS would
+ // not understand shift instruction with single argument of "%dx".
+ return disasm_verbatim(node, instr);
+ break;
+ case AddrMode::kAn:
+ return disasm_verbatim(node, instr);
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ case AddrMode::kLong:
+ break;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ case AddrMode::kImmediate:
+ return disasm_verbatim(node, instr);
+ }
+ }
+ const unsigned imm = ((rotation - 1) & 7) + 1;
+ const unsigned src = (opsize == OpSize::kInvalid) ? 1 : rotation;
+ const auto dir = static_cast<ShiftDirection>((instr >> 8) & 1);
+ if (opsize == OpSize::kInvalid) {
+ node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, dst);
+ } else {
+ const unsigned m = (instr >> 5) & 1;
+ const auto arg1 = m ? Arg::AddrModeXn(ArgType::kDn, src) : Arg::Immediate(imm);
+ node.op = Op::Typical(ShiftKindToOpcode(kind, dir), opsize, arg1, dst);
+ }
+ return node.size = kInstructionSizeStepBytes + dst.Size(opsize);
+}
+
+static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c)
+{
+ switch ((i & 0xf000) >> 12) {
+ case 0x0:
+ return disasm_bitops_movep(n, i, c);
+ case 0x1:
+ case 0x2:
+ case 0x3:
+ return disasm_move_movea(n, i, c);
+ case 0x4:
+ return disasm_chunk_4(n, i, c);
+ case 0x5:
+ return disasm_addq_subq_scc_dbcc(n, i, c);
+ case 0x6:
+ return disasm_bra_bsr_bcc(n, i, c);
+ case 0x7:
+ return disasm_moveq(n, i);
+ case 0x8:
+ return disasm_divu_divs_sbcd_or(n, i, c);
+ case 0x9:
+ return disasm_add_sub_x_a(n, i, c, OpCode::kSUB);
+ case 0xa:
+ // Does not exist
+ return disasm_verbatim(n, i);
+ case 0xb:
+ return disasm_eor_cmpm_cmp_cmpa(n, i, c);
+ case 0xc:
+ return disasm_chunk_c(n, i, c);
+ case 0xd:
+ return disasm_add_sub_x_a(n, i, c, OpCode::kADD);
+ case 0xe:
+ return disasm_shift_rotate(n, i, c);
+ case 0xf:
+ // Does not exist
+ return disasm_verbatim(n, i);
+ }
+ assert(false);
+ return disasm_verbatim(n, i);
+}
+
+size_t DisasmNode::Disasm(const DataView &code)
+{
+ // We assume that machine have no MMU and ROM data always starts at 0
+ assert(this->address < code.size);
+ size = kInstructionSizeStepBytes;
+ ref_kinds = 0;
+ ref1_addr = 0;
+ ref2_addr = 0;
+ const uint16_t instr = GetU16BE(code.buffer + this->address);
+ if (IsInstruction(this->type)) {
+ return m68k_disasm(*this, instr, code);
+ } else {
+ // Data should not be disassembled
+ return disasm_verbatim(*this, instr);
+ }
+}
+
+size_t DisasmNode::DisasmAsRaw(const DataView &code)
+{
+ // We assume that machine have no MMU and ROM data always starts at 0
+ assert(this->address < code.size);
+ size = kInstructionSizeStepBytes;
+ ref_kinds = 0;
+ ref1_addr = 0;
+ ref2_addr = 0;
+ const uint16_t instr = GetU16BE(code.buffer + this->address);
+ return disasm_verbatim(*this, instr);
+}
+
+static const char *ToString(const OpCode opcode, const Condition condition)
+{
+ switch (opcode) {
+ case OpCode::kNone:
+ assert(false);
+ break;
+ case OpCode::kRaw: return ".short";
+ case OpCode::kORI: return "ori";
+ case OpCode::kANDI: return "andi";
+ case OpCode::kSUBI: return "subi";
+ case OpCode::kADDI: return "addi";
+ case OpCode::kEORI: return "eori";
+ case OpCode::kCMPI: return "cmpi";
+ case OpCode::kBTST: return "btst";
+ case OpCode::kBCHG: return "bchg";
+ case OpCode::kBCLR: return "bclr";
+ case OpCode::kBSET: return "bset";
+ case OpCode::kMOVEP: return "movep";
+ case OpCode::kMOVEA: return "movea";
+ case OpCode::kMOVE: return "move";
+ case OpCode::kNEGX: return "negx";
+ case OpCode::kCLR: return "clr";
+ case OpCode::kNEG: return "neg";
+ case OpCode::kNOT: return "not";
+ case OpCode::kEXT: return "ext";
+ case OpCode::kNBCD: return "nbcd";
+ case OpCode::kSWAP: return "swap";
+ case OpCode::kPEA: return "pea";
+ case OpCode::kILLEGAL: return "illegal";
+ case OpCode::kTAS: return "tas";
+ case OpCode::kTST: return "tst";
+ case OpCode::kTRAP: return "trap";
+ case OpCode::kLINK: return "link";
+ case OpCode::kUNLK: return "unlk";
+ case OpCode::kRESET: return "reset";
+ case OpCode::kNOP: return "nop";
+ case OpCode::kSTOP: return "stop";
+ case OpCode::kRTE: return "rte";
+ case OpCode::kRTS: return "rts";
+ case OpCode::kTRAPV: return "trapv";
+ case OpCode::kRTR: return "rtr";
+ case OpCode::kJSR: return "jsr";
+ case OpCode::kJMP: return "jmp";
+ case OpCode::kMOVEM: return "movem";
+ case OpCode::kLEA: return "lea";
+ case OpCode::kCHK: return "chk";
+ case OpCode::kADDQ: return "addq";
+ case OpCode::kSUBQ: return "subq";
+ case OpCode::kScc:
+ switch(condition) {
+ case Condition::kT : return "st";
+ case Condition::kF: return "sf";
+ case Condition::kHI: return "shi";
+ case Condition::kLS: return "sls";
+ case Condition::kCC: return "scc";
+ case Condition::kCS: return "scs";
+ case Condition::kNE: return "sne";
+ case Condition::kEQ: return "seq";
+ case Condition::kVC: return "svc";
+ case Condition::kVS: return "svs";
+ case Condition::kPL: return "spl";
+ case Condition::kMI: return "smi";
+ case Condition::kGE: return "sge";
+ case Condition::kLT: return "slt";
+ case Condition::kGT: return "sgt";
+ case Condition::kLE: return "sle";
+ }
+ assert(false);
+ break;
+ case OpCode::kDBcc:
+ switch (condition) {
+ case Condition::kT: return "dbt";
+ case Condition::kF: return "dbf";
+ case Condition::kHI: return "dbhi";
+ case Condition::kLS: return "dbls";
+ case Condition::kCC: return "dbcc";
+ case Condition::kCS: return "dbcs";
+ case Condition::kNE: return "dbne";
+ case Condition::kEQ: return "dbeq";
+ case Condition::kVC: return "dbvc";
+ case Condition::kVS: return "dbvs";
+ case Condition::kPL: return "dbpl";
+ case Condition::kMI: return "dbmi";
+ case Condition::kGE: return "dbge";
+ case Condition::kLT: return "dblt";
+ case Condition::kGT: return "dbgt";
+ case Condition::kLE: return "dble";
+ }
+ assert(false);
+ break;
+ case OpCode::kBcc:
+ switch (condition) {
+ case Condition::kT: return "bra";
+ case Condition::kF: return "bsr";
+ case Condition::kHI: return "bhi";
+ case Condition::kLS: return "bls";
+ case Condition::kCC: return "bcc";
+ case Condition::kCS: return "bcs";
+ case Condition::kNE: return "bne";
+ case Condition::kEQ: return "beq";
+ case Condition::kVC: return "bvc";
+ case Condition::kVS: return "bvs";
+ case Condition::kPL: return "bpl";
+ case Condition::kMI: return "bmi";
+ case Condition::kGE: return "bge";
+ case Condition::kLT: return "blt";
+ case Condition::kGT: return "bgt";
+ case Condition::kLE: return "ble";
+ }
+ assert(false);
+ break;
+ case OpCode::kMOVEQ: return "moveq";
+ case OpCode::kDIVU: return "divu";
+ case OpCode::kDIVS: return "divs";
+ case OpCode::kSBCD: return "sbcd";
+ case OpCode::kOR: return "or";
+ case OpCode::kSUB: return "sub";
+ case OpCode::kSUBX: return "subx";
+ case OpCode::kSUBA: return "suba";
+ case OpCode::kEOR: return "eor";
+ case OpCode::kCMPM: return "cmpm";
+ case OpCode::kCMP: return "cmp";
+ case OpCode::kCMPA: return "cmpa";
+ case OpCode::kMULU: return "mulu";
+ case OpCode::kMULS: return "muls";
+ case OpCode::kABCD: return "abcd";
+ case OpCode::kEXG: return "exg";
+ case OpCode::kAND: return "and";
+ case OpCode::kADD: return "add";
+ case OpCode::kADDX: return "addx";
+ case OpCode::kADDA: return "adda";
+ case OpCode::kASR: return "asr";
+ case OpCode::kASL: return "asl";
+ case OpCode::kLSR: return "lsr";
+ case OpCode::kLSL: return "lsl";
+ case OpCode::kROXR: return "roxr";
+ case OpCode::kROXL: return "roxl";
+ case OpCode::kROR: return "ror";
+ case OpCode::kROL: return "rol";
+ }
+ assert(false);
+ return "?";
+}
+
+static const char *ToString(const OpSize s)
+{
+ switch (s) {
+ case OpSize::kNone: return "";
+ case OpSize::kByte: return "b";
+ case OpSize::kShort: return "s";
+ case OpSize::kWord: return "w";
+ case OpSize::kLong: return "l";
+ }
+ assert(false);
+ return "";
+}
+
+static int OpcodeSNPrintf(
+ char *const buf,
+ const size_t bufsz,
+ const OpCode opcode,
+ const Condition condition,
+ const OpSize size_spec)
+{
+ return snprintf(buf, bufsz, "%s%s", ToString(opcode, condition), ToString(size_spec));
+}
+
+static char RegChar(const uint8_t xi)
+{
+ return (xi & 0x08) ? 'a' : 'd';
+}
+
+static char SizeSpecChar(const uint8_t xi)
+{
+ return (xi & 0x10) ? 'l' : 'w';
+}
+
+static unsigned RegNum(const uint8_t xi)
+{
+ return xi & 0x7;
+}
+
+static size_t snprint_reg_mask(
+ char *const buf, const size_t bufsz, const uint32_t regmask_arg, const ArgType arg_type)
+{
+ const uint32_t regmask = regmask_arg & 0xffff;
+ size_t written = 0;
+ bool first_printed = 0;
+ size_t span = 0;
+ // 17-th bit used to close the span with 0 value unconditionally
+ for (int i = 0; i < 17; i++) {
+ const uint32_t mask = 1 << (arg_type == ArgType::kRegMaskPredecrement ? (15 - i) : i);
+ const bool hit = regmask & mask;
+ const bool span_open = hit && span == 0;
+ const bool span_closed = !hit && span > 1;
+ const int printable_i = i - (span_closed ? 1 : 0);
+ const int id = printable_i % 8;
+ const char regtype = (printable_i >= 8) ? 'a' : 'd';
+ if (span_open || span_closed) {
+ const char *const delimiter = span_open ? (first_printed ? "/" : "") : "-";
+ const size_t remaining = bufsz - written;
+ const int ret = snprintf(buf + written, remaining, "%s%%%c%d", delimiter, regtype, id);
+ assert(ret > 0);
+ assert(static_cast<unsigned>(ret) >= sizeof("%d0")-1);
+ assert(static_cast<unsigned>(ret) <= sizeof("-%d0")-1);
+ written += Min(remaining, ret);
+ first_printed = true;
+ }
+ span = hit ? span + 1 : 0;
+ }
+ assert(written < bufsz); // Output must not be truncated
+ return written;
+}
+
+int Arg::SNPrint(
+ char *const buf,
+ const size_t bufsz,
+ const bool imm_as_hex,
+ const RefKindMask ref_kinds,
+ const char *const label,
+ const uint32_t self_addr,
+ const uint32_t ref_addr) const
+{
+ switch (type) {
+ case ArgType::kNone:
+ assert(false);
+ break;
+ case ArgType::kRaw:
+ return snprintf(buf, bufsz, "0x%04x", uword);
+ case ArgType::kDn:
+ return snprintf(buf, bufsz, "%%d%d", xn);
+ case ArgType::kAn:
+ return snprintf(buf, bufsz, "%%a%u", xn);
+ case ArgType::kAnAddr:
+ return snprintf(buf, bufsz, "%%a%u@", xn);
+ case ArgType::kAnAddrIncr:
+ return snprintf(buf, bufsz, "%%a%u@+", xn);
+ case ArgType::kAnAddrDecr:
+ return snprintf(buf, bufsz, "%%a%u@-", xn);
+ case ArgType::kD16AnAddr:
+ return snprintf(buf, bufsz, "%%a%u@(%d:w)", d16_an.an, d16_an.d16);
+ case ArgType::kD8AnXiAddr:
+ return snprintf(
+ buf, bufsz, "%%a%u@(%d,%%%c%u:%c)",
+ d8_an_xi.an,
+ d8_an_xi.d8,
+ RegChar(d8_an_xi.xi),
+ RegNum(d8_an_xi.xi),
+ SizeSpecChar(d8_an_xi.xi));
+ case ArgType::kWord:
+ case ArgType::kLong:
+ {
+ const char c = type == ArgType::kLong ? 'l' : 'w';
+ if (ref_kinds & kRefAbsMask) {
+ if (static_cast<uint32_t>(lword) == ref_addr) {
+ return snprintf(buf, bufsz, "%s:%c", label, c);
+ } else {
+ // It has to be AFTER the label we are gonna reference here
+ assert(static_cast<uint32_t>(lword) > ref_addr);
+ return snprintf(buf, bufsz, "%s+%d:%c", label, lword - ref_addr, c);
+ }
+ } else {
+ return snprintf(buf, bufsz, "0x%x:%c", lword, c);
+ }
+ }
+ case ArgType::kD16PCAddr:
+ if (ref_kinds & kRefRelMask) {
+ // XXX: Most of instructions with PC relative values have 2 bytes
+ // added to the offset, some does not. Still figuring that out.
+ const bool has_fix = ref_kinds & kRefPcRelFix2Bytes;
+ const uint32_t arg_addr = self_addr + d16_pc.d16 + kInstructionSizeStepBytes + (has_fix ? kInstructionSizeStepBytes : 0);
+ if (arg_addr == ref_addr) {
+ return snprintf(buf, bufsz, "%%pc@(%s:w)", label);
+ } else {
+ assert(arg_addr > ref_addr);
+ return snprintf(buf, bufsz, "%%pc@(%s+%d:w)", label, arg_addr - ref_addr);
+ }
+ } else {
+ return snprintf(buf, bufsz, "%%pc@(%d:w)", d16_pc.d16);
+ }
+ case ArgType::kD8PCXiAddr:
+ return snprintf(
+ buf, bufsz, "%%pc@(%d,%%%c%u:%c)",
+ d8_pc_xi.d8,
+ RegChar(d8_pc_xi.xi),
+ RegNum(d8_pc_xi.xi),
+ SizeSpecChar(d8_pc_xi.xi));
+ case ArgType::kImmediate:
+ if (ref_kinds & kRef1ImmMask) {
+ if (static_cast<uint32_t>(lword) == ref_addr) {
+ return snprintf(buf, bufsz, "#%s", label);
+ } else {
+ // It has to be AFTER the label we are gonna reference here
+ assert(static_cast<uint32_t>(lword) > ref_addr);
+ return snprintf(buf, bufsz, "#%s+%d", label, lword - ref_addr);
+ }
+ } else if (imm_as_hex) {
+ return snprintf(buf, bufsz, "#0x%x", lword);
+ } else {
+ return snprintf(buf, bufsz, "#%d", lword);
+ }
+ case ArgType::kRegMask:
+ case ArgType::kRegMaskPredecrement:
+ return snprint_reg_mask(buf, bufsz, uword, type);
+ case ArgType::kDisplacement:
+ if (ref_kinds & kRefRelMask) {
+ if (static_cast<uint32_t>(self_addr + lword) == ref_addr) {
+ return snprintf(buf, bufsz, "%s", label);
+ } else {
+ assert(static_cast<uint32_t>(self_addr + lword) > ref_addr);
+ return snprintf(buf, bufsz, "%s+%d", label, (self_addr + lword) - ref_addr);
+ }
+ } else {
+ return snprintf(buf, bufsz, ".%s%d", lword >= 0 ? "+" : "", lword);
+ }
+ case ArgType::kCCR:
+ return snprintf(buf, bufsz, "%%ccr");
+ case ArgType::kSR:
+ return snprintf(buf, bufsz, "%%sr");
+ case ArgType::kUSP:
+ return snprintf(buf, bufsz, "%%usp");
+ }
+ assert(false);
+ return -1;
+}
+
+int Op::FPrint(
+ FILE *const stream,
+ const char *const indent,
+ const bool imm_as_hex,
+ const RefKindMask ref_kinds,
+ const char *const ref1_label,
+ const char *const ref2_label,
+ const uint32_t self_addr,
+ const uint32_t ref1_addr,
+ const uint32_t ref2_addr) const
+{
+ assert(opcode != OpCode::kNone);
+ char mnemonic_str[kMnemonicBufferSize]{};
+ OpcodeSNPrintf(mnemonic_str, kMnemonicBufferSize, opcode, condition, size_spec);
+ if (arg1.type != ArgType::kNone) {
+ char arg1_str[kArgsBufferSize]{};
+ const RefKindMask ref1_kinds = ref_kinds & (kRef1Mask | kRefPcRelFix2Bytes);
+ // It is useful to have immediate value printed as hex if destination
+ // argument is plain address register, status register or condition code
+ // register. USP is not the case because it's value may be moved only to
+ // or from An register.
+ const bool imm_as_hex_2 = imm_as_hex ||
+ arg2.type == ArgType::kAn ||
+ arg2.type == ArgType::kCCR ||
+ arg2.type == ArgType::kSR;
+ arg1.SNPrint(
+ arg1_str,
+ kArgsBufferSize,
+ imm_as_hex_2,
+ ref1_kinds,
+ ref1_label,
+ self_addr,
+ ref1_addr);
+ if (arg2.type != ArgType::kNone) {
+ char arg2_str[kArgsBufferSize]{};
+ const RefKindMask ref2_kinds = ref_kinds & (kRef2Mask | kRefPcRelFix2Bytes);
+ arg2.SNPrint(
+ arg2_str,
+ kArgsBufferSize,
+ false,
+ ref2_kinds,
+ ref2_label,
+ self_addr,
+ ref2_addr);
+ return fprintf(stream, "%s%s %s,%s", indent, mnemonic_str, arg1_str, arg2_str);
+ } else {
+ return fprintf(stream, "%s%s %s", indent, mnemonic_str, arg1_str);
+ }
+ } else {
+ return fprintf(stream, "%s%s", indent, mnemonic_str);
+ }
+}
+
+void DisasmNode::AddReferencedBy(const uint32_t address, const ReferenceType type)
+{
+ ReferenceNode *node{};
+ if (this->last_ref_by) {
+ node = this->last_ref_by;
+ } else {
+ node = new ReferenceNode{};
+ assert(node);
+ this->ref_by = this->last_ref_by = node;
+ }
+ node->refs[node->refs_count] = ReferenceRecord{type, address};
+ node->refs_count++;
+ if (node->refs_count >= kRefsCountPerBuffer) {
+ ReferenceNode *new_node = new ReferenceNode{};
+ assert(new_node);
+ node->next = new_node;
+ this->last_ref_by = new_node;
+ }
+}
+
+DisasmNode::~DisasmNode()
+{
+ ReferenceNode *ref{this->ref_by};
+ while (ref) {
+ ReferenceNode *prev = ref;
+ ref = ref->next;
+ delete prev;
+ }
+}
diff --git a/src/disasm.h b/src/disasm.h
new file mode 100644
index 0000000..65429dc
--- /dev/null
+++ b/src/disasm.h
@@ -0,0 +1,401 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "data_buffer.h"
+#include "common.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+
+enum class OpSize: int {
+ kByte = 0,
+ kWord = 1,
+ kLong = 2,
+ kInvalid = 3,
+ kNone = kInvalid,
+ kShort, ///< Semantically is the same as kByte, pseudosize, used for Bcc
+};
+
+enum class OpCode: uint8_t {
+ kNone,
+ kRaw, ///< Emits ".short"
+ kORI,
+ kANDI,
+ kSUBI,
+ kADDI,
+ kEORI,
+ kCMPI,
+ kBTST,
+ kBCHG,
+ kBCLR,
+ kBSET,
+ kMOVEP,
+ kMOVEA,
+ kMOVE,
+ kNEGX,
+ kCLR,
+ kNEG,
+ kNOT,
+ kEXT,
+ kNBCD,
+ kSWAP,
+ kPEA,
+ kILLEGAL,
+ kTAS,
+ kTST,
+ kTRAP,
+ kLINK,
+ kUNLK,
+ kRESET,
+ kNOP,
+ kSTOP,
+ kRTE,
+ kRTS,
+ kTRAPV,
+ kRTR,
+ kJSR,
+ kJMP,
+ kMOVEM,
+ kLEA,
+ kCHK,
+ kADDQ,
+ kSUBQ,
+ kScc,
+ kDBcc,
+ kBcc,
+ kMOVEQ,
+ kDIVU,
+ kDIVS,
+ kSBCD,
+ kOR,
+ kSUB,
+ kSUBX,
+ kSUBA,
+ kEOR,
+ kCMPM,
+ kCMP,
+ kCMPA,
+ kMULU,
+ kMULS,
+ kABCD,
+ kEXG,
+ kAND,
+ kADD,
+ kADDX,
+ kADDA,
+ kASR,
+ kASL,
+ kLSR,
+ kLSL,
+ kROXR,
+ kROXL,
+ kROR,
+ kROL,
+};
+
+enum class Condition: uint8_t {
+ kT = 0,
+ kF = 1,
+ kHI = 2,
+ kLS = 3,
+ kCC = 4,
+ kCS = 5,
+ kNE = 6,
+ kEQ = 7,
+ kVC = 8,
+ kVS = 9,
+ kPL = 10,
+ kMI = 11,
+ kGE = 12,
+ kLT = 13,
+ kGT = 14,
+ kLE = 15,
+};
+
+enum class AddrMode: uint8_t {
+ kInvalid = 0,
+ kDn = 1,
+ kAn = 2,
+ kAnAddr = 3,
+ kAnAddrIncr = 4,
+ kAnAddrDecr = 5,
+ kD16AnAddr = 6,
+ kD8AnXiAddr = 7,
+ kWord = 8,
+ kLong = 9,
+ kD16PCAddr = 10,
+ kD8PCXiAddr = 11,
+ kImmediate = 12,
+};
+
+enum class ArgType: uint8_t {
+ kNone = 0,
+ kDn = 1, ///< Dn
+ kAn = 2, ///< An
+ kAnAddr = 3, ///< (An)
+ kAnAddrIncr = 4, ///< (An)+
+ kAnAddrDecr = 5, ///< -(An)
+ kD16AnAddr = 6, ///< (d16,An)
+ kD8AnXiAddr = 7, ///< (d8,An,Xi)
+ kWord = 8, ///< (xxx).W
+ kLong = 9, ///< (xxx).L
+ kD16PCAddr = 10, ///< (d16,PC)
+ kD8PCXiAddr = 11, ///< (d8,PC,Xn)
+ kImmediate = 12, ///< #imm
+ kRegMask,
+ kRegMaskPredecrement,
+ kDisplacement, ///< For BRA, BSR, Bcc and DBcc
+ kCCR,
+ kSR,
+ kUSP,
+ kRaw, ///< Emits "0xXXXX" for ".short"
+};
+
+struct D8AnPCXiAddr {
+ uint8_t an; ///< ID number of An reg, for kD8AnXiAddr only
+ /*! ID number of Xi reg (3 lower bits), for kD8AnXiAddr and kD8PCXiAddr.
+ * Bit 3 (mask 0x8) means 0 == Dn, 1 == An.
+ * Bit 4 (mask 0x10) means 0 == Word, 1 == Long.
+ */
+ uint8_t xi;
+ int8_t d8; ///< Displacement, for kD8AnXiAddr and kD8PCXiAddr
+};
+
+struct D16AnPCAddr {
+ uint8_t an; ///< ID number of An reg, for kD16AnAddr only
+ int16_t d16; ///< Displacement, for D16AnAddr and kD16PCAddr
+};
+
+static_assert(sizeof(D8AnPCXiAddr) <= sizeof(uint32_t), "");
+static_assert(sizeof(D16AnPCAddr) <= sizeof(uint32_t), "");
+
+struct Arg {
+ union {
+ ArgType type{ArgType::kNone};
+ AddrMode mode;
+ };
+ union {
+ int32_t lword{}; ///< kLong, kWord, kDisplacement, kImmediate
+ uint16_t uword; ///< kRegMask, kRaw
+ uint8_t xn; ///< kDn, kAn, kAnAddr, kAnAddrIncr, kAnAddrDecr
+ D16AnPCAddr d16_an; ///< kD16AnAddr
+ D16AnPCAddr d16_pc; ///< kD16PCAddr
+ D8AnPCXiAddr d8_an_xi; ///< kD8AnXiAddr
+ D8AnPCXiAddr d8_pc_xi; ///< kD8PCXiAddr
+ };
+ /// Size of the instruction extension: 0, 2 or 4 bytes
+ constexpr size_t Size(const OpSize s) const
+ {
+ switch (mode) {
+ case AddrMode::kInvalid:
+ case AddrMode::kDn:
+ case AddrMode::kAn:
+ case AddrMode::kAnAddr:
+ case AddrMode::kAnAddrIncr:
+ case AddrMode::kAnAddrDecr:
+ return 0;
+ case AddrMode::kD16AnAddr:
+ case AddrMode::kD8AnXiAddr:
+ case AddrMode::kWord:
+ return 2;
+ case AddrMode::kLong:
+ return 4;
+ case AddrMode::kD16PCAddr:
+ case AddrMode::kD8PCXiAddr:
+ return 2;
+ case AddrMode::kImmediate:
+ // Byte and Word immediate are of 2 bytes length
+ return s == OpSize::kLong ? 4 : 2;
+ }
+ return 0;
+ }
+ static constexpr auto AddrModeXn(const ArgType type, const uint8_t xn) {
+ Arg a{{type}, {0}};
+ a.xn = xn;
+ return a;
+ }
+ static constexpr auto Dn(const uint8_t xn) { return AddrModeXn(ArgType::kDn, xn); }
+ static constexpr auto An(const uint8_t xn) { return AddrModeXn(ArgType::kAn, xn); }
+ static constexpr auto AnAddr(const uint8_t xn) { return AddrModeXn(ArgType::kAnAddr, xn); }
+ static constexpr auto AnAddrIncr(const uint8_t xn)
+ {
+ return AddrModeXn(ArgType::kAnAddrIncr, xn);
+ }
+ static constexpr auto AnAddrDecr(const uint8_t xn)
+ {
+ return AddrModeXn(ArgType::kAnAddrDecr, xn);
+ }
+ static constexpr auto D16AnAddr(const uint8_t xn, const int16_t d16)
+ {
+ Arg a{{ArgType::kD16AnAddr}, {0}};
+ a.d16_an = D16AnPCAddr{xn, d16};
+ return a;
+ }
+ static constexpr auto D16PCAddr(const int16_t d16)
+ {
+ Arg a{{ArgType::kD16PCAddr}, {0}};
+ a.d16_pc = D16AnPCAddr{0, d16};
+ return a;
+ }
+ static constexpr auto Word(const int16_t w)
+ {
+ Arg a{{ArgType::kWord}, {0}};
+ a.lword = w;
+ return a;
+ }
+ static constexpr auto Long(const int32_t l)
+ {
+ Arg a{{ArgType::kLong}, {0}};
+ a.lword = l;
+ return a;
+ }
+ static constexpr auto D8AnXiAddr(
+ const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8)
+ {
+ Arg a{{ArgType::kD8AnXiAddr}, {0}};
+ a.d8_an_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8};
+ return a;
+ }
+ static constexpr auto D8PCXiAddr(
+ const uint8_t xn, const uint8_t xi, const OpSize s, const int8_t d8)
+ {
+ Arg a{{ArgType::kD8PCXiAddr}, {0}};
+ a.d8_pc_xi = D8AnPCXiAddr{xn, uint8_t(xi | (s == OpSize::kLong ? 0x10u : 0u)), d8};
+ return a;
+ }
+ static constexpr auto Immediate(const int32_t value) {
+ Arg a{{ArgType::kImmediate}, {0}};
+ a.lword = value;
+ return a;
+ }
+ static constexpr auto RegMask(const uint16_t regmask) {
+ Arg a{{ArgType::kRegMask}, {0}};
+ a.uword = regmask;
+ return a;
+ }
+ static constexpr auto RegMaskPredecrement(const uint16_t regmask) {
+ Arg a{{ArgType::kRegMaskPredecrement}, {0}};
+ a.uword = regmask;
+ return a;
+ }
+ static constexpr auto Displacement(const int32_t displacement) {
+ Arg a{{ArgType::kDisplacement}, {0}};
+ a.lword = displacement;
+ return a;
+ }
+ static constexpr auto CCR() { return Arg{{ArgType::kCCR}, {0}}; }
+ static constexpr auto SR() { return Arg{{ArgType::kSR}, {0}}; }
+ static constexpr auto USP() { return Arg{{ArgType::kUSP}, {0}}; }
+ static constexpr auto Raw(const uint16_t instr) {
+ Arg a{{ArgType::kRaw}, {0}};
+ a.uword = instr;
+ return a;
+ }
+ int SNPrint(
+ char *buf,
+ size_t bufsz,
+ bool imm_as_hex = false,
+ RefKindMask ref_kinds = 0,
+ const char *label = nullptr,
+ uint32_t self_addr = 0,
+ uint32_t ref_addr = 0) const;
+};
+
+enum class NodeType {
+ kTracedInstruction,
+ kRefInstruction,
+ kData,
+};
+
+constexpr size_t kRefsCountPerBuffer = 10;
+
+constexpr size_t kMnemonicBufferSize = 10;
+constexpr size_t kArgsBufferSize = 80;
+
+enum class ReferenceType {
+ kUnknown = 0,
+ kCall,
+ kBranch,
+ kRead,
+ kWrite,
+};
+
+struct ReferenceRecord {
+ ReferenceType type{};
+ uint32_t address{};
+};
+
+struct ReferenceNode {
+ ReferenceNode *next{};
+ ReferenceRecord refs[kRefsCountPerBuffer];
+ uint32_t refs_count{};
+};
+
+struct Op {
+ OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic)
+ /// Size specifier, the suffix `b`, `w` or `l`
+ OpSize size_spec{OpSize::kNone};
+ Condition condition{Condition::kT}; ///< For Scc, Bcc and Dbcc
+ Arg arg1{}; ///< First argument, optional
+ Arg arg2{}; ///< Second argument, optional, cannot be set if arg1 is not set
+ static constexpr auto Typical(
+ const OpCode opcode = OpCode::kNone,
+ const OpSize opsize = OpSize::kNone,
+ const Arg arg1 = Arg{},
+ const Arg arg2 = Arg{})
+ {
+ return Op{opcode, opsize, Condition::kT, arg1, arg2};
+ }
+ static constexpr auto Raw(const uint16_t instr)
+ {
+ return Op::Typical(OpCode::kRaw, OpSize::kNone, Arg::Raw(instr));
+ }
+ int FPrint(
+ FILE *,
+ const char *indent,
+ bool imm_as_hex,
+ RefKindMask ref_kinds = 0,
+ const char *ref1_label = nullptr,
+ const char *ref2_label = nullptr,
+ uint32_t self_addr = 0,
+ uint32_t ref1_addr = 0,
+ uint32_t ref2_addr = 0) const;
+};
+
+struct DisasmNode {
+ const NodeType type{};
+ /// Address of the instruction (PC value basically)
+ const uint32_t address{};
+ /// Instruction size in bytes
+ size_t size{kInstructionSizeStepBytes};
+ /// Indicates whether `ref_addr` should be interpreted and how
+ RefKindMask ref_kinds{};
+ /// Address of first argument reference
+ uint32_t ref1_addr{};
+ /// Address of second argument reference
+ uint32_t ref2_addr{};
+ ReferenceNode *ref_by{};
+ ReferenceNode *last_ref_by{};
+ Op op{};
+
+ /*! Disassembles instruction with arguments
+ * returns size of whole instruction with arguments in bytes
+ */
+ size_t Disasm(const DataView &code);
+ size_t DisasmAsRaw(const DataView &code);
+ void AddReferencedBy(uint32_t address, ReferenceType);
+ ~DisasmNode();
+};
+
+static constexpr inline bool IsInstruction(NodeType t)
+{
+ return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction;
+}
+
+static constexpr inline bool IsBRA(Op op)
+{
+ return op.opcode == OpCode::kBcc && op.condition == Condition::kT;
+}
diff --git a/src/elf_format.h b/src/elf_format.h
new file mode 100644
index 0000000..b5a268a
--- /dev/null
+++ b/src/elf_format.h
@@ -0,0 +1,328 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include <cstddef>
+#include <cstdint>
+
+namespace ELF {
+
+constexpr size_t kIdentSize = 16;
+constexpr size_t kHeaderSize = kIdentSize + 36;
+constexpr size_t kMagicSize = 4;
+constexpr size_t kProgramHeaderSize = 32;
+
+using Address = uint32_t;
+using Offset = uint32_t;
+
+enum class FileClass : uint8_t {
+ kNone = 0,
+ k32 = 1,
+ k64 = 2,
+ kUnknown,
+};
+
+enum class DataEncoding : uint8_t {
+ kNone = 0,
+ k2LSB = 1,
+ kLE = k2LSB,
+ k2MSB = 2,
+ kBE = k2MSB,
+ kUnknown,
+};
+
+enum class Version : uint8_t {
+ kNone = 0,
+ kCurrent = 1,
+ kUnknown,
+};
+
+static constexpr inline auto ParseFileClass(const uint8_t file_class)
+{
+ switch (file_class) {
+ case static_cast<uint8_t>(FileClass::kNone): return FileClass::kNone;
+ case static_cast<uint8_t>(FileClass::k32): return FileClass::k32;
+ case static_cast<uint8_t>(FileClass::k64): return FileClass::k64;
+ }
+ return FileClass::kUnknown;
+}
+
+static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding)
+{
+ switch (data_encoding) {
+ case static_cast<uint8_t>(DataEncoding::kNone): return DataEncoding::kNone;
+ case static_cast<uint8_t>(DataEncoding::k2LSB): return DataEncoding::k2LSB;
+ case static_cast<uint8_t>(DataEncoding::k2MSB): return DataEncoding::k2MSB;
+ }
+ return DataEncoding::kUnknown;
+}
+
+static constexpr inline auto ParseVersion(const uint8_t version)
+{
+ switch (version) {
+ case static_cast<uint8_t>(Version::kNone): return Version::kNone;
+ case static_cast<uint8_t>(Version::kCurrent): return Version::kCurrent;
+ }
+ return Version::kUnknown;
+}
+
+struct Ident32Raw {
+ uint8_t magic[4];
+ uint8_t file_class;
+ uint8_t data_encoding;
+ uint8_t version;
+ uint8_t os_abi;
+ uint8_t abi_version;
+ uint8_t padding[7];
+ static constexpr auto inline FromBytes(const uint8_t *data)
+ {
+ return Ident32Raw{
+ { data[0], data[1], data[2], data[3] },
+ data[4],
+ data[5],
+ data[6],
+ data[7],
+ data[8],
+ { data[9], data[10], data[11], data[12], data[13], data[14], data[15], },
+ };
+ }
+};
+
+struct Ident32 {
+ uint8_t magic[4];
+ FileClass file_class;
+ DataEncoding data_encoding;
+ Version version;
+ uint8_t os_abi;
+ uint8_t abi_version;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ return Ident32{
+ { data[0], data[1], data[2], data[3] },
+ ParseFileClass(data[4]),
+ ParseDataEncoding(data[5]),
+ ParseVersion(data[6]),
+ data[7],
+ data[8],
+ };
+ }
+ static constexpr inline auto FromIdent32Raw(const Ident32Raw raw)
+ {
+ return Ident32{
+ { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] },
+ ParseFileClass(raw.file_class),
+ ParseDataEncoding(raw.data_encoding),
+ ParseVersion(raw.version),
+ raw.os_abi,
+ raw.abi_version,
+ };
+ }
+};
+
+enum class ObjectType : uint16_t {
+ kNone = 0,
+ kRel = 1,
+ kExec = 2,
+ kDyn = 3,
+ kCore = 4,
+ kUnknown = 0x7fff,
+ kLoProc = 0xff00,
+ kHiProc = 0xffff,
+};
+
+enum class Machine : uint16_t {
+ kNone = 0,
+ kM32 = 1,
+ kSPARC = 2,
+ k386 = 3,
+ k68k = 4,
+ k88k = 5,
+ k860 = 7,
+ kMIPS = 8,
+ kUnknown,
+};
+
+static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e)
+{
+ if (e == DataEncoding::k2MSB) {
+ return uint16_t(d[0]) << 8 | d[1];
+ }
+ return uint16_t(d[1]) << 8 | d[0];
+}
+
+static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e)
+{
+ if (e == DataEncoding::k2MSB) {
+ return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3];
+ }
+ return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0];
+}
+
+static constexpr inline auto ParseObjectType(const uint16_t type)
+{
+ switch (type) {
+ case static_cast<uint16_t>(ObjectType::kNone): return ObjectType::kNone;
+ case static_cast<uint16_t>(ObjectType::kRel): return ObjectType::kRel;
+ case static_cast<uint16_t>(ObjectType::kExec): return ObjectType::kExec;
+ case static_cast<uint16_t>(ObjectType::kDyn): return ObjectType::kDyn;
+ case static_cast<uint16_t>(ObjectType::kCore): return ObjectType::kCore;
+ case static_cast<uint16_t>(ObjectType::kLoProc): return ObjectType::kLoProc;
+ case static_cast<uint16_t>(ObjectType::kHiProc): return ObjectType::kHiProc;
+ }
+ return ObjectType::kUnknown;
+}
+
+static constexpr inline auto ParseMachine(const uint16_t machine)
+{
+ switch (machine) {
+ case static_cast<uint16_t>(Machine::kNone): return Machine::kNone;
+ case static_cast<uint16_t>(Machine::kM32): return Machine::kM32;
+ case static_cast<uint16_t>(Machine::kSPARC): return Machine::kSPARC;
+ case static_cast<uint16_t>(Machine::k386): return Machine::k386;
+ case static_cast<uint16_t>(Machine::k68k): return Machine::k68k;
+ case static_cast<uint16_t>(Machine::k88k): return Machine::k88k;
+ case static_cast<uint16_t>(Machine::k860): return Machine::k860;
+ case static_cast<uint16_t>(Machine::kMIPS): return Machine::kMIPS;
+ }
+ return Machine::kUnknown;
+}
+
+struct Header32Raw {
+ Ident32Raw ident;
+ uint16_t type;
+ uint16_t machine;
+ uint32_t version;
+ Address entry;
+ Offset phoff;
+ Offset shoff;
+ uint32_t flags;
+ uint16_t ehsize;
+ uint16_t phentsize;
+ uint16_t phnum;
+ uint16_t shentsize;
+ uint16_t shnum;
+ uint16_t shstrndx;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ const auto ident = Ident32Raw::FromBytes(data);
+ const DataEncoding e = ParseDataEncoding(ident.data_encoding);
+ return Header32Raw{
+ /* .ident */ ident,
+ /* .type */ ParseU16(data + kIdentSize + 0, e),
+ /* .machine */ ParseU16(data + kIdentSize + 2, e),
+ /* .version */ ParseU32(data + kIdentSize + 4, e),
+ /* .entry */ ParseU32(data + kIdentSize + 8, e),
+ /* .phoff */ ParseU32(data + kIdentSize + 12, e),
+ /* .shoff */ ParseU32(data + kIdentSize + 16, e),
+ /* .flags */ ParseU32(data + kIdentSize + 20, e),
+ /* .ehsize */ ParseU16(data + kIdentSize + 24, e),
+ /* .phentsize */ ParseU16(data + kIdentSize + 26, e),
+ /* .phnum */ ParseU16(data + kIdentSize + 28, e),
+ /* .shentsize */ ParseU16(data + kIdentSize + 30, e),
+ /* .shnum */ ParseU16(data + kIdentSize + 32, e),
+ /* .shstrndx */ ParseU16(data + kIdentSize + 34, e),
+ };
+ }
+};
+
+struct Header32 {
+ Ident32 ident;
+ ObjectType type;
+ Machine machine;
+ Version version;
+ Address entry;
+ Offset phoff;
+ Offset shoff;
+ uint32_t flags;
+ uint16_t ehsize;
+ uint16_t phentsize;
+ uint16_t phnum;
+ uint16_t shentsize;
+ uint16_t shnum;
+ uint16_t shstrndx;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ const auto raw = Header32Raw::FromBytes(data);
+ return Header32{
+ Ident32::FromIdent32Raw(raw.ident),
+ ParseObjectType(raw.type),
+ ParseMachine(raw.machine),
+ ParseVersion(raw.version),
+ raw.entry,
+ raw.phoff,
+ raw.shoff,
+ raw.flags,
+ raw.ehsize,
+ raw.phentsize,
+ raw.phnum,
+ raw.shentsize,
+ raw.shnum,
+ raw.shstrndx,
+ };
+ }
+};
+
+enum class PHType : uint32_t {
+ kNull = 0,
+ kLoad = 1,
+ kDynamic = 2,
+ kInterp = 3,
+ kNote = 4,
+ kSHLIB = 5,
+ kProgramHeaderTable = 6,
+ kLoProc = 0x70000000,
+ kHiProc = 0x7fffffff,
+ kUnknown,
+};
+
+static constexpr inline auto ParsePHType(const uint32_t type)
+{
+ switch (type) {
+ case static_cast<uint32_t>(PHType::kNull): return PHType::kNull;
+ case static_cast<uint32_t>(PHType::kLoad): return PHType::kLoad;
+ case static_cast<uint32_t>(PHType::kDynamic): return PHType::kDynamic;
+ case static_cast<uint32_t>(PHType::kInterp): return PHType::kInterp;
+ case static_cast<uint32_t>(PHType::kNote): return PHType::kNote;
+ case static_cast<uint32_t>(PHType::kSHLIB): return PHType::kSHLIB;
+ case static_cast<uint32_t>(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable;
+ case static_cast<uint32_t>(PHType::kLoProc): return PHType::kLoProc;
+ case static_cast<uint32_t>(PHType::kHiProc): return PHType::kHiProc;
+ }
+ return PHType::kUnknown;
+}
+
+constexpr uint32_t kPHFlagX = 1 << 0;
+constexpr uint32_t kPHFlagW = 1 << 1;
+constexpr uint32_t kPHFlagR = 1 << 2;
+
+struct ProgramHeader32 {
+ uint32_t type;
+ Offset offset;
+ Address vaddr;
+ Address paddr;
+ uint32_t filesz;
+ uint32_t memsz;
+ uint32_t flags;
+ uint32_t align;
+ static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e)
+ {
+ return ProgramHeader32{
+ /* type */ ParseU32(data + 0, e),
+ /* offset */ ParseU32(data + 4, e),
+ /* vaddr */ ParseU32(data + 8, e),
+ /* paddr */ ParseU32(data + 12, e),
+ /* filesz */ ParseU32(data + 16, e),
+ /* memsz */ ParseU32(data + 20, e),
+ /* flags */ ParseU32(data + 24, e),
+ /* align */ ParseU32(data + 28, e),
+ };
+ }
+};
+
+static constexpr inline bool MagicIsValid(const uint8_t *m)
+{
+ return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F';
+}
+
+};
diff --git a/src/elf_image.cpp b/src/elf_image.cpp
new file mode 100644
index 0000000..6db72f3
--- /dev/null
+++ b/src/elf_image.cpp
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_image.h"
+
+#include <cassert>
+#include <cstdio>
+
+ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes(
+ const DataView &d, const DataEncoding e)
+{
+ if (d.buffer == nullptr || d.size == 0) {
+ return ELF::ProgramHeader32Table{};
+ }
+ assert(d.size % kProgramHeaderSize == 0);
+ const size_t size = d.size / kProgramHeaderSize;
+ auto *headers = new ProgramHeader32[size];
+ assert(headers != nullptr);
+ for (size_t i = 0; i < size; i++) {
+ headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e);
+ }
+ return ELF::ProgramHeader32Table{ headers, size, };
+}
+
+static char *ValidateELF(const DataView& d)
+{
+ char *error;
+ size_t size;
+ FILE *s = open_memstream(&error, &size);
+ assert(s);
+ using namespace ELF;
+ if (d.size < kHeaderSize) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than minimum ELF header size (%zu): "
+ "ELF header could not fit",
+ d.size,
+ kHeaderSize);
+ fclose(s);
+ return error;
+ }
+ const auto header_raw = Header32Raw::FromBytes(d.buffer);
+ const auto header = Header32::FromBytes(d.buffer);
+ if (!MagicIsValid(header.ident.magic)) {
+ const uint8_t *m = header.ident.magic;
+ fprintf(
+ s,
+ "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]",
+ 0x7f, 'E', 'L', 'F',
+ m[0], m[1], m[2], m[3]);
+ fclose(s);
+ return error;
+ }
+ if (header.ident.version != Version::kCurrent) {
+ fprintf(
+ s,
+ "version (0x%02x) of ELF header.ident.version is not supported, "
+ "only \"Current\" version (0x%02x) is supported",
+ header_raw.ident.version,
+ static_cast<int>(Version::kCurrent));
+ fclose(s);
+ return error;
+ }
+ if (header.version != Version::kCurrent) {
+ fprintf(
+ s,
+ "version (0x%02x) of ELF header.version is not supported, "
+ "only \"Current\" version (0x%02x) is supported",
+ header_raw.version,
+ static_cast<int>(Version::kCurrent));
+ fclose(s);
+ return error;
+ }
+ if (header.type != ObjectType::kExec) {
+ fprintf(
+ s,
+ "object type (0x%02x) is not supported, "
+ "only Exec (0x%02x) object type is supported",
+ header_raw.type,
+ static_cast<int>(ObjectType::kExec));
+ fclose(s);
+ return error;
+ }
+ if (header.machine != Machine::k68k) {
+ fprintf(
+ s,
+ "machine (0x%02x) is not supported, "
+ "only Motorola 68k (0x%02x) machine is supported",
+ header_raw.machine,
+ static_cast<int>(Machine::k68k));
+ fclose(s);
+ return error;
+ }
+ if (header.phentsize != kProgramHeaderSize) {
+ fprintf(
+ s,
+ "phentsize is invalid: expected (%zu), got (%zu)",
+ kProgramHeaderSize,
+ size_t(header.phentsize));
+ fclose(s);
+ return error;
+ }
+ if (d.size < header.phoff + header.phentsize * header.phnum) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than program header table end offset (%zu): "
+ "program header table could not fit",
+ d.size,
+ size_t(header.phoff + header.phentsize * header.phnum));
+ fclose(s);
+ return error;
+ }
+ bool has_segment_with_entry = false;
+ for (size_t i = 0; i < header.phnum; i++) {
+ const auto ph = ProgramHeader32::FromBytes(
+ d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding);
+ if (d.size < ph.offset + ph.filesz) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than pht[%zu] segment end offset (%zu): "
+ "segment could not fit",
+ d.size,
+ i,
+ size_t(ph.offset + ph.filesz));
+ fclose(s);
+ return error;
+ }
+ const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR);
+ if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) {
+ fprintf(
+ s,
+ "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: "
+ "non-zero base address is not supported",
+ i,
+ ph.vaddr);
+ fclose(s);
+ return error;
+ }
+ const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz;
+ if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) {
+ has_segment_with_entry = true;
+ }
+ }
+ if (!has_segment_with_entry) {
+ fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry);
+ fclose(s);
+ return error;
+ }
+ fclose(s);
+ free(error);
+ return nullptr;
+}
+
+ELF::Image::Image(DataBuffer&& data)
+ : _data(static_cast<DataBuffer&&>(data))
+ , _error(ValidateELF(_data.View()))
+ , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer))
+ , _pht(_error
+ ? ELF::ProgramHeader32Table{}
+ : ELF::ProgramHeader32Table::FromBytes(
+ _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding))
+{}
+
+ELF::Image::~Image()
+{
+ if (_error) {
+ free(_error);
+ }
+ if (_pht.headers) {
+ delete [] _pht.headers;
+ }
+}
diff --git a/src/elf_image.h b/src/elf_image.h
new file mode 100644
index 0000000..b7c7123
--- /dev/null
+++ b/src/elf_image.h
@@ -0,0 +1,55 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_format.h"
+#include "data_buffer.h"
+
+#include <cstdlib>
+
+namespace ELF {
+
+struct ProgramHeader32Table {
+ const ProgramHeader32 *headers{};
+ size_t size{};
+ static ProgramHeader32Table FromBytes(const DataView &, DataEncoding);
+};
+
+struct Segment {
+ Segment *next{};
+ const DataView view{};
+};
+
+class Image {
+ const DataBuffer _data;
+ char *const _error;
+ const Header32 _h;
+ const ProgramHeader32Table _pht;
+public:
+ explicit Image(DataBuffer&&);
+ ~Image();
+ constexpr bool IsValid() const { return _error == nullptr; }
+ constexpr const DataBuffer &Data() const { return _data; };
+ constexpr const DataView ProgramView() const
+ {
+ if (!IsValid()) {
+ return DataView{};
+ }
+ for (size_t i = 0; i < _pht.size; i++) {
+ const auto ph = _pht.headers[i];
+ const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) ==
+ (kPHFlagX | kPHFlagR);
+ const bool is_load = ParsePHType(ph.type) == PHType::kLoad;
+ const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz;
+ if (is_load && is_code && ph.vaddr == 0 && contains_entry)
+ {
+ return _data.View(ph.offset, ph.filesz);
+ }
+ }
+ return DataView{};
+ };
+ constexpr const char *Error() const { return _error; }
+};
+
+}
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..a6f73b3
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,836 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_image.h"
+#include "data_buffer.h"
+#include "disasm.h"
+#include "common.h"
+
+#define OPTPARSE_IMPLEMENTATION
+#define OPTPARSE_API static
+#include "optparse/optparse.h"
+
+#include <cassert>
+#include <cinttypes>
+#include <cstdio>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+#include <climits>
+
+enum class DisasmMapType {
+ kTraced,
+ kRaw,
+};
+
+class DisasmMap {
+ const DisasmMapType _type;
+ DisasmNode *_map[kDisasmMapSizeElements]{};
+ constexpr DisasmNode *findNodeByAddress(uint32_t address) const;
+ DisasmNode &insertNode(uint32_t address, NodeType);
+ DisasmNode &insertReferencedBy(
+ const uint32_t by_addr,
+ const uint32_t ref_addr,
+ const NodeType type,
+ const ReferenceType ref_type);
+ constexpr bool canBeAllocated(const DisasmNode& node) const;
+public:
+ constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const
+ {
+ return findNodeByAddress(address);
+ };
+ void InsertNode(uint32_t address, NodeType type)
+ {
+ assert(_type == DisasmMapType::kTraced);
+ insertNode(address, type);
+ }
+ void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false);
+ DisasmMap(DisasmMapType type): _type(type) {}
+ ~DisasmMap();
+};
+
+constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const
+{
+ if (address < kRomSizeBytes)
+ return _map[address / kInstructionSizeStepBytes];
+ return nullptr;
+}
+
+static constexpr uint32_t AlignInstructionAddress(const uint32_t address)
+{
+ return address & ~1UL;
+}
+
+DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type)
+{
+ auto *node = findNodeByAddress(address);
+ if (node) {
+ // Instruction nodes take precedence over data nodes. If a node that
+ // was previously accessed only as data now turns out to be an
+ // instruction, then it must become an instruction node.
+ if (IsInstruction(type) && !IsInstruction(node->type)) {
+ *const_cast<NodeType*>(&node->type) = type;
+ // Make sure it is OpCode::kNone so it will be properly disassembled
+ node->op = Op{};
+ }
+ return *node;
+ }
+ node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)});
+ assert(node);
+ _map[address / kInstructionSizeStepBytes] = node;
+ return *node;
+}
+
+DisasmNode &DisasmMap::insertReferencedBy(
+ const uint32_t by_addr,
+ const uint32_t ref_addr,
+ const NodeType type,
+ const ReferenceType ref_type)
+{
+ auto &ref_node = insertNode(ref_addr, type);
+ ref_node.AddReferencedBy(by_addr, ref_type);
+ return ref_node;
+}
+
+constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const
+{
+ const auto size = node.size / kInstructionSizeStepBytes;
+ const auto *const node_real = findNodeByAddress(node.address);
+ for (size_t i = 1; i < size; i++) {
+ const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i];
+ if (ptr != nullptr && ptr != node_real) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds)
+{
+ return (ref_kinds & kRefCallMask)
+ ? ReferenceType::kCall
+ : (ref_kinds & kRef1ReadMask)
+ ? ReferenceType::kRead
+ : (ref_kinds & kRef1WriteMask)
+ ? ReferenceType::kWrite
+ : ReferenceType::kBranch;
+}
+
+static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds)
+{
+ return (ref_kinds & kRefCallMask)
+ ? ReferenceType::kCall
+ : (ref_kinds & kRef2ReadMask)
+ ? ReferenceType::kRead
+ : (ref_kinds & kRef2WriteMask)
+ ? ReferenceType::kWrite
+ : ReferenceType::kBranch;
+}
+
+static constexpr bool IsNextLikelyAnInstruction(const Op &op)
+{
+ return (op.opcode != OpCode::kNone &&
+ op.opcode != OpCode::kRaw &&
+ !IsBRA(op) &&
+ op.opcode != OpCode::kJMP &&
+ op.opcode != OpCode::kRTS &&
+ op.opcode != OpCode::kRTE &&
+ op.opcode != OpCode::kSTOP);
+}
+
+void DisasmMap::Disasm(
+ const DataView &code, const Settings &s, size_t at, bool nested)
+{
+ // Some of logic of this function is covered by integration tests in
+ // `test_walk_and_follow_jumps.bash`.
+ bool inside_code_span = nested;
+ while (at < Min(kRomSizeBytes, code.size)) {
+ DisasmNode *node;
+ if (_type == DisasmMapType::kTraced) {
+ node = _map[at / kInstructionSizeStepBytes];
+ if (!node) {
+ if (inside_code_span) {
+ node = &insertNode(at, NodeType::kTracedInstruction);
+ } else {
+ at += kInstructionSizeStepBytes;
+ continue;
+ }
+ }
+ } else {
+ node = &insertNode(at, NodeType::kTracedInstruction);
+ }
+ if (node->op.opcode == OpCode::kNone || inside_code_span) {
+ const auto size = node->Disasm(code);
+ assert(size >= kInstructionSizeStepBytes);
+ if (canBeAllocated(*node)) {
+ // Spread across the size
+ for (size_t o = kInstructionSizeStepBytes; o < size; o++) {
+ _map[(node->address + o) / kInstructionSizeStepBytes] = node;
+ }
+ } else {
+ node->DisasmAsRaw(code);
+ }
+ }
+ inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op);
+ if (nested && !inside_code_span) {
+ return;
+ }
+ at += node->size;
+ // NOTE: There is not much information about a reference passed further,
+ // so just don't add a reference of immediate if s.imm_labels is false
+ // enabled.
+ const bool has_ref1 = (node->ref_kinds & kRef1ImmMask)
+ ? s.imm_labels
+ : (node->ref_kinds & kRef1Mask);
+ const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1;
+ if (has_code_ref1) {
+ const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask))
+ ? NodeType::kData : NodeType::kRefInstruction;
+ const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds);
+ auto &ref_node = insertReferencedBy(
+ node->address, node->ref1_addr, type, ref_type);
+ if (ref_node.op.opcode == OpCode::kNone) {
+ if (s.follow_jumps) {
+ Disasm(code, s, ref_node.address, true);
+ } else {
+ ref_node.DisasmAsRaw(code);
+ }
+ }
+ }
+ const bool has_ref2 = (node->ref_kinds & kRef2Mask);
+ const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size);
+ if (has_code_ref2) {
+ const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask))
+ ? NodeType::kData : NodeType::kRefInstruction;
+ const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds);
+ auto &ref_node = insertReferencedBy(
+ node->address, node->ref2_addr, type, ref_type);
+ if (ref_node.op.opcode == OpCode::kNone) {
+ if (s.follow_jumps) {
+ Disasm(code, s, ref_node.address, true);
+ } else {
+ ref_node.DisasmAsRaw(code);
+ }
+ }
+ }
+ }
+}
+
+DisasmMap::~DisasmMap()
+{
+ for (size_t i = 0; i < kDisasmMapSizeElements; i++) {
+ auto *const node = _map[i];
+ if (!node) {
+ continue;
+ }
+ const auto size = node->size / kInstructionSizeStepBytes;
+ for (size_t o = 0; o < size; o++) {
+ assert(_map[i + o] == node);
+ _map[i + o] = nullptr;
+ }
+ delete node;
+ i += size - 1;
+ }
+}
+
+static size_t RenderRawDataComment(
+ char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code)
+{
+ size_t overall_sz{};
+ for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes)
+ {
+ overall_sz += Min(
+ out_sz - overall_sz,
+ snprintf(
+ out + overall_sz,
+ out_sz - overall_sz,
+ " %04x",
+ GetU16BE(code.buffer + address + i)));
+ }
+ overall_sz += Min(
+ out_sz - overall_sz,
+ snprintf(out + overall_sz, out_sz - overall_sz, " @%08x", address));
+ return overall_sz;
+}
+
+static constexpr const char *ReferenceTypeToString(ReferenceType type)
+{
+ switch (type) {
+ case ReferenceType::kUnknown: return "UNKNOWN";
+ case ReferenceType::kCall: return "CALL";
+ case ReferenceType::kBranch: return "BRANCH";
+ case ReferenceType::kRead: return "READ";
+ case ReferenceType::kWrite: return "WRITE";
+ }
+ return "UNKN";
+}
+
+static constexpr bool ShouldPrintAsRaw(const Op& op)
+{
+ if (op.arg1.type == ArgType::kImmediate) {
+ if (op.opcode == OpCode::kADD || op.opcode == OpCode::kSUB ||
+ op.opcode == OpCode::kAND || op.opcode == OpCode::kOR ||
+ op.opcode == OpCode::kEOR || op.opcode == OpCode::kCMP)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+static constexpr bool HasCallReference(const DisasmNode &node)
+{
+ for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
+ for (size_t i = 0; i < ref->refs_count; i++) {
+ if (ref->refs[i].type == ReferenceType::kCall) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static constexpr size_t GetNodeSizeByAddress(const DisasmMap &disasm_map, const uint32_t address)
+{
+ const auto *node = disasm_map.FindNodeByAddress(address);
+ if (node == nullptr) {
+ return kInstructionSizeStepBytes;
+ }
+ return node->size;
+}
+
+static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmNode &node)
+{
+ for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
+ for (size_t i = 0; i < ref->refs_count; i++) {
+ const ReferenceRecord &ref_rec = ref->refs[i];
+ if (ref_rec.type == ReferenceType::kCall) {
+ // Locals are definitely not made for calls
+ return false;
+ }
+ const bool forward = ref_rec.address < node.address;
+ const size_t min_addr = forward ? ref_rec.address : node.address;
+ const size_t start = min_addr + GetNodeSizeByAddress(disasm_map, min_addr);
+ const size_t max_addr = forward ? node.address : ref_rec.address;
+ const size_t end = max_addr + (forward ? 0 : GetNodeSizeByAddress(disasm_map, min_addr));
+ for (size_t addr = start; addr < end;) {
+ const auto *intermediate_node = disasm_map.FindNodeByAddress(addr);
+ if (intermediate_node) {
+ if (intermediate_node->ref_by) {
+ // Another labeled node detected on the jump path, hence
+ // current node's location cannot be considered local
+ return false;
+ }
+ addr += intermediate_node->size;
+ } else {
+ addr += kInstructionSizeStepBytes;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+static constexpr const char *StringWihoutFristNChars(const char *str, const size_t n)
+{
+ for (size_t i = 0, tab = 0; i < n && *str; i++, str++) {
+ if (*str == '\t') {
+ tab++;
+ if (tab == 7) {
+ tab = 0;
+ str++;
+ }
+ } else {
+ str++;
+ }
+ }
+ return str;
+}
+
+static void RenderNodeDisassembly(
+ FILE *const output,
+ const DisasmMap &disasm_map,
+ const DataView &code,
+ const Settings &s,
+ const DisasmNode &node)
+{
+ if (node.ref_by) {
+ const bool is_local = IsLocalLocation(disasm_map, node);
+ if (s.labels && !(s.short_ref_local_labels && is_local)) {
+ const bool export_this_function = s.export_functions && HasCallReference(node);
+ const bool export_this_label = s.export_all_labels ||
+ (s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) ||
+ export_this_function;
+ if (export_this_label) {
+ fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address);
+ if (export_this_function) {
+ fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address);
+ }
+ }
+ }
+ if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) {
+ fprintf(output, "| XREFS:\n");
+ for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
+ if (ref->refs_count == 0) {
+ continue;
+ }
+ fprintf(output, "|");
+ for (size_t i = 0; i < ref->refs_count; i++) {
+ const ReferenceRecord r = ref->refs[i];
+ fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address);
+ }
+ fprintf(output, "\n");
+ }
+ }
+ if (s.labels) {
+ if (s.short_ref_local_labels && is_local) {
+ fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1));
+ } else {
+ fprintf(output, "L%08x:\n", node.address);
+ }
+ }
+ }
+ assert(node.op.opcode != OpCode::kNone);
+ if (ShouldPrintAsRaw(node.op)) {
+ auto raw = Op::Raw(GetU16BE(code.buffer + node.address));
+ raw.FPrint(output, s.indent, s.imm_hex);
+ uint32_t i = kInstructionSizeStepBytes;
+ for (; i < node.size; i += kInstructionSizeStepBytes) {
+ char arg_str[kArgsBufferSize]{};
+ const auto arg = Arg::Raw(GetU16BE(code.buffer + node.address + i));
+ arg.SNPrint(arg_str, kArgsBufferSize);
+ fprintf(output, ", %s", arg_str);
+ }
+ } else {
+ const bool with_ref = node.ref_kinds && s.labels && (s.abs_labels || s.rel_labels);
+ const auto *ref1 = (node.ref_kinds & kRef1Mask)
+ ? disasm_map.FindNodeByAddress(node.ref1_addr) : nullptr;
+ const auto *ref2 = (node.ref_kinds & kRef2Mask)
+ ? disasm_map.FindNodeByAddress(node.ref2_addr) : nullptr;
+ const uint32_t ref1_addr = (with_ref && ref1) ? ref1->address : 0;
+ const uint32_t ref2_addr = (with_ref && ref2) ? ref2->address : 0;
+ if (with_ref && (ref1 || ref2)) {
+ const RefKindMask ref_kinds =
+ (s.abs_labels
+ ? ((ref1 ? (node.ref_kinds & kRef1AbsMask) : 0) |
+ (ref2 ? (node.ref_kinds & kRef2AbsMask) : 0))
+ : 0) |
+ (s.rel_labels
+ ? ((ref1 ? (node.ref_kinds & kRef1RelMask) : 0) |
+ (ref2 ? (node.ref_kinds & kRef2RelMask) : 0))
+ : 0) |
+ ((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) |
+ (node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes));
+ const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1);
+ char ref1_label[32]{};
+ if (ref1) {
+ if (s.short_ref_local_labels && ref1_is_local) {
+ const char dir = ref1_addr <= node.address ? 'b' : 'f';
+ snprintf(ref1_label, (sizeof ref1_label), "1%c", dir);
+ } else {
+ snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr);
+ }
+ }
+ const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2);
+ char ref2_label[32]{};
+ if (ref2) {
+ if (s.short_ref_local_labels && ref2_is_local) {
+ const char dir = ref2_addr <= node.address ? 'b' : 'f';
+ snprintf(ref2_label, (sizeof ref2_label), "1%c", dir);
+ } else {
+ snprintf(ref2_label, (sizeof ref2_label), "L%08x", ref2_addr);
+ }
+ }
+ node.op.FPrint(
+ output,
+ s.indent,
+ s.imm_hex,
+ ref_kinds,
+ ref1_label,
+ ref2_label,
+ node.address,
+ ref1_addr,
+ ref2_addr);
+ const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true);
+ if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok)
+ {
+ fprintf(output, " | L%08x", ref1_addr);
+ }
+ if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) {
+ fprintf(output, " | L%08x", ref2_addr);
+ }
+ } else {
+ node.op.FPrint(output, s.indent, s.imm_hex);
+ }
+ }
+ if (s.raw_data_comment) {
+ char raw_data_comment[100]{};
+ RenderRawDataComment(
+ raw_data_comment,
+ (sizeof raw_data_comment) - 1,
+ node.address,
+ node.size, code);
+ fprintf(output, " |%s", raw_data_comment);
+ }
+ fprintf(output, "\n");
+}
+
+static void RenderDisassembly(
+ FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s)
+{
+ for (size_t i = 0; i < code.size;) {
+ const DisasmNode *node = disasm_map.FindNodeByAddress(i);
+ if (node) {
+ RenderNodeDisassembly(output, disasm_map, code, s, *node);
+ i += node->size;
+ } else {
+ auto raw = Op::Raw(GetU16BE(code.buffer + i));
+ raw.FPrint(output, s.indent, s.imm_hex);
+ fprintf(output, "\n");
+ i += kInstructionSizeStepBytes;
+ }
+ }
+}
+
+static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data)
+{
+ // FIXME make a full blown parser with various radixes support and different
+ // trace types support
+ bool parse = true;
+ for (size_t i = 0; i < trace_data.size; i++) {
+ if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') {
+ parse = true;
+ } else if (parse) {
+ errno = 0;
+ const char *startptr = reinterpret_cast<const char *>(trace_data.buffer + i);
+ char *endptr = nullptr;
+ const long address = strtol(startptr, &endptr, 10);
+ if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) {
+ // Parsing error, just skip
+ } else if (startptr == endptr) {
+ // Parsing error, just skip
+ } else if (address % 2) {
+ fprintf(stderr, "Error: Uneven PC values are not supported (got PC=0x%08lx), exiting\n", address);
+ exit(1);
+ } else if (static_cast<unsigned long>(address) > kRomSizeBytes) {
+ fprintf(stderr, "Error: PC values > 4MiB are not supported (got PC=0x%08lx), exiting\n", address);
+ exit(1);
+ } else {
+ // Valid value
+ disasm_map.InsertNode(address, NodeType::kTracedInstruction);
+ }
+ if (startptr != endptr) {
+ i += endptr - startptr - 1;
+ }
+ parse = false;
+ }
+ }
+}
+
+static size_t ReadFromStream(DataBuffer &db, FILE *stream)
+{
+ assert(db.buffer && db.buffer_size >= db.kInitialSize);
+ while (1) {
+ const size_t read_size = db.buffer_size - db.occupied_size;
+ const size_t fread_ret = fread(
+ db.buffer + db.occupied_size, sizeof(*db.buffer), read_size, stream);
+ db.occupied_size += fread_ret;
+ if (fread_ret >= db.buffer_size) {
+ assert(fread_ret == db.buffer_size);
+ db.Expand(db.buffer_size * 2);
+ } else {
+ const int err = errno;
+ if (feof(stream)) {
+ break;
+ } else if (ferror(stream)) {
+ fprintf(stderr, "ReadFromStream: fread(%zu): Error (%d): \"%s\"\n", read_size, err, strerror(err));
+ return EXIT_FAILURE;
+ } else if (db.buffer_size == db.occupied_size) {
+ db.Expand(db.buffer_size * 2);
+ } else {
+ assert(false);
+ }
+ }
+ }
+ return db.occupied_size;
+}
+
+static DisasmMap *NewDisasmMap(FILE *trace_stream)
+{
+ if (trace_stream == nullptr) {
+ DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw};
+ assert(disasm_map);
+ return disasm_map;
+ }
+ // Read trace file into buffer
+ DataBuffer trace_data{};
+ const size_t trace_size = ReadFromStream(trace_data, trace_stream);
+ if (trace_size == 0) {
+ fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n");
+ return nullptr;
+ }
+ // Parse trace file into map
+ DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced};
+ assert(disasm_map);
+ ParseTraceData(*disasm_map, trace_data.View());
+ return disasm_map;
+}
+
+static int M68kDisasm(
+ FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s)
+{
+ // Read input file into buffer
+ DataBuffer input{};
+ const size_t input_size = ReadFromStream(input, input_stream);
+ if (input_size == 0) {
+ fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n");
+ return EXIT_FAILURE;
+ }
+ const ELF::Image elf(static_cast<DataBuffer&&>(input));
+ if (s.bfd == BFDTarget::kELF && !elf.IsValid()) {
+ fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error());
+ return EXIT_FAILURE;
+ }
+ const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid());
+ const DataView code(from_elf ? elf.ProgramView() : elf.Data().View());
+ assert(code.buffer != nullptr);
+ assert(code.size != 0);
+ // It is not worth it to check this somewhere while disassembling or
+ // emitting. Odd size is just not supported.
+ if (code.size % 2) {
+ fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n");
+ return EXIT_FAILURE;
+ }
+ auto *disasm_map = NewDisasmMap(trace_stream);
+ if (disasm_map == nullptr) {
+ return EXIT_FAILURE;
+ }
+ // Disasm into output map
+ disasm_map->Disasm(code, s);
+ // Print output into output_stream
+ RenderDisassembly(output_stream, *disasm_map, code, s);
+ delete disasm_map;
+ return EXIT_SUCCESS;
+}
+
+static bool FeatureStringHasPrefixNo(const char *feature)
+{
+ assert(feature);
+ // There is also implicit, embedded and free check for null terminator
+ if (feature[0] == 'n' && feature[1] == 'o' && feature[2] == '-') {
+ return true;
+ }
+ return false;
+}
+
+static bool ApplyFeature(Settings& s, const char *feature_arg)
+{
+ struct {
+ bool Settings::* setting;
+ const char* feature_name;
+ } const features[]{
+ { &Settings::raw_data_comment, "rdc" },
+ { &Settings::labels, "labels" },
+ { &Settings::rel_labels, "rel-labels" },
+ { &Settings::abs_labels, "abs-labels" },
+ { &Settings::imm_labels, "imm-labels" },
+ { &Settings::short_ref_local_labels, "short-ref-local-labels" },
+ { &Settings::export_labels, "export-labels" },
+ { &Settings::export_all_labels, "export-all-labels" },
+ { &Settings::export_functions, "export-functions" },
+ { &Settings::xrefs_from, "xrefs-from" },
+ { &Settings::xrefs_to, "xrefs-to" },
+ { &Settings::imm_hex, "imm-hex" },
+ { &Settings::follow_jumps, "follow-jumps" },
+ { &Settings::walk, "walk" },
+ };
+ constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1;
+ const bool disable = FeatureStringHasPrefixNo(feature_arg);
+ const char *const feature = feature_arg + (disable ? sizeof_no_prefix : 0);
+ for (size_t i = 0; i < (sizeof features) / (sizeof *features); i++) {
+ if (0 == strcmp(feature, features[i].feature_name)) {
+ s.*(features[i].setting) = !disable;
+ return true;
+ }
+ }
+ return false;
+}
+
+static void PrintUsage(FILE *s, const char *argv0)
+{
+ // Please, keep all lines in 80 columns range when printed.
+ fprintf(s,
+ "Usage: %s [options] <input-file-name>\n"
+ "Options:\n"
+ " -h, --help, Show this message.\n"
+ " -o, --output, Where to write disassembly to (stdout if not set)\n"
+ " -t, --pc-trace, File containing PC trace\n"
+ " --indent, Specify instruction indentation, e.g. \"\t\",\n"
+ " Single tab is used by default.\n"
+ " -f, --feature=[no-]<feature>\n"
+ " Enable or disable (with \"no-\" prefix) a feature.\n"
+ " Available features described below under the\n"
+ " \"Feature flags\" section.\n"
+ " -b, --bfd-target=bfdname\n"
+ " Specify target object format as `bfdname`. Will attempt\n"
+ " to detect automatically if not set. Only `auto,\n"
+ " `binary` and `elf` are currently supported.\n"
+ " <input_file_name> Binary or elf file with the machine code to disassemble\n"
+ "Feature flags:\n"
+ " rdc Print raw data comment.\n"
+ " labels Print labels above all places that have jumps from\n"
+ " somewhere.\n"
+ " rel-labels Use label instead of number on relative branch or call.\n"
+ " abs-labels Use label instead of number on absolute branch or call.\n"
+ " imm-labels Use label instead of number when immediate value moved\n"
+ " to address register.\n"
+ " short-ref-local-labels\n"
+ " Use local labels (numbers) for short jumps or loops.\n"
+ " Jump is considered short when it does not cross other\n"
+ " labels and has no calls.\n"
+ " export-labels Add `.globl` preamble to labels referenced two or more\n"
+ " times.\n"
+ " export-all-labels Add `.globl` preamble to all labels.\n"
+ " export-functions Add `.globl` and `.type @funciton` preamble to a label\n"
+ " referenced as a call.\n"
+ " xrefs-from Print xrefs comments above all places that have xrefs.\n"
+ " xrefs-to Print xrefs comments after all branch instructions.\n"
+ " imm-hex Print all immediate values as hexadecimal numbers.\n"
+ " follow-jumps Follow jumps to statically known locations.\n"
+ " walk Try best to detect further instructions following known\n"
+ " traced locations without overcommitting.\n"
+ , argv0);
+}
+
+int main(int, char* argv[])
+{
+ struct optparse_long longopts[] = {
+ {"help", 'h', OPTPARSE_NONE},
+ {"output", 'o', OPTPARSE_REQUIRED},
+ {"pc-trace", 't', OPTPARSE_REQUIRED},
+ {"feature", 'f', OPTPARSE_REQUIRED},
+ {"bfd-target", 'b', OPTPARSE_REQUIRED},
+ {"indent", 80, OPTPARSE_REQUIRED},
+ {},
+ };
+ const char *trace_file_name = nullptr;
+ const char *output_file_name = nullptr;
+ const char *input_file_name = nullptr;
+ Settings s{};
+ struct optparse options;
+ optparse_init(&options, argv);
+ // Parse opts
+ int option;
+ while ((option = optparse_long(&options, longopts, NULL)) != -1) {
+ switch (option) {
+ case 'h':
+ PrintUsage(stdout, argv[0]);
+ return EXIT_SUCCESS;
+ break;
+ case 'o':
+ output_file_name = options.optarg;
+ break;
+ case 't':
+ trace_file_name = options.optarg;
+ break;
+ case 'f':
+ if (!ApplyFeature(s, options.optarg)) {
+ fprintf(stderr, "main: Error: Unknown feature \"%s\", exiting\n", options.optarg);
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'b':
+ {
+ const auto *bfd_str = options.optarg;
+ if (0 == strcmp(bfd_str, "auto")) {
+ s.bfd = BFDTarget::kAuto;
+ } else if (0 == strcmp(bfd_str, "binary")) {
+ s.bfd = BFDTarget::kBinary;
+ } else if (0 == strcmp(bfd_str, "elf")) {
+ s.bfd = BFDTarget::kELF;
+ } else {
+ fprintf(
+ stderr,
+ "Unknown BFD target specified: \"%s\". "
+ "Refer to usage below to find correct BFD values.\n",
+ bfd_str);
+ PrintUsage(stderr, argv[0]);
+ return EXIT_FAILURE;
+ }
+ }
+ break;
+ case 80:
+ s.indent = options.optarg;
+ break;
+ case '?':
+ fprintf(stderr, "main: optparse_long: Error: \"%s\"\n", options.errmsg);
+ return EXIT_FAILURE;
+ }
+ }
+ // Parse input file name
+ char *arg;
+ while ((arg = optparse_arg(&options))) {
+ if (input_file_name == nullptr) {
+ input_file_name = arg;
+ } else {
+ fprintf(stderr, "error: too many free arguments provided\n");
+ return EXIT_FAILURE;
+ }
+ }
+ // Open the files
+ FILE *input_stream = nullptr;
+ FILE *output_stream = stdout;
+ FILE *trace_stream = nullptr;
+ if (input_file_name) {
+ if (0 == strcmp(input_file_name, "-")) {
+ input_stream = stdin;
+ } else {
+ input_stream = fopen(input_file_name, "r");
+ }
+ if (input_stream == nullptr) {
+ const int err = errno;
+ fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err));
+ return EXIT_FAILURE;
+ }
+ } else {
+ fprintf(stderr, "main: Error: no input file name specified, see usage below.\n");
+ PrintUsage(stderr, argv[0]);
+ return EXIT_FAILURE;
+ }
+ if (output_file_name) {
+ output_stream = fopen(output_file_name, "w");
+ if (output_stream == nullptr) {
+ const int err = errno;
+ fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err));
+ fclose(input_stream);
+ return EXIT_FAILURE;
+ }
+ }
+ if (trace_file_name) {
+ if (0 == strcmp(trace_file_name, "-")) {
+ if (input_stream == stdin) {
+ fprintf(stderr, "error: trace stream and input stream cannot be both stdin\n");
+ return EXIT_FAILURE;
+ }
+ trace_stream = stdin;
+ } else {
+ trace_stream = fopen(trace_file_name, "r");
+ }
+ if (trace_stream == nullptr) {
+ const int err = errno;
+ fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err));
+ fclose(input_stream);
+ fclose(output_stream);
+ return EXIT_FAILURE;
+ }
+ }
+ // Run the program
+ const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s);
+ if (trace_stream != nullptr) {
+ fclose(trace_stream);
+ }
+ fclose(output_stream);
+ fclose(input_stream);
+ return ret;
+}