diff options
author | Oxore <oxore@protonmail.com> | 2023-06-04 21:58:39 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-04 23:26:13 +0300 |
commit | a3f3fb052678b9cf1f80bbdc72c42afc3705ac0b (patch) | |
tree | f45953c256f4e463f073afcdc920c916afc4c0d1 | |
parent | b5c24afbc10a36f65e73d5ef2100da4ff173a109 (diff) |
Add initial support of ELF files
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | Makefile | 33 | ||||
-rw-r--r-- | common.h | 18 | ||||
-rw-r--r-- | data_buffer.cpp | 4 | ||||
-rw-r--r-- | data_buffer.h | 29 | ||||
-rw-r--r-- | disasm.cpp | 104 | ||||
-rw-r--r-- | disasm.h | 8 | ||||
-rw-r--r-- | elf_format.h | 328 | ||||
-rw-r--r-- | elf_image.cpp | 172 | ||||
-rw-r--r-- | elf_image.h | 55 | ||||
-rw-r--r-- | main.cpp | 199 | ||||
-rw-r--r-- | test.ld | 3 | ||||
-rw-r--r-- | todo.md | 5 |
14 files changed, 814 insertions, 146 deletions
@@ -9,3 +9,4 @@ prof_output cmake[-_]build*/ compile_commands.json +m68k-disasm diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d47415..a7dd8b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ set(m68kdisasm_sources main.cpp data_buffer.cpp disasm.cpp + elf_image.cpp ) add_executable(m68k-disasm ${m68kdisasm_sources}) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2b6ee2f --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: Unlicense + +WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op +OPTFLAGS = -O2 +ARCHFLAGS = +INCLUDES = lib +_CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(ARCHFLAGS) $(OPTFLAGS) -pipe -g +_CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(ARCHFLAGS) $(OPTFLAGS) -pipe -g +LDSCRIPTS = +_LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS)) + +OBJECTS=main.o \ + data_buffer.o \ + elf_image.o \ + disasm.o + +.PHONY: all +all: m68k-disasm + +m68k-disasm: $(OBJECTS) $(LDSCRIPTS) Makefile + sh -c "time $(CXX) -o $@ $(_LDFLAGS) $(OBJECTS)" + +$(OBJECTS): Makefile + +%.o: %.c Makefile + sh -c "time $(CC) $(_CFLAGS) -c -o $@ $<" + +%.o: %.cpp Makefile + sh -c "time $(CXX) $(_CXXFLAGS) -c -o $@ $<" + +clean: + rm -rfv m68k-disasm $(OBJECTS) + @@ -1,7 +1,16 @@ +#pragma once + /* SPDX-License-Identifier: Unlicense */ -#pragma once +#include <cstddef> +#include <cstdint> + +enum class BFDTarget { + kAuto, + kBinary, + kELF, +}; struct Settings { bool raw_data_comment{}; @@ -16,6 +25,7 @@ struct Settings { bool xrefs_to{}; bool xrefs_from{}; bool imm_hex{}; + BFDTarget bfd{}; const char *indent{"\t"}; }; @@ -54,17 +64,17 @@ constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBy static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; } -static inline constexpr uint16_t GetU16BE(uint8_t *buffer) +static inline constexpr uint16_t GetU16BE(const uint8_t *buffer) { return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]); } -static inline constexpr int16_t GetI16BE(uint8_t *buffer) +static inline constexpr int16_t GetI16BE(const uint8_t *buffer) { return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]); } -static inline constexpr int32_t GetI32BE(uint8_t *buffer) +static inline constexpr int32_t GetI32BE(const uint8_t *buffer) { return (static_cast<uint32_t>(buffer[0]) << 24) | (static_cast<uint32_t>(buffer[1]) << 16) | diff --git a/data_buffer.cpp b/data_buffer.cpp index e691b97..33cb0b3 100644 --- a/data_buffer.cpp +++ b/data_buffer.cpp @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: Unlicense + */ + #include "data_buffer.h" #include <cassert> @@ -5,6 +8,7 @@ void DataBuffer::Expand(size_t new_size) { + assert(buffer); if (new_size <= buffer_size) { return; } diff --git a/data_buffer.h b/data_buffer.h index c3d86e8..bc264d2 100644 --- a/data_buffer.h +++ b/data_buffer.h @@ -1,14 +1,41 @@ #pragma once +/* SPDX-License-Identifier: Unlicense + */ + +#include "common.h" + #include <cstddef> #include <cstdint> +struct DataView { + const uint8_t *const buffer{}; + const size_t size{}; +}; + struct DataBuffer { + DataBuffer(){}; + DataBuffer(const DataBuffer&) = delete; + constexpr DataBuffer(DataBuffer&& other) + : buffer(other.buffer) + , buffer_size(other.buffer_size) + , occupied_size(other.occupied_size) + { + other.occupied_size = 0; + other.buffer_size = 0; + other.buffer = nullptr; + }; static constexpr size_t kInitialSize = 4 * 1024; uint8_t *buffer{new uint8_t[kInitialSize]}; size_t buffer_size{kInitialSize}; size_t occupied_size{}; void Expand(size_t new_size); + constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const + { + if (offset >= occupied_size) { + return DataView{}; + } + return DataView{buffer + offset, Min(occupied_size - offset, size)}; + }; ~DataBuffer(); }; - @@ -27,16 +27,16 @@ enum class ShiftKind: int { kRotate = 3, }; -constexpr Arg FetchImmediate(const uint32_t address, const DataBuffer &code, const OpSize s) +constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s) { if (s == OpSize::kInvalid) { return Arg{}; } else if (s == OpSize::kLong) { - if (address + kInstructionSizeStepBytes < code.occupied_size) { + if (address + kInstructionSizeStepBytes < code.size) { const int32_t value = GetI32BE(code.buffer + address); return Arg::Immediate(value); } - } else if (address < code.occupied_size) { + } else if (address < code.size) { const int16_t value = GetI16BE(code.buffer + address); if (s == OpSize::kByte) { // Technically it is impossible to have value lower that -128 in 8 @@ -54,7 +54,7 @@ constexpr Arg FetchImmediate(const uint32_t address, const DataBuffer &code, con } constexpr Arg FetchArg( - const uint32_t address, const DataBuffer &code, const int m, const int xn, const OpSize s) + const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s) { switch (m) { case 0: // Dn @@ -68,13 +68,13 @@ constexpr Arg FetchArg( case 4: // -(An) return Arg::AnAddrDecr(xn); case 5: // (d16, An), Additional Word - if (address < code.occupied_size) { + if (address < code.size) { const int16_t d16 = GetI16BE(code.buffer + address); return Arg::D16AnAddr(xn, d16); } break; case 6: // (d8, An, Xi), Brief Extension Word - if (address < code.occupied_size) { + if (address < code.size) { const uint16_t briefext = GetU16BE(code.buffer + address); if (briefext & 0x0700) { // briefext must have zeros on 8, 9 an 10-th bits, @@ -91,25 +91,25 @@ constexpr Arg FetchArg( case 7: switch (xn) { case 0: // (xxx).W, Additional Word - if (address < code.occupied_size) { + if (address < code.size) { const int32_t w = GetI16BE(code.buffer + address); return Arg::Word(w); } break; case 1: // (xxx).L, Additional Long - if (address + kInstructionSizeStepBytes < code.occupied_size) { + if (address + kInstructionSizeStepBytes < code.size) { const int32_t l = GetI32BE(code.buffer + address); return Arg::Long(l); } break; case 2: // (d16, PC), Additional Word - if (address < code.occupied_size) { + if (address < code.size) { const int16_t d16 = GetI16BE(code.buffer + address); return Arg::D16PCAddr(d16); } break; case 3: // (d8, PC, Xi), Brief Extension Word - if (address < code.occupied_size) { + if (address < code.size) { const uint16_t briefext = GetU16BE(code.buffer + address); if (briefext & 0x0700) { // briefext must have zeros on 8, 9 an 10-th bits, @@ -136,7 +136,7 @@ constexpr Arg FetchArg( } static Arg FetchArg( - const uint32_t address, const DataBuffer &code, const uint16_t instr, const OpSize s) + const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s) { const int addrmode = instr & 0x3f; const int m = (addrmode >> 3) & 7; @@ -151,7 +151,7 @@ static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr) } static size_t disasm_jsr_jmp( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = OpSize::kWord; const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); @@ -218,7 +218,7 @@ static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg) } static size_t disasm_ext_movem( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const auto dir = static_cast<MoveDirection>((instr >> 10) & 1); const unsigned m = (instr >> 3) & 7; @@ -227,7 +227,7 @@ static size_t disasm_ext_movem( if (m == 0 && dir == MoveDirection::kRegisterToMemory) { return disasm_ext(node, opsize, Arg::Dn(xn)); } - if (node.address + kInstructionSizeStepBytes >= code.occupied_size) { + if (node.address + kInstructionSizeStepBytes >= code.size) { // Not enough space for regmask, but maybe it is just EXT? return disasm_verbatim(node, instr); } @@ -298,7 +298,7 @@ static size_t disasm_ext_movem( } static size_t disasm_lea( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = OpSize::kLong; const auto addr = FetchArg( @@ -338,7 +338,7 @@ static size_t disasm_lea( } static size_t disasm_chk( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = OpSize::kWord; const auto src = FetchArg( @@ -370,7 +370,7 @@ static size_t disasm_chk( } static size_t disasm_bra_bsr_bcc( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const int16_t dispmt0 = static_cast<int8_t>(instr & 0xff); if (dispmt0 == -1) { @@ -381,7 +381,7 @@ static size_t disasm_bra_bsr_bcc( const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord; if (dispmt0 == 0) { // Check the boundaries - if (node.address + kInstructionSizeStepBytes >= code.occupied_size) { + if (node.address + kInstructionSizeStepBytes >= code.size) { return disasm_verbatim(node, instr); } node.size = kInstructionSizeStepBytes * 2; @@ -412,7 +412,7 @@ static OpCode OpCodeForBitOps(const unsigned opcode) } static size_t disasm_movep( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const unsigned dn = ((instr >> 9) & 7); const unsigned an = instr & 7; @@ -437,7 +437,7 @@ static size_t disasm_movep( static size_t disasm_src_arg_bitops_movep( DisasmNode &node, const uint16_t instr, - const DataBuffer &code, + const DataView &code, const bool has_dn_src = true) { const unsigned m = (instr >> 3) & 7; @@ -497,7 +497,7 @@ static size_t disasm_src_arg_bitops_movep( return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0); } -static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataBuffer &c) +static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c) { return disasm_src_arg_bitops_movep(n, i, c, false); } @@ -526,7 +526,7 @@ static OpCode OpCodeForLogicalImmediate(const unsigned opcode) } static size_t disasm_bitops_movep( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const bool has_source_reg = (instr >> 8) & 1; if (has_source_reg) { @@ -600,7 +600,7 @@ static size_t disasm_bitops_movep( } static size_t disasm_move_movea( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const int opsize_raw = (instr >> 12) & 3; const OpSize opsize = (opsize_raw == 1) @@ -684,7 +684,7 @@ static size_t disasm_move_movea( } static size_t disasm_move_from_sr( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const auto opsize = OpSize::kWord; const auto dst = FetchArg( @@ -714,7 +714,7 @@ static size_t disasm_move_from_sr( } static size_t disasm_move_to( - DisasmNode &node, const uint16_t instr, const DataBuffer &code, const ArgType reg) + DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg) { const auto opsize = OpSize::kWord; const auto src = FetchArg( @@ -755,7 +755,7 @@ static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode) } static size_t disasm_move_negx_clr_neg_not( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const auto opsize = static_cast<OpSize>((instr >> 6) & 3); const unsigned opcode = (instr >> 9) & 3; @@ -807,7 +807,7 @@ static size_t disasm_trivial( } static size_t disasm_tas( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const auto opsize = OpSize::kByte; const auto a = FetchArg( @@ -837,7 +837,7 @@ static size_t disasm_tas( } static size_t disasm_tst_tas_illegal( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const auto opsize = static_cast<OpSize>((instr >> 6) & 3); const int m = (instr >> 3) & 7; @@ -880,7 +880,7 @@ static size_t disasm_trap(DisasmNode &node, const uint16_t instr) return node.size = kInstructionSizeStepBytes; } -static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code) { const bool unlk = (instr >> 3) & 1; const unsigned xn = instr & 7; @@ -911,7 +911,7 @@ static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr) return node.size = kInstructionSizeStepBytes; } -static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code) { const bool is_nbcd = !((instr >> 6) & 1); const OpSize opsize0 = OpSize::kWord; @@ -964,7 +964,7 @@ static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const return node.size = kInstructionSizeStepBytes + arg.Size(opsize0); } -static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code) { const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord); if (a.mode != AddrMode::kImmediate) { @@ -974,7 +974,7 @@ static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataBuff return node.size = kInstructionSizeStepBytes * 2; } -static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code) { if ((instr & 0xf900) == 0x4000) { return disasm_move_negx_clr_neg_not(node, instr, code); @@ -1018,7 +1018,7 @@ static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataB } static size_t disasm_addq_subq( - DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpSize opsize) + DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize) { const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize); switch (a.mode) { @@ -1053,9 +1053,9 @@ static size_t disasm_addq_subq( return node.size = kInstructionSizeStepBytes + a.Size(opsize); } -static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) { - if (node.address + kInstructionSizeStepBytes >= code.occupied_size) { + if (node.address + kInstructionSizeStepBytes >= code.size) { return disasm_verbatim(node, instr); } const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes); @@ -1072,7 +1072,7 @@ static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataBuff return node.size = kInstructionSizeStepBytes * 2; } -static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = OpSize::kWord; const auto a = FetchArg( @@ -1102,7 +1102,7 @@ static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const Data return node.size = kInstructionSizeStepBytes + a.Size(opsize); } -static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataBuffer &c) +static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c) { const auto opsize = static_cast<OpSize>((instr >> 6) & 3); if (opsize == OpSize::kInvalid) { @@ -1128,7 +1128,7 @@ static size_t disasm_moveq(DisasmNode &node, const uint16_t instr) static size_t disasm_divu_divs_mulu_muls( DisasmNode &node, const uint16_t instr, - const DataBuffer &code, + const DataView &code, const OpCode opcode) { const auto opsize = OpSize::kWord; @@ -1181,7 +1181,7 @@ static size_t disasm_addx_subx_abcd_sbcd( static size_t disasm_or_and( DisasmNode &node, const uint16_t instr, - const DataBuffer &code, + const DataView &code, const OpSize opsize, const OpCode opcode) { @@ -1231,7 +1231,7 @@ static size_t disasm_or_and( } static size_t disasm_divu_divs_sbcd_or( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { // Also ensures that opsize == OpSize::kByte, i.e. 0b00 if ((instr & 0x1f0) == 0x100) { @@ -1247,7 +1247,7 @@ static size_t disasm_divu_divs_sbcd_or( } static size_t disasm_adda_suba_cmpa( - DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpCode opcode) + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) { const OpSize opsize = static_cast<OpSize>(((instr >> 8) & 1) + 1); const auto src = FetchArg( @@ -1278,7 +1278,7 @@ static size_t disasm_adda_suba_cmpa( static size_t disasm_add_sub_cmp( DisasmNode &node, const uint16_t instr, - const DataBuffer &code, + const DataView &code, const OpCode opcode, const OpSize opsize, const bool dir_to_addr) @@ -1356,7 +1356,7 @@ static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr) return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); } -static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); const auto addr = FetchArg( @@ -1388,7 +1388,7 @@ static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataBuffe } static size_t disasm_eor_cmpm_cmp_cmpa( - DisasmNode &node, const uint16_t instr, const DataBuffer &code) + DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); if (opsize == OpSize::kInvalid) { @@ -1423,7 +1423,7 @@ static size_t disasm_exg(DisasmNode &node, const uint16_t instr) return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize); } -static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code) { if ((instr & 0x1f0) == 0x100) { return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD); @@ -1442,7 +1442,7 @@ static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataB } static size_t disasm_add_sub_x_a( - DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpCode opcode) + DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode) { const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); if (opsize == OpSize::kInvalid) { @@ -1477,7 +1477,7 @@ static bool IsValidShiftKind(const ShiftKind k) return static_cast<int>(k) < 4; } -static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataBuffer &code) +static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code) { const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3); const unsigned xn = instr & 7; @@ -1529,7 +1529,7 @@ static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const return node.size = kInstructionSizeStepBytes + dst.Size(opsize); } -static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataBuffer &c) +static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c) { switch ((i & 0xf000) >> 12) { case 0x0: @@ -1569,10 +1569,10 @@ static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataBuffer &c) return disasm_verbatim(n, i); } -size_t DisasmNode::Disasm(const DataBuffer &code) +size_t DisasmNode::Disasm(const DataView &code) { // We assume that machine have no MMU and ROM data always starts with 0 - assert(this->address < code.occupied_size); + assert(this->address < code.size); // It is possible to have multiple DisasmNode::Disasm() calls, and there is // no point to disassemble it again if it already has opcode determined if (this->op.opcode != OpCode::kNone) { @@ -1591,10 +1591,10 @@ size_t DisasmNode::Disasm(const DataBuffer &code) } } -size_t DisasmNode::DisasmAsRaw(const DataBuffer &code) +size_t DisasmNode::DisasmAsRaw(const DataView &code) { // We assume that machine have no MMU and ROM data always starts with 0 - assert(this->address < code.occupied_size); + assert(this->address < code.size); size = kInstructionSizeStepBytes; ref_kinds = 0; ref1_addr = 0; @@ -1,8 +1,8 @@ +#pragma once + /* SPDX-License-Identifier: Unlicense */ -#pragma once - #include "data_buffer.h" #include "common.h" @@ -383,8 +383,8 @@ struct DisasmNode { /*! Disassembles instruction with arguments * returns size of whole instruction with arguments in bytes */ - size_t Disasm(const DataBuffer &code); - size_t DisasmAsRaw(const DataBuffer &code); + size_t Disasm(const DataView &code); + size_t DisasmAsRaw(const DataView &code); void AddReferencedBy(uint32_t address, ReferenceType); ~DisasmNode(); }; diff --git a/elf_format.h b/elf_format.h new file mode 100644 index 0000000..b5a268a --- /dev/null +++ b/elf_format.h @@ -0,0 +1,328 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include <cstddef> +#include <cstdint> + +namespace ELF { + +constexpr size_t kIdentSize = 16; +constexpr size_t kHeaderSize = kIdentSize + 36; +constexpr size_t kMagicSize = 4; +constexpr size_t kProgramHeaderSize = 32; + +using Address = uint32_t; +using Offset = uint32_t; + +enum class FileClass : uint8_t { + kNone = 0, + k32 = 1, + k64 = 2, + kUnknown, +}; + +enum class DataEncoding : uint8_t { + kNone = 0, + k2LSB = 1, + kLE = k2LSB, + k2MSB = 2, + kBE = k2MSB, + kUnknown, +}; + +enum class Version : uint8_t { + kNone = 0, + kCurrent = 1, + kUnknown, +}; + +static constexpr inline auto ParseFileClass(const uint8_t file_class) +{ + switch (file_class) { + case static_cast<uint8_t>(FileClass::kNone): return FileClass::kNone; + case static_cast<uint8_t>(FileClass::k32): return FileClass::k32; + case static_cast<uint8_t>(FileClass::k64): return FileClass::k64; + } + return FileClass::kUnknown; +} + +static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding) +{ + switch (data_encoding) { + case static_cast<uint8_t>(DataEncoding::kNone): return DataEncoding::kNone; + case static_cast<uint8_t>(DataEncoding::k2LSB): return DataEncoding::k2LSB; + case static_cast<uint8_t>(DataEncoding::k2MSB): return DataEncoding::k2MSB; + } + return DataEncoding::kUnknown; +} + +static constexpr inline auto ParseVersion(const uint8_t version) +{ + switch (version) { + case static_cast<uint8_t>(Version::kNone): return Version::kNone; + case static_cast<uint8_t>(Version::kCurrent): return Version::kCurrent; + } + return Version::kUnknown; +} + +struct Ident32Raw { + uint8_t magic[4]; + uint8_t file_class; + uint8_t data_encoding; + uint8_t version; + uint8_t os_abi; + uint8_t abi_version; + uint8_t padding[7]; + static constexpr auto inline FromBytes(const uint8_t *data) + { + return Ident32Raw{ + { data[0], data[1], data[2], data[3] }, + data[4], + data[5], + data[6], + data[7], + data[8], + { data[9], data[10], data[11], data[12], data[13], data[14], data[15], }, + }; + } +}; + +struct Ident32 { + uint8_t magic[4]; + FileClass file_class; + DataEncoding data_encoding; + Version version; + uint8_t os_abi; + uint8_t abi_version; + static constexpr inline auto FromBytes(const uint8_t *data) + { + return Ident32{ + { data[0], data[1], data[2], data[3] }, + ParseFileClass(data[4]), + ParseDataEncoding(data[5]), + ParseVersion(data[6]), + data[7], + data[8], + }; + } + static constexpr inline auto FromIdent32Raw(const Ident32Raw raw) + { + return Ident32{ + { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] }, + ParseFileClass(raw.file_class), + ParseDataEncoding(raw.data_encoding), + ParseVersion(raw.version), + raw.os_abi, + raw.abi_version, + }; + } +}; + +enum class ObjectType : uint16_t { + kNone = 0, + kRel = 1, + kExec = 2, + kDyn = 3, + kCore = 4, + kUnknown = 0x7fff, + kLoProc = 0xff00, + kHiProc = 0xffff, +}; + +enum class Machine : uint16_t { + kNone = 0, + kM32 = 1, + kSPARC = 2, + k386 = 3, + k68k = 4, + k88k = 5, + k860 = 7, + kMIPS = 8, + kUnknown, +}; + +static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint16_t(d[0]) << 8 | d[1]; + } + return uint16_t(d[1]) << 8 | d[0]; +} + +static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e) +{ + if (e == DataEncoding::k2MSB) { + return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3]; + } + return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0]; +} + +static constexpr inline auto ParseObjectType(const uint16_t type) +{ + switch (type) { + case static_cast<uint16_t>(ObjectType::kNone): return ObjectType::kNone; + case static_cast<uint16_t>(ObjectType::kRel): return ObjectType::kRel; + case static_cast<uint16_t>(ObjectType::kExec): return ObjectType::kExec; + case static_cast<uint16_t>(ObjectType::kDyn): return ObjectType::kDyn; + case static_cast<uint16_t>(ObjectType::kCore): return ObjectType::kCore; + case static_cast<uint16_t>(ObjectType::kLoProc): return ObjectType::kLoProc; + case static_cast<uint16_t>(ObjectType::kHiProc): return ObjectType::kHiProc; + } + return ObjectType::kUnknown; +} + +static constexpr inline auto ParseMachine(const uint16_t machine) +{ + switch (machine) { + case static_cast<uint16_t>(Machine::kNone): return Machine::kNone; + case static_cast<uint16_t>(Machine::kM32): return Machine::kM32; + case static_cast<uint16_t>(Machine::kSPARC): return Machine::kSPARC; + case static_cast<uint16_t>(Machine::k386): return Machine::k386; + case static_cast<uint16_t>(Machine::k68k): return Machine::k68k; + case static_cast<uint16_t>(Machine::k88k): return Machine::k88k; + case static_cast<uint16_t>(Machine::k860): return Machine::k860; + case static_cast<uint16_t>(Machine::kMIPS): return Machine::kMIPS; + } + return Machine::kUnknown; +} + +struct Header32Raw { + Ident32Raw ident; + uint16_t type; + uint16_t machine; + uint32_t version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto ident = Ident32Raw::FromBytes(data); + const DataEncoding e = ParseDataEncoding(ident.data_encoding); + return Header32Raw{ + /* .ident */ ident, + /* .type */ ParseU16(data + kIdentSize + 0, e), + /* .machine */ ParseU16(data + kIdentSize + 2, e), + /* .version */ ParseU32(data + kIdentSize + 4, e), + /* .entry */ ParseU32(data + kIdentSize + 8, e), + /* .phoff */ ParseU32(data + kIdentSize + 12, e), + /* .shoff */ ParseU32(data + kIdentSize + 16, e), + /* .flags */ ParseU32(data + kIdentSize + 20, e), + /* .ehsize */ ParseU16(data + kIdentSize + 24, e), + /* .phentsize */ ParseU16(data + kIdentSize + 26, e), + /* .phnum */ ParseU16(data + kIdentSize + 28, e), + /* .shentsize */ ParseU16(data + kIdentSize + 30, e), + /* .shnum */ ParseU16(data + kIdentSize + 32, e), + /* .shstrndx */ ParseU16(data + kIdentSize + 34, e), + }; + } +}; + +struct Header32 { + Ident32 ident; + ObjectType type; + Machine machine; + Version version; + Address entry; + Offset phoff; + Offset shoff; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + static constexpr inline auto FromBytes(const uint8_t *data) + { + const auto raw = Header32Raw::FromBytes(data); + return Header32{ + Ident32::FromIdent32Raw(raw.ident), + ParseObjectType(raw.type), + ParseMachine(raw.machine), + ParseVersion(raw.version), + raw.entry, + raw.phoff, + raw.shoff, + raw.flags, + raw.ehsize, + raw.phentsize, + raw.phnum, + raw.shentsize, + raw.shnum, + raw.shstrndx, + }; + } +}; + +enum class PHType : uint32_t { + kNull = 0, + kLoad = 1, + kDynamic = 2, + kInterp = 3, + kNote = 4, + kSHLIB = 5, + kProgramHeaderTable = 6, + kLoProc = 0x70000000, + kHiProc = 0x7fffffff, + kUnknown, +}; + +static constexpr inline auto ParsePHType(const uint32_t type) +{ + switch (type) { + case static_cast<uint32_t>(PHType::kNull): return PHType::kNull; + case static_cast<uint32_t>(PHType::kLoad): return PHType::kLoad; + case static_cast<uint32_t>(PHType::kDynamic): return PHType::kDynamic; + case static_cast<uint32_t>(PHType::kInterp): return PHType::kInterp; + case static_cast<uint32_t>(PHType::kNote): return PHType::kNote; + case static_cast<uint32_t>(PHType::kSHLIB): return PHType::kSHLIB; + case static_cast<uint32_t>(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable; + case static_cast<uint32_t>(PHType::kLoProc): return PHType::kLoProc; + case static_cast<uint32_t>(PHType::kHiProc): return PHType::kHiProc; + } + return PHType::kUnknown; +} + +constexpr uint32_t kPHFlagX = 1 << 0; +constexpr uint32_t kPHFlagW = 1 << 1; +constexpr uint32_t kPHFlagR = 1 << 2; + +struct ProgramHeader32 { + uint32_t type; + Offset offset; + Address vaddr; + Address paddr; + uint32_t filesz; + uint32_t memsz; + uint32_t flags; + uint32_t align; + static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) + { + return ProgramHeader32{ + /* type */ ParseU32(data + 0, e), + /* offset */ ParseU32(data + 4, e), + /* vaddr */ ParseU32(data + 8, e), + /* paddr */ ParseU32(data + 12, e), + /* filesz */ ParseU32(data + 16, e), + /* memsz */ ParseU32(data + 20, e), + /* flags */ ParseU32(data + 24, e), + /* align */ ParseU32(data + 28, e), + }; + } +}; + +static constexpr inline bool MagicIsValid(const uint8_t *m) +{ + return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F'; +} + +}; diff --git a/elf_image.cpp b/elf_image.cpp new file mode 100644 index 0000000..6db72f3 --- /dev/null +++ b/elf_image.cpp @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" + +#include <cassert> +#include <cstdio> + +ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( + const DataView &d, const DataEncoding e) +{ + if (d.buffer == nullptr || d.size == 0) { + return ELF::ProgramHeader32Table{}; + } + assert(d.size % kProgramHeaderSize == 0); + const size_t size = d.size / kProgramHeaderSize; + auto *headers = new ProgramHeader32[size]; + assert(headers != nullptr); + for (size_t i = 0; i < size; i++) { + headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e); + } + return ELF::ProgramHeader32Table{ headers, size, }; +} + +static char *ValidateELF(const DataView& d) +{ + char *error; + size_t size; + FILE *s = open_memstream(&error, &size); + assert(s); + using namespace ELF; + if (d.size < kHeaderSize) { + fprintf( + s, + "data size (%zu) is lower than minimum ELF header size (%zu): " + "ELF header could not fit", + d.size, + kHeaderSize); + fclose(s); + return error; + } + const auto header_raw = Header32Raw::FromBytes(d.buffer); + const auto header = Header32::FromBytes(d.buffer); + if (!MagicIsValid(header.ident.magic)) { + const uint8_t *m = header.ident.magic; + fprintf( + s, + "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]", + 0x7f, 'E', 'L', 'F', + m[0], m[1], m[2], m[3]); + fclose(s); + return error; + } + if (header.ident.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.ident.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.ident.version, + static_cast<int>(Version::kCurrent)); + fclose(s); + return error; + } + if (header.version != Version::kCurrent) { + fprintf( + s, + "version (0x%02x) of ELF header.version is not supported, " + "only \"Current\" version (0x%02x) is supported", + header_raw.version, + static_cast<int>(Version::kCurrent)); + fclose(s); + return error; + } + if (header.type != ObjectType::kExec) { + fprintf( + s, + "object type (0x%02x) is not supported, " + "only Exec (0x%02x) object type is supported", + header_raw.type, + static_cast<int>(ObjectType::kExec)); + fclose(s); + return error; + } + if (header.machine != Machine::k68k) { + fprintf( + s, + "machine (0x%02x) is not supported, " + "only Motorola 68k (0x%02x) machine is supported", + header_raw.machine, + static_cast<int>(Machine::k68k)); + fclose(s); + return error; + } + if (header.phentsize != kProgramHeaderSize) { + fprintf( + s, + "phentsize is invalid: expected (%zu), got (%zu)", + kProgramHeaderSize, + size_t(header.phentsize)); + fclose(s); + return error; + } + if (d.size < header.phoff + header.phentsize * header.phnum) { + fprintf( + s, + "data size (%zu) is lower than program header table end offset (%zu): " + "program header table could not fit", + d.size, + size_t(header.phoff + header.phentsize * header.phnum)); + fclose(s); + return error; + } + bool has_segment_with_entry = false; + for (size_t i = 0; i < header.phnum; i++) { + const auto ph = ProgramHeader32::FromBytes( + d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding); + if (d.size < ph.offset + ph.filesz) { + fprintf( + s, + "data size (%zu) is lower than pht[%zu] segment end offset (%zu): " + "segment could not fit", + d.size, + i, + size_t(ph.offset + ph.filesz)); + fclose(s); + return error; + } + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR); + if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) { + fprintf( + s, + "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: " + "non-zero base address is not supported", + i, + ph.vaddr); + fclose(s); + return error; + } + const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz; + if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) { + has_segment_with_entry = true; + } + } + if (!has_segment_with_entry) { + fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry); + fclose(s); + return error; + } + fclose(s); + free(error); + return nullptr; +} + +ELF::Image::Image(DataBuffer&& data) + : _data(static_cast<DataBuffer&&>(data)) + , _error(ValidateELF(_data.View())) + , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer)) + , _pht(_error + ? ELF::ProgramHeader32Table{} + : ELF::ProgramHeader32Table::FromBytes( + _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding)) +{} + +ELF::Image::~Image() +{ + if (_error) { + free(_error); + } + if (_pht.headers) { + delete [] _pht.headers; + } +} diff --git a/elf_image.h b/elf_image.h new file mode 100644 index 0000000..b7c7123 --- /dev/null +++ b/elf_image.h @@ -0,0 +1,55 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_format.h" +#include "data_buffer.h" + +#include <cstdlib> + +namespace ELF { + +struct ProgramHeader32Table { + const ProgramHeader32 *headers{}; + size_t size{}; + static ProgramHeader32Table FromBytes(const DataView &, DataEncoding); +}; + +struct Segment { + Segment *next{}; + const DataView view{}; +}; + +class Image { + const DataBuffer _data; + char *const _error; + const Header32 _h; + const ProgramHeader32Table _pht; +public: + explicit Image(DataBuffer&&); + ~Image(); + constexpr bool IsValid() const { return _error == nullptr; } + constexpr const DataBuffer &Data() const { return _data; }; + constexpr const DataView ProgramView() const + { + if (!IsValid()) { + return DataView{}; + } + for (size_t i = 0; i < _pht.size; i++) { + const auto ph = _pht.headers[i]; + const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == + (kPHFlagX | kPHFlagR); + const bool is_load = ParsePHType(ph.type) == PHType::kLoad; + const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz; + if (is_load && is_code && ph.vaddr == 0 && contains_entry) + { + return _data.View(ph.offset, ph.filesz); + } + } + return DataView{}; + }; + constexpr const char *Error() const { return _error; } +}; + +} @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: Unlicense */ -#include "disasm.h" +#include "elf_image.h" #include "data_buffer.h" +#include "disasm.h" #include "common.h" #define OPTPARSE_IMPLEMENTATION @@ -32,7 +33,7 @@ class DisasmMap { const uint32_t by_addr, const uint32_t ref_addr, const TracedNodeType type, - const DataBuffer &code, + const DataView &code, const ReferenceType ref_type); constexpr bool canBeAllocated(const DisasmNode& node) const; public: @@ -47,7 +48,7 @@ public: assert(_type == DisasmMapType::kTraced); return nullptr != insertTracedNode(address, type); } - void Disasm(const DataBuffer &code, const Settings &); + void Disasm(const DataView &code, const Settings &); DisasmMap(DisasmMapType type): _type(type) {} ~DisasmMap(); }; @@ -88,7 +89,7 @@ void DisasmMap::insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, const TracedNodeType type, - const DataBuffer &code, + const DataView &code, const ReferenceType ref_type) { auto *const ref_node = insertTracedNode(ref_addr, type); @@ -140,10 +141,10 @@ static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask r : ReferenceType::kBranch; } -void DisasmMap::Disasm(const DataBuffer &code, const Settings &s) +void DisasmMap::Disasm(const DataView &code, const Settings &s) { DisasmNode *node; - for (size_t i = 0; i < Min(kRomSizeBytes, code.occupied_size);) { + for (size_t i = 0; i < Min(kRomSizeBytes, code.size);) { if (_type == DisasmMapType::kTraced) { node = _map[i / kInstructionSizeStepBytes]; if (!node) { @@ -171,7 +172,7 @@ void DisasmMap::Disasm(const DataBuffer &code, const Settings &s) const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) ? s.imm_labels : (node->ref_kinds & kRef1Mask); - const bool has_code_ref1 = node->ref1_addr < code.occupied_size && has_ref1; + const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; if (has_code_ref1) { const TracedNodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) ? TracedNodeType::kData : TracedNodeType::kInstruction; @@ -179,7 +180,7 @@ void DisasmMap::Disasm(const DataBuffer &code, const Settings &s) insertReferencedBy(node->address, node->ref1_addr, type, code, ref_type); } const bool has_ref2 = (node->ref_kinds & kRef2Mask); - const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.occupied_size); + const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); if (has_code_ref2) { const TracedNodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) ? TracedNodeType::kData : TracedNodeType::kInstruction; @@ -208,7 +209,7 @@ DisasmMap::~DisasmMap() } static size_t RenderRawDataComment( - char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataBuffer &code) + char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) { size_t overall_sz{}; for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes) @@ -324,7 +325,7 @@ static constexpr const char *StringWihoutFristNChars(const char *str, const size static void RenderNodeDisassembly( FILE *const output, const DisasmMap &disasm_map, - const DataBuffer &code, + const DataView &code, const Settings &s, const DisasmNode &node) { @@ -451,9 +452,9 @@ static void RenderNodeDisassembly( } static void RenderDisassembly( - FILE *const output, const DisasmMap &disasm_map, const DataBuffer &code, const Settings &s) + FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) { - for (size_t i = 0; i < code.occupied_size;) { + for (size_t i = 0; i < code.size;) { const DisasmNode *node = disasm_map.FindNodeByAddress(i); if (node) { RenderNodeDisassembly(output, disasm_map, code, s, *node); @@ -467,18 +468,18 @@ static void RenderDisassembly( } } -static void ParseTraceData(DisasmMap &disasm_map, const DataBuffer &trace_data) +static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) { // FIXME make a full blown parser with various radixes support and different // trace types support bool parse = true; - for (size_t i = 0; i < trace_data.occupied_size; i++) { + for (size_t i = 0; i < trace_data.size; i++) { if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') { parse = true; } else if (parse) { errno = 0; - char *startptr = reinterpret_cast<char *>(trace_data.buffer + i); - char *endptr = startptr; + const char *startptr = reinterpret_cast<const char *>(trace_data.buffer + i); + char *endptr = nullptr; const long address = strtol(startptr, &endptr, 10); if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) { // Parsing error, just skip @@ -530,58 +531,56 @@ static size_t ReadFromStream(DataBuffer &db, FILE *stream) return db.occupied_size; } -static int M68kDisasmByTrace(FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s) +static DisasmMap *NewDisasmMap(FILE *trace_stream) { - // Read machine code into buffer - DataBuffer code{}; - const size_t input_size = ReadFromStream(code, input_stream); - if (input_size == 0) { - fprintf(stderr, "ReadFromStream(code, input_stream): Error: No data has been read\n"); - return EXIT_FAILURE; - } - // It just not worth it to check this somewhere while disassebling or - // emitting. Odd size is just not supported. - if (code.occupied_size % 2) { - fprintf(stderr, "Error: code blob must be of even size\n"); - return EXIT_FAILURE; + if (trace_stream == nullptr) { + DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw}; + assert(disasm_map); + return disasm_map; } // Read trace file into buffer DataBuffer trace_data{}; const size_t trace_size = ReadFromStream(trace_data, trace_stream); if (trace_size == 0) { fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n"); - return EXIT_FAILURE; + return nullptr; } // Parse trace file into map DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced}; assert(disasm_map); - ParseTraceData(*disasm_map, trace_data); - // Disasm into output map - disasm_map->Disasm(code, s); - // Print output into output_stream - RenderDisassembly(output_stream, *disasm_map, code, s); - delete disasm_map; - return EXIT_SUCCESS; + ParseTraceData(*disasm_map, trace_data.View()); + return disasm_map; } -static int M68kDisasmAll(FILE *input_stream, FILE *output_stream, const Settings &s) +static int M68kDisasm( + FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s) { - // Read machine code into buffer - DataBuffer code{}; - const size_t input_size = ReadFromStream(code, input_stream); + // Read input file into buffer + DataBuffer input{}; + const size_t input_size = ReadFromStream(input, input_stream); if (input_size == 0) { - fprintf(stderr, "ReadFromStream(code, input_stream): Error: No data has been read\n"); + fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n"); + return EXIT_FAILURE; + } + const ELF::Image elf(static_cast<DataBuffer&&>(input)); + if (s.bfd == BFDTarget::kELF && !elf.IsValid()) { + fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error()); return EXIT_FAILURE; } - // It just not worth it to check this somewhere while disassebling or + const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid()); + const DataView code(from_elf ? elf.ProgramView() : elf.Data().View()); + assert(code.buffer != nullptr); + assert(code.size != 0); + // It is not worth it to check this somewhere while disassembling or // emitting. Odd size is just not supported. - if (code.occupied_size % 2) { - fprintf(stderr, "Error: code blob must be of even size\n"); + if (code.size % 2) { + fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n"); + return EXIT_FAILURE; + } + auto *disasm_map = NewDisasmMap(trace_stream); + if (disasm_map == nullptr) { return EXIT_FAILURE; } - // Create the map and disasseble - DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw}; - assert(disasm_map); // Disasm into output map disasm_map->Disasm(code, s); // Print output into output_stream @@ -634,38 +633,44 @@ static bool ApplyFeature(Settings& s, const char *feature_arg) static void PrintUsage(FILE *s, const char *argv0) { // Please, keep all lines in 80 columns range when printed. - fprintf(s, "Usage: %s [options] [<input-file-name>]\n", argv0); - fprintf(s, "Options:\n"); - fprintf(s, " -h, --help, Show this message.\n"); - fprintf(s, " -o, --output, Where to write disassembly to (stdout if not set)\n"); - fprintf(s, " -t, --pc-trace, File containing PC trace\n"); - fprintf(s, " --indent, Specify instruction indentation, e.g. \"\t\",\n"); - fprintf(s, " Single tab is used by default.\n"); - fprintf(s, " -f, --feature=[no-]<feature>\n"); - fprintf(s, " Enable or disable (with \"no-\" prefix) a feature.\n"); - fprintf(s, " Available features described below under the\n"); - fprintf(s, " \"Feature flags\" section.\n"); - fprintf(s, " <input_file_name> Binary file with machine code (stdin if not set)\n"); - fprintf(s, "Feature flags:\n"); - fprintf(s, " rdc Print raw data comment.\n"); - fprintf(s, " labels Print labels above all places that have jumps from\n"); - fprintf(s, " somewhere.\n"); - fprintf(s, " rel-labels Use label instead of number on relative branch or call.\n"); - fprintf(s, " abs-labels Use label instead of number on absolute branch or call.\n"); - fprintf(s, " imm-labels Use label instead of number when immediate value moved\n"); - fprintf(s, " to address register.\n"); - fprintf(s, " short-ref-local-labels\n"); - fprintf(s, " Use local labels (numbers) for short jumps or loops.\n"); - fprintf(s, " Jump is considered short when it does not cross other\n"); - fprintf(s, " labels and has no calls.\n"); - fprintf(s, " export-labels Add `.globl` preamble to labels referenced two or more\n"); - fprintf(s, " times.\n"); - fprintf(s, " export-all-labels Add `.globl` preamble to all labels.\n"); - fprintf(s, " export-functions Add `.globl` and `.type @funciton` preamble to a label\n"); - fprintf(s, " referenced as a call.\n"); - fprintf(s, " xrefs-from Print xrefs comments above all places that have xrefs.\n"); - fprintf(s, " xrefs-to Print xrefs comments after all branch instructions.\n"); - fprintf(s, " imm-hex Print all immediate values as hexadecimal numbers.\n"); + fprintf(s, + "Usage: %s [options] <input-file-name>\n" + "Options:\n" + " -h, --help, Show this message.\n" + " -o, --output, Where to write disassembly to (stdout if not set)\n" + " -t, --pc-trace, File containing PC trace\n" + " --indent, Specify instruction indentation, e.g. \"\t\",\n" + " Single tab is used by default.\n" + " -f, --feature=[no-]<feature>\n" + " Enable or disable (with \"no-\" prefix) a feature.\n" + " Available features described below under the\n" + " \"Feature flags\" section.\n" + " -b, --bfd-target=bfdname\n" + " Specify target object format as `bfdname`. Will attempt\n" + " to detect automatically if not set. Only `auto,\n" + " `binary` and `elf` are currently supported.\n" + " <input_file_name> Binary or elf file with the machine code to disassemble\n" + "Feature flags:\n" + " rdc Print raw data comment.\n" + " labels Print labels above all places that have jumps from\n" + " somewhere.\n" + " rel-labels Use label instead of number on relative branch or call.\n" + " abs-labels Use label instead of number on absolute branch or call.\n" + " imm-labels Use label instead of number when immediate value moved\n" + " to address register.\n" + " short-ref-local-labels\n" + " Use local labels (numbers) for short jumps or loops.\n" + " Jump is considered short when it does not cross other\n" + " labels and has no calls.\n" + " export-labels Add `.globl` preamble to labels referenced two or more\n" + " times.\n" + " export-all-labels Add `.globl` preamble to all labels.\n" + " export-functions Add `.globl` and `.type @funciton` preamble to a label\n" + " referenced as a call.\n" + " xrefs-from Print xrefs comments above all places that have xrefs.\n" + " xrefs-to Print xrefs comments after all branch instructions.\n" + " imm-hex Print all immediate values as hexadecimal numbers.\n" + , argv0); } int main(int, char* argv[]) @@ -675,6 +680,7 @@ int main(int, char* argv[]) {"output", 'o', OPTPARSE_REQUIRED}, {"pc-trace", 't', OPTPARSE_REQUIRED}, {"feature", 'f', OPTPARSE_REQUIRED}, + {"bfd-target", 'b', OPTPARSE_REQUIRED}, {"indent", 80, OPTPARSE_REQUIRED}, {}, }; @@ -704,6 +710,26 @@ int main(int, char* argv[]) return EXIT_FAILURE; } break; + case 'b': + { + const auto *bfd_str = options.optarg; + if (0 == strcmp(bfd_str, "auto")) { + s.bfd = BFDTarget::kAuto; + } else if (0 == strcmp(bfd_str, "binary")) { + s.bfd = BFDTarget::kBinary; + } else if (0 == strcmp(bfd_str, "elf")) { + s.bfd = BFDTarget::kELF; + } else { + fprintf( + stderr, + "Unknown BFD target specified: \"%s\". " + "Refer to usage below to find correct BFD values.\n", + bfd_str); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; + } + } + break; case 80: s.indent = options.optarg; break; @@ -723,7 +749,7 @@ int main(int, char* argv[]) } } // Open the files - FILE *input_stream = stdin; + FILE *input_stream = nullptr; FILE *output_stream = stdout; FILE *trace_stream = nullptr; if (input_file_name) { @@ -733,12 +759,17 @@ int main(int, char* argv[]) fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err)); return EXIT_FAILURE; } + } else { + fprintf(stderr, "main: Error: no input file name specified, see usage below.\n"); + PrintUsage(stderr, argv[0]); + return EXIT_FAILURE; } if (output_file_name) { output_stream = fopen(output_file_name, "w"); if (output_stream == nullptr) { const int err = errno; fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err)); + fclose(input_stream); return EXIT_FAILURE; } } @@ -747,13 +778,13 @@ int main(int, char* argv[]) if (trace_stream == nullptr) { const int err = errno; fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err)); + fclose(input_stream); + fclose(output_stream); return EXIT_FAILURE; } } // Run the program - const int ret = trace_stream - ? M68kDisasmByTrace(input_stream, output_stream, trace_stream, s) - : M68kDisasmAll(input_stream, output_stream, s); + const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s); if (trace_stream != nullptr) { fclose(trace_stream); } @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: Unlicense + */ + MEMORY { ROM(rx) : ORIGIN = 0x00000000, LENGTH = 4M } @@ -1,8 +1,11 @@ # TODO +- Add support for `ELF` and `DWARF` formats to split an `ELF` file into multiple + original assembly files. These files may not be assembly files originally, but + they will become after decompilation. - Implement RAM symbol mapping from raw addresses found in the instructions like LEA, MOVE and address arithmetic instructions. Basically any direct RAM - address accessed directly may be mapped as symbol. A hashmap is most like + address accessed directly may be mapped as symbol. A hashmap is most likely necessary for this. - Implement CLI option that can be used to specify regions of RAM and IO registers. Custom ROM location and size is still not the case, only 4MiB at |