summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-04 21:58:39 +0300
committerOxore <oxore@protonmail.com>2023-06-04 23:26:13 +0300
commita3f3fb052678b9cf1f80bbdc72c42afc3705ac0b (patch)
treef45953c256f4e463f073afcdc920c916afc4c0d1
parentb5c24afbc10a36f65e73d5ef2100da4ff173a109 (diff)
Add initial support of ELF files
-rw-r--r--.gitignore1
-rw-r--r--CMakeLists.txt1
-rw-r--r--Makefile33
-rw-r--r--common.h18
-rw-r--r--data_buffer.cpp4
-rw-r--r--data_buffer.h29
-rw-r--r--disasm.cpp104
-rw-r--r--disasm.h8
-rw-r--r--elf_format.h328
-rw-r--r--elf_image.cpp172
-rw-r--r--elf_image.h55
-rw-r--r--main.cpp199
-rw-r--r--test.ld3
-rw-r--r--todo.md5
14 files changed, 814 insertions, 146 deletions
diff --git a/.gitignore b/.gitignore
index 162ad75..9857a52 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@
prof_output
cmake[-_]build*/
compile_commands.json
+m68k-disasm
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2d47415..a7dd8b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,7 @@ set(m68kdisasm_sources
main.cpp
data_buffer.cpp
disasm.cpp
+ elf_image.cpp
)
add_executable(m68k-disasm ${m68kdisasm_sources})
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2b6ee2f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: Unlicense
+
+WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op
+OPTFLAGS = -O2
+ARCHFLAGS =
+INCLUDES = lib
+_CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(ARCHFLAGS) $(OPTFLAGS) -pipe -g
+_CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(ARCHFLAGS) $(OPTFLAGS) -pipe -g
+LDSCRIPTS =
+_LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS))
+
+OBJECTS=main.o \
+ data_buffer.o \
+ elf_image.o \
+ disasm.o
+
+.PHONY: all
+all: m68k-disasm
+
+m68k-disasm: $(OBJECTS) $(LDSCRIPTS) Makefile
+ sh -c "time $(CXX) -o $@ $(_LDFLAGS) $(OBJECTS)"
+
+$(OBJECTS): Makefile
+
+%.o: %.c Makefile
+ sh -c "time $(CC) $(_CFLAGS) -c -o $@ $<"
+
+%.o: %.cpp Makefile
+ sh -c "time $(CXX) $(_CXXFLAGS) -c -o $@ $<"
+
+clean:
+ rm -rfv m68k-disasm $(OBJECTS)
+
diff --git a/common.h b/common.h
index 97dfe7c..dddde52 100644
--- a/common.h
+++ b/common.h
@@ -1,7 +1,16 @@
+#pragma once
+
/* SPDX-License-Identifier: Unlicense
*/
-#pragma once
+#include <cstddef>
+#include <cstdint>
+
+enum class BFDTarget {
+ kAuto,
+ kBinary,
+ kELF,
+};
struct Settings {
bool raw_data_comment{};
@@ -16,6 +25,7 @@ struct Settings {
bool xrefs_to{};
bool xrefs_from{};
bool imm_hex{};
+ BFDTarget bfd{};
const char *indent{"\t"};
};
@@ -54,17 +64,17 @@ constexpr size_t kDisasmMapSizeElements = kRomSizeBytes / kInstructionSizeStepBy
static inline constexpr size_t Min(size_t a, size_t b) { return a < b ? a : b; }
-static inline constexpr uint16_t GetU16BE(uint8_t *buffer)
+static inline constexpr uint16_t GetU16BE(const uint8_t *buffer)
{
return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]);
}
-static inline constexpr int16_t GetI16BE(uint8_t *buffer)
+static inline constexpr int16_t GetI16BE(const uint8_t *buffer)
{
return (static_cast<uint16_t>(buffer[0]) << 8) | static_cast<uint16_t>(buffer[1]);
}
-static inline constexpr int32_t GetI32BE(uint8_t *buffer)
+static inline constexpr int32_t GetI32BE(const uint8_t *buffer)
{
return (static_cast<uint32_t>(buffer[0]) << 24) |
(static_cast<uint32_t>(buffer[1]) << 16) |
diff --git a/data_buffer.cpp b/data_buffer.cpp
index e691b97..33cb0b3 100644
--- a/data_buffer.cpp
+++ b/data_buffer.cpp
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
#include "data_buffer.h"
#include <cassert>
@@ -5,6 +8,7 @@
void DataBuffer::Expand(size_t new_size)
{
+ assert(buffer);
if (new_size <= buffer_size) {
return;
}
diff --git a/data_buffer.h b/data_buffer.h
index c3d86e8..bc264d2 100644
--- a/data_buffer.h
+++ b/data_buffer.h
@@ -1,14 +1,41 @@
#pragma once
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "common.h"
+
#include <cstddef>
#include <cstdint>
+struct DataView {
+ const uint8_t *const buffer{};
+ const size_t size{};
+};
+
struct DataBuffer {
+ DataBuffer(){};
+ DataBuffer(const DataBuffer&) = delete;
+ constexpr DataBuffer(DataBuffer&& other)
+ : buffer(other.buffer)
+ , buffer_size(other.buffer_size)
+ , occupied_size(other.occupied_size)
+ {
+ other.occupied_size = 0;
+ other.buffer_size = 0;
+ other.buffer = nullptr;
+ };
static constexpr size_t kInitialSize = 4 * 1024;
uint8_t *buffer{new uint8_t[kInitialSize]};
size_t buffer_size{kInitialSize};
size_t occupied_size{};
void Expand(size_t new_size);
+ constexpr auto View(size_t offset = 0, size_t size = SIZE_MAX) const
+ {
+ if (offset >= occupied_size) {
+ return DataView{};
+ }
+ return DataView{buffer + offset, Min(occupied_size - offset, size)};
+ };
~DataBuffer();
};
-
diff --git a/disasm.cpp b/disasm.cpp
index 01af6ea..13333d4 100644
--- a/disasm.cpp
+++ b/disasm.cpp
@@ -27,16 +27,16 @@ enum class ShiftKind: int {
kRotate = 3,
};
-constexpr Arg FetchImmediate(const uint32_t address, const DataBuffer &code, const OpSize s)
+constexpr Arg FetchImmediate(const uint32_t address, const DataView &code, const OpSize s)
{
if (s == OpSize::kInvalid) {
return Arg{};
} else if (s == OpSize::kLong) {
- if (address + kInstructionSizeStepBytes < code.occupied_size) {
+ if (address + kInstructionSizeStepBytes < code.size) {
const int32_t value = GetI32BE(code.buffer + address);
return Arg::Immediate(value);
}
- } else if (address < code.occupied_size) {
+ } else if (address < code.size) {
const int16_t value = GetI16BE(code.buffer + address);
if (s == OpSize::kByte) {
// Technically it is impossible to have value lower that -128 in 8
@@ -54,7 +54,7 @@ constexpr Arg FetchImmediate(const uint32_t address, const DataBuffer &code, con
}
constexpr Arg FetchArg(
- const uint32_t address, const DataBuffer &code, const int m, const int xn, const OpSize s)
+ const uint32_t address, const DataView &code, const int m, const int xn, const OpSize s)
{
switch (m) {
case 0: // Dn
@@ -68,13 +68,13 @@ constexpr Arg FetchArg(
case 4: // -(An)
return Arg::AnAddrDecr(xn);
case 5: // (d16, An), Additional Word
- if (address < code.occupied_size) {
+ if (address < code.size) {
const int16_t d16 = GetI16BE(code.buffer + address);
return Arg::D16AnAddr(xn, d16);
}
break;
case 6: // (d8, An, Xi), Brief Extension Word
- if (address < code.occupied_size) {
+ if (address < code.size) {
const uint16_t briefext = GetU16BE(code.buffer + address);
if (briefext & 0x0700) {
// briefext must have zeros on 8, 9 an 10-th bits,
@@ -91,25 +91,25 @@ constexpr Arg FetchArg(
case 7:
switch (xn) {
case 0: // (xxx).W, Additional Word
- if (address < code.occupied_size) {
+ if (address < code.size) {
const int32_t w = GetI16BE(code.buffer + address);
return Arg::Word(w);
}
break;
case 1: // (xxx).L, Additional Long
- if (address + kInstructionSizeStepBytes < code.occupied_size) {
+ if (address + kInstructionSizeStepBytes < code.size) {
const int32_t l = GetI32BE(code.buffer + address);
return Arg::Long(l);
}
break;
case 2: // (d16, PC), Additional Word
- if (address < code.occupied_size) {
+ if (address < code.size) {
const int16_t d16 = GetI16BE(code.buffer + address);
return Arg::D16PCAddr(d16);
}
break;
case 3: // (d8, PC, Xi), Brief Extension Word
- if (address < code.occupied_size) {
+ if (address < code.size) {
const uint16_t briefext = GetU16BE(code.buffer + address);
if (briefext & 0x0700) {
// briefext must have zeros on 8, 9 an 10-th bits,
@@ -136,7 +136,7 @@ constexpr Arg FetchArg(
}
static Arg FetchArg(
- const uint32_t address, const DataBuffer &code, const uint16_t instr, const OpSize s)
+ const uint32_t address, const DataView &code, const uint16_t instr, const OpSize s)
{
const int addrmode = instr & 0x3f;
const int m = (addrmode >> 3) & 7;
@@ -151,7 +151,7 @@ static size_t disasm_verbatim(DisasmNode &node, const uint16_t instr)
}
static size_t disasm_jsr_jmp(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = OpSize::kWord;
const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize);
@@ -218,7 +218,7 @@ static size_t disasm_ext(DisasmNode &node, const OpSize opsize, const Arg arg)
}
static size_t disasm_ext_movem(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto dir = static_cast<MoveDirection>((instr >> 10) & 1);
const unsigned m = (instr >> 3) & 7;
@@ -227,7 +227,7 @@ static size_t disasm_ext_movem(
if (m == 0 && dir == MoveDirection::kRegisterToMemory) {
return disasm_ext(node, opsize, Arg::Dn(xn));
}
- if (node.address + kInstructionSizeStepBytes >= code.occupied_size) {
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
// Not enough space for regmask, but maybe it is just EXT?
return disasm_verbatim(node, instr);
}
@@ -298,7 +298,7 @@ static size_t disasm_ext_movem(
}
static size_t disasm_lea(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = OpSize::kLong;
const auto addr = FetchArg(
@@ -338,7 +338,7 @@ static size_t disasm_lea(
}
static size_t disasm_chk(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = OpSize::kWord;
const auto src = FetchArg(
@@ -370,7 +370,7 @@ static size_t disasm_chk(
}
static size_t disasm_bra_bsr_bcc(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const int16_t dispmt0 = static_cast<int8_t>(instr & 0xff);
if (dispmt0 == -1) {
@@ -381,7 +381,7 @@ static size_t disasm_bra_bsr_bcc(
const auto opsize = dispmt0 ? OpSize::kShort : OpSize::kWord;
if (dispmt0 == 0) {
// Check the boundaries
- if (node.address + kInstructionSizeStepBytes >= code.occupied_size) {
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
return disasm_verbatim(node, instr);
}
node.size = kInstructionSizeStepBytes * 2;
@@ -412,7 +412,7 @@ static OpCode OpCodeForBitOps(const unsigned opcode)
}
static size_t disasm_movep(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const unsigned dn = ((instr >> 9) & 7);
const unsigned an = instr & 7;
@@ -437,7 +437,7 @@ static size_t disasm_movep(
static size_t disasm_src_arg_bitops_movep(
DisasmNode &node,
const uint16_t instr,
- const DataBuffer &code,
+ const DataView &code,
const bool has_dn_src = true)
{
const unsigned m = (instr >> 3) & 7;
@@ -497,7 +497,7 @@ static size_t disasm_src_arg_bitops_movep(
return node.size = kInstructionSizeStepBytes + src.Size(opsize0) + dst.Size(opsize0);
}
-static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataBuffer &c)
+static size_t disasm_bitops(DisasmNode &n, const uint16_t i, const DataView &c)
{
return disasm_src_arg_bitops_movep(n, i, c, false);
}
@@ -526,7 +526,7 @@ static OpCode OpCodeForLogicalImmediate(const unsigned opcode)
}
static size_t disasm_bitops_movep(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const bool has_source_reg = (instr >> 8) & 1;
if (has_source_reg) {
@@ -600,7 +600,7 @@ static size_t disasm_bitops_movep(
}
static size_t disasm_move_movea(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const int opsize_raw = (instr >> 12) & 3;
const OpSize opsize = (opsize_raw == 1)
@@ -684,7 +684,7 @@ static size_t disasm_move_movea(
}
static size_t disasm_move_from_sr(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto opsize = OpSize::kWord;
const auto dst = FetchArg(
@@ -714,7 +714,7 @@ static size_t disasm_move_from_sr(
}
static size_t disasm_move_to(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code, const ArgType reg)
+ DisasmNode &node, const uint16_t instr, const DataView &code, const ArgType reg)
{
const auto opsize = OpSize::kWord;
const auto src = FetchArg(
@@ -755,7 +755,7 @@ static OpCode opcode_for_negx_clr_neg_not(const unsigned opcode)
}
static size_t disasm_move_negx_clr_neg_not(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
const unsigned opcode = (instr >> 9) & 3;
@@ -807,7 +807,7 @@ static size_t disasm_trivial(
}
static size_t disasm_tas(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto opsize = OpSize::kByte;
const auto a = FetchArg(
@@ -837,7 +837,7 @@ static size_t disasm_tas(
}
static size_t disasm_tst_tas_illegal(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
const int m = (instr >> 3) & 7;
@@ -880,7 +880,7 @@ static size_t disasm_trap(DisasmNode &node, const uint16_t instr)
return node.size = kInstructionSizeStepBytes;
}
-static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_link_unlink(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const bool unlk = (instr >> 3) & 1;
const unsigned xn = instr & 7;
@@ -911,7 +911,7 @@ static size_t disasm_move_usp(DisasmNode &node, const uint16_t instr)
return node.size = kInstructionSizeStepBytes;
}
-static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const bool is_nbcd = !((instr >> 6) & 1);
const OpSize opsize0 = OpSize::kWord;
@@ -964,7 +964,7 @@ static size_t disasm_nbcd_swap_pea(DisasmNode &node, const uint16_t instr, const
return node.size = kInstructionSizeStepBytes + arg.Size(opsize0);
}
-static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const auto a = FetchImmediate(node.address + kInstructionSizeStepBytes, code, OpSize::kWord);
if (a.mode != AddrMode::kImmediate) {
@@ -974,7 +974,7 @@ static size_t disasm_stop(DisasmNode &node, const uint16_t instr, const DataBuff
return node.size = kInstructionSizeStepBytes * 2;
}
-static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataView &code)
{
if ((instr & 0xf900) == 0x4000) {
return disasm_move_negx_clr_neg_not(node, instr, code);
@@ -1018,7 +1018,7 @@ static size_t disasm_chunk_4(DisasmNode &node, const uint16_t instr, const DataB
}
static size_t disasm_addq_subq(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpSize opsize)
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpSize opsize)
{
const auto a = FetchArg(node.address + kInstructionSizeStepBytes, code, instr, opsize);
switch (a.mode) {
@@ -1053,9 +1053,9 @@ static size_t disasm_addq_subq(
return node.size = kInstructionSizeStepBytes + a.Size(opsize);
}
-static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code)
{
- if (node.address + kInstructionSizeStepBytes >= code.occupied_size) {
+ if (node.address + kInstructionSizeStepBytes >= code.size) {
return disasm_verbatim(node, instr);
}
const int16_t dispmt_raw = GetI16BE(code.buffer + node.address + kInstructionSizeStepBytes);
@@ -1072,7 +1072,7 @@ static size_t disasm_dbcc(DisasmNode &node, const uint16_t instr, const DataBuff
return node.size = kInstructionSizeStepBytes * 2;
}
-static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = OpSize::kWord;
const auto a = FetchArg(
@@ -1102,7 +1102,7 @@ static size_t disasm_scc_dbcc(DisasmNode &node, const uint16_t instr, const Data
return node.size = kInstructionSizeStepBytes + a.Size(opsize);
}
-static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataBuffer &c)
+static size_t disasm_addq_subq_scc_dbcc(DisasmNode &n, const uint16_t instr, const DataView &c)
{
const auto opsize = static_cast<OpSize>((instr >> 6) & 3);
if (opsize == OpSize::kInvalid) {
@@ -1128,7 +1128,7 @@ static size_t disasm_moveq(DisasmNode &node, const uint16_t instr)
static size_t disasm_divu_divs_mulu_muls(
DisasmNode &node,
const uint16_t instr,
- const DataBuffer &code,
+ const DataView &code,
const OpCode opcode)
{
const auto opsize = OpSize::kWord;
@@ -1181,7 +1181,7 @@ static size_t disasm_addx_subx_abcd_sbcd(
static size_t disasm_or_and(
DisasmNode &node,
const uint16_t instr,
- const DataBuffer &code,
+ const DataView &code,
const OpSize opsize,
const OpCode opcode)
{
@@ -1231,7 +1231,7 @@ static size_t disasm_or_and(
}
static size_t disasm_divu_divs_sbcd_or(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
// Also ensures that opsize == OpSize::kByte, i.e. 0b00
if ((instr & 0x1f0) == 0x100) {
@@ -1247,7 +1247,7 @@ static size_t disasm_divu_divs_sbcd_or(
}
static size_t disasm_adda_suba_cmpa(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpCode opcode)
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode)
{
const OpSize opsize = static_cast<OpSize>(((instr >> 8) & 1) + 1);
const auto src = FetchArg(
@@ -1278,7 +1278,7 @@ static size_t disasm_adda_suba_cmpa(
static size_t disasm_add_sub_cmp(
DisasmNode &node,
const uint16_t instr,
- const DataBuffer &code,
+ const DataView &code,
const OpCode opcode,
const OpSize opsize,
const bool dir_to_addr)
@@ -1356,7 +1356,7 @@ static size_t disasm_cmpm(DisasmNode &node, const uint16_t instr)
return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
}
-static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
const auto addr = FetchArg(
@@ -1388,7 +1388,7 @@ static size_t disasm_eor(DisasmNode &node, const uint16_t instr, const DataBuffe
}
static size_t disasm_eor_cmpm_cmp_cmpa(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+ DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
if (opsize == OpSize::kInvalid) {
@@ -1423,7 +1423,7 @@ static size_t disasm_exg(DisasmNode &node, const uint16_t instr)
return node.size = kInstructionSizeStepBytes + src.Size(opsize) + dst.Size(opsize);
}
-static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataView &code)
{
if ((instr & 0x1f0) == 0x100) {
return disasm_addx_subx_abcd_sbcd(node, instr, OpCode::kABCD);
@@ -1442,7 +1442,7 @@ static size_t disasm_chunk_c(DisasmNode &node, const uint16_t instr, const DataB
}
static size_t disasm_add_sub_x_a(
- DisasmNode &node, const uint16_t instr, const DataBuffer &code, const OpCode opcode)
+ DisasmNode &node, const uint16_t instr, const DataView &code, const OpCode opcode)
{
const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
if (opsize == OpSize::kInvalid) {
@@ -1477,7 +1477,7 @@ static bool IsValidShiftKind(const ShiftKind k)
return static_cast<int>(k) < 4;
}
-static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataBuffer &code)
+static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const DataView &code)
{
const OpSize opsize = static_cast<OpSize>((instr >> 6) & 3);
const unsigned xn = instr & 7;
@@ -1529,7 +1529,7 @@ static size_t disasm_shift_rotate(DisasmNode &node, const uint16_t instr, const
return node.size = kInstructionSizeStepBytes + dst.Size(opsize);
}
-static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataBuffer &c)
+static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c)
{
switch ((i & 0xf000) >> 12) {
case 0x0:
@@ -1569,10 +1569,10 @@ static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataBuffer &c)
return disasm_verbatim(n, i);
}
-size_t DisasmNode::Disasm(const DataBuffer &code)
+size_t DisasmNode::Disasm(const DataView &code)
{
// We assume that machine have no MMU and ROM data always starts with 0
- assert(this->address < code.occupied_size);
+ assert(this->address < code.size);
// It is possible to have multiple DisasmNode::Disasm() calls, and there is
// no point to disassemble it again if it already has opcode determined
if (this->op.opcode != OpCode::kNone) {
@@ -1591,10 +1591,10 @@ size_t DisasmNode::Disasm(const DataBuffer &code)
}
}
-size_t DisasmNode::DisasmAsRaw(const DataBuffer &code)
+size_t DisasmNode::DisasmAsRaw(const DataView &code)
{
// We assume that machine have no MMU and ROM data always starts with 0
- assert(this->address < code.occupied_size);
+ assert(this->address < code.size);
size = kInstructionSizeStepBytes;
ref_kinds = 0;
ref1_addr = 0;
diff --git a/disasm.h b/disasm.h
index 07640dc..be447a3 100644
--- a/disasm.h
+++ b/disasm.h
@@ -1,8 +1,8 @@
+#pragma once
+
/* SPDX-License-Identifier: Unlicense
*/
-#pragma once
-
#include "data_buffer.h"
#include "common.h"
@@ -383,8 +383,8 @@ struct DisasmNode {
/*! Disassembles instruction with arguments
* returns size of whole instruction with arguments in bytes
*/
- size_t Disasm(const DataBuffer &code);
- size_t DisasmAsRaw(const DataBuffer &code);
+ size_t Disasm(const DataView &code);
+ size_t DisasmAsRaw(const DataView &code);
void AddReferencedBy(uint32_t address, ReferenceType);
~DisasmNode();
};
diff --git a/elf_format.h b/elf_format.h
new file mode 100644
index 0000000..b5a268a
--- /dev/null
+++ b/elf_format.h
@@ -0,0 +1,328 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include <cstddef>
+#include <cstdint>
+
+namespace ELF {
+
+constexpr size_t kIdentSize = 16;
+constexpr size_t kHeaderSize = kIdentSize + 36;
+constexpr size_t kMagicSize = 4;
+constexpr size_t kProgramHeaderSize = 32;
+
+using Address = uint32_t;
+using Offset = uint32_t;
+
+enum class FileClass : uint8_t {
+ kNone = 0,
+ k32 = 1,
+ k64 = 2,
+ kUnknown,
+};
+
+enum class DataEncoding : uint8_t {
+ kNone = 0,
+ k2LSB = 1,
+ kLE = k2LSB,
+ k2MSB = 2,
+ kBE = k2MSB,
+ kUnknown,
+};
+
+enum class Version : uint8_t {
+ kNone = 0,
+ kCurrent = 1,
+ kUnknown,
+};
+
+static constexpr inline auto ParseFileClass(const uint8_t file_class)
+{
+ switch (file_class) {
+ case static_cast<uint8_t>(FileClass::kNone): return FileClass::kNone;
+ case static_cast<uint8_t>(FileClass::k32): return FileClass::k32;
+ case static_cast<uint8_t>(FileClass::k64): return FileClass::k64;
+ }
+ return FileClass::kUnknown;
+}
+
+static constexpr inline auto ParseDataEncoding(const uint8_t data_encoding)
+{
+ switch (data_encoding) {
+ case static_cast<uint8_t>(DataEncoding::kNone): return DataEncoding::kNone;
+ case static_cast<uint8_t>(DataEncoding::k2LSB): return DataEncoding::k2LSB;
+ case static_cast<uint8_t>(DataEncoding::k2MSB): return DataEncoding::k2MSB;
+ }
+ return DataEncoding::kUnknown;
+}
+
+static constexpr inline auto ParseVersion(const uint8_t version)
+{
+ switch (version) {
+ case static_cast<uint8_t>(Version::kNone): return Version::kNone;
+ case static_cast<uint8_t>(Version::kCurrent): return Version::kCurrent;
+ }
+ return Version::kUnknown;
+}
+
+struct Ident32Raw {
+ uint8_t magic[4];
+ uint8_t file_class;
+ uint8_t data_encoding;
+ uint8_t version;
+ uint8_t os_abi;
+ uint8_t abi_version;
+ uint8_t padding[7];
+ static constexpr auto inline FromBytes(const uint8_t *data)
+ {
+ return Ident32Raw{
+ { data[0], data[1], data[2], data[3] },
+ data[4],
+ data[5],
+ data[6],
+ data[7],
+ data[8],
+ { data[9], data[10], data[11], data[12], data[13], data[14], data[15], },
+ };
+ }
+};
+
+struct Ident32 {
+ uint8_t magic[4];
+ FileClass file_class;
+ DataEncoding data_encoding;
+ Version version;
+ uint8_t os_abi;
+ uint8_t abi_version;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ return Ident32{
+ { data[0], data[1], data[2], data[3] },
+ ParseFileClass(data[4]),
+ ParseDataEncoding(data[5]),
+ ParseVersion(data[6]),
+ data[7],
+ data[8],
+ };
+ }
+ static constexpr inline auto FromIdent32Raw(const Ident32Raw raw)
+ {
+ return Ident32{
+ { raw.magic[0], raw.magic[1], raw.magic[2], raw.magic[3] },
+ ParseFileClass(raw.file_class),
+ ParseDataEncoding(raw.data_encoding),
+ ParseVersion(raw.version),
+ raw.os_abi,
+ raw.abi_version,
+ };
+ }
+};
+
+enum class ObjectType : uint16_t {
+ kNone = 0,
+ kRel = 1,
+ kExec = 2,
+ kDyn = 3,
+ kCore = 4,
+ kUnknown = 0x7fff,
+ kLoProc = 0xff00,
+ kHiProc = 0xffff,
+};
+
+enum class Machine : uint16_t {
+ kNone = 0,
+ kM32 = 1,
+ kSPARC = 2,
+ k386 = 3,
+ k68k = 4,
+ k88k = 5,
+ k860 = 7,
+ kMIPS = 8,
+ kUnknown,
+};
+
+static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e)
+{
+ if (e == DataEncoding::k2MSB) {
+ return uint16_t(d[0]) << 8 | d[1];
+ }
+ return uint16_t(d[1]) << 8 | d[0];
+}
+
+static constexpr inline uint32_t ParseU32(const uint8_t *d, DataEncoding e)
+{
+ if (e == DataEncoding::k2MSB) {
+ return uint32_t(d[0]) << 24 | uint32_t(d[1]) << 16 | uint32_t(d[2]) << 8 | d[3];
+ }
+ return uint32_t(d[3]) << 24 | uint32_t(d[2]) << 16 | uint32_t(d[1]) << 8 | d[0];
+}
+
+static constexpr inline auto ParseObjectType(const uint16_t type)
+{
+ switch (type) {
+ case static_cast<uint16_t>(ObjectType::kNone): return ObjectType::kNone;
+ case static_cast<uint16_t>(ObjectType::kRel): return ObjectType::kRel;
+ case static_cast<uint16_t>(ObjectType::kExec): return ObjectType::kExec;
+ case static_cast<uint16_t>(ObjectType::kDyn): return ObjectType::kDyn;
+ case static_cast<uint16_t>(ObjectType::kCore): return ObjectType::kCore;
+ case static_cast<uint16_t>(ObjectType::kLoProc): return ObjectType::kLoProc;
+ case static_cast<uint16_t>(ObjectType::kHiProc): return ObjectType::kHiProc;
+ }
+ return ObjectType::kUnknown;
+}
+
+static constexpr inline auto ParseMachine(const uint16_t machine)
+{
+ switch (machine) {
+ case static_cast<uint16_t>(Machine::kNone): return Machine::kNone;
+ case static_cast<uint16_t>(Machine::kM32): return Machine::kM32;
+ case static_cast<uint16_t>(Machine::kSPARC): return Machine::kSPARC;
+ case static_cast<uint16_t>(Machine::k386): return Machine::k386;
+ case static_cast<uint16_t>(Machine::k68k): return Machine::k68k;
+ case static_cast<uint16_t>(Machine::k88k): return Machine::k88k;
+ case static_cast<uint16_t>(Machine::k860): return Machine::k860;
+ case static_cast<uint16_t>(Machine::kMIPS): return Machine::kMIPS;
+ }
+ return Machine::kUnknown;
+}
+
+struct Header32Raw {
+ Ident32Raw ident;
+ uint16_t type;
+ uint16_t machine;
+ uint32_t version;
+ Address entry;
+ Offset phoff;
+ Offset shoff;
+ uint32_t flags;
+ uint16_t ehsize;
+ uint16_t phentsize;
+ uint16_t phnum;
+ uint16_t shentsize;
+ uint16_t shnum;
+ uint16_t shstrndx;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ const auto ident = Ident32Raw::FromBytes(data);
+ const DataEncoding e = ParseDataEncoding(ident.data_encoding);
+ return Header32Raw{
+ /* .ident */ ident,
+ /* .type */ ParseU16(data + kIdentSize + 0, e),
+ /* .machine */ ParseU16(data + kIdentSize + 2, e),
+ /* .version */ ParseU32(data + kIdentSize + 4, e),
+ /* .entry */ ParseU32(data + kIdentSize + 8, e),
+ /* .phoff */ ParseU32(data + kIdentSize + 12, e),
+ /* .shoff */ ParseU32(data + kIdentSize + 16, e),
+ /* .flags */ ParseU32(data + kIdentSize + 20, e),
+ /* .ehsize */ ParseU16(data + kIdentSize + 24, e),
+ /* .phentsize */ ParseU16(data + kIdentSize + 26, e),
+ /* .phnum */ ParseU16(data + kIdentSize + 28, e),
+ /* .shentsize */ ParseU16(data + kIdentSize + 30, e),
+ /* .shnum */ ParseU16(data + kIdentSize + 32, e),
+ /* .shstrndx */ ParseU16(data + kIdentSize + 34, e),
+ };
+ }
+};
+
+struct Header32 {
+ Ident32 ident;
+ ObjectType type;
+ Machine machine;
+ Version version;
+ Address entry;
+ Offset phoff;
+ Offset shoff;
+ uint32_t flags;
+ uint16_t ehsize;
+ uint16_t phentsize;
+ uint16_t phnum;
+ uint16_t shentsize;
+ uint16_t shnum;
+ uint16_t shstrndx;
+ static constexpr inline auto FromBytes(const uint8_t *data)
+ {
+ const auto raw = Header32Raw::FromBytes(data);
+ return Header32{
+ Ident32::FromIdent32Raw(raw.ident),
+ ParseObjectType(raw.type),
+ ParseMachine(raw.machine),
+ ParseVersion(raw.version),
+ raw.entry,
+ raw.phoff,
+ raw.shoff,
+ raw.flags,
+ raw.ehsize,
+ raw.phentsize,
+ raw.phnum,
+ raw.shentsize,
+ raw.shnum,
+ raw.shstrndx,
+ };
+ }
+};
+
+enum class PHType : uint32_t {
+ kNull = 0,
+ kLoad = 1,
+ kDynamic = 2,
+ kInterp = 3,
+ kNote = 4,
+ kSHLIB = 5,
+ kProgramHeaderTable = 6,
+ kLoProc = 0x70000000,
+ kHiProc = 0x7fffffff,
+ kUnknown,
+};
+
+static constexpr inline auto ParsePHType(const uint32_t type)
+{
+ switch (type) {
+ case static_cast<uint32_t>(PHType::kNull): return PHType::kNull;
+ case static_cast<uint32_t>(PHType::kLoad): return PHType::kLoad;
+ case static_cast<uint32_t>(PHType::kDynamic): return PHType::kDynamic;
+ case static_cast<uint32_t>(PHType::kInterp): return PHType::kInterp;
+ case static_cast<uint32_t>(PHType::kNote): return PHType::kNote;
+ case static_cast<uint32_t>(PHType::kSHLIB): return PHType::kSHLIB;
+ case static_cast<uint32_t>(PHType::kProgramHeaderTable): return PHType::kProgramHeaderTable;
+ case static_cast<uint32_t>(PHType::kLoProc): return PHType::kLoProc;
+ case static_cast<uint32_t>(PHType::kHiProc): return PHType::kHiProc;
+ }
+ return PHType::kUnknown;
+}
+
+constexpr uint32_t kPHFlagX = 1 << 0;
+constexpr uint32_t kPHFlagW = 1 << 1;
+constexpr uint32_t kPHFlagR = 1 << 2;
+
+struct ProgramHeader32 {
+ uint32_t type;
+ Offset offset;
+ Address vaddr;
+ Address paddr;
+ uint32_t filesz;
+ uint32_t memsz;
+ uint32_t flags;
+ uint32_t align;
+ static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e)
+ {
+ return ProgramHeader32{
+ /* type */ ParseU32(data + 0, e),
+ /* offset */ ParseU32(data + 4, e),
+ /* vaddr */ ParseU32(data + 8, e),
+ /* paddr */ ParseU32(data + 12, e),
+ /* filesz */ ParseU32(data + 16, e),
+ /* memsz */ ParseU32(data + 20, e),
+ /* flags */ ParseU32(data + 24, e),
+ /* align */ ParseU32(data + 28, e),
+ };
+ }
+};
+
+static constexpr inline bool MagicIsValid(const uint8_t *m)
+{
+ return m[0] == 0x7f && m[1] == 'E' && m[2] == 'L' && m[3] == 'F';
+}
+
+};
diff --git a/elf_image.cpp b/elf_image.cpp
new file mode 100644
index 0000000..6db72f3
--- /dev/null
+++ b/elf_image.cpp
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_image.h"
+
+#include <cassert>
+#include <cstdio>
+
+ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes(
+ const DataView &d, const DataEncoding e)
+{
+ if (d.buffer == nullptr || d.size == 0) {
+ return ELF::ProgramHeader32Table{};
+ }
+ assert(d.size % kProgramHeaderSize == 0);
+ const size_t size = d.size / kProgramHeaderSize;
+ auto *headers = new ProgramHeader32[size];
+ assert(headers != nullptr);
+ for (size_t i = 0; i < size; i++) {
+ headers[i] = ProgramHeader32::FromBytes(d.buffer + i * kProgramHeaderSize, e);
+ }
+ return ELF::ProgramHeader32Table{ headers, size, };
+}
+
+static char *ValidateELF(const DataView& d)
+{
+ char *error;
+ size_t size;
+ FILE *s = open_memstream(&error, &size);
+ assert(s);
+ using namespace ELF;
+ if (d.size < kHeaderSize) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than minimum ELF header size (%zu): "
+ "ELF header could not fit",
+ d.size,
+ kHeaderSize);
+ fclose(s);
+ return error;
+ }
+ const auto header_raw = Header32Raw::FromBytes(d.buffer);
+ const auto header = Header32::FromBytes(d.buffer);
+ if (!MagicIsValid(header.ident.magic)) {
+ const uint8_t *m = header.ident.magic;
+ fprintf(
+ s,
+ "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]",
+ 0x7f, 'E', 'L', 'F',
+ m[0], m[1], m[2], m[3]);
+ fclose(s);
+ return error;
+ }
+ if (header.ident.version != Version::kCurrent) {
+ fprintf(
+ s,
+ "version (0x%02x) of ELF header.ident.version is not supported, "
+ "only \"Current\" version (0x%02x) is supported",
+ header_raw.ident.version,
+ static_cast<int>(Version::kCurrent));
+ fclose(s);
+ return error;
+ }
+ if (header.version != Version::kCurrent) {
+ fprintf(
+ s,
+ "version (0x%02x) of ELF header.version is not supported, "
+ "only \"Current\" version (0x%02x) is supported",
+ header_raw.version,
+ static_cast<int>(Version::kCurrent));
+ fclose(s);
+ return error;
+ }
+ if (header.type != ObjectType::kExec) {
+ fprintf(
+ s,
+ "object type (0x%02x) is not supported, "
+ "only Exec (0x%02x) object type is supported",
+ header_raw.type,
+ static_cast<int>(ObjectType::kExec));
+ fclose(s);
+ return error;
+ }
+ if (header.machine != Machine::k68k) {
+ fprintf(
+ s,
+ "machine (0x%02x) is not supported, "
+ "only Motorola 68k (0x%02x) machine is supported",
+ header_raw.machine,
+ static_cast<int>(Machine::k68k));
+ fclose(s);
+ return error;
+ }
+ if (header.phentsize != kProgramHeaderSize) {
+ fprintf(
+ s,
+ "phentsize is invalid: expected (%zu), got (%zu)",
+ kProgramHeaderSize,
+ size_t(header.phentsize));
+ fclose(s);
+ return error;
+ }
+ if (d.size < header.phoff + header.phentsize * header.phnum) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than program header table end offset (%zu): "
+ "program header table could not fit",
+ d.size,
+ size_t(header.phoff + header.phentsize * header.phnum));
+ fclose(s);
+ return error;
+ }
+ bool has_segment_with_entry = false;
+ for (size_t i = 0; i < header.phnum; i++) {
+ const auto ph = ProgramHeader32::FromBytes(
+ d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding);
+ if (d.size < ph.offset + ph.filesz) {
+ fprintf(
+ s,
+ "data size (%zu) is lower than pht[%zu] segment end offset (%zu): "
+ "segment could not fit",
+ d.size,
+ i,
+ size_t(ph.offset + ph.filesz));
+ fclose(s);
+ return error;
+ }
+ const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR);
+ if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) {
+ fprintf(
+ s,
+ "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: "
+ "non-zero base address is not supported",
+ i,
+ ph.vaddr);
+ fclose(s);
+ return error;
+ }
+ const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz;
+ if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) {
+ has_segment_with_entry = true;
+ }
+ }
+ if (!has_segment_with_entry) {
+ fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry);
+ fclose(s);
+ return error;
+ }
+ fclose(s);
+ free(error);
+ return nullptr;
+}
+
+ELF::Image::Image(DataBuffer&& data)
+ : _data(static_cast<DataBuffer&&>(data))
+ , _error(ValidateELF(_data.View()))
+ , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer))
+ , _pht(_error
+ ? ELF::ProgramHeader32Table{}
+ : ELF::ProgramHeader32Table::FromBytes(
+ _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding))
+{}
+
+ELF::Image::~Image()
+{
+ if (_error) {
+ free(_error);
+ }
+ if (_pht.headers) {
+ delete [] _pht.headers;
+ }
+}
diff --git a/elf_image.h b/elf_image.h
new file mode 100644
index 0000000..b7c7123
--- /dev/null
+++ b/elf_image.h
@@ -0,0 +1,55 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_format.h"
+#include "data_buffer.h"
+
+#include <cstdlib>
+
+namespace ELF {
+
+struct ProgramHeader32Table {
+ const ProgramHeader32 *headers{};
+ size_t size{};
+ static ProgramHeader32Table FromBytes(const DataView &, DataEncoding);
+};
+
+struct Segment {
+ Segment *next{};
+ const DataView view{};
+};
+
+class Image {
+ const DataBuffer _data;
+ char *const _error;
+ const Header32 _h;
+ const ProgramHeader32Table _pht;
+public:
+ explicit Image(DataBuffer&&);
+ ~Image();
+ constexpr bool IsValid() const { return _error == nullptr; }
+ constexpr const DataBuffer &Data() const { return _data; };
+ constexpr const DataView ProgramView() const
+ {
+ if (!IsValid()) {
+ return DataView{};
+ }
+ for (size_t i = 0; i < _pht.size; i++) {
+ const auto ph = _pht.headers[i];
+ const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) ==
+ (kPHFlagX | kPHFlagR);
+ const bool is_load = ParsePHType(ph.type) == PHType::kLoad;
+ const bool contains_entry = _h.entry >= ph.vaddr && _h.entry < ph.vaddr + ph.memsz;
+ if (is_load && is_code && ph.vaddr == 0 && contains_entry)
+ {
+ return _data.View(ph.offset, ph.filesz);
+ }
+ }
+ return DataView{};
+ };
+ constexpr const char *Error() const { return _error; }
+};
+
+}
diff --git a/main.cpp b/main.cpp
index 217766a..01e1895 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,8 +1,9 @@
/* SPDX-License-Identifier: Unlicense
*/
-#include "disasm.h"
+#include "elf_image.h"
#include "data_buffer.h"
+#include "disasm.h"
#include "common.h"
#define OPTPARSE_IMPLEMENTATION
@@ -32,7 +33,7 @@ class DisasmMap {
const uint32_t by_addr,
const uint32_t ref_addr,
const TracedNodeType type,
- const DataBuffer &code,
+ const DataView &code,
const ReferenceType ref_type);
constexpr bool canBeAllocated(const DisasmNode& node) const;
public:
@@ -47,7 +48,7 @@ public:
assert(_type == DisasmMapType::kTraced);
return nullptr != insertTracedNode(address, type);
}
- void Disasm(const DataBuffer &code, const Settings &);
+ void Disasm(const DataView &code, const Settings &);
DisasmMap(DisasmMapType type): _type(type) {}
~DisasmMap();
};
@@ -88,7 +89,7 @@ void DisasmMap::insertReferencedBy(
const uint32_t by_addr,
const uint32_t ref_addr,
const TracedNodeType type,
- const DataBuffer &code,
+ const DataView &code,
const ReferenceType ref_type)
{
auto *const ref_node = insertTracedNode(ref_addr, type);
@@ -140,10 +141,10 @@ static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask r
: ReferenceType::kBranch;
}
-void DisasmMap::Disasm(const DataBuffer &code, const Settings &s)
+void DisasmMap::Disasm(const DataView &code, const Settings &s)
{
DisasmNode *node;
- for (size_t i = 0; i < Min(kRomSizeBytes, code.occupied_size);) {
+ for (size_t i = 0; i < Min(kRomSizeBytes, code.size);) {
if (_type == DisasmMapType::kTraced) {
node = _map[i / kInstructionSizeStepBytes];
if (!node) {
@@ -171,7 +172,7 @@ void DisasmMap::Disasm(const DataBuffer &code, const Settings &s)
const bool has_ref1 = (node->ref_kinds & kRef1ImmMask)
? s.imm_labels
: (node->ref_kinds & kRef1Mask);
- const bool has_code_ref1 = node->ref1_addr < code.occupied_size && has_ref1;
+ const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1;
if (has_code_ref1) {
const TracedNodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask))
? TracedNodeType::kData : TracedNodeType::kInstruction;
@@ -179,7 +180,7 @@ void DisasmMap::Disasm(const DataBuffer &code, const Settings &s)
insertReferencedBy(node->address, node->ref1_addr, type, code, ref_type);
}
const bool has_ref2 = (node->ref_kinds & kRef2Mask);
- const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.occupied_size);
+ const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size);
if (has_code_ref2) {
const TracedNodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask))
? TracedNodeType::kData : TracedNodeType::kInstruction;
@@ -208,7 +209,7 @@ DisasmMap::~DisasmMap()
}
static size_t RenderRawDataComment(
- char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataBuffer &code)
+ char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code)
{
size_t overall_sz{};
for (size_t i = 0; i < instr_sz; i += kInstructionSizeStepBytes)
@@ -324,7 +325,7 @@ static constexpr const char *StringWihoutFristNChars(const char *str, const size
static void RenderNodeDisassembly(
FILE *const output,
const DisasmMap &disasm_map,
- const DataBuffer &code,
+ const DataView &code,
const Settings &s,
const DisasmNode &node)
{
@@ -451,9 +452,9 @@ static void RenderNodeDisassembly(
}
static void RenderDisassembly(
- FILE *const output, const DisasmMap &disasm_map, const DataBuffer &code, const Settings &s)
+ FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s)
{
- for (size_t i = 0; i < code.occupied_size;) {
+ for (size_t i = 0; i < code.size;) {
const DisasmNode *node = disasm_map.FindNodeByAddress(i);
if (node) {
RenderNodeDisassembly(output, disasm_map, code, s, *node);
@@ -467,18 +468,18 @@ static void RenderDisassembly(
}
}
-static void ParseTraceData(DisasmMap &disasm_map, const DataBuffer &trace_data)
+static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data)
{
// FIXME make a full blown parser with various radixes support and different
// trace types support
bool parse = true;
- for (size_t i = 0; i < trace_data.occupied_size; i++) {
+ for (size_t i = 0; i < trace_data.size; i++) {
if (trace_data.buffer[i] == '\n' || trace_data.buffer[i] == '\r') {
parse = true;
} else if (parse) {
errno = 0;
- char *startptr = reinterpret_cast<char *>(trace_data.buffer + i);
- char *endptr = startptr;
+ const char *startptr = reinterpret_cast<const char *>(trace_data.buffer + i);
+ char *endptr = nullptr;
const long address = strtol(startptr, &endptr, 10);
if ((address == LONG_MAX || address == LONG_MIN) && errno == ERANGE) {
// Parsing error, just skip
@@ -530,58 +531,56 @@ static size_t ReadFromStream(DataBuffer &db, FILE *stream)
return db.occupied_size;
}
-static int M68kDisasmByTrace(FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s)
+static DisasmMap *NewDisasmMap(FILE *trace_stream)
{
- // Read machine code into buffer
- DataBuffer code{};
- const size_t input_size = ReadFromStream(code, input_stream);
- if (input_size == 0) {
- fprintf(stderr, "ReadFromStream(code, input_stream): Error: No data has been read\n");
- return EXIT_FAILURE;
- }
- // It just not worth it to check this somewhere while disassebling or
- // emitting. Odd size is just not supported.
- if (code.occupied_size % 2) {
- fprintf(stderr, "Error: code blob must be of even size\n");
- return EXIT_FAILURE;
+ if (trace_stream == nullptr) {
+ DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw};
+ assert(disasm_map);
+ return disasm_map;
}
// Read trace file into buffer
DataBuffer trace_data{};
const size_t trace_size = ReadFromStream(trace_data, trace_stream);
if (trace_size == 0) {
fprintf(stderr, "ReadFromStream(trace_data, trace_stream): Error: No data has been read\n");
- return EXIT_FAILURE;
+ return nullptr;
}
// Parse trace file into map
DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced};
assert(disasm_map);
- ParseTraceData(*disasm_map, trace_data);
- // Disasm into output map
- disasm_map->Disasm(code, s);
- // Print output into output_stream
- RenderDisassembly(output_stream, *disasm_map, code, s);
- delete disasm_map;
- return EXIT_SUCCESS;
+ ParseTraceData(*disasm_map, trace_data.View());
+ return disasm_map;
}
-static int M68kDisasmAll(FILE *input_stream, FILE *output_stream, const Settings &s)
+static int M68kDisasm(
+ FILE *input_stream, FILE *output_stream, FILE *trace_stream, const Settings &s)
{
- // Read machine code into buffer
- DataBuffer code{};
- const size_t input_size = ReadFromStream(code, input_stream);
+ // Read input file into buffer
+ DataBuffer input{};
+ const size_t input_size = ReadFromStream(input, input_stream);
if (input_size == 0) {
- fprintf(stderr, "ReadFromStream(code, input_stream): Error: No data has been read\n");
+ fprintf(stderr, "ReadFromStream(input, input_stream): Error: No data has been read\n");
+ return EXIT_FAILURE;
+ }
+ const ELF::Image elf(static_cast<DataBuffer&&>(input));
+ if (s.bfd == BFDTarget::kELF && !elf.IsValid()) {
+ fprintf(stderr, "Error: ELF image is not valid: %s\n", elf.Error());
return EXIT_FAILURE;
}
- // It just not worth it to check this somewhere while disassebling or
+ const bool from_elf = s.bfd == BFDTarget::kELF || (s.bfd == BFDTarget::kAuto && elf.IsValid());
+ const DataView code(from_elf ? elf.ProgramView() : elf.Data().View());
+ assert(code.buffer != nullptr);
+ assert(code.size != 0);
+ // It is not worth it to check this somewhere while disassembling or
// emitting. Odd size is just not supported.
- if (code.occupied_size % 2) {
- fprintf(stderr, "Error: code blob must be of even size\n");
+ if (code.size % 2) {
+ fprintf(stderr, "M68kDisasm: Error: code blob must be of even size\n");
+ return EXIT_FAILURE;
+ }
+ auto *disasm_map = NewDisasmMap(trace_stream);
+ if (disasm_map == nullptr) {
return EXIT_FAILURE;
}
- // Create the map and disasseble
- DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kRaw};
- assert(disasm_map);
// Disasm into output map
disasm_map->Disasm(code, s);
// Print output into output_stream
@@ -634,38 +633,44 @@ static bool ApplyFeature(Settings& s, const char *feature_arg)
static void PrintUsage(FILE *s, const char *argv0)
{
// Please, keep all lines in 80 columns range when printed.
- fprintf(s, "Usage: %s [options] [<input-file-name>]\n", argv0);
- fprintf(s, "Options:\n");
- fprintf(s, " -h, --help, Show this message.\n");
- fprintf(s, " -o, --output, Where to write disassembly to (stdout if not set)\n");
- fprintf(s, " -t, --pc-trace, File containing PC trace\n");
- fprintf(s, " --indent, Specify instruction indentation, e.g. \"\t\",\n");
- fprintf(s, " Single tab is used by default.\n");
- fprintf(s, " -f, --feature=[no-]<feature>\n");
- fprintf(s, " Enable or disable (with \"no-\" prefix) a feature.\n");
- fprintf(s, " Available features described below under the\n");
- fprintf(s, " \"Feature flags\" section.\n");
- fprintf(s, " <input_file_name> Binary file with machine code (stdin if not set)\n");
- fprintf(s, "Feature flags:\n");
- fprintf(s, " rdc Print raw data comment.\n");
- fprintf(s, " labels Print labels above all places that have jumps from\n");
- fprintf(s, " somewhere.\n");
- fprintf(s, " rel-labels Use label instead of number on relative branch or call.\n");
- fprintf(s, " abs-labels Use label instead of number on absolute branch or call.\n");
- fprintf(s, " imm-labels Use label instead of number when immediate value moved\n");
- fprintf(s, " to address register.\n");
- fprintf(s, " short-ref-local-labels\n");
- fprintf(s, " Use local labels (numbers) for short jumps or loops.\n");
- fprintf(s, " Jump is considered short when it does not cross other\n");
- fprintf(s, " labels and has no calls.\n");
- fprintf(s, " export-labels Add `.globl` preamble to labels referenced two or more\n");
- fprintf(s, " times.\n");
- fprintf(s, " export-all-labels Add `.globl` preamble to all labels.\n");
- fprintf(s, " export-functions Add `.globl` and `.type @funciton` preamble to a label\n");
- fprintf(s, " referenced as a call.\n");
- fprintf(s, " xrefs-from Print xrefs comments above all places that have xrefs.\n");
- fprintf(s, " xrefs-to Print xrefs comments after all branch instructions.\n");
- fprintf(s, " imm-hex Print all immediate values as hexadecimal numbers.\n");
+ fprintf(s,
+ "Usage: %s [options] <input-file-name>\n"
+ "Options:\n"
+ " -h, --help, Show this message.\n"
+ " -o, --output, Where to write disassembly to (stdout if not set)\n"
+ " -t, --pc-trace, File containing PC trace\n"
+ " --indent, Specify instruction indentation, e.g. \"\t\",\n"
+ " Single tab is used by default.\n"
+ " -f, --feature=[no-]<feature>\n"
+ " Enable or disable (with \"no-\" prefix) a feature.\n"
+ " Available features described below under the\n"
+ " \"Feature flags\" section.\n"
+ " -b, --bfd-target=bfdname\n"
+ " Specify target object format as `bfdname`. Will attempt\n"
+ " to detect automatically if not set. Only `auto,\n"
+ " `binary` and `elf` are currently supported.\n"
+ " <input_file_name> Binary or elf file with the machine code to disassemble\n"
+ "Feature flags:\n"
+ " rdc Print raw data comment.\n"
+ " labels Print labels above all places that have jumps from\n"
+ " somewhere.\n"
+ " rel-labels Use label instead of number on relative branch or call.\n"
+ " abs-labels Use label instead of number on absolute branch or call.\n"
+ " imm-labels Use label instead of number when immediate value moved\n"
+ " to address register.\n"
+ " short-ref-local-labels\n"
+ " Use local labels (numbers) for short jumps or loops.\n"
+ " Jump is considered short when it does not cross other\n"
+ " labels and has no calls.\n"
+ " export-labels Add `.globl` preamble to labels referenced two or more\n"
+ " times.\n"
+ " export-all-labels Add `.globl` preamble to all labels.\n"
+ " export-functions Add `.globl` and `.type @funciton` preamble to a label\n"
+ " referenced as a call.\n"
+ " xrefs-from Print xrefs comments above all places that have xrefs.\n"
+ " xrefs-to Print xrefs comments after all branch instructions.\n"
+ " imm-hex Print all immediate values as hexadecimal numbers.\n"
+ , argv0);
}
int main(int, char* argv[])
@@ -675,6 +680,7 @@ int main(int, char* argv[])
{"output", 'o', OPTPARSE_REQUIRED},
{"pc-trace", 't', OPTPARSE_REQUIRED},
{"feature", 'f', OPTPARSE_REQUIRED},
+ {"bfd-target", 'b', OPTPARSE_REQUIRED},
{"indent", 80, OPTPARSE_REQUIRED},
{},
};
@@ -704,6 +710,26 @@ int main(int, char* argv[])
return EXIT_FAILURE;
}
break;
+ case 'b':
+ {
+ const auto *bfd_str = options.optarg;
+ if (0 == strcmp(bfd_str, "auto")) {
+ s.bfd = BFDTarget::kAuto;
+ } else if (0 == strcmp(bfd_str, "binary")) {
+ s.bfd = BFDTarget::kBinary;
+ } else if (0 == strcmp(bfd_str, "elf")) {
+ s.bfd = BFDTarget::kELF;
+ } else {
+ fprintf(
+ stderr,
+ "Unknown BFD target specified: \"%s\". "
+ "Refer to usage below to find correct BFD values.\n",
+ bfd_str);
+ PrintUsage(stderr, argv[0]);
+ return EXIT_FAILURE;
+ }
+ }
+ break;
case 80:
s.indent = options.optarg;
break;
@@ -723,7 +749,7 @@ int main(int, char* argv[])
}
}
// Open the files
- FILE *input_stream = stdin;
+ FILE *input_stream = nullptr;
FILE *output_stream = stdout;
FILE *trace_stream = nullptr;
if (input_file_name) {
@@ -733,12 +759,17 @@ int main(int, char* argv[])
fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err));
return EXIT_FAILURE;
}
+ } else {
+ fprintf(stderr, "main: Error: no input file name specified, see usage below.\n");
+ PrintUsage(stderr, argv[0]);
+ return EXIT_FAILURE;
}
if (output_file_name) {
output_stream = fopen(output_file_name, "w");
if (output_stream == nullptr) {
const int err = errno;
fprintf(stderr, "main: fopen(\"%s\", \"w\"): Error (%d): \"%s\"\n", output_file_name, err, strerror(err));
+ fclose(input_stream);
return EXIT_FAILURE;
}
}
@@ -747,13 +778,13 @@ int main(int, char* argv[])
if (trace_stream == nullptr) {
const int err = errno;
fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err));
+ fclose(input_stream);
+ fclose(output_stream);
return EXIT_FAILURE;
}
}
// Run the program
- const int ret = trace_stream
- ? M68kDisasmByTrace(input_stream, output_stream, trace_stream, s)
- : M68kDisasmAll(input_stream, output_stream, s);
+ const int ret = M68kDisasm(input_stream, output_stream, trace_stream, s);
if (trace_stream != nullptr) {
fclose(trace_stream);
}
diff --git a/test.ld b/test.ld
index 22a08e2..f939414 100644
--- a/test.ld
+++ b/test.ld
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
MEMORY {
ROM(rx) : ORIGIN = 0x00000000, LENGTH = 4M
}
diff --git a/todo.md b/todo.md
index 1dabd4f..4bfa52e 100644
--- a/todo.md
+++ b/todo.md
@@ -1,8 +1,11 @@
# TODO
+- Add support for `ELF` and `DWARF` formats to split an `ELF` file into multiple
+ original assembly files. These files may not be assembly files originally, but
+ they will become after decompilation.
- Implement RAM symbol mapping from raw addresses found in the instructions like
LEA, MOVE and address arithmetic instructions. Basically any direct RAM
- address accessed directly may be mapped as symbol. A hashmap is most like
+ address accessed directly may be mapped as symbol. A hashmap is most likely
necessary for this.
- Implement CLI option that can be used to specify regions of RAM and IO
registers. Custom ROM location and size is still not the case, only 4MiB at