summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt1
-rw-r--r--Makefile1
-rw-r--r--src/disasm.cpp260
-rw-r--r--src/disasm.h232
-rw-r--r--src/m68k.cpp31
-rw-r--r--src/m68k.h61
-rw-r--r--src/main.cpp385
7 files changed, 498 insertions, 473 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 71863ed..2186d15 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,7 @@ set(common_debug_flags
add_executable(m68k-disasm
src/main.cpp
src/data_buffer.cpp
+ src/disasm.cpp
src/m68k.cpp
src/elf_image.cpp
)
diff --git a/Makefile b/Makefile
index 6a305b8..a1c05a0 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,7 @@ _LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS))
OBJECTS=main.o \
data_buffer.o \
+ disasm.o \
elf_image.o \
m68k.o
diff --git a/src/disasm.cpp b/src/disasm.cpp
new file mode 100644
index 0000000..bf72674
--- /dev/null
+++ b/src/disasm.cpp
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "disasm.h"
+#include "m68k.h"
+
+#include <cassert>
+#include <cstring>
+#include <cerrno>
+
+void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type)
+{
+ ReferenceNode *node{};
+ if (this->last_ref_by) {
+ node = this->last_ref_by;
+ } else {
+ node = new ReferenceNode{};
+ assert(node);
+ this->ref_by = this->last_ref_by = node;
+ }
+ node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from};
+ node->refs_count++;
+ if (node->refs_count >= kRefsCountPerBuffer) {
+ ReferenceNode *new_node = new ReferenceNode{};
+ assert(new_node);
+ node->next = new_node;
+ this->last_ref_by = new_node;
+ }
+}
+
+DisasmNode::~DisasmNode()
+{
+ ReferenceNode *ref{this->ref_by};
+ while (ref) {
+ ReferenceNode *prev = ref;
+ ref = ref->next;
+ delete prev;
+ }
+}
+
+static constexpr uint32_t AlignInstructionAddress(const uint32_t address)
+{
+ return address & ~1UL;
+}
+
+DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type)
+{
+ auto *node = findNodeByAddress(address);
+ if (node) {
+ // Instruction nodes take precedence over data nodes. If a node that
+ // was previously accessed only as data now turns out to be an
+ // instruction, then it must become an instruction node.
+ if (IsInstruction(type) && !IsInstruction(node->type)) {
+ *const_cast<NodeType*>(&node->type) = type;
+ // Make sure it is OpCode::kNone so it will be properly disassembled
+ node->op = Op{};
+ }
+ return *node;
+ }
+ node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)});
+ assert(node);
+ _map[address / kInstructionSizeStepBytes] = node;
+ return *node;
+}
+
+DisasmNode &DisasmMap::insertReferencedBy(
+ const uint32_t by_addr,
+ const uint32_t ref_addr,
+ const NodeType type,
+ const ReferenceType ref_type)
+{
+ auto &ref_node = insertNode(ref_addr, type);
+ ref_node.AddReferencedBy(by_addr, ref_type);
+ return ref_node;
+}
+
+void DisasmMap::InsertNode(uint32_t address, NodeType type)
+{
+ assert(_type == DisasmMapType::kTraced);
+ insertNode(address, type);
+}
+
+constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t)
+{
+ if (t == ELF::Symbol32Type::kObject) {
+ return SymbolType::kObject;
+ }
+ if (t == ELF::Symbol32Type::kFunc) {
+ return SymbolType::kFunction;
+ }
+ return SymbolType::kNone;
+}
+
+static int cmpsym(const void *p1, const void *p2)
+{
+ const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1);
+ const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2);
+ if (sym1->address == sym2->address) {
+ return strcmp(sym1->name, sym2->name);
+ }
+ return sym1->address < sym2->address ? -1 : 1;
+}
+
+bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf)
+{
+ const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab");
+ if (!symtab.IsValid()) {
+ fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n");
+ return true;
+ }
+ FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size);
+ if (symtab_stream == nullptr) {
+ const int err = errno;
+ fprintf(stderr,
+ "open_memstream() for symtab failed: Error (%d): \"%s\"\n",
+ err, strerror(err));
+ return false;
+ }
+ const Symbol null_symbol{};
+ if (null_symbol.name != nullptr && *null_symbol.name != '\0') {
+ const size_t ret = fwrite(
+ &null_symbol, sizeof null_symbol, 1, symtab_stream);
+ (void) ret;
+ assert(ret == 1);
+ }
+ const size_t nentries = symtab.size/symtab.entsize;
+ for (size_t i = 0; i < nentries; i++) {
+ const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i);
+ const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) ||
+ (elfsym.type() == ELF::Symbol32Type::kObject) ||
+ (elfsym.type() == ELF::Symbol32Type::kFunc);
+ if (has_proper_type) {
+ // XXX: Is it possible that it may have binding other than
+ // Symbol32Bind::kGlobal when it is kFunc?
+ // XXX: Yes, it is possible. It may be kLocal or kWeak for sure.
+ const auto type = SymbolTypeFromElf32SymbolType(elfsym.type());
+ const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size};
+ if (symbol.name != nullptr && *symbol.name != '\0') {
+ const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream);
+ (void) ret;
+ assert(ret == 1);
+ }
+ }
+ }
+ // No more symbols are going to be added further, so it may be closed now.
+ fclose(symtab_stream);
+ // The RenderNodeDisassembly() function expects the symbol table to be
+ // sorted.
+ qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym);
+ return true;
+}
+
+static constexpr bool IsNextLikelyAnInstruction(const Op &op)
+{
+ return (op.opcode != OpCode::kNone &&
+ op.opcode != OpCode::kRaw &&
+ !IsBRA(op) &&
+ op.opcode != OpCode::kJMP &&
+ op.opcode != OpCode::kRTS &&
+ op.opcode != OpCode::kRTE &&
+ op.opcode != OpCode::kSTOP);
+}
+
+void DisasmMap::Disasm(
+ const DataView &code, const Settings &s, size_t at, bool nested)
+{
+ // Some of logic of this function is covered by integration tests in
+ // `test_walk_and_follow_jumps.bash`.
+ bool inside_code_span = nested;
+ while (at < Min(kRomSizeBytes, code.size)) {
+ DisasmNode *node;
+ if (_type == DisasmMapType::kTraced) {
+ node = _map[at / kInstructionSizeStepBytes];
+ if (!node) {
+ if (inside_code_span) {
+ node = &insertNode(at, NodeType::kTracedInstruction);
+ } else {
+ at += kInstructionSizeStepBytes;
+ continue;
+ }
+ }
+ } else {
+ node = &insertNode(at, NodeType::kTracedInstruction);
+ }
+ if (node->op.opcode == OpCode::kNone || inside_code_span) {
+ const auto size = node->Disasm(code);
+ assert(size >= kInstructionSizeStepBytes);
+ if (canBeAllocated(*node)) {
+ // Spread across the size
+ for (size_t o = kInstructionSizeStepBytes; o < size; o++) {
+ _map[(node->address + o) / kInstructionSizeStepBytes] = node;
+ }
+ } else {
+ node->DisasmAsRaw(code);
+ }
+ }
+ inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op);
+ if (nested && !inside_code_span) {
+ return;
+ }
+ at += node->size;
+ // NOTE: There is not much information about a reference passed further,
+ // so just don't add a reference of immediate if s.imm_labels is false
+ // enabled.
+ const bool has_ref1 = (node->ref_kinds & kRef1ImmMask)
+ ? s.imm_labels
+ : (node->ref_kinds & kRef1Mask);
+ const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1;
+ if (has_code_ref1) {
+ const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask))
+ ? NodeType::kData : NodeType::kRefInstruction;
+ const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds);
+ auto &ref_node = insertReferencedBy(
+ node->address, node->ref1_addr, type, ref_type);
+ if (ref_node.op.opcode == OpCode::kNone) {
+ if (s.follow_jumps) {
+ Disasm(code, s, ref_node.address, true);
+ } else {
+ ref_node.DisasmAsRaw(code);
+ }
+ }
+ }
+ const bool has_ref2 = (node->ref_kinds & kRef2Mask);
+ const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size);
+ if (has_code_ref2) {
+ const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask))
+ ? NodeType::kData : NodeType::kRefInstruction;
+ const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds);
+ auto &ref_node = insertReferencedBy(
+ node->address, node->ref2_addr, type, ref_type);
+ if (ref_node.op.opcode == OpCode::kNone) {
+ if (s.follow_jumps) {
+ Disasm(code, s, ref_node.address, true);
+ } else {
+ ref_node.DisasmAsRaw(code);
+ }
+ }
+ }
+ }
+}
+
+DisasmMap::~DisasmMap()
+{
+ for (size_t i = 0; i < kDisasmMapSizeElements; i++) {
+ auto *const node = _map[i];
+ if (!node) {
+ continue;
+ }
+ const auto size = node->size / kInstructionSizeStepBytes;
+ for (size_t o = 0; o < size; o++) {
+ assert(_map[i + o] == node);
+ _map[i + o] = nullptr;
+ }
+ delete node;
+ i += size - 1;
+ }
+ if (_symtab != nullptr) {
+ free(_symtab);
+ }
+}
diff --git a/src/disasm.h b/src/disasm.h
new file mode 100644
index 0000000..90906ed
--- /dev/null
+++ b/src/disasm.h
@@ -0,0 +1,232 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include "elf_image.h"
+#include "common.h"
+#include "m68k.h"
+
+#include <cstdint>
+#include <cstddef>
+
+enum class ReferenceType {
+ kUnknown = 0,
+ kCall,
+ kBranch,
+ kRead,
+ kWrite,
+};
+
+struct ReferenceRecord {
+ ReferenceType type{};
+ uint32_t address{};
+};
+
+constexpr size_t kRefsCountPerBuffer = 10;
+
+struct ReferenceNode {
+ ReferenceNode *next{};
+ ReferenceRecord refs[kRefsCountPerBuffer];
+ uint32_t refs_count{};
+};
+
+enum class NodeType {
+ kTracedInstruction,
+ kRefInstruction,
+ kData,
+};
+
+struct DisasmNode {
+ const NodeType type{};
+ /// Address of the instruction (PC value basically)
+ const uint32_t address{};
+ /// Instruction size in bytes
+ size_t size{kInstructionSizeStepBytes};
+ /// Indicates whether `ref_addr` should be interpreted and how
+ RefKindMask ref_kinds{};
+ /// Address of first argument reference
+ uint32_t ref1_addr{};
+ /// Address of second argument reference
+ uint32_t ref2_addr{};
+ ReferenceNode *ref_by{};
+ ReferenceNode *last_ref_by{};
+ Op op{};
+
+ /*! Disassembles instruction with arguments
+ * returns size of whole instruction with arguments in bytes
+ */
+ size_t Disasm(const DataView &code);
+ size_t DisasmAsRaw(const DataView &code);
+ void AddReferencedBy(uint32_t address, ReferenceType);
+ ~DisasmNode();
+};
+
+static constexpr inline bool IsInstruction(NodeType t)
+{
+ return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction;
+}
+
+enum class SymbolType: int {
+ kNone = 0,
+ kFunction,
+ kObject,
+};
+
+struct Symbol {
+ uint32_t address{};
+ SymbolType type{};
+ const char *name{};
+ size_t size{};
+};
+
+enum class DisasmMapType {
+ kTraced,
+ kRaw,
+};
+
+class DisasmMap {
+ const DisasmMapType _type;
+ DisasmNode *_map[kDisasmMapSizeElements]{};
+ Symbol *_symtab{};
+ size_t _symtab_size{};
+ constexpr DisasmNode *findNodeByAddress(uint32_t address) const;
+ constexpr size_t findFirstSymbolAtAddress(
+ uint32_t address, bool return_last_considered=false) const;
+ DisasmNode &insertNode(uint32_t address, NodeType);
+ void insertSymbol(uint32_t address, ReferenceType ref_type);
+ DisasmNode &insertReferencedBy(
+ const uint32_t by_addr,
+ const uint32_t ref_addr,
+ const NodeType type,
+ const ReferenceType ref_type);
+ constexpr bool canBeAllocated(const DisasmNode& node) const;
+ constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; }
+public:
+ constexpr const Symbol *Symtab() const { return _symtab; }
+ constexpr size_t SymbolsCount() const { return symbolsCount(); }
+ constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const;
+ constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const;
+ constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const
+ {
+ return findNodeByAddress(address);
+ };
+ void InsertNode(uint32_t address, NodeType type);
+ bool ApplySymbolsFromElf(const ELF::Image &);
+ void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false);
+ DisasmMap(DisasmMapType type): _type(type) {}
+ ~DisasmMap();
+};
+
+constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const
+{
+ if (address < kRomSizeBytes)
+ return _map[address / kInstructionSizeStepBytes];
+ return nullptr;
+}
+
+constexpr size_t DisasmMap::findFirstSymbolAtAddress(
+ uint32_t address, bool return_last_considered) const
+{
+ if (_symtab == nullptr || symbolsCount() < 1) {
+ return 0;
+ }
+ // A symbol at index 0 is a special null symbol and it must be skipped.
+ size_t start = 1, len = symbolsCount() - start, middle = start, index = 0;
+ while (1) {
+ if (len == 0) {
+ if (return_last_considered && index == 0) {
+ index = start;
+ }
+ break;
+ }
+ middle = start + len / 2;
+ if (_symtab[middle].address >= address) {
+ if (_symtab[middle].address == address) {
+ index = middle;
+ }
+ // Look at the span right before the middle one on the next step
+ len = middle - start;
+ } else {
+ // Look at the span right after the middle one on the next step
+ len -= middle + 1 - start;
+ start = middle + 1;
+ }
+ }
+ return index;
+}
+
+static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length)
+{
+ return value >= at && value < at + length;
+}
+
+constexpr bool DisasmMap::HasSymbolsInRange(
+ uint32_t const address, size_t const length) const
+{
+ size_t index = findFirstSymbolAtAddress(address, true);
+ if (index == 0) {
+ // The symtab is empty
+ return false;
+ }
+ if (IsWithinRange(_symtab[index].address, address, length)) {
+ // The symbol is found right at the address, which is unlikely
+ return true;
+ }
+ if (_symtab[index].address < address) {
+ // Maybe the next symbol falls into the range?
+ if (index + 1 >= symbolsCount()) {
+ // No more symbols after the index
+ return false;
+ }
+ index++;
+ } else {
+ // Maybe the previous symbol falls into the range? (unlikely at all)
+ if (index < 2) {
+ // No more symbols before the index
+ return false;
+ }
+ index--;
+ }
+ if (IsWithinRange(_symtab[index].address, address, length)) {
+ return true;
+ }
+ return false;
+}
+
+constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const
+{
+ const auto size = node.size / kInstructionSizeStepBytes;
+ const auto *const node_real = findNodeByAddress(node.address);
+ for (size_t i = 1; i < size; i++) {
+ const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i];
+ if (ptr != nullptr && ptr != node_real) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds)
+{
+ return (ref_kinds & kRefCallMask)
+ ? ReferenceType::kCall
+ : (ref_kinds & kRef1ReadMask)
+ ? ReferenceType::kRead
+ : (ref_kinds & kRef1WriteMask)
+ ? ReferenceType::kWrite
+ : ReferenceType::kBranch;
+}
+
+static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds)
+{
+ // FIXME: AFAIK it is impossible for a call instruction to have second
+ // argument. I can probably drop the first condition, but it needs testing
+ return (ref_kinds & kRefCallMask)
+ ? ReferenceType::kCall
+ : (ref_kinds & kRef2ReadMask)
+ ? ReferenceType::kRead
+ : (ref_kinds & kRef2WriteMask)
+ ? ReferenceType::kWrite
+ : ReferenceType::kBranch;
+}
diff --git a/src/m68k.cpp b/src/m68k.cpp
index 254c898..9062fcf 100644
--- a/src/m68k.cpp
+++ b/src/m68k.cpp
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: Unlicense
*/
+#include "disasm.h"
#include "m68k.h"
#include "data_buffer.h"
#include "common.h"
@@ -1980,33 +1981,3 @@ int Op::FPrint(
return fprintf(stream, "%s%s", indent, mnemonic_str);
}
}
-
-void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type)
-{
- ReferenceNode *node{};
- if (this->last_ref_by) {
- node = this->last_ref_by;
- } else {
- node = new ReferenceNode{};
- assert(node);
- this->ref_by = this->last_ref_by = node;
- }
- node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from};
- node->refs_count++;
- if (node->refs_count >= kRefsCountPerBuffer) {
- ReferenceNode *new_node = new ReferenceNode{};
- assert(new_node);
- node->next = new_node;
- this->last_ref_by = new_node;
- }
-}
-
-DisasmNode::~DisasmNode()
-{
- ReferenceNode *ref{this->ref_by};
- while (ref) {
- ReferenceNode *prev = ref;
- ref = ref->next;
- delete prev;
- }
-}
diff --git a/src/m68k.h b/src/m68k.h
index 65429dc..6db0ebc 100644
--- a/src/m68k.h
+++ b/src/m68k.h
@@ -304,36 +304,6 @@ struct Arg {
uint32_t ref_addr = 0) const;
};
-enum class NodeType {
- kTracedInstruction,
- kRefInstruction,
- kData,
-};
-
-constexpr size_t kRefsCountPerBuffer = 10;
-
-constexpr size_t kMnemonicBufferSize = 10;
-constexpr size_t kArgsBufferSize = 80;
-
-enum class ReferenceType {
- kUnknown = 0,
- kCall,
- kBranch,
- kRead,
- kWrite,
-};
-
-struct ReferenceRecord {
- ReferenceType type{};
- uint32_t address{};
-};
-
-struct ReferenceNode {
- ReferenceNode *next{};
- ReferenceRecord refs[kRefsCountPerBuffer];
- uint32_t refs_count{};
-};
-
struct Op {
OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic)
/// Size specifier, the suffix `b`, `w` or `l`
@@ -365,35 +335,8 @@ struct Op {
uint32_t ref2_addr = 0) const;
};
-struct DisasmNode {
- const NodeType type{};
- /// Address of the instruction (PC value basically)
- const uint32_t address{};
- /// Instruction size in bytes
- size_t size{kInstructionSizeStepBytes};
- /// Indicates whether `ref_addr` should be interpreted and how
- RefKindMask ref_kinds{};
- /// Address of first argument reference
- uint32_t ref1_addr{};
- /// Address of second argument reference
- uint32_t ref2_addr{};
- ReferenceNode *ref_by{};
- ReferenceNode *last_ref_by{};
- Op op{};
-
- /*! Disassembles instruction with arguments
- * returns size of whole instruction with arguments in bytes
- */
- size_t Disasm(const DataView &code);
- size_t DisasmAsRaw(const DataView &code);
- void AddReferencedBy(uint32_t address, ReferenceType);
- ~DisasmNode();
-};
-
-static constexpr inline bool IsInstruction(NodeType t)
-{
- return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction;
-}
+constexpr size_t kMnemonicBufferSize = 10;
+constexpr size_t kArgsBufferSize = 80;
static constexpr inline bool IsBRA(Op op)
{
diff --git a/src/main.cpp b/src/main.cpp
index 5fd86b9..278a34f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3,7 +3,7 @@
#include "elf_image.h"
#include "data_buffer.h"
-#include "m68k.h"
+#include "disasm.h"
#include "common.h"
#define OPTPARSE_IMPLEMENTATION
@@ -28,389 +28,6 @@
#include <climits>
#include <sys/stat.h>
-enum class SymbolType: int {
- kNone = 0,
- kFunction,
- kObject,
-};
-
-struct Symbol {
- uint32_t address{};
- SymbolType type{};
- const char *name{};
- size_t size{};
-};
-
-enum class DisasmMapType {
- kTraced,
- kRaw,
-};
-
-class DisasmMap {
- const DisasmMapType _type;
- DisasmNode *_map[kDisasmMapSizeElements]{};
- Symbol *_symtab{};
- size_t _symtab_size{};
- constexpr DisasmNode *findNodeByAddress(uint32_t address) const;
- constexpr size_t findFirstSymbolAtAddress(
- uint32_t address, bool return_last_considered=false) const;
- DisasmNode &insertNode(uint32_t address, NodeType);
- void insertSymbol(uint32_t address, ReferenceType ref_type);
- DisasmNode &insertReferencedBy(
- const uint32_t by_addr,
- const uint32_t ref_addr,
- const NodeType type,
- const ReferenceType ref_type);
- constexpr bool canBeAllocated(const DisasmNode& node) const;
- constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; }
-public:
- constexpr const Symbol *Symtab() const { return _symtab; }
- constexpr size_t SymbolsCount() const { return symbolsCount(); }
- constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const;
- constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const;
- constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const
- {
- return findNodeByAddress(address);
- };
- void InsertNode(uint32_t address, NodeType type)
- {
- assert(_type == DisasmMapType::kTraced);
- insertNode(address, type);
- }
- bool ApplySymbolsFromElf(const ELF::Image &);
- void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false);
- DisasmMap(DisasmMapType type): _type(type) {}
- ~DisasmMap();
-};
-
-constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const
-{
- if (address < kRomSizeBytes)
- return _map[address / kInstructionSizeStepBytes];
- return nullptr;
-}
-
-constexpr size_t DisasmMap::findFirstSymbolAtAddress(
- uint32_t address, bool return_last_considered) const
-{
- if (_symtab == nullptr || symbolsCount() < 1) {
- return 0;
- }
- // A symbol at index 0 is a special null symbol and it must be skipped.
- size_t start = 1, len = symbolsCount() - start, middle = start, index = 0;
- while (1) {
- if (len == 0) {
- if (return_last_considered && index == 0) {
- index = start;
- }
- break;
- }
- middle = start + len / 2;
- if (_symtab[middle].address >= address) {
- if (_symtab[middle].address == address) {
- index = middle;
- }
- // Look at the span right before the middle one on the next step
- len = middle - start;
- } else {
- // Look at the span right after the middle one on the next step
- len -= middle + 1 - start;
- start = middle + 1;
- }
- }
- return index;
-}
-
-static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length)
-{
- return value >= at && value < at + length;
-}
-
-constexpr bool DisasmMap::HasSymbolsInRange(
- uint32_t const address, size_t const length) const
-{
- size_t index = findFirstSymbolAtAddress(address, true);
- if (index == 0) {
- // The symtab is empty
- return false;
- }
- if (IsWithinRange(_symtab[index].address, address, length)) {
- // The symbol is found right at the address, which is unlikely
- return true;
- }
- if (_symtab[index].address < address) {
- // Maybe the next symbol falls into the range?
- if (index + 1 >= symbolsCount()) {
- // No more symbols after the index
- return false;
- }
- index++;
- } else {
- // Maybe the previous symbol falls into the range? (unlikely at all)
- if (index < 2) {
- // No more symbols before the index
- return false;
- }
- index--;
- }
- if (IsWithinRange(_symtab[index].address, address, length)) {
- return true;
- }
- return false;
-}
-
-static constexpr uint32_t AlignInstructionAddress(const uint32_t address)
-{
- return address & ~1UL;
-}
-
-DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type)
-{
- auto *node = findNodeByAddress(address);
- if (node) {
- // Instruction nodes take precedence over data nodes. If a node that
- // was previously accessed only as data now turns out to be an
- // instruction, then it must become an instruction node.
- if (IsInstruction(type) && !IsInstruction(node->type)) {
- *const_cast<NodeType*>(&node->type) = type;
- // Make sure it is OpCode::kNone so it will be properly disassembled
- node->op = Op{};
- }
- return *node;
- }
- node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)});
- assert(node);
- _map[address / kInstructionSizeStepBytes] = node;
- return *node;
-}
-
-DisasmNode &DisasmMap::insertReferencedBy(
- const uint32_t by_addr,
- const uint32_t ref_addr,
- const NodeType type,
- const ReferenceType ref_type)
-{
- auto &ref_node = insertNode(ref_addr, type);
- ref_node.AddReferencedBy(by_addr, ref_type);
- return ref_node;
-}
-
-constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const
-{
- const auto size = node.size / kInstructionSizeStepBytes;
- const auto *const node_real = findNodeByAddress(node.address);
- for (size_t i = 1; i < size; i++) {
- const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i];
- if (ptr != nullptr && ptr != node_real) {
- return false;
- }
- }
- return true;
-}
-
-static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds)
-{
- return (ref_kinds & kRefCallMask)
- ? ReferenceType::kCall
- : (ref_kinds & kRef1ReadMask)
- ? ReferenceType::kRead
- : (ref_kinds & kRef1WriteMask)
- ? ReferenceType::kWrite
- : ReferenceType::kBranch;
-}
-
-static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds)
-{
- // FIXME: AFAIK it is impossible for a call instruction to have second
- // argument. I can probably drop the first condition, but it needs testing
- return (ref_kinds & kRefCallMask)
- ? ReferenceType::kCall
- : (ref_kinds & kRef2ReadMask)
- ? ReferenceType::kRead
- : (ref_kinds & kRef2WriteMask)
- ? ReferenceType::kWrite
- : ReferenceType::kBranch;
-}
-
-static constexpr bool IsNextLikelyAnInstruction(const Op &op)
-{
- return (op.opcode != OpCode::kNone &&
- op.opcode != OpCode::kRaw &&
- !IsBRA(op) &&
- op.opcode != OpCode::kJMP &&
- op.opcode != OpCode::kRTS &&
- op.opcode != OpCode::kRTE &&
- op.opcode != OpCode::kSTOP);
-}
-
-static int cmpsym(const void *p1, const void *p2)
-{
- const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1);
- const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2);
- if (sym1->address == sym2->address) {
- return strcmp(sym1->name, sym2->name);
- }
- return sym1->address < sym2->address ? -1 : 1;
-}
-
-constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t)
-{
- if (t == ELF::Symbol32Type::kObject) {
- return SymbolType::kObject;
- }
- if (t == ELF::Symbol32Type::kFunc) {
- return SymbolType::kFunction;
- }
- return SymbolType::kNone;
-}
-
-bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf)
-{
- const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab");
- if (!symtab.IsValid()) {
- fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n");
- return true;
- }
- FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size);
- if (symtab_stream == nullptr) {
- const int err = errno;
- fprintf(stderr,
- "open_memstream() for symtab failed: Error (%d): \"%s\"\n",
- err, strerror(err));
- return false;
- }
- const Symbol null_symbol{};
- if (null_symbol.name != nullptr && *null_symbol.name != '\0') {
- const size_t ret = fwrite(
- &null_symbol, sizeof null_symbol, 1, symtab_stream);
- (void) ret;
- assert(ret == 1);
- }
- const size_t nentries = symtab.size/symtab.entsize;
- for (size_t i = 0; i < nentries; i++) {
- const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i);
- const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) ||
- (elfsym.type() == ELF::Symbol32Type::kObject) ||
- (elfsym.type() == ELF::Symbol32Type::kFunc);
- if (has_proper_type) {
- // XXX: Is it possible that it may have binding other than
- // Symbol32Bind::kGlobal when it is kFunc?
- // XXX: Yes, it is possible. It may be kLocal or kWeak for sure.
- const auto type = SymbolTypeFromElf32SymbolType(elfsym.type());
- const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size};
- if (symbol.name != nullptr && *symbol.name != '\0') {
- const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream);
- (void) ret;
- assert(ret == 1);
- }
- }
- }
- // No more symbols are going to be added further, so it may be closed now.
- fclose(symtab_stream);
- // The RenderNodeDisassembly() function expects the symbol table to be
- // sorted.
- qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym);
- return true;
-}
-
-void DisasmMap::Disasm(
- const DataView &code, const Settings &s, size_t at, bool nested)
-{
- // Some of logic of this function is covered by integration tests in
- // `test_walk_and_follow_jumps.bash`.
- bool inside_code_span = nested;
- while (at < Min(kRomSizeBytes, code.size)) {
- DisasmNode *node;
- if (_type == DisasmMapType::kTraced) {
- node = _map[at / kInstructionSizeStepBytes];
- if (!node) {
- if (inside_code_span) {
- node = &insertNode(at, NodeType::kTracedInstruction);
- } else {
- at += kInstructionSizeStepBytes;
- continue;
- }
- }
- } else {
- node = &insertNode(at, NodeType::kTracedInstruction);
- }
- if (node->op.opcode == OpCode::kNone || inside_code_span) {
- const auto size = node->Disasm(code);
- assert(size >= kInstructionSizeStepBytes);
- if (canBeAllocated(*node)) {
- // Spread across the size
- for (size_t o = kInstructionSizeStepBytes; o < size; o++) {
- _map[(node->address + o) / kInstructionSizeStepBytes] = node;
- }
- } else {
- node->DisasmAsRaw(code);
- }
- }
- inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op);
- if (nested && !inside_code_span) {
- return;
- }
- at += node->size;
- // NOTE: There is not much information about a reference passed further,
- // so just don't add a reference of immediate if s.imm_labels is false
- // enabled.
- const bool has_ref1 = (node->ref_kinds & kRef1ImmMask)
- ? s.imm_labels
- : (node->ref_kinds & kRef1Mask);
- const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1;
- if (has_code_ref1) {
- const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask))
- ? NodeType::kData : NodeType::kRefInstruction;
- const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds);
- auto &ref_node = insertReferencedBy(
- node->address, node->ref1_addr, type, ref_type);
- if (ref_node.op.opcode == OpCode::kNone) {
- if (s.follow_jumps) {
- Disasm(code, s, ref_node.address, true);
- } else {
- ref_node.DisasmAsRaw(code);
- }
- }
- }
- const bool has_ref2 = (node->ref_kinds & kRef2Mask);
- const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size);
- if (has_code_ref2) {
- const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask))
- ? NodeType::kData : NodeType::kRefInstruction;
- const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds);
- auto &ref_node = insertReferencedBy(
- node->address, node->ref2_addr, type, ref_type);
- if (ref_node.op.opcode == OpCode::kNone) {
- if (s.follow_jumps) {
- Disasm(code, s, ref_node.address, true);
- } else {
- ref_node.DisasmAsRaw(code);
- }
- }
- }
- }
-}
-
-DisasmMap::~DisasmMap()
-{
- for (size_t i = 0; i < kDisasmMapSizeElements; i++) {
- auto *const node = _map[i];
- if (!node) {
- continue;
- }
- const auto size = node->size / kInstructionSizeStepBytes;
- for (size_t o = 0; o < size; o++) {
- assert(_map[i + o] == node);
- _map[i + o] = nullptr;
- }
- delete node;
- i += size - 1;
- }
- if (_symtab != nullptr) {
- free(_symtab);
- }
-}
-
static size_t RenderRawDataComment(
char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code)
{