diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | src/disasm.cpp | 260 | ||||
-rw-r--r-- | src/disasm.h | 232 | ||||
-rw-r--r-- | src/m68k.cpp | 31 | ||||
-rw-r--r-- | src/m68k.h | 61 | ||||
-rw-r--r-- | src/main.cpp | 385 |
7 files changed, 498 insertions, 473 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 71863ed..2186d15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ set(common_debug_flags add_executable(m68k-disasm src/main.cpp src/data_buffer.cpp + src/disasm.cpp src/m68k.cpp src/elf_image.cpp ) @@ -14,6 +14,7 @@ _LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS)) OBJECTS=main.o \ data_buffer.o \ + disasm.o \ elf_image.o \ m68k.o diff --git a/src/disasm.cpp b/src/disasm.cpp new file mode 100644 index 0000000..bf72674 --- /dev/null +++ b/src/disasm.cpp @@ -0,0 +1,260 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "disasm.h" +#include "m68k.h" + +#include <cassert> +#include <cstring> +#include <cerrno> + +void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type) +{ + ReferenceNode *node{}; + if (this->last_ref_by) { + node = this->last_ref_by; + } else { + node = new ReferenceNode{}; + assert(node); + this->ref_by = this->last_ref_by = node; + } + node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from}; + node->refs_count++; + if (node->refs_count >= kRefsCountPerBuffer) { + ReferenceNode *new_node = new ReferenceNode{}; + assert(new_node); + node->next = new_node; + this->last_ref_by = new_node; + } +} + +DisasmNode::~DisasmNode() +{ + ReferenceNode *ref{this->ref_by}; + while (ref) { + ReferenceNode *prev = ref; + ref = ref->next; + delete prev; + } +} + +static constexpr uint32_t AlignInstructionAddress(const uint32_t address) +{ + return address & ~1UL; +} + +DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) +{ + auto *node = findNodeByAddress(address); + if (node) { + // Instruction nodes take precedence over data nodes. If a node that + // was previously accessed only as data now turns out to be an + // instruction, then it must become an instruction node. + if (IsInstruction(type) && !IsInstruction(node->type)) { + *const_cast<NodeType*>(&node->type) = type; + // Make sure it is OpCode::kNone so it will be properly disassembled + node->op = Op{}; + } + return *node; + } + node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); + assert(node); + _map[address / kInstructionSizeStepBytes] = node; + return *node; +} + +DisasmNode &DisasmMap::insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type) +{ + auto &ref_node = insertNode(ref_addr, type); + ref_node.AddReferencedBy(by_addr, ref_type); + return ref_node; +} + +void DisasmMap::InsertNode(uint32_t address, NodeType type) +{ + assert(_type == DisasmMapType::kTraced); + insertNode(address, type); +} + +constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t) +{ + if (t == ELF::Symbol32Type::kObject) { + return SymbolType::kObject; + } + if (t == ELF::Symbol32Type::kFunc) { + return SymbolType::kFunction; + } + return SymbolType::kNone; +} + +static int cmpsym(const void *p1, const void *p2) +{ + const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1); + const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2); + if (sym1->address == sym2->address) { + return strcmp(sym1->name, sym2->name); + } + return sym1->address < sym2->address ? -1 : 1; +} + +bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf) +{ + const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab"); + if (!symtab.IsValid()) { + fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n"); + return true; + } + FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size); + if (symtab_stream == nullptr) { + const int err = errno; + fprintf(stderr, + "open_memstream() for symtab failed: Error (%d): \"%s\"\n", + err, strerror(err)); + return false; + } + const Symbol null_symbol{}; + if (null_symbol.name != nullptr && *null_symbol.name != '\0') { + const size_t ret = fwrite( + &null_symbol, sizeof null_symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + const size_t nentries = symtab.size/symtab.entsize; + for (size_t i = 0; i < nentries; i++) { + const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i); + const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) || + (elfsym.type() == ELF::Symbol32Type::kObject) || + (elfsym.type() == ELF::Symbol32Type::kFunc); + if (has_proper_type) { + // XXX: Is it possible that it may have binding other than + // Symbol32Bind::kGlobal when it is kFunc? + // XXX: Yes, it is possible. It may be kLocal or kWeak for sure. + const auto type = SymbolTypeFromElf32SymbolType(elfsym.type()); + const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size}; + if (symbol.name != nullptr && *symbol.name != '\0') { + const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + } + } + // No more symbols are going to be added further, so it may be closed now. + fclose(symtab_stream); + // The RenderNodeDisassembly() function expects the symbol table to be + // sorted. + qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym); + return true; +} + +static constexpr bool IsNextLikelyAnInstruction(const Op &op) +{ + return (op.opcode != OpCode::kNone && + op.opcode != OpCode::kRaw && + !IsBRA(op) && + op.opcode != OpCode::kJMP && + op.opcode != OpCode::kRTS && + op.opcode != OpCode::kRTE && + op.opcode != OpCode::kSTOP); +} + +void DisasmMap::Disasm( + const DataView &code, const Settings &s, size_t at, bool nested) +{ + // Some of logic of this function is covered by integration tests in + // `test_walk_and_follow_jumps.bash`. + bool inside_code_span = nested; + while (at < Min(kRomSizeBytes, code.size)) { + DisasmNode *node; + if (_type == DisasmMapType::kTraced) { + node = _map[at / kInstructionSizeStepBytes]; + if (!node) { + if (inside_code_span) { + node = &insertNode(at, NodeType::kTracedInstruction); + } else { + at += kInstructionSizeStepBytes; + continue; + } + } + } else { + node = &insertNode(at, NodeType::kTracedInstruction); + } + if (node->op.opcode == OpCode::kNone || inside_code_span) { + const auto size = node->Disasm(code); + assert(size >= kInstructionSizeStepBytes); + if (canBeAllocated(*node)) { + // Spread across the size + for (size_t o = kInstructionSizeStepBytes; o < size; o++) { + _map[(node->address + o) / kInstructionSizeStepBytes] = node; + } + } else { + node->DisasmAsRaw(code); + } + } + inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); + if (nested && !inside_code_span) { + return; + } + at += node->size; + // NOTE: There is not much information about a reference passed further, + // so just don't add a reference of immediate if s.imm_labels is false + // enabled. + const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) + ? s.imm_labels + : (node->ref_kinds & kRef1Mask); + const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; + if (has_code_ref1) { + const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref1_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + const bool has_ref2 = (node->ref_kinds & kRef2Mask); + const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); + if (has_code_ref2) { + const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref2_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + } +} + +DisasmMap::~DisasmMap() +{ + for (size_t i = 0; i < kDisasmMapSizeElements; i++) { + auto *const node = _map[i]; + if (!node) { + continue; + } + const auto size = node->size / kInstructionSizeStepBytes; + for (size_t o = 0; o < size; o++) { + assert(_map[i + o] == node); + _map[i + o] = nullptr; + } + delete node; + i += size - 1; + } + if (_symtab != nullptr) { + free(_symtab); + } +} diff --git a/src/disasm.h b/src/disasm.h new file mode 100644 index 0000000..90906ed --- /dev/null +++ b/src/disasm.h @@ -0,0 +1,232 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" +#include "common.h" +#include "m68k.h" + +#include <cstdint> +#include <cstddef> + +enum class ReferenceType { + kUnknown = 0, + kCall, + kBranch, + kRead, + kWrite, +}; + +struct ReferenceRecord { + ReferenceType type{}; + uint32_t address{}; +}; + +constexpr size_t kRefsCountPerBuffer = 10; + +struct ReferenceNode { + ReferenceNode *next{}; + ReferenceRecord refs[kRefsCountPerBuffer]; + uint32_t refs_count{}; +}; + +enum class NodeType { + kTracedInstruction, + kRefInstruction, + kData, +}; + +struct DisasmNode { + const NodeType type{}; + /// Address of the instruction (PC value basically) + const uint32_t address{}; + /// Instruction size in bytes + size_t size{kInstructionSizeStepBytes}; + /// Indicates whether `ref_addr` should be interpreted and how + RefKindMask ref_kinds{}; + /// Address of first argument reference + uint32_t ref1_addr{}; + /// Address of second argument reference + uint32_t ref2_addr{}; + ReferenceNode *ref_by{}; + ReferenceNode *last_ref_by{}; + Op op{}; + + /*! Disassembles instruction with arguments + * returns size of whole instruction with arguments in bytes + */ + size_t Disasm(const DataView &code); + size_t DisasmAsRaw(const DataView &code); + void AddReferencedBy(uint32_t address, ReferenceType); + ~DisasmNode(); +}; + +static constexpr inline bool IsInstruction(NodeType t) +{ + return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; +} + +enum class SymbolType: int { + kNone = 0, + kFunction, + kObject, +}; + +struct Symbol { + uint32_t address{}; + SymbolType type{}; + const char *name{}; + size_t size{}; +}; + +enum class DisasmMapType { + kTraced, + kRaw, +}; + +class DisasmMap { + const DisasmMapType _type; + DisasmNode *_map[kDisasmMapSizeElements]{}; + Symbol *_symtab{}; + size_t _symtab_size{}; + constexpr DisasmNode *findNodeByAddress(uint32_t address) const; + constexpr size_t findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered=false) const; + DisasmNode &insertNode(uint32_t address, NodeType); + void insertSymbol(uint32_t address, ReferenceType ref_type); + DisasmNode &insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type); + constexpr bool canBeAllocated(const DisasmNode& node) const; + constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } +public: + constexpr const Symbol *Symtab() const { return _symtab; } + constexpr size_t SymbolsCount() const { return symbolsCount(); } + constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; + constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; + constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const + { + return findNodeByAddress(address); + }; + void InsertNode(uint32_t address, NodeType type); + bool ApplySymbolsFromElf(const ELF::Image &); + void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); + DisasmMap(DisasmMapType type): _type(type) {} + ~DisasmMap(); +}; + +constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const +{ + if (address < kRomSizeBytes) + return _map[address / kInstructionSizeStepBytes]; + return nullptr; +} + +constexpr size_t DisasmMap::findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered) const +{ + if (_symtab == nullptr || symbolsCount() < 1) { + return 0; + } + // A symbol at index 0 is a special null symbol and it must be skipped. + size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; + while (1) { + if (len == 0) { + if (return_last_considered && index == 0) { + index = start; + } + break; + } + middle = start + len / 2; + if (_symtab[middle].address >= address) { + if (_symtab[middle].address == address) { + index = middle; + } + // Look at the span right before the middle one on the next step + len = middle - start; + } else { + // Look at the span right after the middle one on the next step + len -= middle + 1 - start; + start = middle + 1; + } + } + return index; +} + +static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) +{ + return value >= at && value < at + length; +} + +constexpr bool DisasmMap::HasSymbolsInRange( + uint32_t const address, size_t const length) const +{ + size_t index = findFirstSymbolAtAddress(address, true); + if (index == 0) { + // The symtab is empty + return false; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + // The symbol is found right at the address, which is unlikely + return true; + } + if (_symtab[index].address < address) { + // Maybe the next symbol falls into the range? + if (index + 1 >= symbolsCount()) { + // No more symbols after the index + return false; + } + index++; + } else { + // Maybe the previous symbol falls into the range? (unlikely at all) + if (index < 2) { + // No more symbols before the index + return false; + } + index--; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + return true; + } + return false; +} + +constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const +{ + const auto size = node.size / kInstructionSizeStepBytes; + const auto *const node_real = findNodeByAddress(node.address); + for (size_t i = 1; i < size; i++) { + const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; + if (ptr != nullptr && ptr != node_real) { + return false; + } + } + return true; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef1ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef1WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) +{ + // FIXME: AFAIK it is impossible for a call instruction to have second + // argument. I can probably drop the first condition, but it needs testing + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef2ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef2WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} diff --git a/src/m68k.cpp b/src/m68k.cpp index 254c898..9062fcf 100644 --- a/src/m68k.cpp +++ b/src/m68k.cpp @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: Unlicense */ +#include "disasm.h" #include "m68k.h" #include "data_buffer.h" #include "common.h" @@ -1980,33 +1981,3 @@ int Op::FPrint( return fprintf(stream, "%s%s", indent, mnemonic_str); } } - -void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type) -{ - ReferenceNode *node{}; - if (this->last_ref_by) { - node = this->last_ref_by; - } else { - node = new ReferenceNode{}; - assert(node); - this->ref_by = this->last_ref_by = node; - } - node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from}; - node->refs_count++; - if (node->refs_count >= kRefsCountPerBuffer) { - ReferenceNode *new_node = new ReferenceNode{}; - assert(new_node); - node->next = new_node; - this->last_ref_by = new_node; - } -} - -DisasmNode::~DisasmNode() -{ - ReferenceNode *ref{this->ref_by}; - while (ref) { - ReferenceNode *prev = ref; - ref = ref->next; - delete prev; - } -} @@ -304,36 +304,6 @@ struct Arg { uint32_t ref_addr = 0) const; }; -enum class NodeType { - kTracedInstruction, - kRefInstruction, - kData, -}; - -constexpr size_t kRefsCountPerBuffer = 10; - -constexpr size_t kMnemonicBufferSize = 10; -constexpr size_t kArgsBufferSize = 80; - -enum class ReferenceType { - kUnknown = 0, - kCall, - kBranch, - kRead, - kWrite, -}; - -struct ReferenceRecord { - ReferenceType type{}; - uint32_t address{}; -}; - -struct ReferenceNode { - ReferenceNode *next{}; - ReferenceRecord refs[kRefsCountPerBuffer]; - uint32_t refs_count{}; -}; - struct Op { OpCode opcode{OpCode::kNone}; ///< Identifies instruction (mnemonic) /// Size specifier, the suffix `b`, `w` or `l` @@ -365,35 +335,8 @@ struct Op { uint32_t ref2_addr = 0) const; }; -struct DisasmNode { - const NodeType type{}; - /// Address of the instruction (PC value basically) - const uint32_t address{}; - /// Instruction size in bytes - size_t size{kInstructionSizeStepBytes}; - /// Indicates whether `ref_addr` should be interpreted and how - RefKindMask ref_kinds{}; - /// Address of first argument reference - uint32_t ref1_addr{}; - /// Address of second argument reference - uint32_t ref2_addr{}; - ReferenceNode *ref_by{}; - ReferenceNode *last_ref_by{}; - Op op{}; - - /*! Disassembles instruction with arguments - * returns size of whole instruction with arguments in bytes - */ - size_t Disasm(const DataView &code); - size_t DisasmAsRaw(const DataView &code); - void AddReferencedBy(uint32_t address, ReferenceType); - ~DisasmNode(); -}; - -static constexpr inline bool IsInstruction(NodeType t) -{ - return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; -} +constexpr size_t kMnemonicBufferSize = 10; +constexpr size_t kArgsBufferSize = 80; static constexpr inline bool IsBRA(Op op) { diff --git a/src/main.cpp b/src/main.cpp index 5fd86b9..278a34f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,7 +3,7 @@ #include "elf_image.h" #include "data_buffer.h" -#include "m68k.h" +#include "disasm.h" #include "common.h" #define OPTPARSE_IMPLEMENTATION @@ -28,389 +28,6 @@ #include <climits> #include <sys/stat.h> -enum class SymbolType: int { - kNone = 0, - kFunction, - kObject, -}; - -struct Symbol { - uint32_t address{}; - SymbolType type{}; - const char *name{}; - size_t size{}; -}; - -enum class DisasmMapType { - kTraced, - kRaw, -}; - -class DisasmMap { - const DisasmMapType _type; - DisasmNode *_map[kDisasmMapSizeElements]{}; - Symbol *_symtab{}; - size_t _symtab_size{}; - constexpr DisasmNode *findNodeByAddress(uint32_t address) const; - constexpr size_t findFirstSymbolAtAddress( - uint32_t address, bool return_last_considered=false) const; - DisasmNode &insertNode(uint32_t address, NodeType); - void insertSymbol(uint32_t address, ReferenceType ref_type); - DisasmNode &insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type); - constexpr bool canBeAllocated(const DisasmNode& node) const; - constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } -public: - constexpr const Symbol *Symtab() const { return _symtab; } - constexpr size_t SymbolsCount() const { return symbolsCount(); } - constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; - constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; - constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const - { - return findNodeByAddress(address); - }; - void InsertNode(uint32_t address, NodeType type) - { - assert(_type == DisasmMapType::kTraced); - insertNode(address, type); - } - bool ApplySymbolsFromElf(const ELF::Image &); - void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); - DisasmMap(DisasmMapType type): _type(type) {} - ~DisasmMap(); -}; - -constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const -{ - if (address < kRomSizeBytes) - return _map[address / kInstructionSizeStepBytes]; - return nullptr; -} - -constexpr size_t DisasmMap::findFirstSymbolAtAddress( - uint32_t address, bool return_last_considered) const -{ - if (_symtab == nullptr || symbolsCount() < 1) { - return 0; - } - // A symbol at index 0 is a special null symbol and it must be skipped. - size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; - while (1) { - if (len == 0) { - if (return_last_considered && index == 0) { - index = start; - } - break; - } - middle = start + len / 2; - if (_symtab[middle].address >= address) { - if (_symtab[middle].address == address) { - index = middle; - } - // Look at the span right before the middle one on the next step - len = middle - start; - } else { - // Look at the span right after the middle one on the next step - len -= middle + 1 - start; - start = middle + 1; - } - } - return index; -} - -static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) -{ - return value >= at && value < at + length; -} - -constexpr bool DisasmMap::HasSymbolsInRange( - uint32_t const address, size_t const length) const -{ - size_t index = findFirstSymbolAtAddress(address, true); - if (index == 0) { - // The symtab is empty - return false; - } - if (IsWithinRange(_symtab[index].address, address, length)) { - // The symbol is found right at the address, which is unlikely - return true; - } - if (_symtab[index].address < address) { - // Maybe the next symbol falls into the range? - if (index + 1 >= symbolsCount()) { - // No more symbols after the index - return false; - } - index++; - } else { - // Maybe the previous symbol falls into the range? (unlikely at all) - if (index < 2) { - // No more symbols before the index - return false; - } - index--; - } - if (IsWithinRange(_symtab[index].address, address, length)) { - return true; - } - return false; -} - -static constexpr uint32_t AlignInstructionAddress(const uint32_t address) -{ - return address & ~1UL; -} - -DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) -{ - auto *node = findNodeByAddress(address); - if (node) { - // Instruction nodes take precedence over data nodes. If a node that - // was previously accessed only as data now turns out to be an - // instruction, then it must become an instruction node. - if (IsInstruction(type) && !IsInstruction(node->type)) { - *const_cast<NodeType*>(&node->type) = type; - // Make sure it is OpCode::kNone so it will be properly disassembled - node->op = Op{}; - } - return *node; - } - node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); - assert(node); - _map[address / kInstructionSizeStepBytes] = node; - return *node; -} - -DisasmNode &DisasmMap::insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type) -{ - auto &ref_node = insertNode(ref_addr, type); - ref_node.AddReferencedBy(by_addr, ref_type); - return ref_node; -} - -constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const -{ - const auto size = node.size / kInstructionSizeStepBytes; - const auto *const node_real = findNodeByAddress(node.address); - for (size_t i = 1; i < size; i++) { - const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; - if (ptr != nullptr && ptr != node_real) { - return false; - } - } - return true; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) -{ - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef1ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef1WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) -{ - // FIXME: AFAIK it is impossible for a call instruction to have second - // argument. I can probably drop the first condition, but it needs testing - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef2ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef2WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr bool IsNextLikelyAnInstruction(const Op &op) -{ - return (op.opcode != OpCode::kNone && - op.opcode != OpCode::kRaw && - !IsBRA(op) && - op.opcode != OpCode::kJMP && - op.opcode != OpCode::kRTS && - op.opcode != OpCode::kRTE && - op.opcode != OpCode::kSTOP); -} - -static int cmpsym(const void *p1, const void *p2) -{ - const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1); - const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2); - if (sym1->address == sym2->address) { - return strcmp(sym1->name, sym2->name); - } - return sym1->address < sym2->address ? -1 : 1; -} - -constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t) -{ - if (t == ELF::Symbol32Type::kObject) { - return SymbolType::kObject; - } - if (t == ELF::Symbol32Type::kFunc) { - return SymbolType::kFunction; - } - return SymbolType::kNone; -} - -bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf) -{ - const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab"); - if (!symtab.IsValid()) { - fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n"); - return true; - } - FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size); - if (symtab_stream == nullptr) { - const int err = errno; - fprintf(stderr, - "open_memstream() for symtab failed: Error (%d): \"%s\"\n", - err, strerror(err)); - return false; - } - const Symbol null_symbol{}; - if (null_symbol.name != nullptr && *null_symbol.name != '\0') { - const size_t ret = fwrite( - &null_symbol, sizeof null_symbol, 1, symtab_stream); - (void) ret; - assert(ret == 1); - } - const size_t nentries = symtab.size/symtab.entsize; - for (size_t i = 0; i < nentries; i++) { - const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i); - const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) || - (elfsym.type() == ELF::Symbol32Type::kObject) || - (elfsym.type() == ELF::Symbol32Type::kFunc); - if (has_proper_type) { - // XXX: Is it possible that it may have binding other than - // Symbol32Bind::kGlobal when it is kFunc? - // XXX: Yes, it is possible. It may be kLocal or kWeak for sure. - const auto type = SymbolTypeFromElf32SymbolType(elfsym.type()); - const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size}; - if (symbol.name != nullptr && *symbol.name != '\0') { - const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream); - (void) ret; - assert(ret == 1); - } - } - } - // No more symbols are going to be added further, so it may be closed now. - fclose(symtab_stream); - // The RenderNodeDisassembly() function expects the symbol table to be - // sorted. - qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym); - return true; -} - -void DisasmMap::Disasm( - const DataView &code, const Settings &s, size_t at, bool nested) -{ - // Some of logic of this function is covered by integration tests in - // `test_walk_and_follow_jumps.bash`. - bool inside_code_span = nested; - while (at < Min(kRomSizeBytes, code.size)) { - DisasmNode *node; - if (_type == DisasmMapType::kTraced) { - node = _map[at / kInstructionSizeStepBytes]; - if (!node) { - if (inside_code_span) { - node = &insertNode(at, NodeType::kTracedInstruction); - } else { - at += kInstructionSizeStepBytes; - continue; - } - } - } else { - node = &insertNode(at, NodeType::kTracedInstruction); - } - if (node->op.opcode == OpCode::kNone || inside_code_span) { - const auto size = node->Disasm(code); - assert(size >= kInstructionSizeStepBytes); - if (canBeAllocated(*node)) { - // Spread across the size - for (size_t o = kInstructionSizeStepBytes; o < size; o++) { - _map[(node->address + o) / kInstructionSizeStepBytes] = node; - } - } else { - node->DisasmAsRaw(code); - } - } - inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); - if (nested && !inside_code_span) { - return; - } - at += node->size; - // NOTE: There is not much information about a reference passed further, - // so just don't add a reference of immediate if s.imm_labels is false - // enabled. - const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) - ? s.imm_labels - : (node->ref_kinds & kRef1Mask); - const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; - if (has_code_ref1) { - const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref1_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - const bool has_ref2 = (node->ref_kinds & kRef2Mask); - const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); - if (has_code_ref2) { - const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref2_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - } -} - -DisasmMap::~DisasmMap() -{ - for (size_t i = 0; i < kDisasmMapSizeElements; i++) { - auto *const node = _map[i]; - if (!node) { - continue; - } - const auto size = node->size / kInstructionSizeStepBytes; - for (size_t o = 0; o < size; o++) { - assert(_map[i + o] == node); - _map[i + o] = nullptr; - } - delete node; - i += size - 1; - } - if (_symtab != nullptr) { - free(_symtab); - } -} - static size_t RenderRawDataComment( char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) { |