diff options
Diffstat (limited to 'src/disasm.cpp')
-rw-r--r-- | src/disasm.cpp | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/src/disasm.cpp b/src/disasm.cpp new file mode 100644 index 0000000..bf72674 --- /dev/null +++ b/src/disasm.cpp @@ -0,0 +1,260 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "disasm.h" +#include "m68k.h" + +#include <cassert> +#include <cstring> +#include <cerrno> + +void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type) +{ + ReferenceNode *node{}; + if (this->last_ref_by) { + node = this->last_ref_by; + } else { + node = new ReferenceNode{}; + assert(node); + this->ref_by = this->last_ref_by = node; + } + node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from}; + node->refs_count++; + if (node->refs_count >= kRefsCountPerBuffer) { + ReferenceNode *new_node = new ReferenceNode{}; + assert(new_node); + node->next = new_node; + this->last_ref_by = new_node; + } +} + +DisasmNode::~DisasmNode() +{ + ReferenceNode *ref{this->ref_by}; + while (ref) { + ReferenceNode *prev = ref; + ref = ref->next; + delete prev; + } +} + +static constexpr uint32_t AlignInstructionAddress(const uint32_t address) +{ + return address & ~1UL; +} + +DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) +{ + auto *node = findNodeByAddress(address); + if (node) { + // Instruction nodes take precedence over data nodes. If a node that + // was previously accessed only as data now turns out to be an + // instruction, then it must become an instruction node. + if (IsInstruction(type) && !IsInstruction(node->type)) { + *const_cast<NodeType*>(&node->type) = type; + // Make sure it is OpCode::kNone so it will be properly disassembled + node->op = Op{}; + } + return *node; + } + node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); + assert(node); + _map[address / kInstructionSizeStepBytes] = node; + return *node; +} + +DisasmNode &DisasmMap::insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type) +{ + auto &ref_node = insertNode(ref_addr, type); + ref_node.AddReferencedBy(by_addr, ref_type); + return ref_node; +} + +void DisasmMap::InsertNode(uint32_t address, NodeType type) +{ + assert(_type == DisasmMapType::kTraced); + insertNode(address, type); +} + +constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t) +{ + if (t == ELF::Symbol32Type::kObject) { + return SymbolType::kObject; + } + if (t == ELF::Symbol32Type::kFunc) { + return SymbolType::kFunction; + } + return SymbolType::kNone; +} + +static int cmpsym(const void *p1, const void *p2) +{ + const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1); + const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2); + if (sym1->address == sym2->address) { + return strcmp(sym1->name, sym2->name); + } + return sym1->address < sym2->address ? -1 : 1; +} + +bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf) +{ + const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab"); + if (!symtab.IsValid()) { + fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n"); + return true; + } + FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size); + if (symtab_stream == nullptr) { + const int err = errno; + fprintf(stderr, + "open_memstream() for symtab failed: Error (%d): \"%s\"\n", + err, strerror(err)); + return false; + } + const Symbol null_symbol{}; + if (null_symbol.name != nullptr && *null_symbol.name != '\0') { + const size_t ret = fwrite( + &null_symbol, sizeof null_symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + const size_t nentries = symtab.size/symtab.entsize; + for (size_t i = 0; i < nentries; i++) { + const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i); + const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) || + (elfsym.type() == ELF::Symbol32Type::kObject) || + (elfsym.type() == ELF::Symbol32Type::kFunc); + if (has_proper_type) { + // XXX: Is it possible that it may have binding other than + // Symbol32Bind::kGlobal when it is kFunc? + // XXX: Yes, it is possible. It may be kLocal or kWeak for sure. + const auto type = SymbolTypeFromElf32SymbolType(elfsym.type()); + const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size}; + if (symbol.name != nullptr && *symbol.name != '\0') { + const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + } + } + // No more symbols are going to be added further, so it may be closed now. + fclose(symtab_stream); + // The RenderNodeDisassembly() function expects the symbol table to be + // sorted. + qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym); + return true; +} + +static constexpr bool IsNextLikelyAnInstruction(const Op &op) +{ + return (op.opcode != OpCode::kNone && + op.opcode != OpCode::kRaw && + !IsBRA(op) && + op.opcode != OpCode::kJMP && + op.opcode != OpCode::kRTS && + op.opcode != OpCode::kRTE && + op.opcode != OpCode::kSTOP); +} + +void DisasmMap::Disasm( + const DataView &code, const Settings &s, size_t at, bool nested) +{ + // Some of logic of this function is covered by integration tests in + // `test_walk_and_follow_jumps.bash`. + bool inside_code_span = nested; + while (at < Min(kRomSizeBytes, code.size)) { + DisasmNode *node; + if (_type == DisasmMapType::kTraced) { + node = _map[at / kInstructionSizeStepBytes]; + if (!node) { + if (inside_code_span) { + node = &insertNode(at, NodeType::kTracedInstruction); + } else { + at += kInstructionSizeStepBytes; + continue; + } + } + } else { + node = &insertNode(at, NodeType::kTracedInstruction); + } + if (node->op.opcode == OpCode::kNone || inside_code_span) { + const auto size = node->Disasm(code); + assert(size >= kInstructionSizeStepBytes); + if (canBeAllocated(*node)) { + // Spread across the size + for (size_t o = kInstructionSizeStepBytes; o < size; o++) { + _map[(node->address + o) / kInstructionSizeStepBytes] = node; + } + } else { + node->DisasmAsRaw(code); + } + } + inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); + if (nested && !inside_code_span) { + return; + } + at += node->size; + // NOTE: There is not much information about a reference passed further, + // so just don't add a reference of immediate if s.imm_labels is false + // enabled. + const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) + ? s.imm_labels + : (node->ref_kinds & kRef1Mask); + const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; + if (has_code_ref1) { + const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref1_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + const bool has_ref2 = (node->ref_kinds & kRef2Mask); + const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); + if (has_code_ref2) { + const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; + const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); + auto &ref_node = insertReferencedBy( + node->address, node->ref2_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } + } + } +} + +DisasmMap::~DisasmMap() +{ + for (size_t i = 0; i < kDisasmMapSizeElements; i++) { + auto *const node = _map[i]; + if (!node) { + continue; + } + const auto size = node->size / kInstructionSizeStepBytes; + for (size_t o = 0; o < size; o++) { + assert(_map[i + o] == node); + _map[i + o] = nullptr; + } + delete node; + i += size - 1; + } + if (_symtab != nullptr) { + free(_symtab); + } +} |