diff options
Diffstat (limited to 'src/main.cpp')
-rw-r--r-- | src/main.cpp | 385 |
1 files changed, 1 insertions, 384 deletions
diff --git a/src/main.cpp b/src/main.cpp index 5fd86b9..278a34f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,7 +3,7 @@ #include "elf_image.h" #include "data_buffer.h" -#include "m68k.h" +#include "disasm.h" #include "common.h" #define OPTPARSE_IMPLEMENTATION @@ -28,389 +28,6 @@ #include <climits> #include <sys/stat.h> -enum class SymbolType: int { - kNone = 0, - kFunction, - kObject, -}; - -struct Symbol { - uint32_t address{}; - SymbolType type{}; - const char *name{}; - size_t size{}; -}; - -enum class DisasmMapType { - kTraced, - kRaw, -}; - -class DisasmMap { - const DisasmMapType _type; - DisasmNode *_map[kDisasmMapSizeElements]{}; - Symbol *_symtab{}; - size_t _symtab_size{}; - constexpr DisasmNode *findNodeByAddress(uint32_t address) const; - constexpr size_t findFirstSymbolAtAddress( - uint32_t address, bool return_last_considered=false) const; - DisasmNode &insertNode(uint32_t address, NodeType); - void insertSymbol(uint32_t address, ReferenceType ref_type); - DisasmNode &insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type); - constexpr bool canBeAllocated(const DisasmNode& node) const; - constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } -public: - constexpr const Symbol *Symtab() const { return _symtab; } - constexpr size_t SymbolsCount() const { return symbolsCount(); } - constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; - constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; - constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const - { - return findNodeByAddress(address); - }; - void InsertNode(uint32_t address, NodeType type) - { - assert(_type == DisasmMapType::kTraced); - insertNode(address, type); - } - bool ApplySymbolsFromElf(const ELF::Image &); - void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); - DisasmMap(DisasmMapType type): _type(type) {} - ~DisasmMap(); -}; - -constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const -{ - if (address < kRomSizeBytes) - return _map[address / kInstructionSizeStepBytes]; - return nullptr; -} - -constexpr size_t DisasmMap::findFirstSymbolAtAddress( - uint32_t address, bool return_last_considered) const -{ - if (_symtab == nullptr || symbolsCount() < 1) { - return 0; - } - // A symbol at index 0 is a special null symbol and it must be skipped. - size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; - while (1) { - if (len == 0) { - if (return_last_considered && index == 0) { - index = start; - } - break; - } - middle = start + len / 2; - if (_symtab[middle].address >= address) { - if (_symtab[middle].address == address) { - index = middle; - } - // Look at the span right before the middle one on the next step - len = middle - start; - } else { - // Look at the span right after the middle one on the next step - len -= middle + 1 - start; - start = middle + 1; - } - } - return index; -} - -static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) -{ - return value >= at && value < at + length; -} - -constexpr bool DisasmMap::HasSymbolsInRange( - uint32_t const address, size_t const length) const -{ - size_t index = findFirstSymbolAtAddress(address, true); - if (index == 0) { - // The symtab is empty - return false; - } - if (IsWithinRange(_symtab[index].address, address, length)) { - // The symbol is found right at the address, which is unlikely - return true; - } - if (_symtab[index].address < address) { - // Maybe the next symbol falls into the range? - if (index + 1 >= symbolsCount()) { - // No more symbols after the index - return false; - } - index++; - } else { - // Maybe the previous symbol falls into the range? (unlikely at all) - if (index < 2) { - // No more symbols before the index - return false; - } - index--; - } - if (IsWithinRange(_symtab[index].address, address, length)) { - return true; - } - return false; -} - -static constexpr uint32_t AlignInstructionAddress(const uint32_t address) -{ - return address & ~1UL; -} - -DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) -{ - auto *node = findNodeByAddress(address); - if (node) { - // Instruction nodes take precedence over data nodes. If a node that - // was previously accessed only as data now turns out to be an - // instruction, then it must become an instruction node. - if (IsInstruction(type) && !IsInstruction(node->type)) { - *const_cast<NodeType*>(&node->type) = type; - // Make sure it is OpCode::kNone so it will be properly disassembled - node->op = Op{}; - } - return *node; - } - node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); - assert(node); - _map[address / kInstructionSizeStepBytes] = node; - return *node; -} - -DisasmNode &DisasmMap::insertReferencedBy( - const uint32_t by_addr, - const uint32_t ref_addr, - const NodeType type, - const ReferenceType ref_type) -{ - auto &ref_node = insertNode(ref_addr, type); - ref_node.AddReferencedBy(by_addr, ref_type); - return ref_node; -} - -constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const -{ - const auto size = node.size / kInstructionSizeStepBytes; - const auto *const node_real = findNodeByAddress(node.address); - for (size_t i = 1; i < size; i++) { - const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; - if (ptr != nullptr && ptr != node_real) { - return false; - } - } - return true; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) -{ - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef1ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef1WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) -{ - // FIXME: AFAIK it is impossible for a call instruction to have second - // argument. I can probably drop the first condition, but it needs testing - return (ref_kinds & kRefCallMask) - ? ReferenceType::kCall - : (ref_kinds & kRef2ReadMask) - ? ReferenceType::kRead - : (ref_kinds & kRef2WriteMask) - ? ReferenceType::kWrite - : ReferenceType::kBranch; -} - -static constexpr bool IsNextLikelyAnInstruction(const Op &op) -{ - return (op.opcode != OpCode::kNone && - op.opcode != OpCode::kRaw && - !IsBRA(op) && - op.opcode != OpCode::kJMP && - op.opcode != OpCode::kRTS && - op.opcode != OpCode::kRTE && - op.opcode != OpCode::kSTOP); -} - -static int cmpsym(const void *p1, const void *p2) -{ - const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1); - const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2); - if (sym1->address == sym2->address) { - return strcmp(sym1->name, sym2->name); - } - return sym1->address < sym2->address ? -1 : 1; -} - -constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t) -{ - if (t == ELF::Symbol32Type::kObject) { - return SymbolType::kObject; - } - if (t == ELF::Symbol32Type::kFunc) { - return SymbolType::kFunction; - } - return SymbolType::kNone; -} - -bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf) -{ - const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab"); - if (!symtab.IsValid()) { - fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n"); - return true; - } - FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size); - if (symtab_stream == nullptr) { - const int err = errno; - fprintf(stderr, - "open_memstream() for symtab failed: Error (%d): \"%s\"\n", - err, strerror(err)); - return false; - } - const Symbol null_symbol{}; - if (null_symbol.name != nullptr && *null_symbol.name != '\0') { - const size_t ret = fwrite( - &null_symbol, sizeof null_symbol, 1, symtab_stream); - (void) ret; - assert(ret == 1); - } - const size_t nentries = symtab.size/symtab.entsize; - for (size_t i = 0; i < nentries; i++) { - const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i); - const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) || - (elfsym.type() == ELF::Symbol32Type::kObject) || - (elfsym.type() == ELF::Symbol32Type::kFunc); - if (has_proper_type) { - // XXX: Is it possible that it may have binding other than - // Symbol32Bind::kGlobal when it is kFunc? - // XXX: Yes, it is possible. It may be kLocal or kWeak for sure. - const auto type = SymbolTypeFromElf32SymbolType(elfsym.type()); - const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size}; - if (symbol.name != nullptr && *symbol.name != '\0') { - const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream); - (void) ret; - assert(ret == 1); - } - } - } - // No more symbols are going to be added further, so it may be closed now. - fclose(symtab_stream); - // The RenderNodeDisassembly() function expects the symbol table to be - // sorted. - qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym); - return true; -} - -void DisasmMap::Disasm( - const DataView &code, const Settings &s, size_t at, bool nested) -{ - // Some of logic of this function is covered by integration tests in - // `test_walk_and_follow_jumps.bash`. - bool inside_code_span = nested; - while (at < Min(kRomSizeBytes, code.size)) { - DisasmNode *node; - if (_type == DisasmMapType::kTraced) { - node = _map[at / kInstructionSizeStepBytes]; - if (!node) { - if (inside_code_span) { - node = &insertNode(at, NodeType::kTracedInstruction); - } else { - at += kInstructionSizeStepBytes; - continue; - } - } - } else { - node = &insertNode(at, NodeType::kTracedInstruction); - } - if (node->op.opcode == OpCode::kNone || inside_code_span) { - const auto size = node->Disasm(code); - assert(size >= kInstructionSizeStepBytes); - if (canBeAllocated(*node)) { - // Spread across the size - for (size_t o = kInstructionSizeStepBytes; o < size; o++) { - _map[(node->address + o) / kInstructionSizeStepBytes] = node; - } - } else { - node->DisasmAsRaw(code); - } - } - inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); - if (nested && !inside_code_span) { - return; - } - at += node->size; - // NOTE: There is not much information about a reference passed further, - // so just don't add a reference of immediate if s.imm_labels is false - // enabled. - const bool has_ref1 = (node->ref_kinds & kRef1ImmMask) - ? s.imm_labels - : (node->ref_kinds & kRef1Mask); - const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; - if (has_code_ref1) { - const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref1_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - const bool has_ref2 = (node->ref_kinds & kRef2Mask); - const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); - if (has_code_ref2) { - const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) - ? NodeType::kData : NodeType::kRefInstruction; - const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); - auto &ref_node = insertReferencedBy( - node->address, node->ref2_addr, type, ref_type); - if (ref_node.op.opcode == OpCode::kNone) { - if (s.follow_jumps) { - Disasm(code, s, ref_node.address, true); - } else { - ref_node.DisasmAsRaw(code); - } - } - } - } -} - -DisasmMap::~DisasmMap() -{ - for (size_t i = 0; i < kDisasmMapSizeElements; i++) { - auto *const node = _map[i]; - if (!node) { - continue; - } - const auto size = node->size / kInstructionSizeStepBytes; - for (size_t o = 0; o < size; o++) { - assert(_map[i + o] == node); - _map[i + o] = nullptr; - } - delete node; - i += size - 1; - } - if (_symtab != nullptr) { - free(_symtab); - } -} - static size_t RenderRawDataComment( char *out, size_t out_sz, uint32_t address, size_t instr_sz, const DataView &code) { |