#pragma once /* SPDX-License-Identifier: Unlicense */ #include "elf_image.h" #include "common.h" #include "m68k.h" #include "tracetab.h" #include #include enum class ReferenceType { kUnknown = 0, kCall, kBranch, kRead, kWrite, }; struct ReferenceRecord { ReferenceRecord *next{}; ReferenceType type{}; uint32_t address{}; }; enum class DisasmMapType { kTraced, kRaw, }; enum class NodeType { kTracedInstruction, kRefInstruction, kRefData, }; static constexpr uint32_t AlignInstructionAddress(const uint32_t address) { return address & ~1UL; } struct DisasmNode { NodeType type{}; /// Address of the instruction (PC value basically) uint32_t address{}; /// Instruction size in bytes size_t size{selectSize(type, kInstructionSizeStepBytes)}; /// Indicates whether `ref_addr` should be interpreted and how RefKindMask ref_kinds{}; /// Address of first argument reference uint32_t ref1_addr{}; /// Address of second argument reference uint32_t ref2_addr{}; ReferenceRecord *ref_by{}; ReferenceRecord *last_ref_by{}; Op op{}; static DisasmNode Simple(NodeType t, uint32_t address) { return DisasmNode{ /* .type = */ t, /* .address = */ alignAddress(NodeType::kTracedInstruction, address), /* .size = */ selectSize(t, (address & 1) ? 1 : 2), /* .ref_kinds = */ 0, /* .ref1_addr = */ 0, /* .ref2_addr = */ 0, /* .ref_by = */ nullptr, /* .last_ref_by = */ nullptr, /* .op = */ selectOp(t, (address & 1) ? 1 : 2), }; } static DisasmNode TracedRaw(uint32_t address, uint16_t raw) { return DisasmNode{ /* .type = */ NodeType::kTracedInstruction, /* .address = */ alignAddress(NodeType::kTracedInstruction, address), /* .size = */ sizeof(raw), /* .ref_kinds = */ 0, /* .ref1_addr = */ 0, /* .ref2_addr = */ 0, /* .ref_by = */ nullptr, /* .last_ref_by = */ nullptr, /* .op = */ Op::Raw(raw), }; } static DisasmNode DataRaw8(uint32_t address, uint8_t raw) { return DisasmNode{ /* .type = */ NodeType::kRefData, /* .address = */ address, /* .size = */ sizeof(raw), /* .ref_kinds = */ 0, /* .ref1_addr = */ 0, /* .ref2_addr = */ 0, /* .ref_by = */ nullptr, /* .last_ref_by = */ nullptr, /* .op = */ Op::Raw8(raw), }; } /*! Disassembles instruction with arguments * returns size of whole instruction with arguments in bytes */ size_t Disasm(const DataView &code, const Settings &); size_t DisasmAsRaw(const DataView &code); void AddReferencedBy(uint32_t address, ReferenceType); void RemoveReferencedBy(uint32_t address); bool IsYetToBeHandled(DisasmMapType dmtype) { return op.opcode == OpCode::kNone || (dmtype == DisasmMapType::kRaw && op.opcode == OpCode::kRaw); } ~DisasmNode(); private: static constexpr uint32_t alignAddress(NodeType t, uint32_t address) { switch (t) { case NodeType::kTracedInstruction: case NodeType::kRefInstruction: return AlignInstructionAddress(address); case NodeType::kRefData: return address; } return address; } static constexpr uint32_t selectSize(NodeType t, size_t size) { switch (t) { case NodeType::kTracedInstruction: case NodeType::kRefInstruction: return kInstructionSizeStepBytes; case NodeType::kRefData: return size; } return 1; } static constexpr Op selectOp(NodeType t, size_t size) { switch (t) { case NodeType::kTracedInstruction: case NodeType::kRefInstruction: return Op{}; case NodeType::kRefData: return (size & 1) ? Op::Raw8(0) : Op::Raw(0); } return Op{}; } }; static constexpr inline bool IsInstruction(NodeType t) { return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; } enum class SymbolType: int { kNone = 0, kFunction, kObject, }; struct Symbol { uint32_t address{}; SymbolType type{}; const char *name{}; size_t size{}; }; class DisasmMap { const DisasmMapType _type; DisasmNode **_map{static_cast(calloc(kRomSizeBytes, sizeof(*_map)))}; Symbol *_symtab{}; size_t _symtab_size{}; TraceTable _tt{}; constexpr DisasmNode *findNodeByAddress(uint32_t address) const; constexpr size_t findFirstSymbolAtAddress( uint32_t address, bool return_last_considered=false) const; DisasmNode &insertNode(uint32_t address, NodeType); DisasmNode &insertNodeQuickPeek(uint32_t address, NodeType); /** Merges \p secondary node with the \p primary node on the overlapping * span. * * If \p primary and \p secondary nodes overlap, then all the overlapping * address space of the target machine becomes assigned to \p primary node. * If \p primary node fully contains the space that belongs to \p secondary * node, then \p secondary node ceases to exist after the merge. All the * references pointing at the overlapping space are transferred from \p * secondary to \p primary node. * * \returns \p primary literally, so it is never reallocated. */ DisasmNode *mergeNodeOverlappingSpace(DisasmNode *primary, DisasmNode *secondary); DisasmNode &insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, const NodeType type, const ReferenceType ref_type); constexpr bool canBeAllocated(const DisasmNode& node) const; constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } void disasmQuickPeek(const DataView &code, const Settings &); void disasmProper(const DataView &code, const Settings &, size_t from=0, bool nested=false); public: constexpr const Symbol *Symtab() const { return _symtab; } constexpr size_t SymbolsCount() const { return symbolsCount(); } constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const { return findNodeByAddress(address); }; void InsertNode(uint32_t address, NodeType type); bool ApplySymbolsFromElf(const ELF::Image &); void ConsumeTraceTable(TraceTable &&); void Disasm(const DataView &code, const Settings &s) { if (_type == DisasmMapType::kTraced) { return disasmProper(code, s, 0, false); } return disasmQuickPeek(code, s); } DisasmMap(DisasmMapType type): _type(type) {} ~DisasmMap(); }; constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const { if (address < kRomSizeBytes) return _map[address]; return nullptr; } constexpr size_t DisasmMap::findFirstSymbolAtAddress( uint32_t address, bool return_last_considered) const { if (_symtab == nullptr || symbolsCount() < 1) { return 0; } // A symbol at index 0 is a special null symbol and it must be skipped. size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; while (1) { if (len == 0) { if (return_last_considered && index == 0) { index = start; } break; } middle = start + len / 2; if (_symtab[middle].address >= address) { if (_symtab[middle].address == address) { index = middle; } // Look at the span right before the middle one on the next step len = middle - start; } else { // Look at the span right after the middle one on the next step len -= middle + 1 - start; start = middle + 1; } } return index; } static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) { return value >= at && value < at + length; } constexpr bool DisasmMap::HasSymbolsInRange( uint32_t const address, size_t const length) const { size_t index = findFirstSymbolAtAddress(address, true); if (index == 0) { // The symtab is empty return false; } if (IsWithinRange(_symtab[index].address, address, length)) { // The symbol is found right at the address, which is unlikely return true; } if (_symtab[index].address < address) { // Maybe the next symbol falls into the range? if (index + 1 >= symbolsCount()) { // No more symbols after the index return false; } index++; } else { // Maybe the previous symbol falls into the range? (unlikely at all) if (index < 2) { // No more symbols before the index return false; } index--; } if (IsWithinRange(_symtab[index].address, address, length)) { return true; } return false; } constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const { const auto size = node.size; const auto *const node_real = findNodeByAddress(node.address); for (size_t i = 0; i < size; i++) { const auto *const ptr = _map[node.address + i]; if (ptr != nullptr && ptr != node_real) { return false; } } return true; } static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) { return (ref_kinds & kRefCallMask) ? ReferenceType::kCall : (ref_kinds & kRef1ReadMask) ? ReferenceType::kRead : (ref_kinds & kRef1WriteMask) ? ReferenceType::kWrite : ReferenceType::kBranch; } static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) { // FIXME: AFAIK it is impossible for a call instruction to have second // argument. I can probably drop the first condition, but it needs testing return (ref_kinds & kRefCallMask) ? ReferenceType::kCall : (ref_kinds & kRef2ReadMask) ? ReferenceType::kRead : (ref_kinds & kRef2WriteMask) ? ReferenceType::kWrite : ReferenceType::kBranch; }