diff options
Diffstat (limited to 'src/disasm.h')
-rw-r--r-- | src/disasm.h | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/src/disasm.h b/src/disasm.h new file mode 100644 index 0000000..90906ed --- /dev/null +++ b/src/disasm.h @@ -0,0 +1,232 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include "elf_image.h" +#include "common.h" +#include "m68k.h" + +#include <cstdint> +#include <cstddef> + +enum class ReferenceType { + kUnknown = 0, + kCall, + kBranch, + kRead, + kWrite, +}; + +struct ReferenceRecord { + ReferenceType type{}; + uint32_t address{}; +}; + +constexpr size_t kRefsCountPerBuffer = 10; + +struct ReferenceNode { + ReferenceNode *next{}; + ReferenceRecord refs[kRefsCountPerBuffer]; + uint32_t refs_count{}; +}; + +enum class NodeType { + kTracedInstruction, + kRefInstruction, + kData, +}; + +struct DisasmNode { + const NodeType type{}; + /// Address of the instruction (PC value basically) + const uint32_t address{}; + /// Instruction size in bytes + size_t size{kInstructionSizeStepBytes}; + /// Indicates whether `ref_addr` should be interpreted and how + RefKindMask ref_kinds{}; + /// Address of first argument reference + uint32_t ref1_addr{}; + /// Address of second argument reference + uint32_t ref2_addr{}; + ReferenceNode *ref_by{}; + ReferenceNode *last_ref_by{}; + Op op{}; + + /*! Disassembles instruction with arguments + * returns size of whole instruction with arguments in bytes + */ + size_t Disasm(const DataView &code); + size_t DisasmAsRaw(const DataView &code); + void AddReferencedBy(uint32_t address, ReferenceType); + ~DisasmNode(); +}; + +static constexpr inline bool IsInstruction(NodeType t) +{ + return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; +} + +enum class SymbolType: int { + kNone = 0, + kFunction, + kObject, +}; + +struct Symbol { + uint32_t address{}; + SymbolType type{}; + const char *name{}; + size_t size{}; +}; + +enum class DisasmMapType { + kTraced, + kRaw, +}; + +class DisasmMap { + const DisasmMapType _type; + DisasmNode *_map[kDisasmMapSizeElements]{}; + Symbol *_symtab{}; + size_t _symtab_size{}; + constexpr DisasmNode *findNodeByAddress(uint32_t address) const; + constexpr size_t findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered=false) const; + DisasmNode &insertNode(uint32_t address, NodeType); + void insertSymbol(uint32_t address, ReferenceType ref_type); + DisasmNode &insertReferencedBy( + const uint32_t by_addr, + const uint32_t ref_addr, + const NodeType type, + const ReferenceType ref_type); + constexpr bool canBeAllocated(const DisasmNode& node) const; + constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } +public: + constexpr const Symbol *Symtab() const { return _symtab; } + constexpr size_t SymbolsCount() const { return symbolsCount(); } + constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; + constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; + constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const + { + return findNodeByAddress(address); + }; + void InsertNode(uint32_t address, NodeType type); + bool ApplySymbolsFromElf(const ELF::Image &); + void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); + DisasmMap(DisasmMapType type): _type(type) {} + ~DisasmMap(); +}; + +constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const +{ + if (address < kRomSizeBytes) + return _map[address / kInstructionSizeStepBytes]; + return nullptr; +} + +constexpr size_t DisasmMap::findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered) const +{ + if (_symtab == nullptr || symbolsCount() < 1) { + return 0; + } + // A symbol at index 0 is a special null symbol and it must be skipped. + size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; + while (1) { + if (len == 0) { + if (return_last_considered && index == 0) { + index = start; + } + break; + } + middle = start + len / 2; + if (_symtab[middle].address >= address) { + if (_symtab[middle].address == address) { + index = middle; + } + // Look at the span right before the middle one on the next step + len = middle - start; + } else { + // Look at the span right after the middle one on the next step + len -= middle + 1 - start; + start = middle + 1; + } + } + return index; +} + +static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) +{ + return value >= at && value < at + length; +} + +constexpr bool DisasmMap::HasSymbolsInRange( + uint32_t const address, size_t const length) const +{ + size_t index = findFirstSymbolAtAddress(address, true); + if (index == 0) { + // The symtab is empty + return false; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + // The symbol is found right at the address, which is unlikely + return true; + } + if (_symtab[index].address < address) { + // Maybe the next symbol falls into the range? + if (index + 1 >= symbolsCount()) { + // No more symbols after the index + return false; + } + index++; + } else { + // Maybe the previous symbol falls into the range? (unlikely at all) + if (index < 2) { + // No more symbols before the index + return false; + } + index--; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + return true; + } + return false; +} + +constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const +{ + const auto size = node.size / kInstructionSizeStepBytes; + const auto *const node_real = findNodeByAddress(node.address); + for (size_t i = 1; i < size; i++) { + const auto *const ptr = _map[node.address / kInstructionSizeStepBytes + i]; + if (ptr != nullptr && ptr != node_real) { + return false; + } + } + return true; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask ref_kinds) +{ + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef1ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef1WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} + +static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) +{ + // FIXME: AFAIK it is impossible for a call instruction to have second + // argument. I can probably drop the first condition, but it needs testing + return (ref_kinds & kRefCallMask) + ? ReferenceType::kCall + : (ref_kinds & kRef2ReadMask) + ? ReferenceType::kRead + : (ref_kinds & kRef2WriteMask) + ? ReferenceType::kWrite + : ReferenceType::kBranch; +} |