From 331cb906b0acf14e9ed17667c0bb1f2aafced198 Mon Sep 17 00:00:00 2001 From: Oxore Date: Sun, 2 Feb 2025 19:23:09 +0300 Subject: WIP merge nodes --- src/disasm.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++--------------- src/disasm.h | 20 ++++++++++--- 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index d88a27e..a20cb92 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -23,6 +23,24 @@ void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceTyp this->last_ref_by = node; } +void DisasmNode::RemoveReferencedBy(const uint32_t address_from) +{ + ReferenceRecord *ref{this->ref_by}; + while (ref) { + ReferenceRecord *prev = ref; + ref = ref->next; + if (prev->address == address_from) { + if (this->ref_by == prev) { + this->ref_by = prev->next; + } + delete prev; + } + } + if (nullptr == this->ref_by) { + this->last_ref_by = nullptr; + } +} + DisasmNode::~DisasmNode() { ReferenceRecord *ref{this->ref_by}; @@ -49,7 +67,7 @@ DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) // traced data support is yet to come. if (IsInstruction(type) && !IsInstruction(node->type)) { if (0 == (node->size & 1) && 0 == (node->address & 1)) { - *const_cast(&node->type) = type; + node->type = type; // Make sure it is OpCode::kNone so it will be properly disassembled node->op = Op{}; } @@ -85,7 +103,7 @@ DisasmNode &DisasmMap::insertNodeQuickPeek(uint32_t address, NodeType type) // traced data support is yet to come. if (IsInstruction(type) && !IsInstruction(node->type)) { if (0 == (node->size & 1) && 0 == (node->address & 1)) { - *const_cast(&node->type) = type; + node->type = type; // Make sure it is OpCode::kNone so it will be properly disassembled node->op = Op{}; } @@ -102,32 +120,57 @@ DisasmNode &DisasmMap::insertNodeQuickPeek(uint32_t address, NodeType type) return *node; } -DisasmNode *DisasmMap::mergeNodes(DisasmNode *primary, DisasmNode *secondary) +static bool WithinRange(uint32_t address, uint32_t range_address, uint32_t range_size) +{ + return address >= range_address && address < range_address + range_size; +} + +DisasmNode *DisasmMap::mergeNodeOverlappingSpace(DisasmNode *primary, DisasmNode *secondary) { ASSERT(primary->address < secondary->address); ASSERT(primary->address + primary->size >= secondary->address); - ASSERT(primary->address + primary->size >= secondary->address + secondary->size); - ReferenceNode *rnode{secondary->ref_by}; - while (rnode) { - for (size_t i = 0; i < rnode->refs_count; i--) { - primary->AddReferencedBy(rnode->refs[i].address, rnode->refs[i].type); + ReferenceRecord *ref{secondary->ref_by}; + while (ref) { + ReferenceRecord *prev = ref; + ref = ref->next; + if (WithinRange(prev->address, primary->address, primary->size)) { + primary->AddReferencedBy(prev->address, prev->type); + if (secondary->ref_by == prev) { + secondary->ref_by = prev->next; + } + delete prev; } - ReferenceNode *prev = rnode; - rnode = rnode->next; - delete prev; } - if (secondary.ref_kinds & kRef1Mask) { - DisasmNode *node = _map[secondary.ref1_addr]; + if (nullptr == secondary->ref_by) { + secondary->last_ref_by = nullptr; + } + if (secondary->ref_kinds & kRef1Mask) { + DisasmNode *node = _map[secondary->ref1_addr]; ASSERT(node); - node->RemoveReferencedBy(secondary.ref1_addr); + node->RemoveReferencedBy(secondary->ref1_addr); } - if (secondary.ref_kinds & kRef2Mask) { - DisasmNode *node = _map[secondary.ref2_addr]; + if (secondary->ref_kinds & kRef2Mask) { + DisasmNode *node = _map[secondary->ref2_addr]; ASSERT(node); - node->RemoveReferencedBy(secondary.ref2_addr); + node->RemoveReferencedBy(secondary->ref2_addr); + } + // Spread primary across the overlapping space + const size_t begin = secondary->address; + const size_t end = primary->address + primary->size; + for (size_t address = begin; address < end; address++) { + ASSERT(_map[address] == secondary); + _map[address] = primary; + } + if (secondary->address + secondary->size > end) { + // It is still alive, just shrunk + secondary->size = secondary->address + secondary->size - end; + secondary->address = end; + // Make sure it is OpCode::kNone so it will be properly disassembled + node->op = Op{} + } else { + // It is completely swallowed by the primary node + delete secondary; } - secondary->ref_by = secondary->last_ref_by = nullptr; - delete secondary; return primary; } @@ -344,12 +387,12 @@ void DisasmMap::disasmQuickPeek(const DataView &code, const Settings &s) // Spread across the size and merge if an intersection encountered const size_t address = node->address; const auto size = node->size; - for (size_t i = 0; i < size; i++) { - auto *const ptr = _map[node->address + i]; + for (size_t i = address; i < address + size; i++) { + auto *const ptr = _map[i]; if (ptr != nullptr && ptr != node) { - node = mergeNodes(node, ptr); + mergeNodeOverlappingSpace(node, ptr); } - _map[address + i] = node; + _map[i] = node; } at += node->size; // NOTE: There is not much information about a reference passed further, diff --git a/src/disasm.h b/src/disasm.h index a3c589b..39af8e3 100644 --- a/src/disasm.h +++ b/src/disasm.h @@ -42,9 +42,9 @@ static constexpr uint32_t AlignInstructionAddress(const uint32_t address) } struct DisasmNode { - const NodeType type{}; + NodeType type{}; /// Address of the instruction (PC value basically) - const uint32_t address{}; + uint32_t address{}; /// Instruction size in bytes size_t size{selectSize(type, kInstructionSizeStepBytes)}; /// Indicates whether `ref_addr` should be interpreted and how @@ -105,6 +105,7 @@ struct DisasmNode { size_t Disasm(const DataView &code, const Settings &); size_t DisasmAsRaw(const DataView &code); void AddReferencedBy(uint32_t address, ReferenceType); + void RemoveReferencedBy(uint32_t address); bool IsYetToBeHandled(DisasmMapType dmtype) { return op.opcode == OpCode::kNone || @@ -176,8 +177,19 @@ class DisasmMap { uint32_t address, bool return_last_considered=false) const; DisasmNode &insertNode(uint32_t address, NodeType); DisasmNode &insertNodeQuickPeek(uint32_t address, NodeType); - /// Returns primary, secondary ceases to exist - DisasmNode *mergeNodes(DisasmNode *primary, DisasmNode *secondary); + /** Merges \p secondary node with the \p primary node on the overlapping + * span. + * + * If \p primary and \p secondary nodes overlap, then all the overlapping + * address space of the target machine becomes assigned to \p primary node. + * If \p primary node fully contains the space that belongs to \p secondary + * node, then \p secondary node ceases to exist after the merge. All the + * references pointing at the overlapping space are transferred from \p + * secondary to \p primary node. + * + * \returns \p primary literally, so it is never reallocated. + */ + DisasmNode *mergeNodeOverlappingSpace(DisasmNode *primary, DisasmNode *secondary); DisasmNode &insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, -- cgit v1.2.3