diff options
author | Oxore <oxore@protonmail.com> | 2024-02-04 19:51:35 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2024-02-04 19:51:35 +0300 |
commit | 9fd2eba95beb6c9ce6fb26e1442aa2f68aac9b1f (patch) | |
tree | 9d351968b9757ccca48212fc2687d6df996d4fa2 | |
parent | 853bda8e4c9210062a1b793f3499270c9e4cf532 (diff) |
Impl deep graph walk and distinct jump following option
This commit changes the default behavior which technically breaks
backwards compatibility. It would be a concern if somebody else besides
me is using this tool, but I doubt it would be the case, LOL.
-rw-r--r-- | Readme.md | 2 | ||||
-rw-r--r-- | common.h | 2 | ||||
-rw-r--r-- | disasm.cpp | 11 | ||||
-rw-r--r-- | disasm.h | 17 | ||||
-rw-r--r-- | main.cpp | 164 | ||||
-rw-r--r-- | test.bash | 2 | ||||
-rw-r--r-- | test_labels_referencing.bash | 2 | ||||
-rw-r--r-- | test_random.bash | 2 | ||||
-rw-r--r-- | test_walk_and_follow_jumps.bash | 200 |
9 files changed, 324 insertions, 78 deletions
@@ -94,7 +94,7 @@ trace table. Or better with labeled locations analysis and some fancy raw comments: ``` -./cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -fabs-labels -frel-labels -fexport-labels -fexport-functions -fimm-hex -t pc-trace.txt -o disasm.S rom.bin +./cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -fabs-labels -frel-labels -fexport-labels -fexport-functions -fimm-hex -follow-jumps -fwalk -t pc-trace.txt -o disasm.S rom.bin ``` It will produce `disasm.S` which you can modify and assemble as shown in @@ -25,6 +25,8 @@ struct Settings { bool xrefs_to{}; bool xrefs_from{}; bool imm_hex{}; + bool follow_jumps{}; + bool walk{}; BFDTarget bfd{}; const char *indent{"\t"}; }; @@ -1573,19 +1573,14 @@ static size_t m68k_disasm(DisasmNode &n, uint16_t i, const DataView &c) size_t DisasmNode::Disasm(const DataView &code) { - // We assume that machine have no MMU and ROM data always starts with 0 + // We assume that machine have no MMU and ROM data always starts at 0 assert(this->address < code.size); - // It is possible to have multiple DisasmNode::Disasm() calls, and there is - // no point to disassemble it again if it already has opcode determined - if (this->op.opcode != OpCode::kNone) { - return this->size; - } size = kInstructionSizeStepBytes; ref_kinds = 0; ref1_addr = 0; ref2_addr = 0; const uint16_t instr = GetU16BE(code.buffer + this->address); - if (this->type == TracedNodeType::kInstruction) { + if (IsInstruction(this->type)) { return m68k_disasm(*this, instr, code); } else { // Data should not be disassembled @@ -1595,7 +1590,7 @@ size_t DisasmNode::Disasm(const DataView &code) size_t DisasmNode::DisasmAsRaw(const DataView &code) { - // We assume that machine have no MMU and ROM data always starts with 0 + // We assume that machine have no MMU and ROM data always starts at 0 assert(this->address < code.size); size = kInstructionSizeStepBytes; ref_kinds = 0; @@ -304,8 +304,9 @@ struct Arg { uint32_t ref_addr = 0) const; }; -enum class TracedNodeType { - kInstruction, +enum class NodeType { + kTracedInstruction, + kRefInstruction, kData, }; @@ -365,7 +366,7 @@ struct Op { }; struct DisasmNode { - const TracedNodeType type{}; + const NodeType type{}; /// Address of the instruction (PC value basically) const uint32_t address{}; /// Instruction size in bytes @@ -388,3 +389,13 @@ struct DisasmNode { void AddReferencedBy(uint32_t address, ReferenceType); ~DisasmNode(); }; + +static constexpr inline bool IsInstruction(NodeType t) +{ + return t == NodeType::kTracedInstruction || t == NodeType::kRefInstruction; +} + +static constexpr inline bool IsBRA(Op op) +{ + return op.opcode == OpCode::kBcc && op.condition == Condition::kT; +} @@ -28,12 +28,11 @@ class DisasmMap { const DisasmMapType _type; DisasmNode *_map[kDisasmMapSizeElements]{}; constexpr DisasmNode *findNodeByAddress(uint32_t address) const; - DisasmNode *insertTracedNode(uint32_t address, TracedNodeType); - void insertReferencedBy( + DisasmNode &insertNode(uint32_t address, NodeType); + DisasmNode &insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, - const TracedNodeType type, - const DataView &code, + const NodeType type, const ReferenceType ref_type); constexpr bool canBeAllocated(const DisasmNode& node) const; public: @@ -41,14 +40,12 @@ public: { return findNodeByAddress(address); }; - // Returns true if node inserted, false if node already exist and has not - // been changed - bool InsertTracedNode(uint32_t address, TracedNodeType type) + void InsertNode(uint32_t address, NodeType type) { assert(_type == DisasmMapType::kTraced); - return nullptr != insertTracedNode(address, type); + insertNode(address, type); } - void Disasm(const DataView &code, const Settings &); + void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); DisasmMap(DisasmMapType type): _type(type) {} ~DisasmMap(); }; @@ -65,45 +62,35 @@ static constexpr uint32_t AlignInstructionAddress(const uint32_t address) return address & ~1UL; } -DisasmNode *DisasmMap::insertTracedNode(const uint32_t address, const TracedNodeType type) +DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type) { auto *node = findNodeByAddress(address); if (node) { // Instruction nodes take precedence over data nodes. If a node that // was previously accessed only as data now turns out to be an // instruction, then it must become an instruction node. - if (type == TracedNodeType::kInstruction && node->type != TracedNodeType::kInstruction) { - *const_cast<TracedNodeType*>(&node->type) = type; + if (IsInstruction(type) && !IsInstruction(node->type)) { + *const_cast<NodeType*>(&node->type) = type; // Make sure it is OpCode::kNone so it will be properly disassembled node->op = Op{}; } - return node; + return *node; } node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)}); assert(node); _map[address / kInstructionSizeStepBytes] = node; - return node; + return *node; } -void DisasmMap::insertReferencedBy( +DisasmNode &DisasmMap::insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, - const TracedNodeType type, - const DataView &code, + const NodeType type, const ReferenceType ref_type) { - auto *const ref_node = insertTracedNode(ref_addr, type); - const auto size = ref_node->Disasm(code); - assert(size >= kInstructionSizeStepBytes); - if (canBeAllocated(*ref_node)) { - // Spread across the size - for (size_t o = kInstructionSizeStepBytes; o < size; o++) { - _map[(ref_node->address + o) / kInstructionSizeStepBytes] = ref_node; - } - } else { - ref_node->DisasmAsRaw(code); - } - ref_node->AddReferencedBy(by_addr, ref_type); + auto &ref_node = insertNode(ref_addr, type); + ref_node.AddReferencedBy(by_addr, ref_type); + return ref_node; } constexpr bool DisasmMap::canBeAllocated(const DisasmNode& node) const @@ -141,31 +128,55 @@ static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask r : ReferenceType::kBranch; } -void DisasmMap::Disasm(const DataView &code, const Settings &s) +static constexpr bool IsNextLikelyAnInstruction(const Op &op) +{ + return (op.opcode != OpCode::kNone && + op.opcode != OpCode::kRaw && + !IsBRA(op) && + op.opcode != OpCode::kJMP && + op.opcode != OpCode::kRTS && + op.opcode != OpCode::kRTE && + op.opcode != OpCode::kSTOP); +} + +void DisasmMap::Disasm( + const DataView &code, const Settings &s, size_t at, bool nested) { - DisasmNode *node; - for (size_t i = 0; i < Min(kRomSizeBytes, code.size);) { + // Some of logic of this function is covered by integration tests in + // `test_walk_and_follow_jumps.bash`. + bool inside_code_span = nested; + while (at < Min(kRomSizeBytes, code.size)) { + DisasmNode *node; if (_type == DisasmMapType::kTraced) { - node = _map[i / kInstructionSizeStepBytes]; + node = _map[at / kInstructionSizeStepBytes]; if (!node) { - i += kInstructionSizeStepBytes; - continue; + if (inside_code_span) { + node = &insertNode(at, NodeType::kTracedInstruction); + } else { + at += kInstructionSizeStepBytes; + continue; + } } } else { - node = insertTracedNode(i, TracedNodeType::kInstruction); + node = &insertNode(at, NodeType::kTracedInstruction); } - const auto size = node->Disasm(code); - assert(size >= kInstructionSizeStepBytes); - if (canBeAllocated(*node)) { - // Spread across the size - for (size_t o = kInstructionSizeStepBytes; o < size; o++) { - _map[(node->address + o) / kInstructionSizeStepBytes] = node; + if (node->op.opcode == OpCode::kNone || inside_code_span) { + const auto size = node->Disasm(code); + assert(size >= kInstructionSizeStepBytes); + if (canBeAllocated(*node)) { + // Spread across the size + for (size_t o = kInstructionSizeStepBytes; o < size; o++) { + _map[(node->address + o) / kInstructionSizeStepBytes] = node; + } + } else { + node->DisasmAsRaw(code); } - } else { - node->DisasmAsRaw(code); } - // FIXME implement deep graph walk for DisasmMapType::kTraced case. - // + inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op); + if (nested && !inside_code_span) { + return; + } + at += node->size; // NOTE: There is not much information about a reference passed further, // so just don't add a reference of immediate if s.imm_labels is false // enabled. @@ -174,20 +185,35 @@ void DisasmMap::Disasm(const DataView &code, const Settings &s) : (node->ref_kinds & kRef1Mask); const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1; if (has_code_ref1) { - const TracedNodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) - ? TracedNodeType::kData : TracedNodeType::kInstruction; + const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds); - insertReferencedBy(node->address, node->ref1_addr, type, code, ref_type); + auto &ref_node = insertReferencedBy( + node->address, node->ref1_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } } const bool has_ref2 = (node->ref_kinds & kRef2Mask); const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size); if (has_code_ref2) { - const TracedNodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) - ? TracedNodeType::kData : TracedNodeType::kInstruction; + const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask)) + ? NodeType::kData : NodeType::kRefInstruction; const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds); - insertReferencedBy(node->address, node->ref2_addr, type, code, ref_type); + auto &ref_node = insertReferencedBy( + node->address, node->ref2_addr, type, ref_type); + if (ref_node.op.opcode == OpCode::kNone) { + if (s.follow_jumps) { + Disasm(code, s, ref_node.address, true); + } else { + ref_node.DisasmAsRaw(code); + } + } } - i += node->size; } } @@ -376,7 +402,6 @@ static void RenderNodeDisassembly( arg.SNPrint(arg_str, kArgsBufferSize); fprintf(output, ", %s", arg_str); } - fprintf(output, "\n"); } else { const bool with_ref = node.ref_kinds && s.labels && (s.abs_labels || s.rel_labels); const auto *ref1 = (node.ref_kinds & kRef1Mask) @@ -493,7 +518,7 @@ static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) exit(1); } else { // Valid value - disasm_map.InsertTracedNode(address, TracedNodeType::kInstruction); + disasm_map.InsertNode(address, NodeType::kTracedInstruction); } if (startptr != endptr) { i += endptr - startptr - 1; @@ -617,6 +642,8 @@ static bool ApplyFeature(Settings& s, const char *feature_arg) { &Settings::xrefs_from, "xrefs-from" }, { &Settings::xrefs_to, "xrefs-to" }, { &Settings::imm_hex, "imm-hex" }, + { &Settings::follow_jumps, "follow-jumps" }, + { &Settings::walk, "walk" }, }; constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1; const bool disable = FeatureStringHasPrefixNo(feature_arg); @@ -670,6 +697,9 @@ static void PrintUsage(FILE *s, const char *argv0) " xrefs-from Print xrefs comments above all places that have xrefs.\n" " xrefs-to Print xrefs comments after all branch instructions.\n" " imm-hex Print all immediate values as hexadecimal numbers.\n" + " follow-jumps Follow jumps to statically known locations.\n" + " walk Try best to detect further instructions following known\n" + " traced locations without overcommitting.\n" , argv0); } @@ -753,7 +783,11 @@ int main(int, char* argv[]) FILE *output_stream = stdout; FILE *trace_stream = nullptr; if (input_file_name) { - input_stream = fopen(input_file_name, "r"); + if (0 == strcmp(input_file_name, "-")) { + input_stream = stdin; + } else { + input_stream = fopen(input_file_name, "r"); + } if (input_stream == nullptr) { const int err = errno; fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", input_file_name, err, strerror(err)); @@ -774,7 +808,15 @@ int main(int, char* argv[]) } } if (trace_file_name) { - trace_stream = fopen(trace_file_name, "r"); + if (0 == strcmp(trace_file_name, "-")) { + if (input_stream == stdin) { + fprintf(stderr, "error: trace stream and input stream cannot be both stdin\n"); + return EXIT_FAILURE; + } + trace_stream = stdin; + } else { + trace_stream = fopen(trace_file_name, "r"); + } if (trace_stream == nullptr) { const int err = errno; fprintf(stderr, "main: fopen(\"%s\", \"r\"): Error (%d): \"%s\"\n", trace_file_name, err, strerror(err)); @@ -788,11 +830,7 @@ int main(int, char* argv[]) if (trace_stream != nullptr) { fclose(trace_stream); } - if (output_stream != stdout) { - fclose(output_stream); - } - if (input_stream != stdin) { - fclose(input_stream); - } + fclose(output_stream); + fclose(input_stream); return ret; } @@ -7,7 +7,7 @@ AS=m68k-none-elf-as OBJCOPY=m68k-none-elf-objcopy LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm -fabs-labels -frel-labels -flabels -fimm-hex" +DISASM="./cmake-build/m68k-disasm -fabs-labels -frel-labels -flabels -fimm-hex -ffollow-jumps" TEST_DIR=/tmp/m68k-disasm-tests set -e diff --git a/test_labels_referencing.bash b/test_labels_referencing.bash index ffb66ca..978dbc7 100644 --- a/test_labels_referencing.bash +++ b/test_labels_referencing.bash @@ -7,7 +7,7 @@ AS=m68k-none-elf-as OBJCOPY=m68k-none-elf-objcopy LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm" +DISASM="./cmake-build/m68k-disasm -ffollow-jumps" TEST_DIR=/tmp/m68k-disasm-tests-labels-referencing set -e diff --git a/test_random.bash b/test_random.bash index accb975..6d2b17c 100644 --- a/test_random.bash +++ b/test_random.bash @@ -7,7 +7,7 @@ AS=m68k-none-elf-as OBJCOPY=m68k-none-elf-objcopy LD="m68k-none-elf-ld -Ttest.ld" -DISASM="./cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -frel-labels -fabs-labels -fshort-ref-local-labels -fimm-hex" +DISASM="./cmake-build/m68k-disasm -frdc -fxrefs-to -fxrefs-from -flabels -frel-labels -fabs-labels -fshort-ref-local-labels -fimm-hex -ffollow-jumps" TEST_DIR=/tmp/m68k-disasm-random-tests set -e diff --git a/test_walk_and_follow_jumps.bash b/test_walk_and_follow_jumps.bash new file mode 100644 index 0000000..2f317b4 --- /dev/null +++ b/test_walk_and_follow_jumps.bash @@ -0,0 +1,200 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: Unlicense +# +# Tests against reference text for -ffollow-jumps and -fwalk features + +TEST_DIR=/tmp/m68k-disasm-follow-jumps-walk-tests +DISASM="./cmake-build/m68k-disasm -flabels -frel-labels -fabs-labels" + +set -e +CRED="\033[31m" +CGREEN="\033[32m" +CRST="\033[39m" + +rm -rf ${TEST_DIR} +mkdir -p ${TEST_DIR} + +OUTPUT_ASM="$TEST_DIR"/output.S +TRACE="$TEST_DIR"/trace.txt +REFERENCE="$TEST_DIR"/reference.S +REFERENCE_W="$TEST_DIR"/reference_w.S +REFERENCE_F="$TEST_DIR"/reference_f.S +REFERENCE_WF="$TEST_DIR"/reference_wf.S + +run_test_inner() { + local test_name=$1 + local disasm_args="$2" + local input="$3" + local reference="$4" + echo -ne "Test \"${test_name}\" ($disasm_args)... " + echo -ne "$input" | ${DISASM} --indent=' ' $disasm_args -t "$TRACE" -o "$OUTPUT_ASM" - + if ! diff --ignore-trailing-space "$reference" "$OUTPUT_ASM" >/dev/null 2>&1; then + echo -e "${CRED}FAIL${CRST}: output and reference text files do not match" + diff --color=always --unified --ignore-trailing-space "$reference" "$OUTPUT_ASM" || true + else + echo -e "${CGREEN}OK${CRST}" + fi +} + +run_test() { + local test_name=$1 + local input="$2" + local reference="$3" + local reference_w="$4" + local reference_f="$5" + local reference_wf="$6" + run_test_inner "$test_name" "" "$input" "$reference" + run_test_inner "$test_name" "-fwalk" "$input" "$reference_w" + run_test_inner "$test_name" "-ffollow-jumps" "$input" "$reference_f" + run_test_inner "$test_name" "-fwalk -ffollow-jumps" "$input" "$reference_wf" +} + + +echo -e "0" >"$TRACE" +cat >"$REFERENCE" << EOF + nop + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "linear nops, trace @0" "\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + nop + .short 0x6002 + .short 0x4e71 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + bras L00000006 + .short 0x4e71 +L00000006: + .short 0x4e71 +EOF +# $REFERENCE_F is same as $REFERENCE +cat >"$REFERENCE_WF" << EOF + nop + bras L00000006 + .short 0x4e71 +L00000006: + nop +EOF +run_test "nop and unconditional branch, trace @0" "\x4e\x71\x60\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_WF" + + +cat >"$REFERENCE" << EOF + nop + .short 0x6602 + .short 0x4e71 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + nop + bnes L00000006 + nop +L00000006: + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "nop and conditional branch, trace @0" "\x4e\x71\x66\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + bnes L00000004 + .short 0x4e71 +L00000004: + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF + bnes L00000004 + nop +L00000004: + nop +EOF +cat >"$REFERENCE_F" << EOF + bnes L00000004 + .short 0x4e71 +L00000004: + nop +EOF +# $REFERENCE_WF is same as $REFERENCE_W +run_test "conditional branch, trace @0" "\x66\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_W" + + +cat >"$REFERENCE" << EOF + bras L00000004 + .short 0x4e71 +L00000004: + .short 0x4e71 +EOF +# $REFERENCE_W is same as $REFERENCE +cat >"$REFERENCE_F" << EOF + bras L00000004 + .short 0x4e71 +L00000004: + nop +EOF +# $REFERENCE_WF is same as $REFERENCE_F +run_test "unconditional branch, trace @0" "\x60\x02\x4e\x71\x4e\x71" \ + "$REFERENCE" "$REFERENCE" "$REFERENCE_F" "$REFERENCE_F" + + +echo -e "0\n2" >"$TRACE" +cat >"$REFERENCE" << EOF +L00000000: + nop + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF +L00000000: + nop + bnes L00000000 + nop +EOF +# $REFERENCE_F is same as $REFERENCE +# $REFERENCE_WF is same as $REFERENCE_W +run_test "nop and conditional branch backwards, trace @0, @2" "\x4e\x71\x66\xfc\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE" "$REFERENCE_W" + + +echo -e "2" >"$TRACE" +cat >"$REFERENCE" << EOF +L00000000: + .short 0x4e71 + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_W" << EOF +L00000000: + .short 0x4e71 + bnes L00000000 + nop +EOF +cat >"$REFERENCE_F" << EOF +L00000000: + nop + bnes L00000000 + .short 0x4e71 +EOF +cat >"$REFERENCE_WF" << EOF +L00000000: + nop + bnes L00000000 + nop +EOF +run_test "nop and conditional branch backwards, trace @2" "\x4e\x71\x66\xfc\x4e\x71" \ + "$REFERENCE" "$REFERENCE_W" "$REFERENCE_F" "$REFERENCE_WF" + |