summaryrefslogtreecommitdiff
path: root/src/disasm.cpp
blob: 847d11de6d7198fae124559bc783a4e3e0df6290 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
/* SPDX-License-Identifier: Unlicense
 */

#include "disasm.h"
#include "m68k.h"

#include <cassert>
#include <cstring>
#include <cerrno>

void DisasmNode::AddReferencedBy(const uint32_t address_from, const ReferenceType ref_type)
{
    ReferenceNode *node{};
    if (this->last_ref_by) {
        node = this->last_ref_by;
    } else {
        node = new ReferenceNode{};
        assert(node);
        this->ref_by = this->last_ref_by = node;
    }
    node->refs[node->refs_count] = ReferenceRecord{ref_type, address_from};
    node->refs_count++;
    if (node->refs_count >= kRefsCountPerBuffer) {
        ReferenceNode *new_node = new ReferenceNode{};
        assert(new_node);
        node->next = new_node;
        this->last_ref_by = new_node;
    }
}

DisasmNode::~DisasmNode()
{
    ReferenceNode *ref{this->ref_by};
    while (ref) {
        ReferenceNode *prev = ref;
        ref = ref->next;
        delete prev;
    }
}

static constexpr uint32_t AlignInstructionAddress(const uint32_t address)
{
    return address & ~1UL;
}

DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type)
{
    auto *node = findNodeByAddress(address);
    if (node) {
        // Instruction nodes take precedence over data nodes. If a node that
        // was previously accessed only as data now turns out to be an
        // instruction, then it must become an instruction node.
        if (IsInstruction(type) && !IsInstruction(node->type)) {
            *const_cast<NodeType*>(&node->type) = type;
            // Make sure it is OpCode::kNone so it will be properly disassembled
            node->op = Op{};
        }
        return *node;
    }
    node = new DisasmNode(DisasmNode{type, AlignInstructionAddress(address)});
    assert(node);
    _map[address / kInstructionSizeStepBytes] = node;
    return *node;
}

DisasmNode &DisasmMap::insertReferencedBy(
        const uint32_t by_addr,
        const uint32_t ref_addr,
        const NodeType type,
        const ReferenceType ref_type)
{
    auto &ref_node = insertNode(ref_addr, type);
    ref_node.AddReferencedBy(by_addr, ref_type);
    return ref_node;
}

void DisasmMap::InsertNode(uint32_t address, NodeType type)
{
    assert(_type == DisasmMapType::kTraced);
    insertNode(address, type);
}

constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t)
{
    if (t == ELF::Symbol32Type::kObject) {
        return SymbolType::kObject;
    }
    if (t == ELF::Symbol32Type::kFunc) {
        return SymbolType::kFunction;
    }
    return SymbolType::kNone;
}

static int cmpsym(const void *p1, const void *p2)
{
    const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1);
    const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2);
    if (sym1->address == sym2->address) {
        return strcmp(sym1->name, sym2->name);
    }
    return sym1->address < sym2->address ? -1 : 1;
}

bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf)
{
    const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab");
    if (!symtab.IsValid()) {
        fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n");
        return true;
    }
    FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size);
    if (symtab_stream == nullptr) {
        const int err = errno;
        fprintf(stderr,
                "open_memstream() for symtab failed: Error (%d): \"%s\"\n",
                err, strerror(err));
        return false;
    }
    const Symbol null_symbol{};
    if (null_symbol.name != nullptr && *null_symbol.name != '\0') {
        const size_t ret = fwrite(
                &null_symbol, sizeof null_symbol, 1, symtab_stream);
        (void) ret;
        assert(ret == 1);
    }
    const size_t nentries = symtab.size/symtab.entsize;
    for (size_t i = 0; i < nentries; i++) {
        const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i);
        const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) ||
            (elfsym.type() == ELF::Symbol32Type::kObject) ||
            (elfsym.type() == ELF::Symbol32Type::kFunc);
        if (has_proper_type) {
            // XXX: Is it possible that it may have binding other than
            // Symbol32Bind::kGlobal when it is kFunc?
            // XXX: Yes, it is possible. It may be kLocal or kWeak for sure.
            const auto type = SymbolTypeFromElf32SymbolType(elfsym.type());
            const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size};
            if (symbol.name != nullptr && *symbol.name != '\0') {
                const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream);
                (void) ret;
                assert(ret == 1);
            }
        }
    }
    // No more symbols are going to be added further, so it may be closed now.
    fclose(symtab_stream);
    // The RenderNodeDisassembly() function expects the symbol table to be
    // sorted.
    qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym);
    return true;
}

static constexpr bool IsNextLikelyAnInstruction(const Op &op)
{
    return (op.opcode != OpCode::kNone &&
            op.opcode != OpCode::kRaw &&
            !IsBRA(op) &&
            op.opcode != OpCode::kJMP &&
            op.opcode != OpCode::kRTS &&
            op.opcode != OpCode::kRTE &&
            op.opcode != OpCode::kSTOP);
}

void DisasmMap::Disasm(
        const DataView &code, const Settings &s, size_t at, bool nested)
{
    // Some of logic of this function is covered by integration tests in
    // `test_walk_and_follow_jumps.bash`.
    bool inside_code_span = nested;
    while (at < Min(kRomSizeBytes, code.size)) {
        DisasmNode *node;
        if (_type == DisasmMapType::kTraced) {
            node = _map[at / kInstructionSizeStepBytes];
            if (!node) {
                if (inside_code_span) {
                    node = &insertNode(at, NodeType::kTracedInstruction);
                } else {
                    at += kInstructionSizeStepBytes;
                    continue;
                }
            }
        } else {
            node = &insertNode(at, NodeType::kTracedInstruction);
        }
        if (node->op.opcode == OpCode::kNone || inside_code_span) {
            const auto size = node->Disasm(code);
            assert(size >= kInstructionSizeStepBytes);
            if (canBeAllocated(*node)) {
                // Spread across the size
                for (size_t o = kInstructionSizeStepBytes; o < size; o++) {
                    _map[(node->address + o) / kInstructionSizeStepBytes] = node;
                }
            } else {
                node->DisasmAsRaw(code);
            }
        }
        inside_code_span = s.walk && IsNextLikelyAnInstruction(node->op);
        at += node->size;
        // NOTE: There is not much information about a reference passed further,
        // so just don't add a reference of immediate if s.imm_labels is false
        // enabled.
        const bool has_ref1 = (node->ref_kinds & kRef1ImmMask)
            ? s.imm_labels
            : (node->ref_kinds & kRef1Mask);
        const bool has_code_ref1 = node->ref1_addr < code.size && has_ref1;
        if (has_code_ref1) {
            const NodeType type = (node->ref_kinds & (kRef1ReadMask | kRef1WriteMask))
                ? NodeType::kData : NodeType::kRefInstruction;
            const auto ref_type = ReferenceTypeFromRefKindMask1(node->ref_kinds);
            auto &ref_node = insertReferencedBy(
                    node->address, node->ref1_addr, type, ref_type);
            if (ref_node.op.opcode == OpCode::kNone) {
                if (s.follow_jumps) {
                    Disasm(code, s, ref_node.address, true);
                } else {
                    ref_node.DisasmAsRaw(code);
                }
            }
        }
        const bool has_ref2 = (node->ref_kinds & kRef2Mask);
        const bool has_code_ref2 = (has_ref2 && node->ref2_addr < code.size);
        if (has_code_ref2) {
            const NodeType type = (node->ref_kinds & (kRef2ReadMask | kRef2WriteMask))
                ? NodeType::kData : NodeType::kRefInstruction;
            const auto ref_type = ReferenceTypeFromRefKindMask2(node->ref_kinds);
            auto &ref_node = insertReferencedBy(
                    node->address, node->ref2_addr, type, ref_type);
            if (ref_node.op.opcode == OpCode::kNone) {
                if (s.follow_jumps) {
                    Disasm(code, s, ref_node.address, true);
                } else {
                    ref_node.DisasmAsRaw(code);
                }
            }
        }
        if (nested && !inside_code_span) {
            return;
        }
    }
}

DisasmMap::~DisasmMap()
{
    for (size_t i = 0; i < kDisasmMapSizeElements; i++) {
        auto *const node = _map[i];
        if (!node) {
            continue;
        }
        const auto size = node->size / kInstructionSizeStepBytes;
        for (size_t o = 0; o < size; o++) {
            assert(_map[i + o] == node);
            _map[i + o] = nullptr;
        }
        delete node;
        i += size - 1;
    }
    if (_symtab != nullptr) {
        free(_symtab);
    }
}