diff options
author | Oxore <oxore@protonmail.com> | 2024-03-03 18:38:46 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2024-03-03 18:43:31 +0300 |
commit | c993531d0678de5e29c943fdbb912e1f20957765 (patch) | |
tree | 04c827e3a4f7b739c7bb0c655790bfd0e3401e2b | |
parent | 3ae20774096ddb42ea03142d0c55f9564da4ba50 (diff) |
Impl ELF symbols extraction
-rw-r--r-- | src/common.h | 1 | ||||
-rw-r--r-- | src/elf_format.h | 156 | ||||
-rw-r--r-- | src/elf_image.cpp | 229 | ||||
-rw-r--r-- | src/elf_image.h | 34 | ||||
-rw-r--r-- | src/main.cpp | 463 |
5 files changed, 771 insertions, 112 deletions
diff --git a/src/common.h b/src/common.h index 76fc956..f94bb65 100644 --- a/src/common.h +++ b/src/common.h @@ -27,6 +27,7 @@ struct Settings { bool imm_hex{}; bool follow_jumps{}; bool walk{}; + bool symbols{}; BFDTarget bfd{}; const char *indent{"\t"}; }; diff --git a/src/elf_format.h b/src/elf_format.h index b5a268a..c60ac64 100644 --- a/src/elf_format.h +++ b/src/elf_format.h @@ -12,6 +12,7 @@ constexpr size_t kIdentSize = 16; constexpr size_t kHeaderSize = kIdentSize + 36; constexpr size_t kMagicSize = 4; constexpr size_t kProgramHeaderSize = 32; +constexpr size_t kSectionHeaderSize = 40; using Address = uint32_t; using Offset = uint32_t; @@ -143,6 +144,13 @@ enum class Machine : uint16_t { kUnknown, }; +static constexpr inline uint8_t ParseU8(const uint8_t *d) { return *d; } + +static constexpr inline uint8_t ParseU8(const uint8_t *d, DataEncoding) +{ + return ParseU8(d); +} + static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e) { if (e == DataEncoding::k2MSB) { @@ -194,15 +202,15 @@ struct Header32Raw { uint16_t machine; uint32_t version; Address entry; - Offset phoff; - Offset shoff; + Offset phoff; ///< Program header table's file offset in bytes + Offset shoff; ///< Section header table's file offset in bytes uint32_t flags; - uint16_t ehsize; + uint16_t ehsize; ///< ELF's header size in bytes uint16_t phentsize; uint16_t phnum; uint16_t shentsize; uint16_t shnum; - uint16_t shstrndx; + uint16_t shstrndx; ///< Index of .strtab in section header table static constexpr inline auto FromBytes(const uint8_t *data) { const auto ident = Ident32Raw::FromBytes(data); @@ -232,15 +240,15 @@ struct Header32 { Machine machine; Version version; Address entry; - Offset phoff; - Offset shoff; + Offset phoff; ///< Program header table's file offset in bytes + Offset shoff; ///< Section header table's file offset in bytes uint32_t flags; - uint16_t ehsize; + uint16_t ehsize; ///< ELF's header size in bytes uint16_t phentsize; uint16_t phnum; uint16_t shentsize; uint16_t shnum; - uint16_t shstrndx; + uint16_t shstrndx; ///< Index of .strtab in section header table static constexpr inline auto FromBytes(const uint8_t *data) { const auto raw = Header32Raw::FromBytes(data); @@ -308,14 +316,130 @@ struct ProgramHeader32 { static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) { return ProgramHeader32{ - /* type */ ParseU32(data + 0, e), - /* offset */ ParseU32(data + 4, e), - /* vaddr */ ParseU32(data + 8, e), - /* paddr */ ParseU32(data + 12, e), - /* filesz */ ParseU32(data + 16, e), - /* memsz */ ParseU32(data + 20, e), - /* flags */ ParseU32(data + 24, e), - /* align */ ParseU32(data + 28, e), + /* .type = */ ParseU32(data + 0, e), + /* .offset = */ ParseU32(data + 4, e), + /* .vaddr = */ ParseU32(data + 8, e), + /* .paddr = */ ParseU32(data + 12, e), + /* .filesz = */ ParseU32(data + 16, e), + /* .memsz = */ ParseU32(data + 20, e), + /* .flags = */ ParseU32(data + 24, e), + /* .align = */ ParseU32(data + 28, e), + }; + } +}; + +enum class SectionHeader32Type: uint32_t { + kNull = 0, + kProgBits = 1, + kSymtab = 2, + kStrtab = 3, + kRela = 4, + kHash = 5, + kDynamic = 6, + kNote = 7, + kNobits = 8, + kRel = 9, + kShlib = 10, + kDynsym = 11, + kLoProc = 0x70000000, + kHiProc = 0x7fffffff, + kLoUser = 0x80000000, + kHiUser = 0xffffffff, +}; + +struct SectionHeader32 { + uint32_t name{}; + uint32_t type{}; + uint32_t flags{}; + Address addr{}; + Offset offset{}; + uint32_t size{}; ///< Size of whole section in bytes + uint32_t link{}; + uint32_t info{}; + uint32_t addralign{}; + uint32_t entsize{}; ///< Size of a single entry (every entry has same size) + static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) + { + return SectionHeader32{ + /* .name = */ ParseU32(data + 0, e), + /* .type = */ ParseU32(data + 4, e), + /* .flags = */ ParseU32(data + 8, e), + /* .addr = */ ParseU32(data + 12, e), + /* .offset = */ ParseU32(data + 16, e), + /* .size = */ ParseU32(data + 20, e), + /* .link = */ ParseU32(data + 24, e), + /* .info = */ ParseU32(data + 28, e), + /* .addralign = */ ParseU32(data + 32, e), + /* .entsize = */ ParseU32(data + 36, e), + }; + } + constexpr bool IsValid(void) const { return name != 0; } +}; + +enum class Symbol32Bind: unsigned char { + kLocal = 0, + kGlobal = 1, + kWeak = 2, + kUnknown3 = 3, + kUnknown4 = 4, + kUnknown5 = 5, + kUnknown6 = 6, + kUnknown7 = 7, + kUnknown8 = 8, + kUnknown9 = 9, + kUnknown10 = 10, + kUnknown11 = 11, + kUnknown12 = 12, + kLoProc = 13, + kUnknown14 = 14, + kHiProc = 15, +}; + +enum class Symbol32Type: unsigned char { + kNoType = 0, + kObject = 1, + kFunc = 2, + kSection = 3, + kFile = 4, + kUnknown5 = 5, + kUnknown6 = 6, + kUnknown7 = 7, + kUnknown8 = 8, + kUnknown9 = 9, + kUnknown10 = 10, + kUnknown11 = 11, + kUnknown12 = 12, + kLoProc = 13, + kUnknown14 = 14, + kHiProc = 15, +}; + +struct Symbol32 { + const char *name{}; + uint32_t namendx{}; + Address value{}; ///< Value or address, e.g address of a variable in RAM + uint32_t size{}; ///< Size of a symbol, e.g length of a function, etc. + unsigned char info{}; + unsigned char other{}; + uint16_t shndx{}; ///< Index of a section the symbol belongs to + constexpr Symbol32Bind bind() const + { + return static_cast<Symbol32Bind>(info >> 4); + } + constexpr Symbol32Type type() const + { + return static_cast<Symbol32Type>(info & 0xf); + } + static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e) + { + return Symbol32{ + /* .name = */ nullptr, + /* .namendx = */ ParseU32(data + 0, e), + /* .value = */ ParseU32(data + 4, e), + /* .size = */ ParseU32(data + 8, e), + /* .info = */ ParseU8(data + 12, e), + /* .other = */ ParseU8(data + 13, e), + /* .shndx = */ ParseU16(data + 14, e), }; } }; diff --git a/src/elf_image.cpp b/src/elf_image.cpp index 6db72f3..6572a85 100644 --- a/src/elf_image.cpp +++ b/src/elf_image.cpp @@ -4,9 +4,18 @@ #include "elf_image.h" #include <cassert> +#include <cstdarg> #include <cstdio> +#include <cstring> -ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( +#ifdef __GNUC__ +#define _PRINTF(strindex, first) __attribute__((format(printf, strindex, first))) +#else +#define _PRINTF(strindex, first) +#endif + + +ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromView( const DataView &d, const DataEncoding e) { if (d.buffer == nullptr || d.size == 0) { @@ -22,119 +31,198 @@ ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes( return ELF::ProgramHeader32Table{ headers, size, }; } +static _PRINTF(1, 2) char *Error(const char *fmt, ...) +{ + if (0 == strlen(fmt)) { + return nullptr; + } + char *error{}; + size_t size{}; + FILE *error_stream = open_memstream(&error, &size); + assert(error_stream); + va_list ap; + va_start(ap, fmt); + vfprintf(error_stream, fmt, ap); + va_end(ap); + fclose(error_stream); + assert(error != nullptr); + assert(*error != '\0'); + return error; +} + +static char *ValidateSectionHeader( + const DataView& d, + ELF::SectionHeader32 sh, + size_t shstrndx, + const char *shname) +{ + if (sh.offset >= d.size) { + return Error( + "Section header %zu (%s) offset is too big to fit into the file: " + "expected (<%zu), got (%zu)", + shstrndx, + shname, + d.size, + size_t(sh.offset)); + } + if (sh.size >= d.size - sh.offset) { + return Error( + "Section header %zu (%s) is too big to fit into the file: " + "expected (<%zu), got (%zu)", + shstrndx, + shname, + d.size - sh.offset, + size_t(sh.size)); + } + if (sh.entsize) { + if (sh.entsize > sh.size) { + return Error( + "Section header %zu (%s) entry size is too big to fit into " + "the table: expected (<%zu), got (%zu)", + shstrndx, + shname, + size_t(sh.size), + size_t(sh.entsize)); + } + size_t const remainder = sh.size % sh.entsize; + if (remainder) { + return Error( + "Section header %zu (%s) size is not multiple of entsize: " + "expected (%zu %% %zu == 0), got (%zu)", + shstrndx, + shname, + size_t(sh.size), + size_t(sh.entsize), + remainder); + } + } + return nullptr; +} + static char *ValidateELF(const DataView& d) { - char *error; - size_t size; - FILE *s = open_memstream(&error, &size); - assert(s); using namespace ELF; if (d.size < kHeaderSize) { - fprintf( - s, + return Error( "data size (%zu) is lower than minimum ELF header size (%zu): " "ELF header could not fit", d.size, kHeaderSize); - fclose(s); - return error; } const auto header_raw = Header32Raw::FromBytes(d.buffer); const auto header = Header32::FromBytes(d.buffer); if (!MagicIsValid(header.ident.magic)) { const uint8_t *m = header.ident.magic; - fprintf( - s, + return Error( "ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]", 0x7f, 'E', 'L', 'F', m[0], m[1], m[2], m[3]); - fclose(s); - return error; } if (header.ident.version != Version::kCurrent) { - fprintf( - s, + return Error( "version (0x%02x) of ELF header.ident.version is not supported, " "only \"Current\" version (0x%02x) is supported", header_raw.ident.version, static_cast<int>(Version::kCurrent)); - fclose(s); - return error; } if (header.version != Version::kCurrent) { - fprintf( - s, + return Error( "version (0x%02x) of ELF header.version is not supported, " "only \"Current\" version (0x%02x) is supported", header_raw.version, static_cast<int>(Version::kCurrent)); - fclose(s); - return error; } if (header.type != ObjectType::kExec) { - fprintf( - s, + return Error( "object type (0x%02x) is not supported, " "only Exec (0x%02x) object type is supported", header_raw.type, static_cast<int>(ObjectType::kExec)); - fclose(s); - return error; } if (header.machine != Machine::k68k) { - fprintf( - s, + return Error( "machine (0x%02x) is not supported, " "only Motorola 68k (0x%02x) machine is supported", header_raw.machine, static_cast<int>(Machine::k68k)); - fclose(s); - return error; + } + if (header.ehsize > d.size) { + return Error( + "ELF header ehsize is too big to fit into the file: expected (<=%zu), got (%zu)", + size_t(d.size), + size_t(header.ehsize)); } if (header.phentsize != kProgramHeaderSize) { - fprintf( - s, + return Error( "phentsize is invalid: expected (%zu), got (%zu)", kProgramHeaderSize, size_t(header.phentsize)); - fclose(s); - return error; + } + if (header.shentsize != kSectionHeaderSize) { + return Error( + "shentsize is invalid: expected (%zu), got (%zu)", + kSectionHeaderSize, + size_t(header.shentsize)); + } + if (header.shoff < header.ehsize) { + return Error( + "shoff intersects with an ELF header: expected (>%zu), got (%zu)", + size_t(header.ehsize), + size_t(header.shoff)); + } + if (header.shoff >= d.size) { + return Error( + "shoff is too big for a file size: expected (<%zu), got (%zu)", + d.size, + size_t(header.shoff)); + } + if (header.shnum > (d.size - header.shoff) / header.shentsize) { + return Error( + "shnum is too big to fit shared headers table into the file: expected (<=%zu), got (%zu)", + (d.size - header.shoff) / header.shentsize, + size_t(header.shnum)); + } + if (header.shstrndx > header.shnum) { + return Error( + "shstrndx exceeds shared headers table entries count: expected (<%zu), got (%zu)", + size_t(header.shnum), + size_t(header.shstrndx)); + } + if (header.shstrndx) { + const auto shstrtab = ELF::SectionHeader32::FromBytes( + d.buffer + header.shoff + header.shstrndx * kSectionHeaderSize, + header.ident.data_encoding); + char *error = ValidateSectionHeader(d, shstrtab, header.shstrndx, ".shstrtab"); + if (error != nullptr) { + return error; + } } if (d.size < header.phoff + header.phentsize * header.phnum) { - fprintf( - s, + return Error( "data size (%zu) is lower than program header table end offset (%zu): " "program header table could not fit", d.size, size_t(header.phoff + header.phentsize * header.phnum)); - fclose(s); - return error; } bool has_segment_with_entry = false; for (size_t i = 0; i < header.phnum; i++) { const auto ph = ProgramHeader32::FromBytes( d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding); if (d.size < ph.offset + ph.filesz) { - fprintf( - s, + return Error( "data size (%zu) is lower than pht[%zu] segment end offset (%zu): " "segment could not fit", d.size, i, size_t(ph.offset + ph.filesz)); - fclose(s); - return error; } const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR); if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) { - fprintf( - s, + return Error( "pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: " "non-zero base address is not supported", i, ph.vaddr); - fclose(s); - return error; } const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz; if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) { @@ -142,12 +230,8 @@ static char *ValidateELF(const DataView& d) } } if (!has_segment_with_entry) { - fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry); - fclose(s); - return error; + return Error("no code segments containing entry point (0x%08x) found", header.entry); } - fclose(s); - free(error); return nullptr; } @@ -157,10 +241,53 @@ ELF::Image::Image(DataBuffer&& data) , _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer)) , _pht(_error ? ELF::ProgramHeader32Table{} - : ELF::ProgramHeader32Table::FromBytes( + : ELF::ProgramHeader32Table::FromView( _data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding)) + , _shstrtab(_error + ? ELF::SectionHeader32{} + : ELF::SectionHeader32::FromBytes( + _data.buffer + _h.shoff + _h.shstrndx * kSectionHeaderSize, _h.ident.data_encoding)) + , _symtab(GetSectionHeaderByName(".symtab")) + , _strtab(GetSectionHeader(_symtab.link)) {} +ELF::SectionHeader32 ELF::Image::GetSectionHeaderByName(const char *name) const +{ + const uint32_t index = GetSectionHeaderIndexByName(name); + if (index == 0) { + return SectionHeader32{}; + } + const size_t offset = _h.shoff + kSectionHeaderSize * index; + return SectionHeader32::FromBytes(_data.buffer + offset, _h.ident.data_encoding); +} + +uint32_t ELF::Image::GetSectionHeaderIndexByName(const char *name) const +{ + if (!IsValid()) { + return 0; + } + if (name == nullptr) { + return 0; + } + if (!_shstrtab.IsValid()) { + return 0; + } + for (uint32_t index = 0; index < _h.shnum; index++) { + const size_t offset = _h.shoff + kSectionHeaderSize * index; + if (offset + kSectionHeaderSize > _data.buffer_size) { + return 0; + } + const auto header = SectionHeader32::FromBytes( + _data.buffer + offset, _h.ident.data_encoding); + const char *name_in_elf = reinterpret_cast<const char *>( + _data.buffer + _shstrtab.offset + header.name); + if (0 == strcmp(name, name_in_elf)) { + return index; + } + } + return 0; +} + ELF::Image::~Image() { if (_error) { diff --git a/src/elf_image.h b/src/elf_image.h index b7c7123..b753008 100644 --- a/src/elf_image.h +++ b/src/elf_image.h @@ -13,7 +13,7 @@ namespace ELF { struct ProgramHeader32Table { const ProgramHeader32 *headers{}; size_t size{}; - static ProgramHeader32Table FromBytes(const DataView &, DataEncoding); + static ProgramHeader32Table FromView(const DataView &, DataEncoding); }; struct Segment { @@ -26,6 +26,7 @@ class Image { char *const _error; const Header32 _h; const ProgramHeader32Table _pht; + const SectionHeader32 _shstrtab, _symtab, _strtab; public: explicit Image(DataBuffer&&); ~Image(); @@ -50,6 +51,37 @@ public: return DataView{}; }; constexpr const char *Error() const { return _error; } + ELF::SectionHeader32 GetSectionHeaderByName(const char *name) const; + constexpr const ELF::SectionHeader32 GetSectionHeader(uint32_t index) const + { + if (index > _h.shnum) { + return SectionHeader32{}; + } + const size_t offset = _h.shoff + kSectionHeaderSize * index; + if (offset + kSectionHeaderSize > _data.buffer_size) { + return SectionHeader32{}; + } + return SectionHeader32::FromBytes( + _data.buffer + offset, _h.ident.data_encoding); + } + uint32_t GetSectionHeaderIndexByName(const char *name) const; + constexpr ELF::Symbol32 GetSymbolByIndex(uint32_t index) const + { + if (!IsValid()) { + return Symbol32{}; + } + if (_symtab.entsize == 0 || index >= _symtab.size / _symtab.entsize) { + return Symbol32{}; + } + auto symbol = Symbol32::FromBytes( + _data.buffer + _symtab.offset + _symtab.entsize * index, + _h.ident.data_encoding); + if (symbol.namendx < _strtab.size && _data.buffer[_strtab.offset + _strtab.size] == '\0') { + symbol.name = reinterpret_cast<const char *>( + _data.buffer + _strtab.offset + symbol.namendx); + } + return symbol; + } }; } diff --git a/src/main.cpp b/src/main.cpp index 89aa2ea..2a9b312 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,6 +27,19 @@ #include <cerrno> #include <climits> +enum class SymbolType: int { + kNone = 0, + kFunction, + kObject, +}; + +struct Symbol { + uint32_t address{}; + SymbolType type{}; + const char *name{}; + size_t size{}; +}; + enum class DisasmMapType { kTraced, kRaw, @@ -35,15 +48,25 @@ enum class DisasmMapType { class DisasmMap { const DisasmMapType _type; DisasmNode *_map[kDisasmMapSizeElements]{}; + Symbol *_symtab{}; + size_t _symtab_size{}; constexpr DisasmNode *findNodeByAddress(uint32_t address) const; + constexpr size_t findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered=false) const; DisasmNode &insertNode(uint32_t address, NodeType); + void insertSymbol(uint32_t address, ReferenceType ref_type); DisasmNode &insertReferencedBy( const uint32_t by_addr, const uint32_t ref_addr, const NodeType type, const ReferenceType ref_type); constexpr bool canBeAllocated(const DisasmNode& node) const; + constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; } public: + constexpr const Symbol *Symtab() const { return _symtab; } + constexpr size_t SymbolsCount() const { return symbolsCount(); } + constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const; + constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const; constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const { return findNodeByAddress(address); @@ -53,6 +76,7 @@ public: assert(_type == DisasmMapType::kTraced); insertNode(address, type); } + bool ApplySymbolsFromElf(const ELF::Image &); void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false); DisasmMap(DisasmMapType type): _type(type) {} ~DisasmMap(); @@ -65,12 +89,81 @@ constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const return nullptr; } +constexpr size_t DisasmMap::findFirstSymbolAtAddress( + uint32_t address, bool return_last_considered) const +{ + if (_symtab == nullptr || symbolsCount() < 1) { + return 0; + } + // A symbol at index 0 is a special null symbol and it must be skipped. + size_t start = 1, len = symbolsCount() - start, middle = start, index = 0; + while (1) { + if (len == 0) { + if (return_last_considered && index == 0) { + index = start; + } + break; + } + middle = start + len / 2; + if (_symtab[middle].address >= address) { + if (_symtab[middle].address == address) { + index = middle; + } + // Look at the span right before the middle one on the next step + len = middle - start; + } else { + // Look at the span right after the middle one on the next step + len -= middle + 1 - start; + start = middle + 1; + } + } + return index; +} + +static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length) +{ + return value >= at && value < at + length; +} + +constexpr bool DisasmMap::HasSymbolsInRange( + uint32_t const address, size_t const length) const +{ + size_t index = findFirstSymbolAtAddress(address, true); + if (index == 0) { + // The symtab is empty + return false; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + // The symbol is found right at the address, which is unlikely + return true; + } + if (_symtab[index].address < address) { + // Maybe the next symbol falls into the range? + if (index + 1 >= symbolsCount()) { + // No more symbols after the index + return false; + } + index++; + } else { + // Maybe the previous symbol falls into the range? (unlikely at all) + if (index < 2) { + // No more symbols before the index + return false; + } + index--; + } + if (IsWithinRange(_symtab[index].address, address, length)) { + return true; + } + return false; +} + static constexpr uint32_t AlignInstructionAddress(const uint32_t address) { return address & ~1UL; } -DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type) +DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type) { auto *node = findNodeByAddress(address); if (node) { @@ -127,6 +220,8 @@ static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask r static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds) { + // FIXME: AFAIK it is impossible for a call instruction to have second + // argument. I can probably drop the first condition, but it needs testing return (ref_kinds & kRefCallMask) ? ReferenceType::kCall : (ref_kinds & kRef2ReadMask) @@ -147,6 +242,76 @@ static constexpr bool IsNextLikelyAnInstruction(const Op &op) op.opcode != OpCode::kSTOP); } +static int cmpsym(const void *p1, const void *p2) +{ + const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1); + const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2); + if (sym1->address == sym2->address) { + return strcmp(sym1->name, sym2->name); + } + return sym1->address < sym2->address ? -1 : 1; +} + +constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t) +{ + if (t == ELF::Symbol32Type::kObject) { + return SymbolType::kObject; + } + if (t == ELF::Symbol32Type::kFunc) { + return SymbolType::kFunction; + } + return SymbolType::kNone; +} + +bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf) +{ + const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab"); + if (!symtab.IsValid()) { + fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n"); + return true; + } + FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size); + if (symtab_stream == nullptr) { + const int err = errno; + fprintf(stderr, + "open_memstream() for symtab failed: Error (%d): \"%s\"\n", + err, strerror(err)); + return false; + } + const Symbol null_symbol{}; + if (null_symbol.name != nullptr && *null_symbol.name != '\0') { + const size_t ret = fwrite( + &null_symbol, sizeof null_symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + const size_t nentries = symtab.size/symtab.entsize; + for (size_t i = 0; i < nentries; i++) { + const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i); + const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) || + (elfsym.type() == ELF::Symbol32Type::kObject) || + (elfsym.type() == ELF::Symbol32Type::kFunc); + if (has_proper_type) { + // XXX: Is it possible that it may have binding other than + // Symbol32Bind::kGlobal when it is kFunc? + // XXX: Yes, it is possible. It may be kLocal or kWeak for sure. + const auto type = SymbolTypeFromElf32SymbolType(elfsym.type()); + const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size}; + if (symbol.name != nullptr && *symbol.name != '\0') { + const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream); + (void) ret; + assert(ret == 1); + } + } + } + // No more symbols are going to be added further, so it may be closed now. + fclose(symtab_stream); + // The RenderNodeDisassembly() function expects the symbol table to be + // sorted. + qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym); + return true; +} + void DisasmMap::Disasm( const DataView &code, const Settings &s, size_t at, bool nested) { @@ -240,6 +405,9 @@ DisasmMap::~DisasmMap() delete node; i += size - 1; } + if (_symtab != nullptr) { + free(_symtab); + } } static size_t RenderRawDataComment( @@ -312,6 +480,25 @@ static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmN { for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { for (size_t i = 0; i < ref->refs_count; i++) { + // Check symtab, because we may be crossing a symbol + const DisasmNode *ref_node = disasm_map.FindNodeByAddress(ref->refs[i].address); + if (ref_node != nullptr) { + // We won't cross a symbol at the address if the reference is + // backwards ('1b') and we will cross a symbol if the reference + // is forwards ('1f') - that's why we shift the range one + // instruction forward by adding a size to the address and the + // length. + // TODO write tests for it + uint32_t const address = (node.address < ref_node->address) + ? node.address + node.size + : ref_node->address + ref_node->size; + size_t const length = (node.address < ref_node->address) + ? ref_node->address + ref_node->size - (node.address + node.size) + : node.address + node.size - (ref_node->address + ref_node->size); + if (disasm_map.HasSymbolsInRange(address, length)) { + return false; + } + } const ReferenceRecord &ref_rec = ref->refs[i]; if (ref_rec.type == ReferenceType::kCall) { // Locals are definitely not made for calls @@ -356,47 +543,183 @@ static constexpr const char *StringWihoutFristNChars(const char *str, const size return str; } +constexpr const char *DisasmMap::GetFirstSuitableSymbol( + const DisasmNode &node, bool is_call) const +{ + const size_t index = findFirstSymbolAtAddress(node.address); + if (index == 0) { + return nullptr; + } + if (!is_call) { + return _symtab[index].name; + } + for (size_t i = index; i < symbolsCount() && _symtab[i].address == node.address; i++) { + if (_symtab[i].type == SymbolType::kFunction) { + return _symtab[i].name; + } + } + return nullptr; +} + +struct PendingObjectSize { + PendingObjectSize *next{}; + uint32_t at{}; + const char *name{}; +}; + +struct PendingObjectSizeList { + PendingObjectSize *_first{}, *_last{}; + void Add(uint32_t at, const char *name) + { + assert(name && *name); + // Last in first out + PendingObjectSize *pending = new PendingObjectSize{_first, at, name}; + assert(pending); + if (_last == nullptr) { + _last = pending; + } + _first = pending; + } + const char *TakeNext(uint32_t at) + { + for (PendingObjectSize *cur = _first, *prev = nullptr; cur;) { + // Last in first out + if (cur->at == at) { + const char *name = cur->name; + if (prev) { + prev->next = cur->next; + } else { + _first = cur->next; + } + if (_last == cur) { + _last = prev; + } + delete cur; + return name; + } + prev = cur; + cur = cur->next; + } + return nullptr; + } + ~PendingObjectSizeList() + { + while (_first) { + auto *cur = _first; + _first = _first->next; + delete cur; + } + _last = nullptr; + } +}; + +static constexpr const char *SymbolTypeToElfTypeString(SymbolType t) +{ + switch (t) { + case SymbolType::kNone: return nullptr; + case SymbolType::kFunction: return "function"; + case SymbolType::kObject: return "object"; + } + return nullptr; +} + static void RenderNodeDisassembly( FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s, - const DisasmNode &node) + const DisasmNode &node, + size_t &symbol_index, + PendingObjectSizeList &pending_size) { - if (node.ref_by) { - const bool is_local = IsLocalLocation(disasm_map, node); - if (s.labels && !(s.short_ref_local_labels && is_local)) { - const bool export_this_function = s.export_functions && HasCallReference(node); + for (const char *name = pending_size.TakeNext(node.address); name;) { + fprintf(output, "%s.size\t%s,.-%s\n", s.indent, name, name); + name = pending_size.TakeNext(node.address); + } + const size_t symtab_size = disasm_map.SymbolsCount(); + bool have_rendered_label_already = false; + bool have_rendered_function_label_already = false; + if (disasm_map.Symtab() != nullptr && symtab_size > 0) { + for (; symbol_index < symtab_size; symbol_index++) { + if (disasm_map.Symtab()[symbol_index].address >= node.address) { + break; + } + } + for (; symbol_index < symtab_size; symbol_index++) { + const auto &symbol = disasm_map.Symtab()[symbol_index]; + if (symbol.address != node.address) { + break; + } + if (symbol.name != nullptr || *symbol.name == '\0') { + fprintf(output, "\n%s.globl\t%s\n", s.indent, symbol.name); + if (symbol.type == SymbolType::kFunction) { + have_rendered_function_label_already = true; + } + const char *const type = SymbolTypeToElfTypeString(symbol.type); + if (type) { + fprintf(output, "%s.type\t%s, @%s\n", s.indent, symbol.name, type); + } + if (symbol.size > 0) { + pending_size.Add(node.address + symbol.size, symbol.name); + } + fprintf(output, "%s:\n", disasm_map.Symtab()[symbol_index].name); + have_rendered_label_already = true; + } + } + } + const bool is_local = s.short_ref_local_labels && IsLocalLocation(disasm_map, node); + do { + // Skip generating label or short jump label in-place in case if there + // are no referrers or we already have a suitable label from ELF's + // symtab or some other sources, that has been printed in the code + // section above. + if (node.ref_by == nullptr) { + break; + } + const bool have_call_reference = HasCallReference(node); + if (have_call_reference && have_rendered_function_label_already) { + break; + } + if (have_rendered_label_already) { + break; + } + // If we got here it must be that there is no suitable symbol found in + // the symtab, so it must be generated in-place. + constexpr auto generated_name_length = sizeof "L00000000"; + char name[generated_name_length + 1] = {0}; + snprintf(name, generated_name_length, "L%08x", node.address); + if (s.labels && !is_local) { + const bool export_this_function = s.export_functions && have_call_reference; const bool export_this_label = s.export_all_labels || (s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) || export_this_function; if (export_this_label) { - fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address); + fprintf(output, "\n%s.globl\t%s\n", s.indent, name); if (export_this_function) { - fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address); + fprintf(output, "%s.type\t%s, @function\n", s.indent, name); } } } - if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) { - fprintf(output, "| XREFS:\n"); - for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { - if (ref->refs_count == 0) { - continue; - } - fprintf(output, "|"); - for (size_t i = 0; i < ref->refs_count; i++) { - const ReferenceRecord r = ref->refs[i]; - fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address); - } - fprintf(output, "\n"); - } - } if (s.labels) { - if (s.short_ref_local_labels && is_local) { + if (is_local) { fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1)); } else { - fprintf(output, "L%08x:\n", node.address); + fprintf(output, "%s:\n", name); + } + } + } while (0); + if (s.xrefs_from && !(is_local && !have_rendered_label_already)) { + fprintf(output, "| XREFS:\n"); + for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) { + if (ref->refs_count == 0) { + continue; + } + fprintf(output, "|"); + for (size_t i = 0; i < ref->refs_count; i++) { + const ReferenceRecord r = ref->refs[i]; + fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address); } + fprintf(output, "\n"); } } assert(node.op.opcode != OpCode::kNone); @@ -430,20 +753,32 @@ static void RenderNodeDisassembly( : 0) | ((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) | (node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes)); - const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1); + const bool ref1_is_local = s.short_ref_local_labels && + ref1 && IsLocalLocation(disasm_map, *ref1); char ref1_label[32]{}; if (ref1) { - if (s.short_ref_local_labels && ref1_is_local) { + const bool is_call = + ReferenceType::kCall == ReferenceTypeFromRefKindMask1(ref_kinds); + const char *sym_name = disasm_map.GetFirstSuitableSymbol(*ref1, is_call); + if (sym_name) { + snprintf(ref1_label, (sizeof ref1_label), "%s", sym_name); + } else if (ref1_is_local) { const char dir = ref1_addr <= node.address ? 'b' : 'f'; snprintf(ref1_label, (sizeof ref1_label), "1%c", dir); } else { - snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr); + snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr); } } - const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2); + const bool ref2_is_local = s.short_ref_local_labels && + ref2 && IsLocalLocation(disasm_map, *ref2); char ref2_label[32]{}; if (ref2) { - if (s.short_ref_local_labels && ref2_is_local) { + const bool is_call = + ReferenceType::kCall == ReferenceTypeFromRefKindMask2(ref_kinds); + const char *sym_name = disasm_map.GetFirstSuitableSymbol(*ref2, is_call); + if (sym_name) { + snprintf(ref2_label, (sizeof ref2_label), "%s", sym_name); + } else if (ref2_is_local) { const char dir = ref2_addr <= node.address ? 'b' : 'f'; snprintf(ref2_label, (sizeof ref2_label), "1%c", dir); } else { @@ -461,12 +796,11 @@ static void RenderNodeDisassembly( ref1_addr, ref2_addr); const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true); - if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok) - { - fprintf(output, " | L%08x", ref1_addr); + if (s.xrefs_to && ref1 && !ref1_is_local && ref1_from_imm_ok) { + fprintf(output, " | XREF1 @%08x", ref1_addr); } - if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) { - fprintf(output, " | L%08x", ref2_addr); + if (s.xrefs_to && ref2 && !ref2_is_local) { + fprintf(output, " | XREF2 @%08x", ref2_addr); } } else { node.op.FPrint(output, s.indent, s.imm_hex); @@ -484,21 +818,54 @@ static void RenderNodeDisassembly( fprintf(output, "\n"); } +static void RenderNonCodeSymbols( + FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) +{ + const size_t symtab_size = disasm_map.SymbolsCount(); + for (size_t i = 0; i < symtab_size; i++) { + const auto &symbol = disasm_map.Symtab()[i]; + if (symbol.address <= code.size) { + continue; + } + fprintf(output, "\n%s.globl\t%s\n", s.indent, symbol.name); + const char *const type = SymbolTypeToElfTypeString(symbol.type); + if (type) { + fprintf(output, "%s.type\t%s, @%s\n", s.indent, symbol.name, type); + } + fprintf(output, "%s = 0x%08x\n", symbol.name, symbol.address); + if (symbol.size) { + fprintf(output, "%s.size\t%s, 0x%zx\n", s.indent, symbol.name, symbol.size); + } + } +} + static void RenderDisassembly( FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s) { - for (size_t i = 0; i < code.size;) { + // This list is used to track all places where ".size fnname, .-fnname" + // directives must be put. + PendingObjectSizeList pending_size{}; + // sym_i starts with 1 because 0 is a special null symbol + for (size_t i = 0, sym_i = 1; i < code.size;) { + const DisasmNode raw = DisasmNode{ + /* .type = */ NodeType::kTracedInstruction, + /* .address = */ static_cast<uint32_t>(i), + /* .size = */ 2, + /* .ref_kinds = */ 0, + /* .ref1_addr = */ 0, + /* .ref2_addr = */ 0, + /* .ref_by = */ nullptr, + /* .last_ref_by = */ nullptr, + /* .op = */ Op::Raw(GetU16BE(code.buffer + i)), + }; const DisasmNode *node = disasm_map.FindNodeByAddress(i); - if (node) { - RenderNodeDisassembly(output, disasm_map, code, s, *node); - i += node->size; - } else { - auto raw = Op::Raw(GetU16BE(code.buffer + i)); - raw.FPrint(output, s.indent, s.imm_hex); - fprintf(output, "\n"); - i += kInstructionSizeStepBytes; + if (node == nullptr) { + node = &raw; } + RenderNodeDisassembly(output, disasm_map, code, s, *node, sym_i, pending_size); + i += node->size; } + RenderNonCodeSymbols(output, disasm_map, code, s); } static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data) @@ -580,7 +947,7 @@ static DisasmMap *NewDisasmMap(FILE *trace_stream) } // Parse trace file into map DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced}; - assert(disasm_map); + assert(disasm_map != nullptr); ParseTraceData(*disasm_map, trace_data.View()); return disasm_map; } @@ -614,6 +981,11 @@ static int M68kDisasm( if (disasm_map == nullptr) { return EXIT_FAILURE; } + if (from_elf && s.symbols) { + if (false == disasm_map->ApplySymbolsFromElf(elf)) { + return EXIT_FAILURE; + } + } // Disasm into output map disasm_map->Disasm(code, s); // Print output into output_stream @@ -652,6 +1024,7 @@ static bool ApplyFeature(Settings& s, const char *feature_arg) { &Settings::imm_hex, "imm-hex" }, { &Settings::follow_jumps, "follow-jumps" }, { &Settings::walk, "walk" }, + { &Settings::symbols, "symbols" }, }; constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1; const bool disable = FeatureStringHasPrefixNo(feature_arg); @@ -708,6 +1081,8 @@ static void PrintUsage(FILE *s, const char *argv0) " follow-jumps Follow jumps to statically known locations.\n" " walk Try best to detect further instructions following known\n" " traced locations without overcommitting.\n" + " symbols Extract and apply symbols from input file if available.\n" + " ELF symbols only are currently supported.\n" , argv0); } |