summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2024-03-03 18:38:46 +0300
committerOxore <oxore@protonmail.com>2024-03-03 18:43:31 +0300
commitc993531d0678de5e29c943fdbb912e1f20957765 (patch)
tree04c827e3a4f7b739c7bb0c655790bfd0e3401e2b
parent3ae20774096ddb42ea03142d0c55f9564da4ba50 (diff)
Impl ELF symbols extraction
-rw-r--r--src/common.h1
-rw-r--r--src/elf_format.h156
-rw-r--r--src/elf_image.cpp229
-rw-r--r--src/elf_image.h34
-rw-r--r--src/main.cpp463
5 files changed, 771 insertions, 112 deletions
diff --git a/src/common.h b/src/common.h
index 76fc956..f94bb65 100644
--- a/src/common.h
+++ b/src/common.h
@@ -27,6 +27,7 @@ struct Settings {
bool imm_hex{};
bool follow_jumps{};
bool walk{};
+ bool symbols{};
BFDTarget bfd{};
const char *indent{"\t"};
};
diff --git a/src/elf_format.h b/src/elf_format.h
index b5a268a..c60ac64 100644
--- a/src/elf_format.h
+++ b/src/elf_format.h
@@ -12,6 +12,7 @@ constexpr size_t kIdentSize = 16;
constexpr size_t kHeaderSize = kIdentSize + 36;
constexpr size_t kMagicSize = 4;
constexpr size_t kProgramHeaderSize = 32;
+constexpr size_t kSectionHeaderSize = 40;
using Address = uint32_t;
using Offset = uint32_t;
@@ -143,6 +144,13 @@ enum class Machine : uint16_t {
kUnknown,
};
+static constexpr inline uint8_t ParseU8(const uint8_t *d) { return *d; }
+
+static constexpr inline uint8_t ParseU8(const uint8_t *d, DataEncoding)
+{
+ return ParseU8(d);
+}
+
static constexpr inline uint16_t ParseU16(const uint8_t *d, DataEncoding e)
{
if (e == DataEncoding::k2MSB) {
@@ -194,15 +202,15 @@ struct Header32Raw {
uint16_t machine;
uint32_t version;
Address entry;
- Offset phoff;
- Offset shoff;
+ Offset phoff; ///< Program header table's file offset in bytes
+ Offset shoff; ///< Section header table's file offset in bytes
uint32_t flags;
- uint16_t ehsize;
+ uint16_t ehsize; ///< ELF's header size in bytes
uint16_t phentsize;
uint16_t phnum;
uint16_t shentsize;
uint16_t shnum;
- uint16_t shstrndx;
+ uint16_t shstrndx; ///< Index of .strtab in section header table
static constexpr inline auto FromBytes(const uint8_t *data)
{
const auto ident = Ident32Raw::FromBytes(data);
@@ -232,15 +240,15 @@ struct Header32 {
Machine machine;
Version version;
Address entry;
- Offset phoff;
- Offset shoff;
+ Offset phoff; ///< Program header table's file offset in bytes
+ Offset shoff; ///< Section header table's file offset in bytes
uint32_t flags;
- uint16_t ehsize;
+ uint16_t ehsize; ///< ELF's header size in bytes
uint16_t phentsize;
uint16_t phnum;
uint16_t shentsize;
uint16_t shnum;
- uint16_t shstrndx;
+ uint16_t shstrndx; ///< Index of .strtab in section header table
static constexpr inline auto FromBytes(const uint8_t *data)
{
const auto raw = Header32Raw::FromBytes(data);
@@ -308,14 +316,130 @@ struct ProgramHeader32 {
static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e)
{
return ProgramHeader32{
- /* type */ ParseU32(data + 0, e),
- /* offset */ ParseU32(data + 4, e),
- /* vaddr */ ParseU32(data + 8, e),
- /* paddr */ ParseU32(data + 12, e),
- /* filesz */ ParseU32(data + 16, e),
- /* memsz */ ParseU32(data + 20, e),
- /* flags */ ParseU32(data + 24, e),
- /* align */ ParseU32(data + 28, e),
+ /* .type = */ ParseU32(data + 0, e),
+ /* .offset = */ ParseU32(data + 4, e),
+ /* .vaddr = */ ParseU32(data + 8, e),
+ /* .paddr = */ ParseU32(data + 12, e),
+ /* .filesz = */ ParseU32(data + 16, e),
+ /* .memsz = */ ParseU32(data + 20, e),
+ /* .flags = */ ParseU32(data + 24, e),
+ /* .align = */ ParseU32(data + 28, e),
+ };
+ }
+};
+
+enum class SectionHeader32Type: uint32_t {
+ kNull = 0,
+ kProgBits = 1,
+ kSymtab = 2,
+ kStrtab = 3,
+ kRela = 4,
+ kHash = 5,
+ kDynamic = 6,
+ kNote = 7,
+ kNobits = 8,
+ kRel = 9,
+ kShlib = 10,
+ kDynsym = 11,
+ kLoProc = 0x70000000,
+ kHiProc = 0x7fffffff,
+ kLoUser = 0x80000000,
+ kHiUser = 0xffffffff,
+};
+
+struct SectionHeader32 {
+ uint32_t name{};
+ uint32_t type{};
+ uint32_t flags{};
+ Address addr{};
+ Offset offset{};
+ uint32_t size{}; ///< Size of whole section in bytes
+ uint32_t link{};
+ uint32_t info{};
+ uint32_t addralign{};
+ uint32_t entsize{}; ///< Size of a single entry (every entry has same size)
+ static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e)
+ {
+ return SectionHeader32{
+ /* .name = */ ParseU32(data + 0, e),
+ /* .type = */ ParseU32(data + 4, e),
+ /* .flags = */ ParseU32(data + 8, e),
+ /* .addr = */ ParseU32(data + 12, e),
+ /* .offset = */ ParseU32(data + 16, e),
+ /* .size = */ ParseU32(data + 20, e),
+ /* .link = */ ParseU32(data + 24, e),
+ /* .info = */ ParseU32(data + 28, e),
+ /* .addralign = */ ParseU32(data + 32, e),
+ /* .entsize = */ ParseU32(data + 36, e),
+ };
+ }
+ constexpr bool IsValid(void) const { return name != 0; }
+};
+
+enum class Symbol32Bind: unsigned char {
+ kLocal = 0,
+ kGlobal = 1,
+ kWeak = 2,
+ kUnknown3 = 3,
+ kUnknown4 = 4,
+ kUnknown5 = 5,
+ kUnknown6 = 6,
+ kUnknown7 = 7,
+ kUnknown8 = 8,
+ kUnknown9 = 9,
+ kUnknown10 = 10,
+ kUnknown11 = 11,
+ kUnknown12 = 12,
+ kLoProc = 13,
+ kUnknown14 = 14,
+ kHiProc = 15,
+};
+
+enum class Symbol32Type: unsigned char {
+ kNoType = 0,
+ kObject = 1,
+ kFunc = 2,
+ kSection = 3,
+ kFile = 4,
+ kUnknown5 = 5,
+ kUnknown6 = 6,
+ kUnknown7 = 7,
+ kUnknown8 = 8,
+ kUnknown9 = 9,
+ kUnknown10 = 10,
+ kUnknown11 = 11,
+ kUnknown12 = 12,
+ kLoProc = 13,
+ kUnknown14 = 14,
+ kHiProc = 15,
+};
+
+struct Symbol32 {
+ const char *name{};
+ uint32_t namendx{};
+ Address value{}; ///< Value or address, e.g address of a variable in RAM
+ uint32_t size{}; ///< Size of a symbol, e.g length of a function, etc.
+ unsigned char info{};
+ unsigned char other{};
+ uint16_t shndx{}; ///< Index of a section the symbol belongs to
+ constexpr Symbol32Bind bind() const
+ {
+ return static_cast<Symbol32Bind>(info >> 4);
+ }
+ constexpr Symbol32Type type() const
+ {
+ return static_cast<Symbol32Type>(info & 0xf);
+ }
+ static constexpr inline auto FromBytes(const uint8_t *data, const DataEncoding e)
+ {
+ return Symbol32{
+ /* .name = */ nullptr,
+ /* .namendx = */ ParseU32(data + 0, e),
+ /* .value = */ ParseU32(data + 4, e),
+ /* .size = */ ParseU32(data + 8, e),
+ /* .info = */ ParseU8(data + 12, e),
+ /* .other = */ ParseU8(data + 13, e),
+ /* .shndx = */ ParseU16(data + 14, e),
};
}
};
diff --git a/src/elf_image.cpp b/src/elf_image.cpp
index 6db72f3..6572a85 100644
--- a/src/elf_image.cpp
+++ b/src/elf_image.cpp
@@ -4,9 +4,18 @@
#include "elf_image.h"
#include <cassert>
+#include <cstdarg>
#include <cstdio>
+#include <cstring>
-ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes(
+#ifdef __GNUC__
+#define _PRINTF(strindex, first) __attribute__((format(printf, strindex, first)))
+#else
+#define _PRINTF(strindex, first)
+#endif
+
+
+ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromView(
const DataView &d, const DataEncoding e)
{
if (d.buffer == nullptr || d.size == 0) {
@@ -22,119 +31,198 @@ ELF::ProgramHeader32Table ELF::ProgramHeader32Table::FromBytes(
return ELF::ProgramHeader32Table{ headers, size, };
}
+static _PRINTF(1, 2) char *Error(const char *fmt, ...)
+{
+ if (0 == strlen(fmt)) {
+ return nullptr;
+ }
+ char *error{};
+ size_t size{};
+ FILE *error_stream = open_memstream(&error, &size);
+ assert(error_stream);
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(error_stream, fmt, ap);
+ va_end(ap);
+ fclose(error_stream);
+ assert(error != nullptr);
+ assert(*error != '\0');
+ return error;
+}
+
+static char *ValidateSectionHeader(
+ const DataView& d,
+ ELF::SectionHeader32 sh,
+ size_t shstrndx,
+ const char *shname)
+{
+ if (sh.offset >= d.size) {
+ return Error(
+ "Section header %zu (%s) offset is too big to fit into the file: "
+ "expected (<%zu), got (%zu)",
+ shstrndx,
+ shname,
+ d.size,
+ size_t(sh.offset));
+ }
+ if (sh.size >= d.size - sh.offset) {
+ return Error(
+ "Section header %zu (%s) is too big to fit into the file: "
+ "expected (<%zu), got (%zu)",
+ shstrndx,
+ shname,
+ d.size - sh.offset,
+ size_t(sh.size));
+ }
+ if (sh.entsize) {
+ if (sh.entsize > sh.size) {
+ return Error(
+ "Section header %zu (%s) entry size is too big to fit into "
+ "the table: expected (<%zu), got (%zu)",
+ shstrndx,
+ shname,
+ size_t(sh.size),
+ size_t(sh.entsize));
+ }
+ size_t const remainder = sh.size % sh.entsize;
+ if (remainder) {
+ return Error(
+ "Section header %zu (%s) size is not multiple of entsize: "
+ "expected (%zu %% %zu == 0), got (%zu)",
+ shstrndx,
+ shname,
+ size_t(sh.size),
+ size_t(sh.entsize),
+ remainder);
+ }
+ }
+ return nullptr;
+}
+
static char *ValidateELF(const DataView& d)
{
- char *error;
- size_t size;
- FILE *s = open_memstream(&error, &size);
- assert(s);
using namespace ELF;
if (d.size < kHeaderSize) {
- fprintf(
- s,
+ return Error(
"data size (%zu) is lower than minimum ELF header size (%zu): "
"ELF header could not fit",
d.size,
kHeaderSize);
- fclose(s);
- return error;
}
const auto header_raw = Header32Raw::FromBytes(d.buffer);
const auto header = Header32::FromBytes(d.buffer);
if (!MagicIsValid(header.ident.magic)) {
const uint8_t *m = header.ident.magic;
- fprintf(
- s,
+ return Error(
"ELF Magic is invalid: expected [%02x %02x %02x %02x], got [%02x %02x %02x %02x]",
0x7f, 'E', 'L', 'F',
m[0], m[1], m[2], m[3]);
- fclose(s);
- return error;
}
if (header.ident.version != Version::kCurrent) {
- fprintf(
- s,
+ return Error(
"version (0x%02x) of ELF header.ident.version is not supported, "
"only \"Current\" version (0x%02x) is supported",
header_raw.ident.version,
static_cast<int>(Version::kCurrent));
- fclose(s);
- return error;
}
if (header.version != Version::kCurrent) {
- fprintf(
- s,
+ return Error(
"version (0x%02x) of ELF header.version is not supported, "
"only \"Current\" version (0x%02x) is supported",
header_raw.version,
static_cast<int>(Version::kCurrent));
- fclose(s);
- return error;
}
if (header.type != ObjectType::kExec) {
- fprintf(
- s,
+ return Error(
"object type (0x%02x) is not supported, "
"only Exec (0x%02x) object type is supported",
header_raw.type,
static_cast<int>(ObjectType::kExec));
- fclose(s);
- return error;
}
if (header.machine != Machine::k68k) {
- fprintf(
- s,
+ return Error(
"machine (0x%02x) is not supported, "
"only Motorola 68k (0x%02x) machine is supported",
header_raw.machine,
static_cast<int>(Machine::k68k));
- fclose(s);
- return error;
+ }
+ if (header.ehsize > d.size) {
+ return Error(
+ "ELF header ehsize is too big to fit into the file: expected (<=%zu), got (%zu)",
+ size_t(d.size),
+ size_t(header.ehsize));
}
if (header.phentsize != kProgramHeaderSize) {
- fprintf(
- s,
+ return Error(
"phentsize is invalid: expected (%zu), got (%zu)",
kProgramHeaderSize,
size_t(header.phentsize));
- fclose(s);
- return error;
+ }
+ if (header.shentsize != kSectionHeaderSize) {
+ return Error(
+ "shentsize is invalid: expected (%zu), got (%zu)",
+ kSectionHeaderSize,
+ size_t(header.shentsize));
+ }
+ if (header.shoff < header.ehsize) {
+ return Error(
+ "shoff intersects with an ELF header: expected (>%zu), got (%zu)",
+ size_t(header.ehsize),
+ size_t(header.shoff));
+ }
+ if (header.shoff >= d.size) {
+ return Error(
+ "shoff is too big for a file size: expected (<%zu), got (%zu)",
+ d.size,
+ size_t(header.shoff));
+ }
+ if (header.shnum > (d.size - header.shoff) / header.shentsize) {
+ return Error(
+ "shnum is too big to fit shared headers table into the file: expected (<=%zu), got (%zu)",
+ (d.size - header.shoff) / header.shentsize,
+ size_t(header.shnum));
+ }
+ if (header.shstrndx > header.shnum) {
+ return Error(
+ "shstrndx exceeds shared headers table entries count: expected (<%zu), got (%zu)",
+ size_t(header.shnum),
+ size_t(header.shstrndx));
+ }
+ if (header.shstrndx) {
+ const auto shstrtab = ELF::SectionHeader32::FromBytes(
+ d.buffer + header.shoff + header.shstrndx * kSectionHeaderSize,
+ header.ident.data_encoding);
+ char *error = ValidateSectionHeader(d, shstrtab, header.shstrndx, ".shstrtab");
+ if (error != nullptr) {
+ return error;
+ }
}
if (d.size < header.phoff + header.phentsize * header.phnum) {
- fprintf(
- s,
+ return Error(
"data size (%zu) is lower than program header table end offset (%zu): "
"program header table could not fit",
d.size,
size_t(header.phoff + header.phentsize * header.phnum));
- fclose(s);
- return error;
}
bool has_segment_with_entry = false;
for (size_t i = 0; i < header.phnum; i++) {
const auto ph = ProgramHeader32::FromBytes(
d.buffer + header.phoff + header.phentsize * i, header.ident.data_encoding);
if (d.size < ph.offset + ph.filesz) {
- fprintf(
- s,
+ return Error(
"data size (%zu) is lower than pht[%zu] segment end offset (%zu): "
"segment could not fit",
d.size,
i,
size_t(ph.offset + ph.filesz));
- fclose(s);
- return error;
}
const bool is_code = (ph.flags & (kPHFlagX | kPHFlagW | kPHFlagR)) == (kPHFlagX | kPHFlagR);
if (ParsePHType(ph.type) == PHType::kLoad && is_code && ph.vaddr != 0) {
- fprintf(
- s,
+ return Error(
"pht[%zu] segment is a code, but it's vaddr (0x%08x) is not zero: "
"non-zero base address is not supported",
i,
ph.vaddr);
- fclose(s);
- return error;
}
const bool contains_entry = header.entry >= ph.vaddr && header.entry < ph.vaddr + ph.memsz;
if (ParsePHType(ph.type) == PHType::kLoad && is_code && contains_entry) {
@@ -142,12 +230,8 @@ static char *ValidateELF(const DataView& d)
}
}
if (!has_segment_with_entry) {
- fprintf(s, "no code segments containing entry point (0x%08x) found", header.entry);
- fclose(s);
- return error;
+ return Error("no code segments containing entry point (0x%08x) found", header.entry);
}
- fclose(s);
- free(error);
return nullptr;
}
@@ -157,10 +241,53 @@ ELF::Image::Image(DataBuffer&& data)
, _h(_error ? ELF::Header32{} : ELF::Header32::FromBytes(_data.View().buffer))
, _pht(_error
? ELF::ProgramHeader32Table{}
- : ELF::ProgramHeader32Table::FromBytes(
+ : ELF::ProgramHeader32Table::FromView(
_data.View(_h.phoff, _h.phnum * kProgramHeaderSize), _h.ident.data_encoding))
+ , _shstrtab(_error
+ ? ELF::SectionHeader32{}
+ : ELF::SectionHeader32::FromBytes(
+ _data.buffer + _h.shoff + _h.shstrndx * kSectionHeaderSize, _h.ident.data_encoding))
+ , _symtab(GetSectionHeaderByName(".symtab"))
+ , _strtab(GetSectionHeader(_symtab.link))
{}
+ELF::SectionHeader32 ELF::Image::GetSectionHeaderByName(const char *name) const
+{
+ const uint32_t index = GetSectionHeaderIndexByName(name);
+ if (index == 0) {
+ return SectionHeader32{};
+ }
+ const size_t offset = _h.shoff + kSectionHeaderSize * index;
+ return SectionHeader32::FromBytes(_data.buffer + offset, _h.ident.data_encoding);
+}
+
+uint32_t ELF::Image::GetSectionHeaderIndexByName(const char *name) const
+{
+ if (!IsValid()) {
+ return 0;
+ }
+ if (name == nullptr) {
+ return 0;
+ }
+ if (!_shstrtab.IsValid()) {
+ return 0;
+ }
+ for (uint32_t index = 0; index < _h.shnum; index++) {
+ const size_t offset = _h.shoff + kSectionHeaderSize * index;
+ if (offset + kSectionHeaderSize > _data.buffer_size) {
+ return 0;
+ }
+ const auto header = SectionHeader32::FromBytes(
+ _data.buffer + offset, _h.ident.data_encoding);
+ const char *name_in_elf = reinterpret_cast<const char *>(
+ _data.buffer + _shstrtab.offset + header.name);
+ if (0 == strcmp(name, name_in_elf)) {
+ return index;
+ }
+ }
+ return 0;
+}
+
ELF::Image::~Image()
{
if (_error) {
diff --git a/src/elf_image.h b/src/elf_image.h
index b7c7123..b753008 100644
--- a/src/elf_image.h
+++ b/src/elf_image.h
@@ -13,7 +13,7 @@ namespace ELF {
struct ProgramHeader32Table {
const ProgramHeader32 *headers{};
size_t size{};
- static ProgramHeader32Table FromBytes(const DataView &, DataEncoding);
+ static ProgramHeader32Table FromView(const DataView &, DataEncoding);
};
struct Segment {
@@ -26,6 +26,7 @@ class Image {
char *const _error;
const Header32 _h;
const ProgramHeader32Table _pht;
+ const SectionHeader32 _shstrtab, _symtab, _strtab;
public:
explicit Image(DataBuffer&&);
~Image();
@@ -50,6 +51,37 @@ public:
return DataView{};
};
constexpr const char *Error() const { return _error; }
+ ELF::SectionHeader32 GetSectionHeaderByName(const char *name) const;
+ constexpr const ELF::SectionHeader32 GetSectionHeader(uint32_t index) const
+ {
+ if (index > _h.shnum) {
+ return SectionHeader32{};
+ }
+ const size_t offset = _h.shoff + kSectionHeaderSize * index;
+ if (offset + kSectionHeaderSize > _data.buffer_size) {
+ return SectionHeader32{};
+ }
+ return SectionHeader32::FromBytes(
+ _data.buffer + offset, _h.ident.data_encoding);
+ }
+ uint32_t GetSectionHeaderIndexByName(const char *name) const;
+ constexpr ELF::Symbol32 GetSymbolByIndex(uint32_t index) const
+ {
+ if (!IsValid()) {
+ return Symbol32{};
+ }
+ if (_symtab.entsize == 0 || index >= _symtab.size / _symtab.entsize) {
+ return Symbol32{};
+ }
+ auto symbol = Symbol32::FromBytes(
+ _data.buffer + _symtab.offset + _symtab.entsize * index,
+ _h.ident.data_encoding);
+ if (symbol.namendx < _strtab.size && _data.buffer[_strtab.offset + _strtab.size] == '\0') {
+ symbol.name = reinterpret_cast<const char *>(
+ _data.buffer + _strtab.offset + symbol.namendx);
+ }
+ return symbol;
+ }
};
}
diff --git a/src/main.cpp b/src/main.cpp
index 89aa2ea..2a9b312 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -27,6 +27,19 @@
#include <cerrno>
#include <climits>
+enum class SymbolType: int {
+ kNone = 0,
+ kFunction,
+ kObject,
+};
+
+struct Symbol {
+ uint32_t address{};
+ SymbolType type{};
+ const char *name{};
+ size_t size{};
+};
+
enum class DisasmMapType {
kTraced,
kRaw,
@@ -35,15 +48,25 @@ enum class DisasmMapType {
class DisasmMap {
const DisasmMapType _type;
DisasmNode *_map[kDisasmMapSizeElements]{};
+ Symbol *_symtab{};
+ size_t _symtab_size{};
constexpr DisasmNode *findNodeByAddress(uint32_t address) const;
+ constexpr size_t findFirstSymbolAtAddress(
+ uint32_t address, bool return_last_considered=false) const;
DisasmNode &insertNode(uint32_t address, NodeType);
+ void insertSymbol(uint32_t address, ReferenceType ref_type);
DisasmNode &insertReferencedBy(
const uint32_t by_addr,
const uint32_t ref_addr,
const NodeType type,
const ReferenceType ref_type);
constexpr bool canBeAllocated(const DisasmNode& node) const;
+ constexpr size_t symbolsCount() const { return _symtab_size / sizeof *_symtab; }
public:
+ constexpr const Symbol *Symtab() const { return _symtab; }
+ constexpr size_t SymbolsCount() const { return symbolsCount(); }
+ constexpr const char *GetFirstSuitableSymbol(const DisasmNode &, bool is_call) const;
+ constexpr bool HasSymbolsInRange(uint32_t at, size_t length) const;
constexpr const DisasmNode *FindNodeByAddress(uint32_t address) const
{
return findNodeByAddress(address);
@@ -53,6 +76,7 @@ public:
assert(_type == DisasmMapType::kTraced);
insertNode(address, type);
}
+ bool ApplySymbolsFromElf(const ELF::Image &);
void Disasm(const DataView &code, const Settings &, size_t from=0, bool nested=false);
DisasmMap(DisasmMapType type): _type(type) {}
~DisasmMap();
@@ -65,12 +89,81 @@ constexpr DisasmNode *DisasmMap::findNodeByAddress(uint32_t address) const
return nullptr;
}
+constexpr size_t DisasmMap::findFirstSymbolAtAddress(
+ uint32_t address, bool return_last_considered) const
+{
+ if (_symtab == nullptr || symbolsCount() < 1) {
+ return 0;
+ }
+ // A symbol at index 0 is a special null symbol and it must be skipped.
+ size_t start = 1, len = symbolsCount() - start, middle = start, index = 0;
+ while (1) {
+ if (len == 0) {
+ if (return_last_considered && index == 0) {
+ index = start;
+ }
+ break;
+ }
+ middle = start + len / 2;
+ if (_symtab[middle].address >= address) {
+ if (_symtab[middle].address == address) {
+ index = middle;
+ }
+ // Look at the span right before the middle one on the next step
+ len = middle - start;
+ } else {
+ // Look at the span right after the middle one on the next step
+ len -= middle + 1 - start;
+ start = middle + 1;
+ }
+ }
+ return index;
+}
+
+static constexpr bool IsWithinRange(uint32_t const value, uint32_t at, size_t length)
+{
+ return value >= at && value < at + length;
+}
+
+constexpr bool DisasmMap::HasSymbolsInRange(
+ uint32_t const address, size_t const length) const
+{
+ size_t index = findFirstSymbolAtAddress(address, true);
+ if (index == 0) {
+ // The symtab is empty
+ return false;
+ }
+ if (IsWithinRange(_symtab[index].address, address, length)) {
+ // The symbol is found right at the address, which is unlikely
+ return true;
+ }
+ if (_symtab[index].address < address) {
+ // Maybe the next symbol falls into the range?
+ if (index + 1 >= symbolsCount()) {
+ // No more symbols after the index
+ return false;
+ }
+ index++;
+ } else {
+ // Maybe the previous symbol falls into the range? (unlikely at all)
+ if (index < 2) {
+ // No more symbols before the index
+ return false;
+ }
+ index--;
+ }
+ if (IsWithinRange(_symtab[index].address, address, length)) {
+ return true;
+ }
+ return false;
+}
+
static constexpr uint32_t AlignInstructionAddress(const uint32_t address)
{
return address & ~1UL;
}
-DisasmNode &DisasmMap::insertNode(const uint32_t address, const NodeType type)
+DisasmNode &DisasmMap::insertNode(uint32_t address, NodeType type)
{
auto *node = findNodeByAddress(address);
if (node) {
@@ -127,6 +220,8 @@ static constexpr ReferenceType ReferenceTypeFromRefKindMask1(const RefKindMask r
static constexpr ReferenceType ReferenceTypeFromRefKindMask2(const RefKindMask ref_kinds)
{
+ // FIXME: AFAIK it is impossible for a call instruction to have second
+ // argument. I can probably drop the first condition, but it needs testing
return (ref_kinds & kRefCallMask)
? ReferenceType::kCall
: (ref_kinds & kRef2ReadMask)
@@ -147,6 +242,76 @@ static constexpr bool IsNextLikelyAnInstruction(const Op &op)
op.opcode != OpCode::kSTOP);
}
+static int cmpsym(const void *p1, const void *p2)
+{
+ const Symbol *sym1 = reinterpret_cast<const Symbol *>(p1);
+ const Symbol *sym2 = reinterpret_cast<const Symbol *>(p2);
+ if (sym1->address == sym2->address) {
+ return strcmp(sym1->name, sym2->name);
+ }
+ return sym1->address < sym2->address ? -1 : 1;
+}
+
+constexpr SymbolType SymbolTypeFromElf32SymbolType(const ELF::Symbol32Type &t)
+{
+ if (t == ELF::Symbol32Type::kObject) {
+ return SymbolType::kObject;
+ }
+ if (t == ELF::Symbol32Type::kFunc) {
+ return SymbolType::kFunction;
+ }
+ return SymbolType::kNone;
+}
+
+bool DisasmMap::ApplySymbolsFromElf(const ELF::Image &elf)
+{
+ const ELF::SectionHeader32 symtab = elf.GetSectionHeaderByName(".symtab");
+ if (!symtab.IsValid()) {
+ fprintf(stderr, "Warning: \".symtab\" is invalid, skipping symbols\n");
+ return true;
+ }
+ FILE *symtab_stream = open_memstream(reinterpret_cast<char**>(&_symtab), &_symtab_size);
+ if (symtab_stream == nullptr) {
+ const int err = errno;
+ fprintf(stderr,
+ "open_memstream() for symtab failed: Error (%d): \"%s\"\n",
+ err, strerror(err));
+ return false;
+ }
+ const Symbol null_symbol{};
+ if (null_symbol.name != nullptr && *null_symbol.name != '\0') {
+ const size_t ret = fwrite(
+ &null_symbol, sizeof null_symbol, 1, symtab_stream);
+ (void) ret;
+ assert(ret == 1);
+ }
+ const size_t nentries = symtab.size/symtab.entsize;
+ for (size_t i = 0; i < nentries; i++) {
+ const ELF::Symbol32 elfsym = elf.GetSymbolByIndex(i);
+ const bool has_proper_type = (elfsym.type() == ELF::Symbol32Type::kNoType) ||
+ (elfsym.type() == ELF::Symbol32Type::kObject) ||
+ (elfsym.type() == ELF::Symbol32Type::kFunc);
+ if (has_proper_type) {
+ // XXX: Is it possible that it may have binding other than
+ // Symbol32Bind::kGlobal when it is kFunc?
+ // XXX: Yes, it is possible. It may be kLocal or kWeak for sure.
+ const auto type = SymbolTypeFromElf32SymbolType(elfsym.type());
+ const auto symbol = Symbol{elfsym.value, type, elfsym.name, elfsym.size};
+ if (symbol.name != nullptr && *symbol.name != '\0') {
+ const size_t ret = fwrite(&symbol, sizeof symbol, 1, symtab_stream);
+ (void) ret;
+ assert(ret == 1);
+ }
+ }
+ }
+ // No more symbols are going to be added further, so it may be closed now.
+ fclose(symtab_stream);
+ // The RenderNodeDisassembly() function expects the symbol table to be
+ // sorted.
+ qsort(_symtab, symbolsCount(), sizeof *_symtab, cmpsym);
+ return true;
+}
+
void DisasmMap::Disasm(
const DataView &code, const Settings &s, size_t at, bool nested)
{
@@ -240,6 +405,9 @@ DisasmMap::~DisasmMap()
delete node;
i += size - 1;
}
+ if (_symtab != nullptr) {
+ free(_symtab);
+ }
}
static size_t RenderRawDataComment(
@@ -312,6 +480,25 @@ static constexpr bool IsLocalLocation(const DisasmMap &disasm_map, const DisasmN
{
for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
for (size_t i = 0; i < ref->refs_count; i++) {
+ // Check symtab, because we may be crossing a symbol
+ const DisasmNode *ref_node = disasm_map.FindNodeByAddress(ref->refs[i].address);
+ if (ref_node != nullptr) {
+ // We won't cross a symbol at the address if the reference is
+ // backwards ('1b') and we will cross a symbol if the reference
+ // is forwards ('1f') - that's why we shift the range one
+ // instruction forward by adding a size to the address and the
+ // length.
+ // TODO write tests for it
+ uint32_t const address = (node.address < ref_node->address)
+ ? node.address + node.size
+ : ref_node->address + ref_node->size;
+ size_t const length = (node.address < ref_node->address)
+ ? ref_node->address + ref_node->size - (node.address + node.size)
+ : node.address + node.size - (ref_node->address + ref_node->size);
+ if (disasm_map.HasSymbolsInRange(address, length)) {
+ return false;
+ }
+ }
const ReferenceRecord &ref_rec = ref->refs[i];
if (ref_rec.type == ReferenceType::kCall) {
// Locals are definitely not made for calls
@@ -356,47 +543,183 @@ static constexpr const char *StringWihoutFristNChars(const char *str, const size
return str;
}
+constexpr const char *DisasmMap::GetFirstSuitableSymbol(
+ const DisasmNode &node, bool is_call) const
+{
+ const size_t index = findFirstSymbolAtAddress(node.address);
+ if (index == 0) {
+ return nullptr;
+ }
+ if (!is_call) {
+ return _symtab[index].name;
+ }
+ for (size_t i = index; i < symbolsCount() && _symtab[i].address == node.address; i++) {
+ if (_symtab[i].type == SymbolType::kFunction) {
+ return _symtab[i].name;
+ }
+ }
+ return nullptr;
+}
+
+struct PendingObjectSize {
+ PendingObjectSize *next{};
+ uint32_t at{};
+ const char *name{};
+};
+
+struct PendingObjectSizeList {
+ PendingObjectSize *_first{}, *_last{};
+ void Add(uint32_t at, const char *name)
+ {
+ assert(name && *name);
+ // Last in first out
+ PendingObjectSize *pending = new PendingObjectSize{_first, at, name};
+ assert(pending);
+ if (_last == nullptr) {
+ _last = pending;
+ }
+ _first = pending;
+ }
+ const char *TakeNext(uint32_t at)
+ {
+ for (PendingObjectSize *cur = _first, *prev = nullptr; cur;) {
+ // Last in first out
+ if (cur->at == at) {
+ const char *name = cur->name;
+ if (prev) {
+ prev->next = cur->next;
+ } else {
+ _first = cur->next;
+ }
+ if (_last == cur) {
+ _last = prev;
+ }
+ delete cur;
+ return name;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ return nullptr;
+ }
+ ~PendingObjectSizeList()
+ {
+ while (_first) {
+ auto *cur = _first;
+ _first = _first->next;
+ delete cur;
+ }
+ _last = nullptr;
+ }
+};
+
+static constexpr const char *SymbolTypeToElfTypeString(SymbolType t)
+{
+ switch (t) {
+ case SymbolType::kNone: return nullptr;
+ case SymbolType::kFunction: return "function";
+ case SymbolType::kObject: return "object";
+ }
+ return nullptr;
+}
+
static void RenderNodeDisassembly(
FILE *const output,
const DisasmMap &disasm_map,
const DataView &code,
const Settings &s,
- const DisasmNode &node)
+ const DisasmNode &node,
+ size_t &symbol_index,
+ PendingObjectSizeList &pending_size)
{
- if (node.ref_by) {
- const bool is_local = IsLocalLocation(disasm_map, node);
- if (s.labels && !(s.short_ref_local_labels && is_local)) {
- const bool export_this_function = s.export_functions && HasCallReference(node);
+ for (const char *name = pending_size.TakeNext(node.address); name;) {
+ fprintf(output, "%s.size\t%s,.-%s\n", s.indent, name, name);
+ name = pending_size.TakeNext(node.address);
+ }
+ const size_t symtab_size = disasm_map.SymbolsCount();
+ bool have_rendered_label_already = false;
+ bool have_rendered_function_label_already = false;
+ if (disasm_map.Symtab() != nullptr && symtab_size > 0) {
+ for (; symbol_index < symtab_size; symbol_index++) {
+ if (disasm_map.Symtab()[symbol_index].address >= node.address) {
+ break;
+ }
+ }
+ for (; symbol_index < symtab_size; symbol_index++) {
+ const auto &symbol = disasm_map.Symtab()[symbol_index];
+ if (symbol.address != node.address) {
+ break;
+ }
+ if (symbol.name != nullptr || *symbol.name == '\0') {
+ fprintf(output, "\n%s.globl\t%s\n", s.indent, symbol.name);
+ if (symbol.type == SymbolType::kFunction) {
+ have_rendered_function_label_already = true;
+ }
+ const char *const type = SymbolTypeToElfTypeString(symbol.type);
+ if (type) {
+ fprintf(output, "%s.type\t%s, @%s\n", s.indent, symbol.name, type);
+ }
+ if (symbol.size > 0) {
+ pending_size.Add(node.address + symbol.size, symbol.name);
+ }
+ fprintf(output, "%s:\n", disasm_map.Symtab()[symbol_index].name);
+ have_rendered_label_already = true;
+ }
+ }
+ }
+ const bool is_local = s.short_ref_local_labels && IsLocalLocation(disasm_map, node);
+ do {
+ // Skip generating label or short jump label in-place in case if there
+ // are no referrers or we already have a suitable label from ELF's
+ // symtab or some other sources, that has been printed in the code
+ // section above.
+ if (node.ref_by == nullptr) {
+ break;
+ }
+ const bool have_call_reference = HasCallReference(node);
+ if (have_call_reference && have_rendered_function_label_already) {
+ break;
+ }
+ if (have_rendered_label_already) {
+ break;
+ }
+ // If we got here it must be that there is no suitable symbol found in
+ // the symtab, so it must be generated in-place.
+ constexpr auto generated_name_length = sizeof "L00000000";
+ char name[generated_name_length + 1] = {0};
+ snprintf(name, generated_name_length, "L%08x", node.address);
+ if (s.labels && !is_local) {
+ const bool export_this_function = s.export_functions && have_call_reference;
const bool export_this_label = s.export_all_labels ||
(s.export_labels && node.ref_by && (node.ref_by->refs_count > 1)) ||
export_this_function;
if (export_this_label) {
- fprintf(output, "\n%s.globl\tL%08x\n", s.indent, node.address);
+ fprintf(output, "\n%s.globl\t%s\n", s.indent, name);
if (export_this_function) {
- fprintf(output, "%s.type\tL%08x, @function\n", s.indent, node.address);
+ fprintf(output, "%s.type\t%s, @function\n", s.indent, name);
}
}
}
- if (s.xrefs_from && !(s.short_ref_local_labels && is_local)) {
- fprintf(output, "| XREFS:\n");
- for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
- if (ref->refs_count == 0) {
- continue;
- }
- fprintf(output, "|");
- for (size_t i = 0; i < ref->refs_count; i++) {
- const ReferenceRecord r = ref->refs[i];
- fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address);
- }
- fprintf(output, "\n");
- }
- }
if (s.labels) {
- if (s.short_ref_local_labels && is_local) {
+ if (is_local) {
fprintf(output, "1:%s", StringWihoutFristNChars(s.indent, (sizeof "1:") - 1));
} else {
- fprintf(output, "L%08x:\n", node.address);
+ fprintf(output, "%s:\n", name);
+ }
+ }
+ } while (0);
+ if (s.xrefs_from && !(is_local && !have_rendered_label_already)) {
+ fprintf(output, "| XREFS:\n");
+ for (const ReferenceNode *ref{node.ref_by}; ref; ref = ref->next) {
+ if (ref->refs_count == 0) {
+ continue;
+ }
+ fprintf(output, "|");
+ for (size_t i = 0; i < ref->refs_count; i++) {
+ const ReferenceRecord r = ref->refs[i];
+ fprintf(output, " %s @%08x", ReferenceTypeToString(r.type), r.address);
}
+ fprintf(output, "\n");
}
}
assert(node.op.opcode != OpCode::kNone);
@@ -430,20 +753,32 @@ static void RenderNodeDisassembly(
: 0) |
((s.imm_labels && ref1) ? (node.ref_kinds & kRef1ImmMask) : 0) |
(node.ref_kinds & (kRefDataMask | kRefPcRelFix2Bytes));
- const bool ref1_is_local = !ref1 || IsLocalLocation(disasm_map, *ref1);
+ const bool ref1_is_local = s.short_ref_local_labels &&
+ ref1 && IsLocalLocation(disasm_map, *ref1);
char ref1_label[32]{};
if (ref1) {
- if (s.short_ref_local_labels && ref1_is_local) {
+ const bool is_call =
+ ReferenceType::kCall == ReferenceTypeFromRefKindMask1(ref_kinds);
+ const char *sym_name = disasm_map.GetFirstSuitableSymbol(*ref1, is_call);
+ if (sym_name) {
+ snprintf(ref1_label, (sizeof ref1_label), "%s", sym_name);
+ } else if (ref1_is_local) {
const char dir = ref1_addr <= node.address ? 'b' : 'f';
snprintf(ref1_label, (sizeof ref1_label), "1%c", dir);
} else {
- snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr);
+ snprintf(ref1_label, (sizeof ref1_label), "L%08x", ref1_addr);
}
}
- const bool ref2_is_local = !ref2 || IsLocalLocation(disasm_map, *ref2);
+ const bool ref2_is_local = s.short_ref_local_labels &&
+ ref2 && IsLocalLocation(disasm_map, *ref2);
char ref2_label[32]{};
if (ref2) {
- if (s.short_ref_local_labels && ref2_is_local) {
+ const bool is_call =
+ ReferenceType::kCall == ReferenceTypeFromRefKindMask2(ref_kinds);
+ const char *sym_name = disasm_map.GetFirstSuitableSymbol(*ref2, is_call);
+ if (sym_name) {
+ snprintf(ref2_label, (sizeof ref2_label), "%s", sym_name);
+ } else if (ref2_is_local) {
const char dir = ref2_addr <= node.address ? 'b' : 'f';
snprintf(ref2_label, (sizeof ref2_label), "1%c", dir);
} else {
@@ -461,12 +796,11 @@ static void RenderNodeDisassembly(
ref1_addr,
ref2_addr);
const bool ref1_from_imm_ok = ((node.ref_kinds & kRef1ImmMask) ? s.imm_labels : true);
- if (s.xrefs_to && !(s.short_ref_local_labels && ref1_is_local) && ref1_from_imm_ok)
- {
- fprintf(output, " | L%08x", ref1_addr);
+ if (s.xrefs_to && ref1 && !ref1_is_local && ref1_from_imm_ok) {
+ fprintf(output, " | XREF1 @%08x", ref1_addr);
}
- if (s.xrefs_to && !(s.short_ref_local_labels && ref2_is_local)) {
- fprintf(output, " | L%08x", ref2_addr);
+ if (s.xrefs_to && ref2 && !ref2_is_local) {
+ fprintf(output, " | XREF2 @%08x", ref2_addr);
}
} else {
node.op.FPrint(output, s.indent, s.imm_hex);
@@ -484,21 +818,54 @@ static void RenderNodeDisassembly(
fprintf(output, "\n");
}
+static void RenderNonCodeSymbols(
+ FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s)
+{
+ const size_t symtab_size = disasm_map.SymbolsCount();
+ for (size_t i = 0; i < symtab_size; i++) {
+ const auto &symbol = disasm_map.Symtab()[i];
+ if (symbol.address <= code.size) {
+ continue;
+ }
+ fprintf(output, "\n%s.globl\t%s\n", s.indent, symbol.name);
+ const char *const type = SymbolTypeToElfTypeString(symbol.type);
+ if (type) {
+ fprintf(output, "%s.type\t%s, @%s\n", s.indent, symbol.name, type);
+ }
+ fprintf(output, "%s = 0x%08x\n", symbol.name, symbol.address);
+ if (symbol.size) {
+ fprintf(output, "%s.size\t%s, 0x%zx\n", s.indent, symbol.name, symbol.size);
+ }
+ }
+}
+
static void RenderDisassembly(
FILE *const output, const DisasmMap &disasm_map, const DataView &code, const Settings &s)
{
- for (size_t i = 0; i < code.size;) {
+ // This list is used to track all places where ".size fnname, .-fnname"
+ // directives must be put.
+ PendingObjectSizeList pending_size{};
+ // sym_i starts with 1 because 0 is a special null symbol
+ for (size_t i = 0, sym_i = 1; i < code.size;) {
+ const DisasmNode raw = DisasmNode{
+ /* .type = */ NodeType::kTracedInstruction,
+ /* .address = */ static_cast<uint32_t>(i),
+ /* .size = */ 2,
+ /* .ref_kinds = */ 0,
+ /* .ref1_addr = */ 0,
+ /* .ref2_addr = */ 0,
+ /* .ref_by = */ nullptr,
+ /* .last_ref_by = */ nullptr,
+ /* .op = */ Op::Raw(GetU16BE(code.buffer + i)),
+ };
const DisasmNode *node = disasm_map.FindNodeByAddress(i);
- if (node) {
- RenderNodeDisassembly(output, disasm_map, code, s, *node);
- i += node->size;
- } else {
- auto raw = Op::Raw(GetU16BE(code.buffer + i));
- raw.FPrint(output, s.indent, s.imm_hex);
- fprintf(output, "\n");
- i += kInstructionSizeStepBytes;
+ if (node == nullptr) {
+ node = &raw;
}
+ RenderNodeDisassembly(output, disasm_map, code, s, *node, sym_i, pending_size);
+ i += node->size;
}
+ RenderNonCodeSymbols(output, disasm_map, code, s);
}
static void ParseTraceData(DisasmMap &disasm_map, const DataView &trace_data)
@@ -580,7 +947,7 @@ static DisasmMap *NewDisasmMap(FILE *trace_stream)
}
// Parse trace file into map
DisasmMap *disasm_map = new DisasmMap{DisasmMapType::kTraced};
- assert(disasm_map);
+ assert(disasm_map != nullptr);
ParseTraceData(*disasm_map, trace_data.View());
return disasm_map;
}
@@ -614,6 +981,11 @@ static int M68kDisasm(
if (disasm_map == nullptr) {
return EXIT_FAILURE;
}
+ if (from_elf && s.symbols) {
+ if (false == disasm_map->ApplySymbolsFromElf(elf)) {
+ return EXIT_FAILURE;
+ }
+ }
// Disasm into output map
disasm_map->Disasm(code, s);
// Print output into output_stream
@@ -652,6 +1024,7 @@ static bool ApplyFeature(Settings& s, const char *feature_arg)
{ &Settings::imm_hex, "imm-hex" },
{ &Settings::follow_jumps, "follow-jumps" },
{ &Settings::walk, "walk" },
+ { &Settings::symbols, "symbols" },
};
constexpr size_t sizeof_no_prefix = (sizeof "no-") - 1;
const bool disable = FeatureStringHasPrefixNo(feature_arg);
@@ -708,6 +1081,8 @@ static void PrintUsage(FILE *s, const char *argv0)
" follow-jumps Follow jumps to statically known locations.\n"
" walk Try best to detect further instructions following known\n"
" traced locations without overcommitting.\n"
+ " symbols Extract and apply symbols from input file if available.\n"
+ " ELF symbols only are currently supported.\n"
, argv0);
}