diff options
author | Oxore <oxore@protonmail.com> | 2025-01-03 17:07:00 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2025-01-07 14:39:01 +0300 |
commit | cb96278e25140cfcc1afc22df2102bcf3b6ae38c (patch) | |
tree | 9e93bd8a5fb4d5fbc177924b6b25ca8cd04e7fd7 /src/tracetab.h | |
parent | 810dc87cd5173f8cfc81c774fd49cf8f928a9ae8 (diff) |
Impl extended trace table format parser
Diffstat (limited to 'src/tracetab.h')
-rw-r--r-- | src/tracetab.h | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/src/tracetab.h b/src/tracetab.h new file mode 100644 index 0000000..6d2ad35 --- /dev/null +++ b/src/tracetab.h @@ -0,0 +1,273 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include <cstdint> +#include <cstdio> + +// These data types may be nested via the kTable type. +enum class DataTypeKind { + kBlob = 0, + kStr, + kStrz, + kTable, + kPtr, + kU32, + kU16, + kU8, +}; + +constexpr size_t DataTypeBaseSize(const DataTypeKind k) +{ + switch (k) { + case DataTypeKind::kBlob: + case DataTypeKind::kStr: + case DataTypeKind::kStrz: + case DataTypeKind::kU8: + return 1; + case DataTypeKind::kTable: + return 0; ///< Not applicable + case DataTypeKind::kPtr: + case DataTypeKind::kU32: + return 4; + case DataTypeKind::kU16: + return 2; + } + return 0; +} + +enum class TraceNodeKind { + kPc = 0, + kFunction, + kData, +}; + +struct DataType { + /*! Designates a type of a data object. + */ + DataTypeKind kind{}; + + /*! Count of elements in the data type. + * + * Any data type may have be declared as repeated sequence of elements of + * the same type. This field is needed to represent it. When a type declared + * in the trace table, the count field belongs to type rather that to a + * node, i.e. "strz 12" means that the count field will contain 12 and in + * this case it means 12 bytes, so it is context dependent. Another useful + * example is table defined like this "[ptr, u32 3] 16" which means 16 + * entries, each containing a pointer and three values of `u32`. The count + * value will then contain 16, but to figure out how many bytes the table + * will take, one have to traverse all nested types, get their count and + * consider base type size, which means that a single `ptr` takes 4 bytes, + * as well as `u32` takes 4 bytes. So the total table size in bytes will be + * 16 * (4 + 4 * 3) = 256. + * + * To be clear, for each type the following base sizes are unambiguously + * defined in this disassembler: + * kPtr `ptr`: 4 bytes; + * kU32 `u32`: 4 bytes; + * kU16 `u16`: 2 bytes; + * kU8 `u8`: 1 byte. + * + * Or you may use DataTypeBaseSize function or BaseSize method to + * get base type size in bytes. + * + * When DataTypeKind is kStrz (`strz`) this fields represents length of the + * string including the null terminator. + * + * This field may contain zero. + * + * Full node size in bytes is available in TraceNode.size field. + */ + uint32_t count{}; + + /*! Designates a type of an element or composition of the table. + * + * Only interpreted if data_kind is kTable. It holds an index into the type + * table of the containing trace table. The type pointed by the index + * designates a type of an element of the table this node represents. + */ + size_t nested_idx{}; + + /*! Designates a number of types in a nested composition of the table. + * + * Only interpreted if data_kind is kTable. It holds number of types that + * must be interpreted in the data type table that compose a single element + * of the table. + */ + size_t nested_num{}; + + bool operator==(const DataType &other) const + { + return kind == other.kind && + nested_idx == other.nested_idx && + nested_num == other.nested_num; + } + bool operator!=(const DataType &other) const { return !(*this == other); } + constexpr size_t BaseSize() const { return DataTypeBaseSize(kind); } +}; + +struct TraceNode { + TraceNodeKind kind{}; + + /*! Designates a type of a data trace node. + * + * Only interpreted if TraceNode::kind is kData. + */ + DataType data_type{}; + + /*! Size of data/function, if applicable. + * + * When kind is kPc it is not applicable. + * + * When kind is kFunction this fields represents function size in bytes. + * + * When kind is kData this fields always represents object size in bytes. If + * the underlying type of the object is table and rather count of elements + * is needed, then DataType.count field may be used to get it. + * + * When DataTypeKind is kStrz (`strz`) this fields represents length of the + * string including the null terminator. + */ + uint32_t size{}; + + /*! Virtual offset of the traced location. + * + * By coincidence this also would be an offset inside the Sega Mega + * Drive/Genesis game binary in case if it refers to a ROM location. But it + * may contain RAM location as well which is located outside of possible + * cartridge code region range. + */ + uint32_t address{}; + + /*! Object name represented by an index into the shared string array. + * + * The shared string list may be found in the TraceTable object. The value + * of 0 means no name assigned. The name ends with null terminator and + * follows the alphanumeric regex: "[a-zA-Z_][0-9a-zA-Z_]*". + */ + size_t name{}; + + /*! Comment line(s) represented by an index into the shared string array. + * + * The shared string list may be found in the TraceTable object. The value + * of 0 means no comment assigned. The comment ends with null terminator and + * may contain line feeds which correspond to the original line feeds read + * from the trace table file. No comment start marks '#' are preserved in + * the comment strings. + */ + size_t comment{}; + + static constexpr auto Pc(uint32_t address_) + { + TraceNode n{}; + n.address = address_; + return n; + } + bool operator==(const TraceNode &other) const + { + if (!(kind == other.kind && size == other.size && address == other.address)) { + return false; + } + return (kind == TraceNodeKind::kData) ? data_type == other.data_type : true; + } + bool operator!=(const TraceNode &other) const { return !(*this == other); } +}; + +class TraceTable { + /*! A shared table of all types used in the parsed trace table. + * + * When a data trace node has type of DataTypeKind::kTable, it will refer to + * one or more types that in a combination represent a single table element. + * Instead of specifying the type explicitly it refers to it by an index + * inside this types array. Types may go crazy complex in theory, and this + * approach with indexes into this array makes it possible to build an + * abstract syntax tree of types representing an object. + */ + DataType *_types{}; + + /*! Size of the _types array. + */ + size_t _types_count{}; + + /*! A shared table of all nodes in the parsed trace table. + */ + TraceNode *_nodes{}; + + /*! Size of the _nodes array. + */ + size_t _nodes_count{}; + + /*! A shared list of strings separated by null terminators. + * + * Used for object names like traced PC locations, functions or tables, as + * well as for comments. + */ + char *_shstr{}; + +public: + constexpr TraceTable(){} + constexpr TraceTable( + DataType *types, size_t types_size, TraceNode *nodes, size_t nodes_count, char *shstr) + : _types(types) + , _types_count(types_size) + , _nodes(nodes) + , _nodes_count(nodes_count) + , _shstr(shstr) + {} + constexpr TraceTable(const TraceTable&) = delete; + constexpr TraceTable(TraceTable&& other) = delete; + ~TraceTable(); + constexpr TraceTable &operator=(const TraceTable &other) = delete; + TraceTable &operator=(TraceTable &&other) + { + DataType *types = other._types; + size_t types_count = other._types_count; + TraceNode *nodes = other._nodes; + size_t nodes_count = other._nodes_count; + char *shstr = other._shstr; + other._types = nullptr; + other._types_count = 0; + other._nodes = nullptr; + other._nodes_count = 0; + other._shstr = nullptr; + this->~TraceTable(); + _types = types; + _types_count = types_count; + _nodes = nodes; + _nodes_count = nodes_count; + _shstr = shstr; + return *this; + } + constexpr const TraceNode &Node(size_t index) const { return _nodes[index]; } + constexpr const DataType &Type(size_t index) const { return _types[index]; } + constexpr const char *Shstr(size_t index = 0) const { return _shstr + index; } + constexpr size_t TypesCount() const { return _types_count; } + constexpr size_t NodesCount() const { return _nodes_count; } + friend bool ParseTraceData( + TraceTable &output, + const void *trace_data, + size_t trace_data_size, + FILE *errstream, + const char *trace_file_name); +}; + +/*! Parses \p trace_data into the \p output trace table. + * + * The \p trace_file_name is purely informative an used for error reporting + * purposes only. When \p trace_file_name is nullptr, "<stdin>" is used in the + * error reporting, that is directed to the \p errstream. When \p errstream is + * nullptr, no error messages will be printed in case of failure and function + * will return false silently without even touching the \p output value. + * + * \p trace_data_size simply tells the size of the \p trace_data buffer. + * + * \returns true on success. + */ +bool ParseTraceData( + TraceTable &output, + const void *trace_data, + size_t trace_data_size, + FILE *errstream = nullptr, + const char *trace_file_name = nullptr); |