#pragma once /* SPDX-License-Identifier: Unlicense */ #include #include // These data types may be nested via the kTable type. enum class DataTypeKind { kBlob = 0, kStr, kStrz, kTable, kPtr, kU32, kU16, kU8, }; constexpr size_t DataTypeBaseSize(const DataTypeKind k) { switch (k) { case DataTypeKind::kBlob: case DataTypeKind::kStr: case DataTypeKind::kStrz: case DataTypeKind::kU8: return 1; case DataTypeKind::kTable: return 0; ///< Not applicable case DataTypeKind::kPtr: case DataTypeKind::kU32: return 4; case DataTypeKind::kU16: return 2; } return 0; } enum class TraceNodeKind { kPc = 0, kFunction, kData, }; struct DataType { /*! Designates a type of a data object. */ DataTypeKind kind{}; /*! Count of elements in the data type. * * Any data type may have be declared as repeated sequence of elements of * the same type. This field is needed to represent it. When a type declared * in the trace table, the count field belongs to type rather that to a * node, i.e. "strz 12" means that the count field will contain 12 and in * this case it means 12 bytes, so it is context dependent. Another useful * example is table defined like this "[ptr, u32 3] 16" which means 16 * entries, each containing a pointer and three values of `u32`. The count * value will then contain 16, but to figure out how many bytes the table * will take, one have to traverse all nested types, get their count and * consider base type size, which means that a single `ptr` takes 4 bytes, * as well as `u32` takes 4 bytes. So the total table size in bytes will be * 16 * (4 + 4 * 3) = 256. * * To be clear, for each type the following base sizes are unambiguously * defined in this disassembler: * kPtr `ptr`: 4 bytes; * kU32 `u32`: 4 bytes; * kU16 `u16`: 2 bytes; * kU8 `u8`: 1 byte. * * Or you may use DataTypeBaseSize function or BaseSize method to * get base type size in bytes. * * When DataTypeKind is kStrz (`strz`) this fields represents length of the * string including the null terminator. * * This field may contain zero. * * Full node size in bytes is available in TraceNode.size field. */ uint32_t count{}; /*! Designates a type of an element or composition of the table. * * Only interpreted if data_kind is kTable. It holds an index into the type * table of the containing trace table. The type pointed by the index * designates a type of an element of the table this node represents. */ size_t nested_idx{}; /*! Designates a number of types in a nested composition of the table. * * Only interpreted if data_kind is kTable. It holds number of types that * must be interpreted in the data type table that compose a single element * of the table. */ size_t nested_num{}; bool operator==(const DataType &other) const { return kind == other.kind && nested_idx == other.nested_idx && nested_num == other.nested_num; } bool operator!=(const DataType &other) const { return !(*this == other); } constexpr size_t BaseSize() const { return DataTypeBaseSize(kind); } }; struct TraceNode { TraceNodeKind kind{}; /*! Designates a type of a data trace node. * * Only interpreted if TraceNode::kind is kData. */ DataType data_type{}; /*! Size of data/function, if applicable. * * When kind is kPc it is not applicable. * * When kind is kFunction this fields represents function size in bytes. * * When kind is kData this fields always represents object size in bytes. If * the underlying type of the object is table and rather count of elements * is needed, then DataType.count field may be used to get it. * * When DataTypeKind is kStrz (`strz`) this fields represents length of the * string including the null terminator. */ uint32_t size{}; /*! Virtual offset of the traced location. * * By coincidence this also would be an offset inside the Sega Mega * Drive/Genesis game binary in case if it refers to a ROM location. But it * may contain RAM location as well which is located outside of possible * cartridge code region range. */ uint32_t address{}; /*! Object name represented by an index into the shared string array. * * The shared string list may be found in the TraceTable object. The value * of 0 means no name assigned. The name ends with null terminator and * follows the alphanumeric regex: "[a-zA-Z_][0-9a-zA-Z_]*". */ size_t name{}; /*! Comment line(s) represented by an index into the shared string array. * * The shared string list may be found in the TraceTable object. The value * of 0 means no comment assigned. The comment ends with null terminator and * may contain line feeds which correspond to the original line feeds read * from the trace table file. No comment start marks '#' are preserved in * the comment strings. */ size_t comment{}; static constexpr auto Pc(uint32_t address_) { TraceNode n{}; n.address = address_; return n; } bool operator==(const TraceNode &other) const { if (!(kind == other.kind && size == other.size && address == other.address)) { return false; } return (kind == TraceNodeKind::kData) ? data_type == other.data_type : true; } bool operator!=(const TraceNode &other) const { return !(*this == other); } }; class TraceTable { /*! A shared table of all types used in the parsed trace table. * * When a data trace node has type of DataTypeKind::kTable, it will refer to * one or more types that in a combination represent a single table element. * Instead of specifying the type explicitly it refers to it by an index * inside this types array. Types may go crazy complex in theory, and this * approach with indexes into this array makes it possible to build an * abstract syntax tree of types representing an object. */ DataType *_types{}; /*! Size of the _types array. */ size_t _types_count{}; /*! A shared table of all nodes in the parsed trace table. */ TraceNode *_nodes{}; /*! Size of the _nodes array. */ size_t _nodes_count{}; /*! A shared list of strings separated by null terminators. * * Used for object names like traced PC locations, functions or tables, as * well as for comments. */ char *_shstr{}; public: constexpr TraceTable(){} constexpr TraceTable( DataType *types, size_t types_size, TraceNode *nodes, size_t nodes_count, char *shstr) : _types(types) , _types_count(types_size) , _nodes(nodes) , _nodes_count(nodes_count) , _shstr(shstr) {} constexpr TraceTable(const TraceTable&) = delete; constexpr TraceTable(TraceTable&& other) = delete; ~TraceTable(); constexpr TraceTable &operator=(const TraceTable &other) = delete; TraceTable &operator=(TraceTable &&other) { DataType *types = other._types; size_t types_count = other._types_count; TraceNode *nodes = other._nodes; size_t nodes_count = other._nodes_count; char *shstr = other._shstr; other._types = nullptr; other._types_count = 0; other._nodes = nullptr; other._nodes_count = 0; other._shstr = nullptr; this->~TraceTable(); _types = types; _types_count = types_count; _nodes = nodes; _nodes_count = nodes_count; _shstr = shstr; return *this; } constexpr const TraceNode &Node(size_t index) const { return _nodes[index]; } constexpr const DataType &Type(size_t index) const { return _types[index]; } constexpr const char *Shstr(size_t index = 0) const { return _shstr + index; } constexpr size_t TypesCount() const { return _types_count; } constexpr size_t NodesCount() const { return _nodes_count; } friend bool ParseTraceData( TraceTable &output, const void *trace_data, size_t trace_data_size, FILE *errstream, const char *trace_file_name); }; /*! Parses \p trace_data into the \p output trace table. * * The \p trace_file_name is purely informative an used for error reporting * purposes only. When \p trace_file_name is nullptr, "" is used in the * error reporting, that is directed to the \p errstream. When \p errstream is * nullptr, no error messages will be printed in case of failure and function * will return false silently without even touching the \p output value. * * \p trace_data_size simply tells the size of the \p trace_data buffer. * * \returns true on success. */ bool ParseTraceData( TraceTable &output, const void *trace_data, size_t trace_data_size, FILE *errstream = nullptr, const char *trace_file_name = nullptr);