summaryrefslogtreecommitdiff
path: root/src/tracetab.h
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2025-01-03 17:07:00 +0300
committerOxore <oxore@protonmail.com>2025-01-07 14:39:01 +0300
commitcb96278e25140cfcc1afc22df2102bcf3b6ae38c (patch)
tree9e93bd8a5fb4d5fbc177924b6b25ca8cd04e7fd7 /src/tracetab.h
parent810dc87cd5173f8cfc81c774fd49cf8f928a9ae8 (diff)
Impl extended trace table format parser
Diffstat (limited to 'src/tracetab.h')
-rw-r--r--src/tracetab.h273
1 files changed, 273 insertions, 0 deletions
diff --git a/src/tracetab.h b/src/tracetab.h
new file mode 100644
index 0000000..6d2ad35
--- /dev/null
+++ b/src/tracetab.h
@@ -0,0 +1,273 @@
+#pragma once
+
+/* SPDX-License-Identifier: Unlicense
+ */
+
+#include <cstdint>
+#include <cstdio>
+
+// These data types may be nested via the kTable type.
+enum class DataTypeKind {
+ kBlob = 0,
+ kStr,
+ kStrz,
+ kTable,
+ kPtr,
+ kU32,
+ kU16,
+ kU8,
+};
+
+constexpr size_t DataTypeBaseSize(const DataTypeKind k)
+{
+ switch (k) {
+ case DataTypeKind::kBlob:
+ case DataTypeKind::kStr:
+ case DataTypeKind::kStrz:
+ case DataTypeKind::kU8:
+ return 1;
+ case DataTypeKind::kTable:
+ return 0; ///< Not applicable
+ case DataTypeKind::kPtr:
+ case DataTypeKind::kU32:
+ return 4;
+ case DataTypeKind::kU16:
+ return 2;
+ }
+ return 0;
+}
+
+enum class TraceNodeKind {
+ kPc = 0,
+ kFunction,
+ kData,
+};
+
+struct DataType {
+ /*! Designates a type of a data object.
+ */
+ DataTypeKind kind{};
+
+ /*! Count of elements in the data type.
+ *
+ * Any data type may have be declared as repeated sequence of elements of
+ * the same type. This field is needed to represent it. When a type declared
+ * in the trace table, the count field belongs to type rather that to a
+ * node, i.e. "strz 12" means that the count field will contain 12 and in
+ * this case it means 12 bytes, so it is context dependent. Another useful
+ * example is table defined like this "[ptr, u32 3] 16" which means 16
+ * entries, each containing a pointer and three values of `u32`. The count
+ * value will then contain 16, but to figure out how many bytes the table
+ * will take, one have to traverse all nested types, get their count and
+ * consider base type size, which means that a single `ptr` takes 4 bytes,
+ * as well as `u32` takes 4 bytes. So the total table size in bytes will be
+ * 16 * (4 + 4 * 3) = 256.
+ *
+ * To be clear, for each type the following base sizes are unambiguously
+ * defined in this disassembler:
+ * kPtr `ptr`: 4 bytes;
+ * kU32 `u32`: 4 bytes;
+ * kU16 `u16`: 2 bytes;
+ * kU8 `u8`: 1 byte.
+ *
+ * Or you may use DataTypeBaseSize function or BaseSize method to
+ * get base type size in bytes.
+ *
+ * When DataTypeKind is kStrz (`strz`) this fields represents length of the
+ * string including the null terminator.
+ *
+ * This field may contain zero.
+ *
+ * Full node size in bytes is available in TraceNode.size field.
+ */
+ uint32_t count{};
+
+ /*! Designates a type of an element or composition of the table.
+ *
+ * Only interpreted if data_kind is kTable. It holds an index into the type
+ * table of the containing trace table. The type pointed by the index
+ * designates a type of an element of the table this node represents.
+ */
+ size_t nested_idx{};
+
+ /*! Designates a number of types in a nested composition of the table.
+ *
+ * Only interpreted if data_kind is kTable. It holds number of types that
+ * must be interpreted in the data type table that compose a single element
+ * of the table.
+ */
+ size_t nested_num{};
+
+ bool operator==(const DataType &other) const
+ {
+ return kind == other.kind &&
+ nested_idx == other.nested_idx &&
+ nested_num == other.nested_num;
+ }
+ bool operator!=(const DataType &other) const { return !(*this == other); }
+ constexpr size_t BaseSize() const { return DataTypeBaseSize(kind); }
+};
+
+struct TraceNode {
+ TraceNodeKind kind{};
+
+ /*! Designates a type of a data trace node.
+ *
+ * Only interpreted if TraceNode::kind is kData.
+ */
+ DataType data_type{};
+
+ /*! Size of data/function, if applicable.
+ *
+ * When kind is kPc it is not applicable.
+ *
+ * When kind is kFunction this fields represents function size in bytes.
+ *
+ * When kind is kData this fields always represents object size in bytes. If
+ * the underlying type of the object is table and rather count of elements
+ * is needed, then DataType.count field may be used to get it.
+ *
+ * When DataTypeKind is kStrz (`strz`) this fields represents length of the
+ * string including the null terminator.
+ */
+ uint32_t size{};
+
+ /*! Virtual offset of the traced location.
+ *
+ * By coincidence this also would be an offset inside the Sega Mega
+ * Drive/Genesis game binary in case if it refers to a ROM location. But it
+ * may contain RAM location as well which is located outside of possible
+ * cartridge code region range.
+ */
+ uint32_t address{};
+
+ /*! Object name represented by an index into the shared string array.
+ *
+ * The shared string list may be found in the TraceTable object. The value
+ * of 0 means no name assigned. The name ends with null terminator and
+ * follows the alphanumeric regex: "[a-zA-Z_][0-9a-zA-Z_]*".
+ */
+ size_t name{};
+
+ /*! Comment line(s) represented by an index into the shared string array.
+ *
+ * The shared string list may be found in the TraceTable object. The value
+ * of 0 means no comment assigned. The comment ends with null terminator and
+ * may contain line feeds which correspond to the original line feeds read
+ * from the trace table file. No comment start marks '#' are preserved in
+ * the comment strings.
+ */
+ size_t comment{};
+
+ static constexpr auto Pc(uint32_t address_)
+ {
+ TraceNode n{};
+ n.address = address_;
+ return n;
+ }
+ bool operator==(const TraceNode &other) const
+ {
+ if (!(kind == other.kind && size == other.size && address == other.address)) {
+ return false;
+ }
+ return (kind == TraceNodeKind::kData) ? data_type == other.data_type : true;
+ }
+ bool operator!=(const TraceNode &other) const { return !(*this == other); }
+};
+
+class TraceTable {
+ /*! A shared table of all types used in the parsed trace table.
+ *
+ * When a data trace node has type of DataTypeKind::kTable, it will refer to
+ * one or more types that in a combination represent a single table element.
+ * Instead of specifying the type explicitly it refers to it by an index
+ * inside this types array. Types may go crazy complex in theory, and this
+ * approach with indexes into this array makes it possible to build an
+ * abstract syntax tree of types representing an object.
+ */
+ DataType *_types{};
+
+ /*! Size of the _types array.
+ */
+ size_t _types_count{};
+
+ /*! A shared table of all nodes in the parsed trace table.
+ */
+ TraceNode *_nodes{};
+
+ /*! Size of the _nodes array.
+ */
+ size_t _nodes_count{};
+
+ /*! A shared list of strings separated by null terminators.
+ *
+ * Used for object names like traced PC locations, functions or tables, as
+ * well as for comments.
+ */
+ char *_shstr{};
+
+public:
+ constexpr TraceTable(){}
+ constexpr TraceTable(
+ DataType *types, size_t types_size, TraceNode *nodes, size_t nodes_count, char *shstr)
+ : _types(types)
+ , _types_count(types_size)
+ , _nodes(nodes)
+ , _nodes_count(nodes_count)
+ , _shstr(shstr)
+ {}
+ constexpr TraceTable(const TraceTable&) = delete;
+ constexpr TraceTable(TraceTable&& other) = delete;
+ ~TraceTable();
+ constexpr TraceTable &operator=(const TraceTable &other) = delete;
+ TraceTable &operator=(TraceTable &&other)
+ {
+ DataType *types = other._types;
+ size_t types_count = other._types_count;
+ TraceNode *nodes = other._nodes;
+ size_t nodes_count = other._nodes_count;
+ char *shstr = other._shstr;
+ other._types = nullptr;
+ other._types_count = 0;
+ other._nodes = nullptr;
+ other._nodes_count = 0;
+ other._shstr = nullptr;
+ this->~TraceTable();
+ _types = types;
+ _types_count = types_count;
+ _nodes = nodes;
+ _nodes_count = nodes_count;
+ _shstr = shstr;
+ return *this;
+ }
+ constexpr const TraceNode &Node(size_t index) const { return _nodes[index]; }
+ constexpr const DataType &Type(size_t index) const { return _types[index]; }
+ constexpr const char *Shstr(size_t index = 0) const { return _shstr + index; }
+ constexpr size_t TypesCount() const { return _types_count; }
+ constexpr size_t NodesCount() const { return _nodes_count; }
+ friend bool ParseTraceData(
+ TraceTable &output,
+ const void *trace_data,
+ size_t trace_data_size,
+ FILE *errstream,
+ const char *trace_file_name);
+};
+
+/*! Parses \p trace_data into the \p output trace table.
+ *
+ * The \p trace_file_name is purely informative an used for error reporting
+ * purposes only. When \p trace_file_name is nullptr, "<stdin>" is used in the
+ * error reporting, that is directed to the \p errstream. When \p errstream is
+ * nullptr, no error messages will be printed in case of failure and function
+ * will return false silently without even touching the \p output value.
+ *
+ * \p trace_data_size simply tells the size of the \p trace_data buffer.
+ *
+ * \returns true on success.
+ */
+bool ParseTraceData(
+ TraceTable &output,
+ const void *trace_data,
+ size_t trace_data_size,
+ FILE *errstream = nullptr,
+ const char *trace_file_name = nullptr);