diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/debug.h | 45 | ||||
-rw-r--r-- | src/tracetab.cpp | 1073 | ||||
-rw-r--r-- | src/tracetab.h | 273 | ||||
-rw-r--r-- | src/tracetab_tests.cpp | 317 | ||||
-rw-r--r-- | src/unit_tests_main.cpp | 8 | ||||
-rw-r--r-- | src/vec.cpp | 113 | ||||
-rw-r--r-- | src/vec.h | 83 | ||||
-rw-r--r-- | src/vec_tests.cpp | 50 |
8 files changed, 1962 insertions, 0 deletions
diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..4cd1b58 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,45 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#ifdef NDEBUG +#define __DEBUG 0 +#else +#define __DEBUG 1 +#endif + +#ifndef __TRACE +#define __TRACE 0 +#endif + +#define TRACE(format, ...) \ + if (__TRACE) fprintf(stderr, "%s:%d %s: " format "\n", \ + __FILE__, __LINE__, __func__ __VA_OPT__(,) __VA_ARGS__) + +#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) +#define __SANITIZER_PRINT_STACK_TRACE() __sanitizer_print_stack_trace() +#include <sanitizer/common_interface_defs.h> +#else +#define __SANITIZER_PRINT_STACK_TRACE() +#endif + +#if __DEBUG +#define ASSERT(x) ((x) ? (void)(x) : (__SANITIZER_PRINT_STACK_TRACE(), assert(x))) +#define ASSERT_CONSUME(x) ASSERT(x) +#define UNREACHABLE_BODY() (ASSERT(!"unreachable code reached"), abort()) +#else +#define ASSERT(x) ((void)0) +#define ASSERT_CONSUME(x) ((void)(x)) +#define UNREACHABLE_BODY() ((void)0) +#endif + +#include <cassert> + +#ifdef __GNUC__ +#define BUILTIN_UNREACHABLE() __builtin_unreachable() +#else +#define BUILTIN_UNREACHABLE() ((void)0) +#endif + +#define UNREACHABLE() (UNREACHABLE_BODY(), BUILTIN_UNREACHABLE()) diff --git a/src/tracetab.cpp b/src/tracetab.cpp new file mode 100644 index 0000000..5deb8ea --- /dev/null +++ b/src/tracetab.cpp @@ -0,0 +1,1073 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "tracetab.h" +#include "vec.h" +#include "debug.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <cstring> + +TraceTable::~TraceTable(void) +{ + if (_types) { + free(_types); + } + if (_nodes) { + free(_nodes); + } + if (_shstr) { + free(_shstr); + } + _types = nullptr; + _nodes = nullptr; + _shstr = nullptr; + _types_count = 0; + _nodes_count = 0; +} + +const char *const g_escape_table[256] = { + "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", + "\\x08", "\\t", "\\n", "\\x0b", "\\x0c", "\\r", "\\x0e", "\\x0f", "\\x10", + "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", + "\\x19", "\\x1a", "\\x1b", "\\x1c", "\\x1d", "\\x1e", "\\x1f", " ", "!", + "\\\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", + "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "\\<", "=", "\\>", "?", + "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\\\", + "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "{", "|", "}", "~", "\\x7f", "\\x80", "\\x81", "\\x82", "\\x83", "\\x84", + "\\x85", "\\x86", "\\x87", "\\x88", "\\x89", "\\x8a", "\\x8b", "\\x8c", + "\\x8d", "\\x8e", "\\x8f", "\\x90", "\\x91", "\\x92", "\\x93", "\\x94", + "\\x95", "\\x96", "\\x97", "\\x98", "\\x99", "\\x9a", "\\x9b", "\\x9c", + "\\x9d", "\\x9e", "\\x9f", "\\xa0", "\\xa1", "\\xa2", "\\xa3", "\\xa4", + "\\xa5", "\\xa6", "\\xa7", "\\xa8", "\\xa9", "\\xaa", "\\xab", "\\xac", + "\\xad", "\\xae", "\\xaf", "\\xb0", "\\xb1", "\\xb2", "\\xb3", "\\xb4", + "\\xb5", "\\xb6", "\\xb7", "\\xb8", "\\xb9", "\\xba", "\\xbb", "\\xbc", + "\\xbd", "\\xbe", "\\xbf", "\\xc0", "\\xc1", "\\xc2", "\\xc3", "\\xc4", + "\\xc5", "\\xc6", "\\xc7", "\\xc8", "\\xc9", "\\xca", "\\xcb", "\\xcc", + "\\xcd", "\\xce", "\\xcf", "\\xd0", "\\xd1", "\\xd2", "\\xd3", "\\xd4", + "\\xd5", "\\xd6", "\\xd7", "\\xd8", "\\xd9", "\\xda", "\\xdb", "\\xdc", + "\\xdd", "\\xde", "\\xdf", "\\xe0", "\\xe1", "\\xe2", "\\xe3", "\\xe4", + "\\xe5", "\\xe6", "\\xe7", "\\xe8", "\\xe9", "\\xea", "\\xeb", "\\xec", + "\\xed", "\\xee", "\\xef", "\\xf0", "\\xf1", "\\xf2", "\\xf3", "\\xf4", + "\\xf5", "\\xf6", "\\xf7", "\\xf8", "\\xf9", "\\xfa", "\\xfb", "\\xfc", + "\\xfd", "\\xfe", +}; + +static char *escapeStr(const char *in, size_t len) +{ + char *out = nullptr; + size_t out_size = 0; + FILE *f = open_memstream(&out, &out_size); + if (nullptr == f) { + return out; + } + for (size_t i = 0; i < len; i++) { + const char *escaped = g_escape_table[static_cast<unsigned char>(in[i])]; + const int res = fwrite(escaped, strlen(escaped), 1, f); + ASSERT(res == 1), (void)res; + } + fclose(f); + return out; +} + +enum class TokenKind { + /// Used to indicate end of data + kNone = 0, + /// Used to indicate unexpected end of data or char + kError, + /// "0x[0-9a-fA-f]+" + kNumHex, + /// "0|[1-9][0-9]*" + kNumDec, + /// "0[0-7]+" + kNumOct, + /// "0b[01]+" + kNumBin, + /// "[a-zA-Z_][0-9a-zA-Z_]*" + kAlphaNum, + /// "," + kComma, + /// "[" + kLBracket, + /// "]" + kRBracket, + /// "#.*$" + kComment, + /// "\" + kBackslash, + /// "\n|\r|\r\n" + kNewLine, +}; + +struct Token { + using T = Token; + using K = TokenKind; + K kind{}; + size_t pos{}, len{}; + constexpr const char *Str() const { + switch(kind) { + case K::kNone: return "<None>"; + case K::kError: return "<Error>"; + case K::kNumHex: return "hexadecimal numeric"; + case K::kNumDec: return "decimal numeric"; + case K::kNumOct: return "octal numeric"; + case K::kNumBin: return "binary numeric"; + case K::kAlphaNum: return "alphanumeric"; + case K::kComma: return "`,`"; + case K::kLBracket: return "`[`"; + case K::kRBracket: return "`]`"; + case K::kComment: return "comment"; + case K::kBackslash: return "backslash"; + case K::kNewLine: return "newline"; + } + UNREACHABLE(); + return "<undefined>"; + } + constexpr bool IsNum() const + { + return kind == K::kNumHex || kind == K::kNumDec || kind == K::kNumOct || kind == K::kNumBin; + } + static constexpr T None(size_t pos) { return T{ K::kNone, pos, 0}; } + static constexpr T NumHex(size_t pos, size_t len) { return T{ K::kNumHex, pos, len}; } + static constexpr T NumDec(size_t pos, size_t len) { return T{ K::kNumDec, pos, len}; } + static constexpr T NumOct(size_t pos, size_t len) { return T{ K::kNumOct, pos, len}; } + static constexpr T NumBin(size_t pos, size_t len) { return T{ K::kNumBin, pos, len}; } + static constexpr T AlphaNum(size_t pos, size_t len) { return T{ K::kAlphaNum, pos, len}; } + static constexpr T Comma(size_t pos) { return T{ K::kComma, pos, 1}; } + static constexpr T LBracket(size_t pos) { return T{ K::kLBracket, pos, 1}; } + static constexpr T RBracket(size_t pos) { return T{ K::kRBracket, pos, 1}; } + static constexpr T Comment(size_t pos, size_t len) { return T{ K::kComment, pos, len}; } + static constexpr T Backslash(size_t pos) { return T{ K::kBackslash, pos, 1}; } +}; + +static const char *ParseTypeFromStr( + const char *input, size_t len, TraceNodeKind &k, DataType &dt) +{ + struct { + const char *str; + DataTypeKind type; + } data[] { + { "blob", DataTypeKind::kBlob, }, + { "str", DataTypeKind::kStr, }, + { "strz", DataTypeKind::kStrz, }, + }; + const char *intable[] { "ptr", "u8", "u16", "u32", }; + static char err[256]; + TRACE("Token: \"%.*s\"", static_cast<int>(len), input); + if (0 == strncmp("fn", input, len)) { + k = TraceNodeKind::kFunction; + return nullptr; + } else if (0 == strncmp("pc", input, len)) { + k = TraceNodeKind::kPc; + return nullptr; + } + for (size_t i = 0; i < (sizeof data) / (sizeof *data); i++) { + if (0 == strncmp(data[i].str, input, len)) { + k = TraceNodeKind::kData; + dt = DataType{ data[i].type }; + return nullptr; + } + } + for (size_t i = 0; i < (sizeof intable) / (sizeof *intable); i++) { + const char *t = intable[i]; + if (0 == strncmp(t, input, len)) { + snprintf( + err, sizeof err, + "`%s` trace node type only allowed in a table, " + "use `[%s]` syntax instead", t, t); + return err; + } + } + snprintf(err, sizeof err, "unknown trace node type"); + return err; +} + +static const char *ParseTypeFromToken( + const Token &t, const char *input, TraceNodeKind &k, DataType &dt) +{ + return ParseTypeFromStr(input + t.pos, t.len, k, dt); +} + +static const char *ParseTableTypeFromStr( + const char *input, size_t len, DataType &dt) +{ + struct { + const char *str; + DataTypeKind type; + } data[] { + { "ptr", DataTypeKind::kPtr, }, + { "u32", DataTypeKind::kU32, }, + { "u16", DataTypeKind::kU16, }, + { "u8", DataTypeKind::kU8, }, + }; + static char err[256]; + TRACE("Token: \"%.*s\"", static_cast<int>(len), input); + for (size_t i = 0; i < (sizeof data) / (sizeof *data); i++) { + if (0 == strncmp(data[i].str, input, len)) { + dt = DataType{ data[i].type, 1 }; + return nullptr; + } + } + snprintf(err, sizeof err, "unknown table type"); + return err; +} + +static const char *ParseTableTypeFromToken( + const Token &t, const char *input, DataType &dt) +{ + return ParseTableTypeFromStr(input + t.pos, t.len, dt); +} + +enum class TokenizerState { + kFree = 0, + kNumAmbiguous, + kNumHexStillNoDigits, + kNumHex, + kNumDec, + kNumOct, + kNumBinStillNoDigits, + kNumBin, + kAlphaNum, + kComment, + kNewLine, + kError, +}; + +struct LinePosInfo { + size_t lineno; + size_t col; + size_t pos; +}; + +class Tokenizer { + FILE *_errstream{}; + const char *_filename{}; + size_t _pos{}, _token_pos{}; + TokenizerState _state{}; + TokenKind handleChar(char c, bool is_last); + TokenKind errorExpect(void); + char lookAhead() const { return in[_pos + 1]; } + void printError(const LinePosInfo l, size_t underline_len, const char *str); +public: + const char *in{}; + size_t in_size{}; + Tokenizer(){} + Tokenizer(const void *input, size_t size, FILE *errstream, const char *filename) + : _errstream(errstream) + , _filename(filename) + , in(static_cast<const char *>(input)) + , in_size(size) + {} + Token Next() + { + while (_pos < in_size) { + const char c = in[_pos]; + const TokenKind tk = handleChar(c, _pos + 1 >= in_size); + _pos++; + if (tk != TokenKind::kNone) { + return Token{ tk, _token_pos, _pos - _token_pos }; + } + } + return Token::None(_pos); + } + LinePosInfo GetLinePosInfo(const size_t pos) + { + LinePosInfo l{}; + bool cr = false; + for (size_t i = 0; i < pos; i++) { + const char c = in[i]; + if (c == '\r') { + cr = true; + l.pos = i + 1; + l.lineno++; + l.col = 0; + } else if (c == '\n') { + if (!cr) { + l.lineno++; + } + cr = false; + l.pos = i + 1; + l.col = 0; + } else { + cr = false; + l.col++; + } + } + return l; + } + void PrintError(const Token& t, const char *errstr) + { + return printError(GetLinePosInfo(t.pos), t.len, errstr); + } + void PrintError(const char *errstr) + { + return printError(GetLinePosInfo(_pos), 1, errstr); + } +}; + +const char *TokenizerStateToExpectedText(TokenizerState s) +{ + switch (s) { + case TokenizerState::kFree: + return "digit, alphabetic, `_`, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kNumAmbiguous: + return "`x`, `b` or octal digit"; + case TokenizerState::kNumHexStillNoDigits: + return "hexadecimal digit"; + case TokenizerState::kNumHex: + return "hexadecimal digit, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kNumDec: + return "decimal digit, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kNumOct: + return "octal digit, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kNumBinStillNoDigits: + return "binary digit"; + case TokenizerState::kNumBin: + return "binary digit, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kAlphaNum: + return "decimal digit, alphabetic, `_`, `,`, `[`, `]`, `#`, space, tab, CR or LF"; + case TokenizerState::kComment: + UNREACHABLE(); + return "<Comment>"; + case TokenizerState::kNewLine: + UNREACHABLE(); + return "<NewLine>"; + case TokenizerState::kError: + UNREACHABLE(); + return "<Error>"; + } + UNREACHABLE(); + return "<undefined>"; +} + +static size_t FindLineLength(const char *const str) +{ + for (size_t i = 0;; i++) { + const char c = str[i]; + if (c == '\n' || c == '\r' || c == '\000') { + return i; + } + } + return 0; +} + +void Tokenizer::printError(const LinePosInfo l, size_t underline_len, const char *err) +{ + const char *name = _filename ? _filename : "<stdin>"; + fprintf(_errstream, "%s:%zu:%zu: error: %s\n", name, l.lineno, l.col, err); + const char *const line = in + l.pos; + const int line_length = FindLineLength(line); + fprintf(_errstream, "%5lu | %.*s\n", l.lineno + 1, line_length, line); + fputs(" | ", _errstream); + for (size_t i = 0; i < l.col; i++) { + if (in[l.pos + i] == '\t') { + fputc('\t', _errstream); + } else { + fputc(' ', _errstream); + } + } + fputc('^', _errstream); + for (size_t i = 1; i < underline_len; i++) { + fputc('~', _errstream); + } + fputc('\n', _errstream); +} + +TokenKind Tokenizer::errorExpect(void) +{ + _state = TokenizerState::kError; + size_t size{}; + char *errstr{}; + FILE *memstream = open_memstream(&errstr, &size); + if (nullptr == memstream) { + return TokenKind::kError; + } + fprintf( + memstream, + "unexpected char `%s`, expected %s\n", + g_escape_table[static_cast<unsigned char>(in[_pos])], + TokenizerStateToExpectedText(_state)); + fclose(memstream); + PrintError(errstr); + free(errstr); + return TokenKind::kError; +} + +static constexpr bool IsSpace(char c) +{ + return c == ' ' || c == '\t'; +} + +static constexpr bool IsNewLine(char c) +{ + return c == '\n' || c == '\r'; +} + +static constexpr bool IsValidSequenceBreaker(char c) +{ + return c == ',' || c == '[' || c == ']' || c == '#' || c == '\\' + || IsNewLine(c) || IsSpace(c); +} + +static constexpr bool IsAlphabetic(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'); +} + +static constexpr bool IsAlphaNumeric(char c) +{ + return IsAlphabetic(c) || (c >= '0' && c <= '9'); +} + +static constexpr bool IsDecimal(char c) +{ + return (c >= '0' && c <= '9'); +} + +static constexpr bool IsHexadecimal(char c) +{ + return IsDecimal(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} + +static constexpr bool IsOctal(char c) +{ + return (c >= '0' && c <= '7'); +} + +static constexpr bool IsBinary(char c) +{ + return c == '0' || c == '1'; +} + +TokenKind Tokenizer::handleChar(const char c, const bool is_last) +{ + switch (_state) { + case TokenizerState::kFree: + TRACE("kFree %zu", _pos); + _token_pos = _pos; + if (c == '\n') { + return TokenKind::kNewLine; + } else if (c == '\r') { + if (is_last || '\n' != lookAhead()) { + return TokenKind::kNewLine; + } + _state = TokenizerState::kNewLine; + return TokenKind::kNone; + } else if (c == ',') { + return TokenKind::kComma; + } else if (c == '[') { + return TokenKind::kLBracket; + } else if (c == ']') { + return TokenKind::kRBracket; + } else if (c == '#') { + if (is_last || IsNewLine(lookAhead())) { + return TokenKind::kComment; + } + _state = TokenizerState::kComment; + return TokenKind::kNone; + } else if (c == '\\') { + return TokenKind::kBackslash; + } else if (c == '0') { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + return TokenKind::kNumDec; + } + _state = TokenizerState::kNumAmbiguous; + return TokenKind::kNone; + } else if (IsDecimal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + return TokenKind::kNumDec; + } + _state = TokenizerState::kNumDec; + return TokenKind::kNone; + } else if (IsAlphaNumeric(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + return TokenKind::kAlphaNum; + } + _state = TokenizerState::kAlphaNum; + return TokenKind::kNone; + } else if (IsSpace(c)) { + // Skip + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumAmbiguous: + TRACE("kNumAmbiguous %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (c == 'b' || c == 'B') { + _state = TokenizerState::kNumBinStillNoDigits; + return TokenKind::kNone; + } else if (c == 'x' || c == 'X') { + _state = TokenizerState::kNumHexStillNoDigits; + return TokenKind::kNone; + } else if (IsOctal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumOct; + } + _state = TokenizerState::kNumOct; + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumHexStillNoDigits: + TRACE("kNumHexStillNoDigits %zu", _pos); + if (IsHexadecimal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumHex; + } + _state = TokenizerState::kNumHex; + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumHex: + TRACE("kNumHex %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (IsHexadecimal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumHex; + } + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumDec: + TRACE("kNumDec %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (IsDecimal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumDec; + } + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumOct: + TRACE("kNumOct %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (IsOctal(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumOct; + } + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumBinStillNoDigits: + TRACE("kNumBinStillNoDigits %zu", _pos); + if (IsBinary(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumBin; + } + _state = TokenizerState::kNumBin; + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kNumBin: + TRACE("kNumBin %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (IsBinary(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kNumBin; + } + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kAlphaNum: + TRACE("kAlphaNum %zu", _pos); + ASSERT(!IsValidSequenceBreaker(c)); + if (IsAlphaNumeric(c)) { + if (is_last || IsValidSequenceBreaker(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kAlphaNum; + } + return TokenKind::kNone; + } + return errorExpect(); + case TokenizerState::kComment: + TRACE("kComment %zu", _pos); + if (is_last || IsNewLine(lookAhead())) { + _state = TokenizerState::kFree; + return TokenKind::kComment; + } + return TokenKind::kNone; + case TokenizerState::kNewLine: + TRACE("kNewLine %zu", _pos); + ASSERT(c == '\n'); + _state = TokenizerState::kFree; + return TokenKind::kNewLine; + case TokenizerState::kError: + TRACE("kError %zu", _pos); + UNREACHABLE(); + return TokenKind::kNone; + } + UNREACHABLE(); + return TokenKind::kNone; +} + +enum class ParserState { + kAddress = 0, + kType, + kTableType, + kTableTypeCountOrSep, + kTableSep, + kCountOrName, + kName, + kEscapeEndOfLine, + kEndOfLine, + kError, +}; + +enum class ParseResult { + kOk = 0, + kFinished, + kError, +}; + +struct Statement { + uint32_t address_value{}; + bool has_comment{}; + TraceNodeKind trace_node_kind{}; + DataType data_type{}; + uint32_t size_value{}; + size_t name_index{}; + size_t comment_index{}; +}; + +class Parser { + Statement _stmt; + FILE *_shstr{}; + ParserState _state{}; + ParserState _saved_state{}; // For kEscapeEndOfLine + TokenKind _prev_token_kind{}; + bool _error{}; + ParseResult handleToken(const Token &); + ParseResult errorExpect(const Token &t, const char *expected); + ParseResult error(const Token &t, const char *text); + size_t addToShstr(const char *str, const size_t len) + { + if (nullptr == _shstr) { + return false; + } + fflush(_shstr); + ASSERT(shstr_size); + const size_t index = shstr_size; + if (len) { + ASSERT(shstr[shstr_size - 1] == '\000'); // Make sure it is sealed + const int res = fwrite(str, len, 1, _shstr); + ASSERT(res == 1), (void)res; + } + fflush(_shstr); + ASSERT(shstr_size); + if (shstr[shstr_size - 1] != '\000') { + const int res = fwrite("\000", 1, 1, _shstr); + ASSERT(res == 1), (void) res; + } + return index; + } + size_t addToShstrRaw(const char *str, const size_t len) + { + if (nullptr == _shstr) { + return false; + } + fflush(_shstr); + ASSERT(shstr_size); + const size_t index = shstr_size; + if (len) { + const int res = fwrite(str, len, 1, _shstr); + ASSERT(res == 1), (void) res; + } + return index; + } + size_t addNameTokenToShstr(const Token &t) + { + const char *str = tokenizer.in + t.pos; + const size_t len = t.len; + ASSERT(len); + if (__TRACE) { + char *estr = escapeStr(tokenizer.in + t.pos, static_cast<int>(t.len)); + if (estr) { + TRACE("Name token: \"%s\"", estr); + free(estr); + } + } + return addToShstr(str, len); + } + size_t addComment(const Token &t) + { + const char *str = tokenizer.in + t.pos + 1; + const size_t len = t.len - 1; + ASSERT(len); + if (__TRACE) { + char *estr = escapeStr(tokenizer.in + t.pos, static_cast<int>(t.len)); + if (estr) { + TRACE("Comment token: \"%s\"", estr); + free(estr); + } + } + return addToShstrRaw(str, len); + } + void appendCommentNewline(void) + { + // Newlines are simulated instead of using original ones, because it is + // easier to split multiline comment when printing if newlines used are + // uniform (only "\n"), when original ones may be mixed, i.e. "\r\n", + // "\n" and even "\r" inside a single multiline comment. + addToShstrRaw("\n", (sizeof "\n") - 1); + } + void sealComment(void) { addToShstr(nullptr, 0); } + void addTraceNode(Statement &&stmt) + { + nodes.PushBack(TraceNode{ + stmt.trace_node_kind, + stmt.data_type, + stmt.size_value, + stmt.address_value, + stmt.name_index, + stmt.comment_index, + }); + stmt = Statement(); + } +public: + Tokenizer tokenizer{}; + Vec<DataType> types; + Vec<TraceNode> nodes; + char *shstr{}; + size_t shstr_size{}; + Parser(const void *input, size_t size, FILE *errstream, const char *filename) + : _shstr(open_memstream(&shstr, &shstr_size)) + , tokenizer(input, size, errstream, filename) + { + // Write a single zero so any index 0 name or comment will point to it. + const int res = fwrite("\000", 1, 1, _shstr); + ASSERT(res == 1), (void) res; + } + ~Parser() + { + if (_shstr) { + fclose(_shstr); + _shstr = nullptr; + } + } + ParseResult Parse(void) + { + ParseResult res{}; + do { + const auto token = tokenizer.Next(); + TRACE("%s", token.Str()); + res = handleToken(token); + _prev_token_kind = token.kind; + } while (ParseResult::kOk == res); + if (_shstr) { + fclose(_shstr); + _shstr = nullptr; + } + return _error ? ParseResult::kError : res; + } +}; + +ParseResult Parser::errorExpect(const Token &t, const char *expected) +{ + _state = ParserState::kError; + _error = true; + size_t size{}; + char *errstr{}; + FILE *memstream = open_memstream(&errstr, &size); + if (nullptr == memstream) { + return ParseResult::kOk; + } + fprintf( + memstream, + "unexpected %s, expected %s", + t.Str(), expected); + fclose(memstream); + tokenizer.PrintError(t, errstr); + free(errstr); + return ParseResult::kOk; +} + +ParseResult Parser::error(const Token &t, const char *errstr) +{ + _state = ParserState::kError; + tokenizer.PrintError(t, errstr); + return ParseResult::kOk; +} + +static bool ParseNumeric(const char *str, size_t len, uint32_t &value) +{ + // No need to thoroughly validate the number here, because we trust and + // heavily rely on the tokenizer getting it right. + if (len < 1) { + return false; + } + errno = 0; + unsigned long v = 0; + if (str[0] == '0' && len > 2 && (str[1] == 'b' || str[1] == 'B')) { + v = strtoul(str + 2, nullptr, 2); + } else { + v = strtoul(str, nullptr, 0); + } + if (0 == v && errno) { + return false; + } + return value = v, true; +} + +static bool IsEndOfStatement(const TokenKind k) +{ + return k == TokenKind::kComment || k == TokenKind::kNewLine || k == TokenKind::kNone; +} + +ParseResult Parser::handleToken(const Token &t) +{ + // The trace node description line may be broken in multilpe lines at any + // point using the backslash '\' symbol. + if (t.kind == TokenKind::kBackslash && _state != ParserState::kEscapeEndOfLine) { + _saved_state = _state; + _state = ParserState::kEscapeEndOfLine; + return ParseResult::kOk; + } + switch (_state) { + case ParserState::kAddress: + if (t.kind == TokenKind::kComment) { + ASSERT(t.len); + if (_stmt.has_comment) { + appendCommentNewline(); + addComment(t); + } else { + _stmt.comment_index = addComment(t); + _stmt.has_comment = true; + } + return ParseResult::kOk; + } if (t.IsNum()) { + if (false == ParseNumeric(tokenizer.in + t.pos, t.len, _stmt.address_value)) { + return error(t, "number is too big"); + } + sealComment(); + _state = ParserState::kType; + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + ASSERT(t.kind != TokenKind::kComment); + if (t.kind == TokenKind::kNone) { + sealComment(); + } + if (t.kind == TokenKind::kNewLine && _prev_token_kind == TokenKind::kNewLine) { + // An empty line separating comments means that a comment above it + // does not belong to any trace node statement, hence we should + // reset the heading comment parsing state and seal what has + // been parsed so far. + _stmt.has_comment = false; + // It is easier to just seal the string than undo what has been + // written to it so far. + sealComment(); + } + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return errorExpect(t, "any numeric token"); + case ParserState::kType: + if (t.kind == TokenKind::kAlphaNum) { + const char *err = ParseTypeFromToken( + t, tokenizer.in, _stmt.trace_node_kind, _stmt.data_type); + if (err) { + return error(t, err); + } + _state = ParserState::kCountOrName; + return ParseResult::kOk; + } else if (t.kind == TokenKind::kLBracket) { + _stmt.trace_node_kind = TraceNodeKind::kData; + _stmt.data_type = DataType{ DataTypeKind::kTable, 0, types.Size() }; + _state = ParserState::kTableType; + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + addTraceNode(static_cast<Statement &&>(_stmt)); + _state = ParserState::kAddress; + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return errorExpect(t, "alphanumeric token"); + case ParserState::kTableType: + if (t.kind == TokenKind::kAlphaNum) { + DataType data_type{}; + const char *err = ParseTableTypeFromToken(t, tokenizer.in, data_type); + if (err) { + return error(t, err); + } + types.PushBack(data_type); + _stmt.data_type.nested_num++; + _state = ParserState::kTableTypeCountOrSep; + return ParseResult::kOk; + } else if (_stmt.data_type.nested_num) { + // Allow closing bracket after comma, but only when the table + // already contains at least a single type, i.e. empty table is not + // allowed. + if (t.kind == TokenKind::kRBracket) { + _state = ParserState::kCountOrName; + return ParseResult::kOk; + } + return errorExpect(t, "`]` or table type alphanumeric token, one of: `ptr`, `u32`, `u16` or `u8`"); + } + return errorExpect(t, "table type alphanumeric token, one of: `ptr`, `u32`, `u16` or `u8`"); + case ParserState::kTableTypeCountOrSep: + if (t.IsNum()) { + uint32_t value{}; + if (false == ParseNumeric(tokenizer.in + t.pos, t.len, value)) { + return error(t, "number is too big"); + } + ASSERT(types.Size()); + types[types.Size() - 1].count = value; + _state = ParserState::kTableSep; + return ParseResult::kOk; + } else if (t.kind == TokenKind::kComma) { + _state = ParserState::kTableType; + return ParseResult::kOk; + } else if (_stmt.data_type.nested_num) { + // Allow closing bracket after comma, but only when the table + // already contains at least a single type, i.e. empty table is not + // allowed. + if (t.kind == TokenKind::kRBracket) { + _state = ParserState::kCountOrName; + return ParseResult::kOk; + } + return errorExpect(t, "`]`, `,`, number or table type alphanumeric token, one of: `ptr`, `u32`, `u16` or `u8`"); + } + return errorExpect(t, "`,`, number or table type alphanumeric token, one of: `ptr`, `u32`, `u16` or `u8`"); + case ParserState::kTableSep: + if (t.kind == TokenKind::kComma) { + _state = ParserState::kTableType; + return ParseResult::kOk; + } else if (t.kind == TokenKind::kRBracket) { + _state = ParserState::kCountOrName; + return ParseResult::kOk; + } + return errorExpect(t, "`,` or `]`"); + case ParserState::kCountOrName: + if (t.IsNum()) { + uint32_t value{}; + if (false == ParseNumeric(tokenizer.in + t.pos, t.len, value)) { + return error(t, "number is too big"); + } + if (_stmt.trace_node_kind == TraceNodeKind::kData) { + _stmt.data_type.count = value; + if (_stmt.data_type.kind == DataTypeKind::kTable) { + uint32_t table_entry_size = 0; + for (size_t i = 0; i < _stmt.data_type.nested_num; i++) { + const auto &type = types[i + _stmt.data_type.nested_idx]; + ASSERT(type.count); + table_entry_size += type.count * type.BaseSize(); + } + _stmt.size_value = value * table_entry_size; + } else { + _stmt.size_value = _stmt.data_type.count * _stmt.data_type.BaseSize(); + } + } else { + _stmt.size_value = value; + } + _state = ParserState::kName; + return ParseResult::kOk; + } else if (t.kind == TokenKind::kAlphaNum) { + _stmt.name_index = addNameTokenToShstr(t); + // Maybe there is something that should be done with the count here + // like setting it to 1 in case if it is kTable, but for now let's + // just face the fact that it turns out to be zero when count field + // is omitted and move on. + ASSERT(0 == _stmt.size_value); + _state = ParserState::kEndOfLine; + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + addTraceNode(static_cast<Statement &&>(_stmt)); + _state = ParserState::kAddress; + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return errorExpect(t, "any numeric token"); + case ParserState::kName: + if (t.kind == TokenKind::kAlphaNum) { + _stmt.name_index = addNameTokenToShstr(t); + _state = ParserState::kEndOfLine; + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + addTraceNode(static_cast<Statement &&>(_stmt)); + _state = ParserState::kAddress; + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return errorExpect(t, "alphanumeric token"); + case ParserState::kEscapeEndOfLine: + if (t.kind == TokenKind::kComment) { + // Comments are allowed at the end of the line when it supposed to + // be escaped with a backslash, but these comments don't get + // attached to the trace node, so skip these side comments. + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + // Restore the parsing state to continue parsing the trace node + // expression. + _state = _saved_state; + return ParseResult::kOk; + } + // But backslash may not be inserted anywhere you wish, it is only + // allowed before newlines or comments + return errorExpect(t, "Comment, EOF, CR, LF or CRLF"); + case ParserState::kEndOfLine: + if (t.kind == TokenKind::kComment) { + // Side comments are skipped + return ParseResult::kOk; + } else if (IsEndOfStatement(t.kind)) { + addTraceNode(static_cast<Statement &&>(_stmt)); + _state = ParserState::kAddress; + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return errorExpect(t, "Comment, EOF, CR, LF or CRLF"); + case ParserState::kError: + // Error recovery: just skip everything to the end of the line, but + // honoring the escape symbol '\' (see the top of the function) + if (IsEndOfStatement(t.kind)) { + _stmt = Statement(); + _state = ParserState::kAddress; + return (t.kind == TokenKind::kNone) + ? ParseResult::kFinished : ParseResult::kOk; + } + return ParseResult::kOk; + } + UNREACHABLE(); + return ParseResult::kError; +} + +bool ParseTraceData( + TraceTable &output, + const void *trace_data, + size_t trace_data_size, + FILE *errstream, + const char *trace_file_name) +{ + Parser p(trace_data, trace_data_size, errstream, trace_file_name); + const ParseResult res = p.Parse(); + ASSERT(res != ParseResult::kOk); + if (ParseResult::kFinished != res) { + return false; + } + const auto types_count = p.types.Size(); + const auto nodes_count = p.nodes.Size(); + const auto shstr_size = p.shstr_size; + auto *shstr = p.shstr; + if (shstr_size <= 1) { + // Nothing has been written to the shstr array, because it contains only + // a single char which is a zero, that is written before the parsing + // even starts. + free(shstr); + shstr = nullptr; + } + output._types = p.types.Extract(); + output._types_count = types_count; + output._nodes = p.nodes.Extract(); + output._nodes_count = nodes_count; + output._shstr = shstr; + return true; +} diff --git a/src/tracetab.h b/src/tracetab.h new file mode 100644 index 0000000..6d2ad35 --- /dev/null +++ b/src/tracetab.h @@ -0,0 +1,273 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +#include <cstdint> +#include <cstdio> + +// These data types may be nested via the kTable type. +enum class DataTypeKind { + kBlob = 0, + kStr, + kStrz, + kTable, + kPtr, + kU32, + kU16, + kU8, +}; + +constexpr size_t DataTypeBaseSize(const DataTypeKind k) +{ + switch (k) { + case DataTypeKind::kBlob: + case DataTypeKind::kStr: + case DataTypeKind::kStrz: + case DataTypeKind::kU8: + return 1; + case DataTypeKind::kTable: + return 0; ///< Not applicable + case DataTypeKind::kPtr: + case DataTypeKind::kU32: + return 4; + case DataTypeKind::kU16: + return 2; + } + return 0; +} + +enum class TraceNodeKind { + kPc = 0, + kFunction, + kData, +}; + +struct DataType { + /*! Designates a type of a data object. + */ + DataTypeKind kind{}; + + /*! Count of elements in the data type. + * + * Any data type may have be declared as repeated sequence of elements of + * the same type. This field is needed to represent it. When a type declared + * in the trace table, the count field belongs to type rather that to a + * node, i.e. "strz 12" means that the count field will contain 12 and in + * this case it means 12 bytes, so it is context dependent. Another useful + * example is table defined like this "[ptr, u32 3] 16" which means 16 + * entries, each containing a pointer and three values of `u32`. The count + * value will then contain 16, but to figure out how many bytes the table + * will take, one have to traverse all nested types, get their count and + * consider base type size, which means that a single `ptr` takes 4 bytes, + * as well as `u32` takes 4 bytes. So the total table size in bytes will be + * 16 * (4 + 4 * 3) = 256. + * + * To be clear, for each type the following base sizes are unambiguously + * defined in this disassembler: + * kPtr `ptr`: 4 bytes; + * kU32 `u32`: 4 bytes; + * kU16 `u16`: 2 bytes; + * kU8 `u8`: 1 byte. + * + * Or you may use DataTypeBaseSize function or BaseSize method to + * get base type size in bytes. + * + * When DataTypeKind is kStrz (`strz`) this fields represents length of the + * string including the null terminator. + * + * This field may contain zero. + * + * Full node size in bytes is available in TraceNode.size field. + */ + uint32_t count{}; + + /*! Designates a type of an element or composition of the table. + * + * Only interpreted if data_kind is kTable. It holds an index into the type + * table of the containing trace table. The type pointed by the index + * designates a type of an element of the table this node represents. + */ + size_t nested_idx{}; + + /*! Designates a number of types in a nested composition of the table. + * + * Only interpreted if data_kind is kTable. It holds number of types that + * must be interpreted in the data type table that compose a single element + * of the table. + */ + size_t nested_num{}; + + bool operator==(const DataType &other) const + { + return kind == other.kind && + nested_idx == other.nested_idx && + nested_num == other.nested_num; + } + bool operator!=(const DataType &other) const { return !(*this == other); } + constexpr size_t BaseSize() const { return DataTypeBaseSize(kind); } +}; + +struct TraceNode { + TraceNodeKind kind{}; + + /*! Designates a type of a data trace node. + * + * Only interpreted if TraceNode::kind is kData. + */ + DataType data_type{}; + + /*! Size of data/function, if applicable. + * + * When kind is kPc it is not applicable. + * + * When kind is kFunction this fields represents function size in bytes. + * + * When kind is kData this fields always represents object size in bytes. If + * the underlying type of the object is table and rather count of elements + * is needed, then DataType.count field may be used to get it. + * + * When DataTypeKind is kStrz (`strz`) this fields represents length of the + * string including the null terminator. + */ + uint32_t size{}; + + /*! Virtual offset of the traced location. + * + * By coincidence this also would be an offset inside the Sega Mega + * Drive/Genesis game binary in case if it refers to a ROM location. But it + * may contain RAM location as well which is located outside of possible + * cartridge code region range. + */ + uint32_t address{}; + + /*! Object name represented by an index into the shared string array. + * + * The shared string list may be found in the TraceTable object. The value + * of 0 means no name assigned. The name ends with null terminator and + * follows the alphanumeric regex: "[a-zA-Z_][0-9a-zA-Z_]*". + */ + size_t name{}; + + /*! Comment line(s) represented by an index into the shared string array. + * + * The shared string list may be found in the TraceTable object. The value + * of 0 means no comment assigned. The comment ends with null terminator and + * may contain line feeds which correspond to the original line feeds read + * from the trace table file. No comment start marks '#' are preserved in + * the comment strings. + */ + size_t comment{}; + + static constexpr auto Pc(uint32_t address_) + { + TraceNode n{}; + n.address = address_; + return n; + } + bool operator==(const TraceNode &other) const + { + if (!(kind == other.kind && size == other.size && address == other.address)) { + return false; + } + return (kind == TraceNodeKind::kData) ? data_type == other.data_type : true; + } + bool operator!=(const TraceNode &other) const { return !(*this == other); } +}; + +class TraceTable { + /*! A shared table of all types used in the parsed trace table. + * + * When a data trace node has type of DataTypeKind::kTable, it will refer to + * one or more types that in a combination represent a single table element. + * Instead of specifying the type explicitly it refers to it by an index + * inside this types array. Types may go crazy complex in theory, and this + * approach with indexes into this array makes it possible to build an + * abstract syntax tree of types representing an object. + */ + DataType *_types{}; + + /*! Size of the _types array. + */ + size_t _types_count{}; + + /*! A shared table of all nodes in the parsed trace table. + */ + TraceNode *_nodes{}; + + /*! Size of the _nodes array. + */ + size_t _nodes_count{}; + + /*! A shared list of strings separated by null terminators. + * + * Used for object names like traced PC locations, functions or tables, as + * well as for comments. + */ + char *_shstr{}; + +public: + constexpr TraceTable(){} + constexpr TraceTable( + DataType *types, size_t types_size, TraceNode *nodes, size_t nodes_count, char *shstr) + : _types(types) + , _types_count(types_size) + , _nodes(nodes) + , _nodes_count(nodes_count) + , _shstr(shstr) + {} + constexpr TraceTable(const TraceTable&) = delete; + constexpr TraceTable(TraceTable&& other) = delete; + ~TraceTable(); + constexpr TraceTable &operator=(const TraceTable &other) = delete; + TraceTable &operator=(TraceTable &&other) + { + DataType *types = other._types; + size_t types_count = other._types_count; + TraceNode *nodes = other._nodes; + size_t nodes_count = other._nodes_count; + char *shstr = other._shstr; + other._types = nullptr; + other._types_count = 0; + other._nodes = nullptr; + other._nodes_count = 0; + other._shstr = nullptr; + this->~TraceTable(); + _types = types; + _types_count = types_count; + _nodes = nodes; + _nodes_count = nodes_count; + _shstr = shstr; + return *this; + } + constexpr const TraceNode &Node(size_t index) const { return _nodes[index]; } + constexpr const DataType &Type(size_t index) const { return _types[index]; } + constexpr const char *Shstr(size_t index = 0) const { return _shstr + index; } + constexpr size_t TypesCount() const { return _types_count; } + constexpr size_t NodesCount() const { return _nodes_count; } + friend bool ParseTraceData( + TraceTable &output, + const void *trace_data, + size_t trace_data_size, + FILE *errstream, + const char *trace_file_name); +}; + +/*! Parses \p trace_data into the \p output trace table. + * + * The \p trace_file_name is purely informative an used for error reporting + * purposes only. When \p trace_file_name is nullptr, "<stdin>" is used in the + * error reporting, that is directed to the \p errstream. When \p errstream is + * nullptr, no error messages will be printed in case of failure and function + * will return false silently without even touching the \p output value. + * + * \p trace_data_size simply tells the size of the \p trace_data buffer. + * + * \returns true on success. + */ +bool ParseTraceData( + TraceTable &output, + const void *trace_data, + size_t trace_data_size, + FILE *errstream = nullptr, + const char *trace_file_name = nullptr); diff --git a/src/tracetab_tests.cpp b/src/tracetab_tests.cpp new file mode 100644 index 0000000..e8d0710 --- /dev/null +++ b/src/tracetab_tests.cpp @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "tracetab.h" +#include "doctest/doctest.h" +#include "debug.h" + +#include <cstring> + +#define STRING_SLICE(s) StringSlice{ s, (sizeof s) - 1u } + +struct StringSlice { + const char *str; + size_t len; +}; + +TEST_CASE("PC trace") { + const auto data = STRING_SLICE("512\n0x202\n0b1000000100\n01006\n"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + CHECK_EQ(tt.TypesCount(), 0); + CHECK_EQ(tt.NodesCount(), 4); + CHECK_EQ(tt.Shstr(), nullptr); + CHECK_EQ(tt.Node(0), TraceNode::Pc(512)); + CHECK_EQ(tt.Node(1), TraceNode::Pc(0x202)); + CHECK_EQ(tt.Node(2), TraceNode::Pc(0b1000000100)); + CHECK_EQ(tt.Node(3), TraceNode::Pc(01006)); +} + +TEST_CASE("pc, fn and mixed newlines") { + const auto data = STRING_SLICE( + "0x200 pc __start\r\n\r0x491ac fn 30 printf\n\n\n0x49c78 fn 100 fputs"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + CHECK_EQ(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 3); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kPc); + CHECK_EQ(n.address, 0x200); + CHECK_EQ(n.size, 0); + CHECK_NE(n.name, 0); + TRACE("tt.Shstr(n.name) = %s\n", tt.Shstr(n.name)); + CHECK_EQ(0, strcmp("__start", tt.Shstr(n.name))); + } + { + const auto n = tt.Node(1); + CHECK_EQ(n.kind, TraceNodeKind::kFunction); + CHECK_EQ(n.address, 0x491ac); + CHECK_EQ(n.size, 30); + CHECK_NE(n.name, 0); + TRACE("tt.Shstr(n.name) = %s\n", tt.Shstr(n.name)); + CHECK_EQ(0, strcmp("printf", tt.Shstr(n.name))); + } + { + const auto n = tt.Node(2); + CHECK_EQ(n.kind, TraceNodeKind::kFunction); + CHECK_EQ(n.address, 0x49c78); + CHECK_EQ(n.size, 100); + CHECK_NE(n.name, 0); + TRACE("tt.Shstr(n.name) = %s\n", tt.Shstr(n.name)); + CHECK_EQ(0, strcmp("fputs", tt.Shstr(n.name))); + } +} + +TEST_CASE("str, strz and blob") { + const auto data = STRING_SLICE( + "0x100 str 0x10 smd_header_sega_genesis\n" + "0xbc1b strz 13 windtrap_wsa\n" + "0x29bb0 blob 256 copy_of_smd_header\n"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + CHECK_EQ(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 3); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x100); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("smd_header_sega_genesis", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kStr); + CHECK_EQ(n.data_type.count, 0x10); + CHECK_EQ(n.data_type.count * n.data_type.BaseSize(), n.size); + CHECK_EQ(0x10, n.size); + } + { + const auto n = tt.Node(1); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0xbc1b); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("windtrap_wsa", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kStrz); + CHECK_EQ(n.data_type.count, 13); + CHECK_EQ(n.data_type.count * n.data_type.BaseSize(), n.size); + CHECK_EQ(13, n.size); + } + { + const auto n = tt.Node(2); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x29bb0); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("copy_of_smd_header", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kBlob); + CHECK_EQ(n.data_type.count, 256); + CHECK_EQ(n.data_type.count * n.data_type.BaseSize(), n.size); + CHECK_EQ(256, n.size); + } +} + +TEST_CASE("tables") { + const auto data = STRING_SLICE( + "0x4 [ptr] 63\n" + "0x87f44 [ptr,u32,] 19 soundtest_music_name_ptrs\n" + "0x88124 [ptr,u32] passwords_ptrs\n"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + REQUIRE_NE(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 3); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x4); + CHECK_EQ(n.name, 0); + CHECK_EQ(n.data_type.kind, DataTypeKind::kTable); + CHECK_EQ(n.data_type.count, 63); + REQUIRE_EQ(n.data_type.nested_num, 1); + const auto ptr_type = tt.Type(n.data_type.nested_idx); + CHECK_EQ(ptr_type.kind, DataTypeKind::kPtr); + CHECK_EQ(ptr_type.count, 1); + CHECK_EQ(n.data_type.count * (ptr_type.count * ptr_type.BaseSize()), n.size); + CHECK_NE(0, n.size); + } + { + const auto n = tt.Node(1); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x87f44); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("soundtest_music_name_ptrs", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kTable); + CHECK_EQ(n.data_type.count, 19); + REQUIRE_EQ(n.data_type.nested_num, 2); + const auto ptr_type = tt.Type(n.data_type.nested_idx); + CHECK_EQ(ptr_type.kind, DataTypeKind::kPtr); + CHECK_EQ(ptr_type.count, 1); + const auto u32_type = tt.Type(n.data_type.nested_idx + 1); + CHECK_EQ(u32_type.kind, DataTypeKind::kU32); + CHECK_EQ(u32_type.count, 1); + CHECK_EQ( + n.data_type.count * (ptr_type.count * ptr_type.BaseSize() + + u32_type.count * u32_type.BaseSize()), + n.size); + CHECK_NE(0, n.size); + } + { + const auto n = tt.Node(2); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x88124); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("passwords_ptrs", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kTable); + CHECK_EQ(n.data_type.count, 0); + REQUIRE_EQ(n.data_type.nested_num, 2); + const auto ptr_type = tt.Type(n.data_type.nested_idx); + CHECK_EQ(ptr_type.kind, DataTypeKind::kPtr); + CHECK_EQ(ptr_type.count, 1); + const auto u32_type = tt.Type(n.data_type.nested_idx + 1); + CHECK_EQ(u32_type.kind, DataTypeKind::kU32); + CHECK_EQ(u32_type.count, 1); + CHECK_EQ( + n.data_type.count * (ptr_type.count * ptr_type.BaseSize() + + u32_type.count * u32_type.BaseSize()), + 0); + CHECK_EQ(0, n.size); + } +} + +TEST_CASE("table types with count specified") { + const auto data = STRING_SLICE( + "0x88 [ptr 4,u16 2, u8 4] 14 something_idk\n"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + REQUIRE_NE(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 1); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x88); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("something_idk", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kTable); + CHECK_EQ(n.data_type.count, 14); + REQUIRE_EQ(n.data_type.nested_num, 3); + const auto ptr_type = tt.Type(n.data_type.nested_idx); + CHECK_EQ(ptr_type.kind, DataTypeKind::kPtr); + CHECK_EQ(ptr_type.count, 4); + const auto u16_type = tt.Type(n.data_type.nested_idx + 1); + CHECK_EQ(u16_type.kind, DataTypeKind::kU16); + CHECK_EQ(u16_type.count, 2); + const auto u8_type = tt.Type(n.data_type.nested_idx + 2); + CHECK_EQ(u8_type.kind, DataTypeKind::kU8); + CHECK_EQ(u8_type.count, 4); + CHECK_EQ( + n.data_type.count * + (ptr_type.count * ptr_type.BaseSize() + + u16_type.count * u16_type.BaseSize() + + u8_type.count * u8_type.BaseSize()), + n.size); + CHECK_NE(0, n.size); + } +} + +TEST_CASE("multiline trace node with comments") { + const auto data = STRING_SLICE( + "0x100 \\\n" + " [\\# side comments are ignored\n" + " ptr \\\n" + " 4\\\n" + " ,\\\n" + " u32\\#another random comment\n" + " ] \\\n" + " 4\\# and another one\n" + " trace_node#proper trailing comment\n"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + REQUIRE_NE(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 1); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x100); + CHECK_NE(n.name, 0); + CHECK_EQ(0, strcmp("trace_node", tt.Shstr(n.name))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kTable); + CHECK_EQ(n.data_type.count, 4); + REQUIRE_EQ(n.data_type.nested_num, 2); + const auto ptr_type = tt.Type(n.data_type.nested_idx); + CHECK_EQ(ptr_type.kind, DataTypeKind::kPtr); + CHECK_EQ(ptr_type.count, 4); + const auto u32_type = tt.Type(n.data_type.nested_idx + 1); + CHECK_EQ(u32_type.kind, DataTypeKind::kU32); + CHECK_EQ(u32_type.count, 1); + CHECK_EQ( + n.data_type.count * + (ptr_type.count * ptr_type.BaseSize() + + u32_type.count * u32_type.BaseSize()), + n.size); + CHECK_NE(0, n.size); + } +} + +TEST_CASE("multiline trace node with comments") { + const auto data = STRING_SLICE( + "# Heading comments are appended to the trace node.\r" + "# And they may be multiline!\n" + "# The newlines in the heading comment are preserved\r\n" + "# if there is more than one line.\n" + "0x100 str 8 trace_node #side comments are ignored\n" + "#Just a stray comment without trace node because empty line follows\n" + "\n" + "# Single line heading comment won't preserve trailing newline symbol\n" + "0x200 strz 4 trace_node2 # side comments are ignored\n" + "#Just a comment without trace node"); + TraceTable tt{}; + const bool ok = ParseTraceData(tt, data.str, data.len, stderr); + REQUIRE(ok); + REQUIRE_EQ(tt.TypesCount(), 0); + REQUIRE_EQ(tt.NodesCount(), 2); + CHECK_NE(tt.Shstr(), nullptr); + { + const auto n = tt.Node(0); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x100); + CHECK_NE(n.name, 0); + TRACE("name = %s", tt.Shstr(n.name)); + CHECK_EQ(0, strcmp("trace_node", tt.Shstr(n.name))); + CHECK_NE(n.comment, 0); + TRACE("comment = %s", tt.Shstr(n.comment)); + CHECK_EQ(0, strcmp( + " Heading comments are appended to the trace node.\n" + " And they may be multiline!\n" + " The newlines in the heading comment are preserved\n" + " if there is more than one line.", + tt.Shstr(n.comment))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kStr); + CHECK_EQ(n.data_type.count, 8); + CHECK_EQ(n.data_type.count * n.data_type.BaseSize(), n.size); + CHECK_EQ(8, n.size); + } + { + const auto n = tt.Node(1); + CHECK_EQ(n.kind, TraceNodeKind::kData); + CHECK_EQ(n.address, 0x200); + CHECK_NE(n.name, 0); + TRACE("n.name = %s", tt.Shstr(n.name)); + CHECK_EQ(0, strcmp("trace_node2", tt.Shstr(n.name))); + CHECK_NE(n.comment, 0); + TRACE("comment = %s", tt.Shstr(n.comment)); + CHECK_EQ(0, strcmp( + " Single line heading comment won't preserve trailing newline symbol", + tt.Shstr(n.comment))); + CHECK_EQ(n.data_type.kind, DataTypeKind::kStrz); + CHECK_EQ(n.data_type.count, 4); + CHECK_EQ(n.data_type.count * n.data_type.BaseSize(), n.size); + CHECK_EQ(4, n.size); + } +} diff --git a/src/unit_tests_main.cpp b/src/unit_tests_main.cpp new file mode 100644 index 0000000..8d46240 --- /dev/null +++ b/src/unit_tests_main.cpp @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#define DOCTEST_CONFIG_NO_EXCEPTIONS +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include <doctest/doctest.h> + +/* These aren't the Tests you're looking for... */ diff --git a/src/vec.cpp b/src/vec.cpp new file mode 100644 index 0000000..ad6b172 --- /dev/null +++ b/src/vec.cpp @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "vec.h" + +#include <cassert> +#include <cstdlib> + +template <typename T> +void Vec<T>::expand(Vec<T>::size_type by) +{ + if (_size + by <= _capacity) { + return; + } + const size_type new_capacity = (_size + by) * 1.5f; + T *d{static_cast<T *>(calloc(new_capacity, sizeof (T)))}; + assert(d); + if (_d) { + for (size_type i = 0; i < _size; i++) { + d[i] = static_cast<T &&>(_d[i]); + } + free(_d); + } + _d = d; + _capacity = new_capacity; +} + +template <typename T> +Vec<T>::Vec(const Vec& other) + : _size(other._size) + , _capacity(other._size) + , _d(static_cast<T *>(calloc(_capacity, sizeof (T)))) +{ + assert(_d); + for (size_type i = 0; i < _size; i++) { + _d[i] = other._d[i]; + } +} + +template <typename T> +Vec<T>::Vec(const T *data, size_type nmemb) + : _size(nmemb) + , _capacity(nmemb) + , _d(static_cast<T *>(calloc(_capacity, sizeof (T)))) +{ + assert(_d); + for (size_type i = 0; i < _size; i++) { + _d[i] = static_cast<const T *>(data)[i]; + } +} + +template <typename T> +Vec<T>::Vec(const void *data, size_type size) + : _size(size / sizeof (T)) + , _capacity(size / sizeof (T)) + , _d(static_cast<T *>(calloc(_capacity, sizeof (T)))) +{ + assert(_d); + for (size_type i = 0; i < _size; i++) { + _d[i] = static_cast<const T *>(data)[i]; + } +} + +template <typename T> +Vec<T>::~Vec() +{ + if (_d == nullptr) { + return; + } + for (size_type i = 0; i < _size; i++) { + _d[i].~T(); + } + free(_d); + _d = nullptr; + _capacity = _size = 0; +} + +template <typename T> +Vec<T> &Vec<T>::operator=(const Vec &other) +{ + this->~Vec(); + _size = _capacity = 0; + _d = nullptr; + expand(other._size); + for (size_type i = 0; i < other._size; i++) { + _d[i] = other._d[i]; + } + _size = other._size; + return *this; +} + +template <typename T> +bool Vec<T>::operator==(const Vec& other) const +{ + if (_size != other._size) { + return false; + } + for (size_type i = 0; i < _size; i++) { + if (_d[i] != other._d[i]) { + return false; + } + } + return true; +} + +template class Vec<char>; // Used in vecutil.cpp +template class Vec<int>; // Used in vec_tests.cpp +template class Vec<Vec<int>>; // Used in vec_tests.cpp + +#include "tracetab.h" + +template class Vec<DataType>; // Used in tracetab.cpp +template class Vec<TraceNode>; // Used in tracetab.cpp diff --git a/src/vec.h b/src/vec.h new file mode 100644 index 0000000..98c760a --- /dev/null +++ b/src/vec.h @@ -0,0 +1,83 @@ +#pragma once + +/* SPDX-License-Identifier: Unlicense + */ + +/*! A data vector implementation. + * + * The sole purpose of this vector implementation is to not use std::vector, + * because it greatly increases compilation times even if you just type + * `#include <vector>`. And since there is no exceptions enabled in m68k-disasm + * project, I consider it to be fine to have an implementation this bold. I also + * don't need all the features of original std::vector an this implementation + * provide only a small portion of them. + * */ +template <typename T> +class Vec { +public: + using size_type = decltype(sizeof 0); +private: + size_type _size{}; + size_type _capacity{}; + T *_d{}; + void expand(size_type by); +public: + constexpr explicit Vec() {} + Vec(const Vec& other); + Vec(const T *data, size_type nmemb); + Vec(const void *data, size_type size); + constexpr Vec(Vec&& other) + : _size(other._size), _capacity(other._capacity), _d(other._d) + { + other._d = nullptr; + other._capacity = other._size = 0; + } + Vec &operator=(const Vec &other); + Vec &operator=(Vec &&other) + { + // In case if `other` points to `this` we have to store it's state on + // the stack before calling destructor of `this`. + const size_type size = other._size; + const size_type capacity = other._capacity; + T *const d = other._d; + other._d = nullptr; + other._capacity = other._size = 0; + this->~Vec(); + _size = size; + _capacity = capacity; + _d = d; + return *this; + } + ~Vec(); + Vec<T> &PushBack(const T &value) + { + expand(1); + _d[_size++] = static_cast<const T &>(value); + return *this; + } + Vec<T> &PushBack(T &&value) + { + expand(1); + _d[_size++] = static_cast<T &&>(value); + return *this; + } + T PopBack(void) { return static_cast<T &&>(_d[--_size]); } + constexpr T *Extract(void) + { + T *d = _d; + _d = nullptr; + _size = _capacity = 0; + return d; + } + constexpr size_type Size(void) const { return _size; } + constexpr size_type Capacity(void) const { return _capacity; } + constexpr T *begin(void) { return _d; } + constexpr T *end(void) { return _d + _size; } + constexpr const T& operator[](size_type index) const { return *(_d + index); } + constexpr T& operator[](size_type index) { return *(_d + index); } + bool operator==(const Vec& other) const; + bool operator!=(const Vec& other) const + { + return !(*this == other); + } +}; diff --git a/src/vec_tests.cpp b/src/vec_tests.cpp new file mode 100644 index 0000000..d3a4b6f --- /dev/null +++ b/src/vec_tests.cpp @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: Unlicense + */ + +#include "vec.h" +#include "doctest/doctest.h" + +TEST_CASE("Push, access and pop trivial data") { + Vec<int> v{}; + CHECK_EQ(v.Size(), 0); + v.PushBack(10); + v.PushBack(11); + v.PushBack(12); + CHECK_EQ(v.Size(), 3); + CHECK_EQ(10, v[0]); + CHECK_EQ(11, v[1]); + CHECK_EQ(12, v[2]); + CHECK_EQ(12, v.PopBack()); + CHECK_EQ(11, v.PopBack()); + CHECK_EQ(10, v.PopBack()); + CHECK_EQ(v.Size(), 0); +} + +TEST_CASE("Copy constructor should copy") { + Vec<int> v{}; + v.PushBack(10); + Vec<int> v2(v); + CHECK_EQ(v2.Size(), 1); + CHECK_EQ(10, v[0]); + CHECK_EQ(v.Size(), 1); + CHECK_EQ(10, v[0]); + CHECK_EQ(v, v2); +} + +TEST_CASE("Push, access and pop vec of vecs") { + Vec<int> vi{}; + vi.PushBack(10); + Vec<Vec<int>> vv{}; + CHECK_EQ(vv.Size(), 0); + vv.PushBack(vi); + vv.PushBack(vi); + CHECK_EQ(vv.Size(), 2); + CHECK_EQ(vi, vv[1]); + CHECK_EQ(vi, vv[0]); + Vec<Vec<int>> vv2{vv}; + CHECK_EQ(vi.Size(), vv[0].Size()); + CHECK_EQ(vi, vv[0]); + const auto popvi = vv.PopBack(); + CHECK_EQ(vi.Size(), popvi.Size()); + CHECK_EQ(vv2.Size(), 2); +} |