diff options
author | Oxore <oxore@protonmail.com> | 2023-10-21 07:24:48 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-10-21 07:24:48 +0300 |
commit | 6529ed369f4da544c31aa1db0b23fa4eb4663013 (patch) | |
tree | 87026787bc121faa721823ad65c53f92d73214da | |
parent | ad54e3c0a58e98927ec7e43418d593412ba6fa9f (diff) |
Impl expr numbers parsing and function type and size
-rw-r--r-- | main.c | 459 | ||||
-rwxr-xr-x | test.sh | 1 | ||||
-rw-r--r-- | tests/test_sat.S | 7 |
3 files changed, 385 insertions, 82 deletions
@@ -23,17 +23,23 @@ #endif #if defined(__GNUC__) || defined(__clang__) +#if !defined(NDEBUG) +#define UNREACHABLE() assert(false) +#else #define UNREACHABLE __builtin_unreachable +#endif #else #define UNREACHABLE() #endif #define E_NIMPL "not implemented" #define E_UNREACH "unreachable code reached" -#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol" +#define E_EXPR "'(', ')', unary operator, binary operator, number or identifier" #define E_EXPR_NONREG "symbol that is not a register when parsing expression" -#define E_EXPR_OPEN "'(', number or symbol" -#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol" +#define E_EXPR_OPEN_WITH_ID "'-', '(', number or identifier" +#define E_EXPR_CLOSE_WITH_ID "')', '+', '-', '+', '/' or identifier" +#define E_EXPR_OPEN "'-', '(' or number" +#define E_EXPR_CLOSE "')', '+', '-', '+' or '/'" #define E_DN "D0, ...D7" #define E_AN "A0, ...A7, SP" #define E_AN_DN E_AN " or " E_DN @@ -60,16 +66,28 @@ #define E_DIRECTIVE "directive" #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL #define E_UNKNOWN_DRC "unknown directive" +#define E_NUM "number" #define E_STR "string" #define E_ID "identifier" +#define E_ID_NUM_DOT E_ID ", " E_NUM " or '.'" #define E_NESTED_DEF "nested .def ... .endef blocks are illegal" #define E_NMATCH_ENDEF ".endef directive without matching .def" +#define E_MULTIPLE_VAL "multiple .val directives specified" +#define E_MULTIPLE_SCL "multiple .scl directives specified" +#define E_MULTIPLE_TYPE "multiple .type directives specified" +#define E_MAX_NESTING "maximum expression nesting level reached" #define ERR 0 #define OK 1 #define CONTINUE 2 - #define BCC_S_MAX_BACKWARDS 126 +#define EXPR_NESTING_MAX 10 +#define PARS_EXPR_FLAG_ALLOW_ID 1 + +// These are from Sierra's FILE_FMT.H +#define C_EFCN -1 /* physical end of function */ +#define DT_FCN 2 /* function */ + enum token_type { TT_NONE = 0, @@ -111,6 +129,7 @@ enum token_type { struct token { enum token_type type; + uint32_t value; ///< For TT_NUMOCT, TT_NUMDEC, TT_NUMHEX size_t offset; size_t length; }; @@ -151,6 +170,7 @@ struct lex { // State variables enum lex_state state; enum lex_error error; + uint32_t current_number_value; size_t cursor; size_t tok_offset; bool inside_line; @@ -172,6 +192,7 @@ enum stmt_type { ST_ASSIGNMENT, ST_COMMENT, ST_DIRECTIVE, + ST_META_SAT, ///< Not a real statement, just an accumulation of .def .endef block }; enum mnemonic { @@ -392,8 +413,10 @@ enum args_count { ARGS_COUNT_2, }; -struct expr_tokens_span { +struct expr { size_t first_token, num_tokens; + int32_t value; + bool value_is_resolved; }; struct arg { @@ -402,7 +425,7 @@ struct arg { uint8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An) uint8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An enum opsize briefext_size; - struct expr_tokens_span expr; + struct expr expr; size_t first_token, num_tokens; ///< Argument tokens span }; @@ -418,23 +441,25 @@ struct directive { size_t first_token, num_tokens; ///< Directive arguments tokens span }; +/// Symbol Attribute Table (SAT, a `.def ... .endef` block) +struct sat { + struct expr def_arg; + struct expr val_arg; + struct expr scl_arg; + struct expr type_arg; +}; + struct stmt { enum stmt_type type; + uint32_t addr; union { struct instruction instruction; struct directive directive; + struct sat sat; }; size_t label_token; size_t first_token, num_tokens; // Statement tokens span, may be NULL size_t comment_token; - uint32_t addr; -}; - -struct symbol { - size_t offset; // Byte offset in continuous null terminated symbol buffer - // Instead of strcmp every item in symtab we can compare hashes and get O(N) - // for search. - uint32_t hash; }; enum pars_error { @@ -481,10 +506,8 @@ struct pars { // State size_t cur_tok_id; enum pars_error error; - /*! - * SAT stands for Symbol Attribute Table - */ bool in_sat; ///< Indicates whether inside `.def ... .endef` block or not + struct sat sat; // Statement table FILE *stmttab_stream; struct stmt *stmttab; @@ -502,6 +525,7 @@ struct pars { struct assem { const struct pars *pars; + struct sat sat; }; static int pars_parse_arg_inside_parens( @@ -663,6 +687,9 @@ const struct mnemonic_meta { static int pars_directive_skip(struct pars *, enum directive_type, size_t); static int pars_directive_handler_def(struct pars *, enum directive_type, size_t); static int pars_directive_handler_endef(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_scl(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_type(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_val(struct pars *, enum directive_type, size_t); const struct directive_description { const char *str; @@ -709,7 +736,7 @@ const struct directive_description { { "packed", pars_directive_skip, }, { "page", pars_directive_skip, }, { "reorg", pars_directive_skip, }, - { "scl", pars_directive_skip, }, + { "scl", pars_directive_handler_scl, }, { "section", pars_directive_skip, }, { "short", pars_directive_skip, }, { "single", pars_directive_skip, }, @@ -719,8 +746,8 @@ const struct directive_description { { "tag", pars_directive_skip, }, { "text", pars_directive_skip, }, { "tsection", pars_directive_skip, }, - { "type", pars_directive_skip, }, - { "val", pars_directive_skip, }, + { "type", pars_directive_handler_type, }, + { "val", pars_directive_handler_val, }, { "word", pars_directive_skip, }, { "xdef", pars_directive_skip, }, { "xref", pars_directive_skip, }, @@ -800,6 +827,17 @@ static int fprint_string_escaped( return written; } +static int hex_digit_to_int(char c) +{ + if (c >= 'a' && c <= 'f') { + return c - 'a'; + } + if (c >= 'A' && c <= 'F') { + return c - 'A'; + } + return c - '0'; +} + static const char *token_type_to_string(const enum token_type type) { switch (type) { @@ -1032,15 +1070,19 @@ static int lex_handle_next(struct lex *const self, const int c) self->state = LS_ID; } else if (c == '0') { self->tok_offset = self->cursor; + self->current_number_value = 0; self->state = LS_NUMOCTHEX; } else if (is_dec(c)) { self->tok_offset = self->cursor; + self->current_number_value = c - '0'; self->state = LS_NUMDEC; } else if (c == '@') { self->tok_offset = self->cursor; + self->current_number_value = 0; self->state = LS_NUMOCT; } else if (c == '$') { self->tok_offset = self->cursor; + self->current_number_value = 0; self->state = LS_NUMHEX; } else if (c == '"') { self->tok_offset = self->cursor; @@ -1058,59 +1100,59 @@ static int lex_handle_next(struct lex *const self, const int c) self->tok_offset = self->cursor; self->state = LS_DOT; } else if (c == ',') { - lex_yield_token(self, &(struct token){TT_COMMA, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_COMMA, 0, self->cursor, 1}); } else if (c == '(') { - lex_yield_token(self, &(struct token){TT_LPAREN, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_LPAREN, 0, self->cursor, 1}); } else if (c == ')') { - lex_yield_token(self, &(struct token){TT_RPAREN, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_RPAREN, 0, self->cursor, 1}); } else if (c == '[') { - lex_yield_token(self, &(struct token){TT_LBRACKET, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_LBRACKET, 0, self->cursor, 1}); } else if (c == ']') { - lex_yield_token(self, &(struct token){TT_RBRACKET, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_RBRACKET, 0, self->cursor, 1}); } else if (c == '{') { - lex_yield_token(self, &(struct token){TT_LBRACE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_LBRACE, 0, self->cursor, 1}); } else if (c == '{') { - lex_yield_token(self, &(struct token){TT_RBRACE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_RBRACE, 0, self->cursor, 1}); } else if (c == '+') { - lex_yield_token(self, &(struct token){TT_PLUS, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_PLUS, 0, self->cursor, 1}); } else if (c == '-') { - lex_yield_token(self, &(struct token){TT_MINUS, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_MINUS, 0, self->cursor, 1}); } else if (c == '*') { if (self->inside_line) { lex_yield_token( - self, &(struct token){TT_ASTERISK, self->cursor, 1}); + self, &(struct token){TT_ASTERISK, 0, self->cursor, 1}); } else { self->tok_offset = self->cursor; self->state = LS_COMMENT_ASTERISK; } } else if (c == '/') { - lex_yield_token(self, &(struct token){TT_SLASH, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_SLASH, 0, self->cursor, 1}); } else if (c == '=') { self->tok_offset = self->cursor; self->state = LS_EQ; } else if (c == ':') { - lex_yield_token(self, &(struct token){TT_COLON, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_COLON, 0, self->cursor, 1}); } else if (c == '%') { - lex_yield_token(self, &(struct token){TT_PERCENT, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_PERCENT, 0, self->cursor, 1}); } else if (c == '#') { - lex_yield_token(self, &(struct token){TT_HASH, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_HASH, 0, self->cursor, 1}); } else if (c == '!') { - lex_yield_token(self, &(struct token){TT_BANG, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_BANG, 0, self->cursor, 1}); } else if (c == '~') { - lex_yield_token(self, &(struct token){TT_TILDE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_TILDE, 0, self->cursor, 1}); } else if (c == '&') { - lex_yield_token(self, &(struct token){TT_AMPERSAND, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_AMPERSAND, 0, self->cursor, 1}); } else if (c == '|') { - lex_yield_token(self, &(struct token){TT_PIPE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_PIPE, 0, self->cursor, 1}); } else if (c == '^') { - lex_yield_token(self, &(struct token){TT_CAP, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_CAP, 0, self->cursor, 1}); } else if (c == '\r') { self->tok_offset = self->cursor; self->state = LS_CR; } else if (c == '\n') { - lex_yield_token(self, &(struct token){TT_NEWLINE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_NEWLINE, 0, self->cursor, 1}); } else if (c == '\\') { - lex_yield_token(self, &(struct token){TT_ESCAPE, self->cursor, 1}); + lex_yield_token(self, &(struct token){TT_ESCAPE, 0, self->cursor, 1}); } else if (c == ' ' || c == '\t') { // ignore spaces and tabs } else if (c == EOF) { @@ -1123,8 +1165,8 @@ static int lex_handle_next(struct lex *const self, const int c) break; case LS_CR: // Accumulate CRLF into single token { - const size_t size = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR - const struct token token = {TT_NEWLINE, self->tok_offset, size}; + const size_t length = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR + const struct token token = {TT_NEWLINE, 0, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; if (c != '\n') { @@ -1136,7 +1178,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_LSHIFT: if (c == '<') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_LSHIFT, self->tok_offset, length}; + const struct token token = {TT_LSHIFT, 0, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; } else { @@ -1146,7 +1188,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_RSHIFT: if (c == '>') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_RSHIFT, self->tok_offset, length}; + const struct token token = {TT_RSHIFT, 0, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; } else { @@ -1157,7 +1199,7 @@ static int lex_handle_next(struct lex *const self, const int c) { const size_t length = (c == '=') ? 2 : 1; const enum token_type type = (c == '=') ? TT_EQ_DOUBLE : TT_EQ; - const struct token token = {type, self->tok_offset, length}; + const struct token token = {type, 0, self->tok_offset, length}; lex_yield_token(self, &token); } self->state = LS_FREE; @@ -1170,7 +1212,7 @@ static int lex_handle_next(struct lex *const self, const int c) if (is_alphanum(c) || c == '_') { self->state = LS_DOT_ID; } else { - lex_yield_token(self, &(struct token){TT_DOT, self->tok_offset, 1}); + lex_yield_token(self, &(struct token){TT_DOT, 0, self->tok_offset, 1}); self->state = LS_FREE; return lex_handle_next(self, c); } @@ -1178,7 +1220,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_DOT_ID: if (!is_alphanum(c) && c != '_') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_DOT_ID, self->tok_offset, length}; + const struct token token = {TT_DOT_ID, 0, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; return lex_handle_next(self, c); @@ -1187,7 +1229,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_ID: if (!is_alphanum(c) && c != '_') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_ID, self->tok_offset, length}; + const struct token token = {TT_ID, 0, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; return lex_handle_next(self, c); @@ -1198,11 +1240,12 @@ static int lex_handle_next(struct lex *const self, const int c) self->state = LS_NUMHEX; } else if (is_oct(c)) { self->state = LS_NUMOCT; + return lex_handle_next(self, c); } else if (is_alphabetic(c) || c == '_') { return lex_yield_error(self, c); } else { assert((self->cursor - self->tok_offset) == 1); - const struct token token = {TT_NUMDEC, self->tok_offset, 1}; + const struct token token = {TT_NUMDEC, 0, self->tok_offset, 1}; lex_yield_token(self, &token); // It was just zero, handle this char in LS_FREE state then self->state = LS_FREE; @@ -1210,11 +1253,16 @@ static int lex_handle_next(struct lex *const self, const int c) } break; case LS_NUMOCT: - if (is_alphabetic(c) || c == '_') { + if (is_oct(c)) { + self->current_number_value <<= 3; + self->current_number_value |= c - '0'; + } else if (is_alphabetic(c) || c == '_') { return lex_yield_error(self, c); - } else if (!is_oct(c)) { + } else { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_NUMOCT, self->tok_offset, length}; + const struct token token = { + TT_NUMOCT, self->current_number_value, self->tok_offset, length + }; lex_yield_token(self, &token); // This token is finished, handle this char in LS_FREE state self->state = LS_FREE; @@ -1223,13 +1271,16 @@ static int lex_handle_next(struct lex *const self, const int c) break; case LS_NUMHEX: if (is_hex(c)) { - // Keep calm + self->current_number_value <<= 4; + self->current_number_value |= hex_digit_to_int(c); } else if (is_alphabetic(c) || c == '_') { // Panik! return lex_yield_error(self, c); } else { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_NUMHEX, self->tok_offset, length}; + const struct token token = { + TT_NUMHEX, self->current_number_value, self->tok_offset, length + }; lex_yield_token(self, &token); // This token is finished, handle this char in LS_FREE state self->state = LS_FREE; @@ -1237,11 +1288,16 @@ static int lex_handle_next(struct lex *const self, const int c) } break; case LS_NUMDEC: - if (is_alphabetic(c) || c == '_') { + if (is_dec(c)) { + self->current_number_value *= 10; + self->current_number_value += c - '0'; + } else if (is_alphabetic(c) || c == '_') { return lex_yield_error(self, c); - } else if (!is_dec(c)) { + } else { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_NUMDEC, self->tok_offset, length}; + const struct token token = { + TT_NUMDEC, self->current_number_value, self->tok_offset, length + }; lex_yield_token(self, &token); // This token is finished, handle this char in LS_FREE state self->state = LS_FREE; @@ -1253,7 +1309,7 @@ static int lex_handle_next(struct lex *const self, const int c) self->state = LS_STRING_ESC; } else if (c == '"') { const size_t length = self->cursor - self->tok_offset + 1; - const struct token token = {TT_STRING, self->tok_offset, length}; + const struct token token = {TT_STRING, 0, self->tok_offset, length}; lex_yield_token(self, &token); // This token is finished self->state = LS_FREE; @@ -1265,7 +1321,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_COMMENT_ASTERISK: if (c == '\r' || c == '\n') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_COMMENT_ASTERISK, self->tok_offset, length}; + const struct token token = {TT_COMMENT_ASTERISK, 0, self->tok_offset, length}; lex_yield_token(self, &token); // This token is finished, handle this char in LS_FREE state self->state = LS_FREE; @@ -1275,7 +1331,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_COMMENT_SEMICOLON: if (c == '\r' || c == '\n') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_COMMENT_SEMICOLON, self->tok_offset, length}; + const struct token token = {TT_COMMENT_SEMICOLON, 0, self->tok_offset, length}; lex_yield_token(self, &token); // This token is finished, handle this char in LS_FREE state self->state = LS_FREE; @@ -1309,7 +1365,7 @@ static int lex_next(struct lex *const self, FILE *const stream) } if (c == EOF) { // Add a hidden EOF token of 0 size - lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0}); + lex_yield_token(self, &(struct token){TT_NONE, 0, self->cursor, 0}); break; } } @@ -1483,6 +1539,7 @@ static const char *stmt_type_to_string(const enum stmt_type type) case ST_ASSIGNMENT: return "ASSIGNMENT"; case ST_COMMENT: return "COMMENT"; case ST_DIRECTIVE: return "DIRECTIVE"; + case ST_META_SAT: return "META_SAT"; } return "_UNKNOWN"; } @@ -1508,7 +1565,7 @@ static int fprint_tokens( static void fprint_expr( const struct lex *const lex, - const struct expr_tokens_span *const expr, + const struct expr *const expr, FILE *const s) { fputc('[', s); @@ -1757,6 +1814,8 @@ static size_t pars_commit(struct pars *const self) static void pars_skip_to_newline(struct pars *const self) { + // Reset state In case of inside of the .def .. .endef block + self->in_sat = false; while (!pars_is_eof_reached(self)) { const struct token nl = pars_peek(self); pars_commit(self); @@ -1954,13 +2013,55 @@ static struct token_recognition pars_recognize_token( return (struct token_recognition){0}; } +struct expr_value { + enum token_type operator; + int32_t value; + bool negative; + bool bit_inverted; +}; + +static int32_t apply_binary_operator(int32_t a, int32_t b, enum token_type op) +{ + switch (op) { + case TT_ASTERISK: + return a * b; + case TT_SLASH: + return a / b; + case TT_LSHIFT: + return a << b; + case TT_RSHIFT: + return a >> b; + case TT_AMPERSAND: + return a & b; + case TT_PIPE: + return a | b; + case TT_CAP: + return a ^ b; + case TT_NONE: + case TT_PLUS: + return a + b; + default: + break; + } + UNREACHABLE(); + assert(false); +} + static int pars_parse_expr( - struct pars *const self, struct expr_tokens_span *const expr) + struct pars *const self, + struct expr *const expr, + int flags) { // This function is called only when expression is expected unconditionally, // so if the first token cannot be a part of expression, then error must be // yielded. const size_t first_token_id = self->cur_tok_id; + const char *const e_expr_open = (flags & PARS_EXPR_FLAG_ALLOW_ID) + ? E_EXPR_OPEN_WITH_ID : E_EXPR_OPEN; + const char *const e_expr_close = (flags & PARS_EXPR_FLAG_ALLOW_ID) + ? E_EXPR_CLOSE_WITH_ID : E_EXPR_CLOSE; + struct expr_value stack[EXPR_NESTING_MAX] = {{0}}; + bool value_is_resolved = true; unsigned nesting = 0; // Otherwise expect open parenthesis, number, or unary operator. bool expect_close_or_binary = false; @@ -1970,7 +2071,7 @@ static int pars_parse_expr( assert(pars_is_eof_reached(self)); return pars_yield_error_eof( self, - expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); + expect_close_or_binary ? e_expr_close : e_expr_open); } break; } @@ -1980,45 +2081,80 @@ static int pars_parse_expr( if (nesting == 0) { break; } - return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + return pars_yield_error(self, self->cur_tok_id, e_expr_close); } else { nesting++; + if (nesting >= EXPR_NESTING_MAX) { + return pars_yield_error_msg(self, self->cur_tok_id, E_MAX_NESTING); + } } } else if (token.type == TT_MINUS) { // Minus is both unary and binary operator, so it does not care // about expression parsing state expect_close_or_binary = false; + stack[nesting].negative = !stack[nesting].negative; } else if (token.type == TT_TILDE) { if (expect_close_or_binary) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + return pars_yield_error(self, self->cur_tok_id, e_expr_close); } + stack[nesting].bit_inverted = !stack[nesting].bit_inverted; } else if (token.type == TT_ID) { + if (0 == (flags & PARS_EXPR_FLAG_ALLOW_ID)) { + return pars_yield_error( + self, + self->cur_tok_id, + expect_close_or_binary ? e_expr_close : e_expr_open); + } + value_is_resolved = false; if (expect_close_or_binary) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + return pars_yield_error(self, self->cur_tok_id, e_expr_close); } if (pars_recognize_token(self, token).type == RTT_REG) { return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG); } expect_close_or_binary = true; + stack[nesting].operator = TT_NONE; + stack[nesting].negative = false; } else if (token_is_number(token.type)) { if (expect_close_or_binary) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + return pars_yield_error(self, self->cur_tok_id, e_expr_close); } expect_close_or_binary = true; + int32_t value = token.value; + if (stack[nesting].negative) { + value = -value; + } + if (stack[nesting].bit_inverted) { + value = ~value; + } + stack[nesting].value = apply_binary_operator( + stack[nesting].value, value, stack[nesting].operator); + stack[nesting].operator = TT_NONE; } else if (token_is_binary_operator(token.type)) { if (!expect_close_or_binary) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); + return pars_yield_error(self, self->cur_tok_id, e_expr_open); } expect_close_or_binary = false; + stack[nesting].operator = token.type; } else if (token.type == TT_RPAREN) { if (!expect_close_or_binary) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); + return pars_yield_error(self, self->cur_tok_id, e_expr_open); } if (nesting == 0) { // This is not my closing parenthesis, should stop break; } nesting--; + int32_t value = stack[nesting + 1].value; + if (stack[nesting].negative) { + value = -value; + } + if (stack[nesting].bit_inverted) { + value = ~value; + } + stack[nesting].value = apply_binary_operator( + stack[nesting].value, value, stack[nesting].operator); + stack[nesting].operator = TT_NONE; } else { if (nesting == 0 && expect_close_or_binary) { break; @@ -2026,14 +2162,16 @@ static int pars_parse_expr( return pars_yield_error( self, self->cur_tok_id, - expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); + expect_close_or_binary ? e_expr_close : e_expr_open); } pars_commit(self); } assert(first_token_id != self->cur_tok_id); - *expr = (struct expr_tokens_span){ + *expr = (struct expr){ .first_token = first_token_id, .num_tokens = self->cur_tok_id - first_token_id, + .value = stack[nesting].value, + .value_is_resolved = value_is_resolved, }; return OK; } @@ -2100,6 +2238,13 @@ static int pars_finish_directive( .num_tokens = self->cur_tok_id - first_token, }; pars_put_stmt(self, &stmt); + if (directive.type == DT_ENDEF) { + struct stmt stmt = { + .type = ST_META_SAT, + .sat = self->sat, + }; + pars_put_stmt(self, &stmt); + } return OK; } @@ -2131,14 +2276,16 @@ static int pars_directive_handler_def( const size_t label_id) { const size_t name_token = self->cur_tok_id - 1; - const struct token token = pars_peek(self); - if (token.type != TT_ID) { + const struct token arg_token = pars_peek(self); + if (arg_token.type != TT_ID) { return pars_yield_error(self, self->cur_tok_id, E_ID); } if (self->in_sat) { return pars_yield_error_msg(self, self->cur_tok_id, E_NESTED_DEF); } + self->sat = (struct sat){0}; self->in_sat = true; + self->sat.def_arg = (struct expr){self->cur_tok_id, 1, 0, false}; const struct directive directive = { drc, name_token, pars_commit(self), 1 }; return pars_finish_directive(self, label_id, directive); } @@ -2157,6 +2304,86 @@ static int pars_directive_handler_endef( return pars_finish_directive(self, label_id, directive); } +static int pars_directive_handler_scl( + struct pars *const self, + const enum directive_type drc, + const size_t label_id) +{ + const size_t name_token = self->cur_tok_id - 1; + if (!self->in_sat) { + return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); + } + if (self->sat.scl_arg.first_token) { + return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_SCL); + } + struct expr expr = {0}; + const int ret = pars_parse_expr(self, &expr, 0); + if (ret != OK) { + return ret; + } + assert(expr.value_is_resolved); + const struct directive directive = { + drc, name_token, expr.first_token, expr.num_tokens + }; + self->sat.scl_arg = expr; + return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_type( + struct pars *const self, + const enum directive_type drc, + const size_t label_id) +{ + const size_t name_token = self->cur_tok_id - 1; + if (!self->in_sat) { + return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); + } + if (self->sat.type_arg.first_token) { + return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_TYPE); + } + struct expr expr = {0}; + const int ret = pars_parse_expr(self, &expr, 0); + if (ret != OK) { + return ret; + } + assert(expr.value_is_resolved); + const struct directive directive = { + drc, name_token, expr.first_token, expr.num_tokens + }; + self->sat.type_arg = expr; + return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_val( + struct pars *const self, + const enum directive_type drc, + const size_t label_id) +{ + const size_t name_token = self->cur_tok_id - 1; + const struct token arg_token = pars_peek(self); + if (!self->in_sat) { + return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); + } + if (self->sat.val_arg.first_token) { + return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_VAL); + } + struct expr expr = {self->cur_tok_id, 1, 0, false}; + if (arg_token.type == TT_ID || arg_token.type == TT_DOT) { + pars_commit(self); + } else { + const int ret = pars_parse_expr(self, &expr, 0); + if (ret != OK) { + return ret; + } + assert(expr.value_is_resolved); + } + const struct directive directive = { + drc, name_token, expr.first_token, expr.num_tokens + }; + self->sat.val_arg = expr; + return pars_finish_directive(self, label_id, directive); +} + static int pars_parse_direc(struct pars *const self, const size_t label_id) { const struct token dotid = pars_peek(self); @@ -2259,7 +2486,7 @@ static int pars_parse_arg_starts_with_minus( } } // Otherwise it is expression - either prefix or standalone - const int ret = pars_parse_expr(self, &arg->expr); + const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID); if (ret != OK) { return ret; } @@ -2362,7 +2589,7 @@ static int pars_parse_arg_inside_parens_single_item( } } } else if (arg->expr.first_token == 0) { - const int ret = pars_parse_expr(self, &arg->expr); + const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID); if (ret != OK) { return ret; } @@ -2617,7 +2844,7 @@ static int pars_parse_arg( if (token0.type == TT_HASH) { // Definitely an immediate value expression pars_commit(self); - const int ret = pars_parse_expr(self, &arg->expr); + const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID); if (ret != OK) { return ret; } @@ -2629,7 +2856,7 @@ static int pars_parse_arg( return pars_parse_arg_starts_with_minus(self, arg); } else if (token0.type == TT_TILDE || token_is_number(token0.type)) { // Tilde is unary operation, so it must be an expression - const int ret = pars_parse_expr(self, &arg->expr); + const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID); if (ret != OK) { return ret; } @@ -2680,7 +2907,7 @@ static int pars_parse_arg( arg->num_tokens = self->cur_tok_id - first_token_id; return OK; } else { - const int ret = pars_parse_expr(self, &arg->expr); + const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID); if (ret != OK) { return ret; } @@ -2984,7 +3211,7 @@ static void emit_token_id( static void emit_expr( const struct lex *const lex, - const struct expr_tokens_span *const expr, + const struct expr *const expr, FILE *const s) { for (size_t i = 0; i < expr->num_tokens; i++) { @@ -3166,6 +3393,41 @@ static void emit_directive_short( } } +static void emit_directive_size_of_function( + const struct lex *const lex, + const size_t function_name_token_id, + FILE *const s) +{ + const struct token name_token = lex->tokbuf[function_name_token_id]; + const char *name = lex->input + name_token.offset; + int len = name_token.length; + if (*name == '_') { + // Strip leading underscore + // FIXME It should be sort of an option that may be disabled. + name++; + len--; + } + fprintf(s, "\t.size\t%.*s, .-%.*s", len, name, len, name); +} + +static void emit_directive_type( + const struct lex *const lex, + const size_t function_name_token_id, + const char *const type_str, + FILE *const s) +{ + const struct token name_token = lex->tokbuf[function_name_token_id]; + const char *name = lex->input + name_token.offset; + int len = name_token.length; + if (*name == '_') { + // Strip leading underscore + // FIXME It should be sort of an option that may be disabled. + name++; + len--; + } + fprintf(s, "\t.type\t%.*s, @%s", len, name, type_str); +} + static enum opsize assem_resolve_bcc( struct assem *const self, const size_t stmt_number) @@ -3225,6 +3487,28 @@ static struct res { uint32_t value; bool ok; } assem_find_symbol( return (struct res){ .ok = false }; } +static void assem_emit_meta_sat( + struct assem *const self, + const struct sat *const sat, + FILE *const stream) +{ + const struct lex *const lex = self->pars->lex; + if (0 == sat->val_arg.first_token) { + return; + } + if (sat->scl_arg.value == -1) { + emit_directive_size_of_function( + lex, sat->def_arg.first_token, stream); + fprintf(stream, "\n\n"); + } + // Check if first derived type (2 bits at offset 4) is function + if (((sat->type_arg.value >> 4) & 3) == DT_FCN) { + emit_directive_type( + lex, sat->def_arg.first_token, "function", stream); + fprintf(stream, "\n"); + } +} + static int assem_emit(struct assem *const self, FILE *const stream) { const struct lex *const lex = self->pars->lex; @@ -3240,6 +3524,7 @@ static int assem_emit(struct assem *const self, FILE *const stream) } } for (size_t i = 1; i < pars->stmttab_size / (sizeof *pars->stmttab); i++) { + bool line_is_empty = true; const struct stmt *stmt = pars->stmttab + i; if (stmt->label_token) { const struct token token = lex->tokbuf[stmt->label_token]; @@ -3249,6 +3534,7 @@ static int assem_emit(struct assem *const self, FILE *const stream) } else { fprintf(stream, "%.*s:", (int)token.length, lex->input + token.offset); } + line_is_empty = false; } if (stmt->type == ST_INSTRUCTION) { const struct instruction instr = stmt->instruction; @@ -3272,6 +3558,7 @@ static int assem_emit(struct assem *const self, FILE *const stream) emit_arg(lex, &instr.arg2, stream); } } + line_is_empty = false; } else if (stmt->type == ST_DIRECTIVE) { const struct directive *dir = &stmt->directive; switch (dir->type) { @@ -3280,14 +3567,17 @@ static int assem_emit(struct assem *const self, FILE *const stream) case DT_GLOBL: case DT_TEXT: emit_directive_same(lex, dir, stream); + line_is_empty = false; break; case DT_ASCII: case DT_BYTE: emit_directive_byte(lex, dir, stream); + line_is_empty = false; break; case DT_SHORT: case DT_WORD: emit_directive_short(lex, dir, stream); + line_is_empty = false; break; default: break; @@ -3303,7 +3593,12 @@ static int assem_emit(struct assem *const self, FILE *const stream) fprintf(stream, " | @%08x", res.value); } } - fprintf(stream, "\n"); + if (!line_is_empty) { + fprintf(stream, "\n"); + } + if (stmt->type == ST_META_SAT) { + assem_emit_meta_sat(self, &stmt->sat, stream); + } } return OK; } @@ -14,4 +14,5 @@ echo "asm68 -l -Q -o test1.o test1.S" >>"$dosbuild_dir/build.bat" echo "asm68 -l -Q -o test2.o test2.S" >>"$dosbuild_dir/build.bat" echo "asm68 -l -Q -o test3.o test3.S" >>"$dosbuild_dir/build.bat" echo "asm68 -l -Q -o test4.o test4.S" >>"$dosbuild_dir/build.bat" +echo "asm68 -l -Q -o test_sat.o test_sat.S" >>"$dosbuild_dir/build.bat" dosemu -quiet -K "$dosbuild_dir" -E'build.bat' diff --git a/tests/test_sat.S b/tests/test_sat.S new file mode 100644 index 0000000..32ada7b --- /dev/null +++ b/tests/test_sat.S @@ -0,0 +1,7 @@ + .def _fn\ .val _fn\ .scl 3\ .type 0x10024\ .endef +_fn: + nop + .def _fn\ .val .\ .scl -1\ .endef + .def _fn2\ .val -1\ .endef + .def _fn2\ .val 0x100\ .endef + .def _fn2\ .val 0\ .endef |