diff options
author | Oxore <oxore@protonmail.com> | 2023-06-26 22:04:39 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-26 22:47:40 +0300 |
commit | dc93b017b6c380d7c255ac448f51c10e68d71264 (patch) | |
tree | c900089e9a660ffe91c4822a6911b36b43ca9d52 | |
parent | 2dd7bd6ca24b6a28ff36c5c441442d5885c0b611 (diff) |
Impl expr validation, fix lex error test
-rw-r--r-- | main.c | 180 |
1 files changed, 118 insertions, 62 deletions
@@ -30,7 +30,10 @@ #define E_UNIMPL "unimplemented" #define E_UNREACH "unreachable code reached" -#define E_EXPR "expression token" +#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol" +#define E_EXPR_NONREG "symbol that is not a register" +#define E_EXPR_OPEN "'(', number or symbol" +#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol" #define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression" #define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression" #define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC" @@ -40,6 +43,7 @@ #define E_ARG "valid instruction argument" #define E_MNEMONIC "valid instruction mnemonic" #define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'" +#define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'" #define E_ARGS_COUNT "invalid arguments count" #define E_NL "new line '\\n', '\\r\\n' or '\\r'" #define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC @@ -413,6 +417,9 @@ struct assem { const struct pars *pars; }; +static int pars_parse_arg_inside_parens( + struct pars *const self, struct arg *const arg); + const char *const g_escape_table[256] = { "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\t", "\\n", "\\x0b", "\\x0c", "\\r", "\\x0e", "\\x0f", "\\x10", @@ -610,6 +617,15 @@ static bool token_is_number(const enum token_type type) return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT; } +static bool token_is_binary_operator(const enum token_type type) +{ + return type == TT_PLUS || type == TT_MINUS || + type == TT_ASTERISK || type == TT_SLASH || type == TT_PERCENT || + type == TT_RSHIFT || type == TT_RSHIFT || + type == TT_AMPERSAND || type == TT_CAP || + type == TT_PIPE || type == TT_BANG; +} + static bool token_is_regmask_delimiter(const enum token_type type) { return type == TT_SLASH || type == TT_MINUS; @@ -745,7 +761,7 @@ static const char *lex_state_error_string( "'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' " "or EOF"; case LS_NUMHEX: - return "';', '[0-9a-zA-Z]' , ',', '.', '(', ')', '+', " + return "';', '[0-9a-fA-F]' , ',', '.', '(', ')', '+', " "'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' " "or EOF"; case LS_NUMDEC: @@ -801,6 +817,17 @@ static struct line_pos_info lex_get_line_pos_info( return l; } +static size_t find_line_length(const char *const str) +{ + for (size_t i = 0;; i++) { + const char c = str[i]; + if (c == '\n' || c == '\r' || c == '\000') { + return i; + } + } + return 0; +} + static int lex_yield_error(struct lex *const self, const int c) { fflush(self->input_stream); @@ -825,7 +852,9 @@ static int lex_yield_error(struct lex *const self, const int c) lex_state_error_string(self->state, self->inside_line)); fputs(g_escape_table[c_char], stderr); fputs("'\n", stderr); - fprintf(stderr, "%5lu | %s\n", l.line_num + 1, self->input + l.line_offset); + const char *const line = self->input + l.line_offset; + const size_t line_length = find_line_length(line); + fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line); fputs(" | ", stderr); for (size_t i = 0; i < l.column_num; i++) { if (self->input[l.line_offset + i] == '\t') { @@ -1127,7 +1156,6 @@ static int lex_next(struct lex *const self, FILE *const stream) if (c == EOF) { // Add a hidden EOF token of 0 size lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0}); - self->tokens_count--; break; } } @@ -1399,17 +1427,6 @@ static int fwrite_stmt(const struct stmt *const stmt, FILE *const stream) return res; } -static size_t find_line_length(const char *const str) -{ - for (size_t i = 0;; i++) { - const char c = str[i]; - if (c == '\n' || c == '\r' || c == '\000') { - return i; - } - } - return 0; -} - static int pars_yield_error_msg( struct pars *const self, const size_t token_id, @@ -1426,7 +1443,7 @@ static int pars_yield_error_msg( msg); const char *const line = self->lex->input + l.line_offset; const size_t line_length = find_line_length(line); - fprintf( stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line); + fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line); fputs(" | ", stderr); for (size_t i = 0; i < l.column_num; i++) { if (self->lex->input[l.line_offset + i] == '\t') { @@ -1525,33 +1542,6 @@ enum opsize get_opsize_from_specifier(const char size_specifier) return OPSIZE_NONE; } -static bool is_expression_token(const enum token_type type) -{ - switch (type) { - case TT_PLUS: return true; - case TT_MINUS: return true; - case TT_ASTERISK: return true; - case TT_SLASH: return true; - case TT_PERCENT: return true; - case TT_LSHIFT: return true; - case TT_RSHIFT: return true; - case TT_HASH: return true; - case TT_BANG: return true; - case TT_TILDE: return true; - case TT_AMPERSAND: return true; - case TT_PIPE: return true; - case TT_CAP: return true; - case TT_ID: return true; - case TT_NUMDEC: return true; - case TT_NUMOCT: return true; - case TT_NUMHEX: return true; - case TT_LPAREN: return true; - case TT_RPAREN: return true; - default: return false; - } - return false; -} - static struct token pars_peek(const struct pars *const self) { return self->lex->tokbuf[self->cur_tok_id]; @@ -1666,33 +1656,71 @@ static int pars_parse_expr( // yielded. const size_t first_token_id = self->cur_tok_id; unsigned nesting = 0; + // Otherwise expect open parenthesis, number, or unary operator. + bool expect_close_or_binary = false; while (!pars_is_eof_reached(self)) { const struct token token = pars_peek(self); if (token.type == TT_LPAREN) { - nesting++; + if (expect_close_or_binary) { + if (nesting == 0) { + break; + } + return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + } else { + nesting++; + } + } else if (token.type == TT_MINUS) { + // Minus is both unary and binary operator, so it does not care + // about expression parsing state + expect_close_or_binary = false; + } else if (token.type == TT_TILDE) { + if (expect_close_or_binary) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + } + } else if (token.type == TT_ID) { + if (expect_close_or_binary) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + } + if (pars_recognize_token(self, token).type == RTT_REG) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG); + } + expect_close_or_binary = true; + } else if (token_is_number(token.type)) { + if (expect_close_or_binary) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); + } + expect_close_or_binary = true; + } else if (token_is_binary_operator(token.type)) { + if (!expect_close_or_binary) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); + } + expect_close_or_binary = false; } else if (token.type == TT_RPAREN) { + if (!expect_close_or_binary) { + return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); + } if (nesting == 0) { // This is not my closing parenthesis, should stop break; } nesting--; - } else if (is_expression_token(token.type)) { - // TODO parse expression } else { - if (nesting > 0) { - return pars_yield_error(self, self->cur_tok_id, E_EXPR); + if (nesting == 0) { + break; } - break; + return pars_yield_error( + self, + self->cur_tok_id, + expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); } pars_commit(self); } + assert(first_token_id != self->cur_tok_id); if (nesting != 0) { - return pars_yield_error_nesting( - self, first_token_id, self->cur_tok_id - first_token_id); - } - if (first_token_id == self->cur_tok_id) { - // Nothing has been parsed but expression expected - return pars_yield_error(self, self->cur_tok_id, E_EXPR); + assert(pars_is_eof_reached(self)); + return pars_yield_error_eof( + self, + expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); } *expr = (struct expr_tokens_span){ .first_token = first_token_id, @@ -1709,8 +1737,36 @@ static int pars_parse_arg_after_prefix_expr( // - Standalone expression // - Standalone expression with size suffix like ".l" // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn) - (void) self; - (void) arg; + if (pars_is_eof_reached(self)) { + // It was a standalone expression without size suffix, yield an + // argument from here + arg->type = ARG_ADDR_UNSPEC; + return OK; + } + const struct token token0 = pars_peek(self); + if (token0.type == TT_NEWLINE) { + // It was a standalone expression without size suffix, yield an + // argument from here + arg->type = ARG_ADDR_UNSPEC; + return OK; + } if (token0.type == TT_DOT_ID) { + // It must be a size specifier, or error otherwise + const size_t size_spec_id = pars_commit(self); + if (token0.length != 2) { + return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC); + } + const enum opsize addrsize = + get_opsize_from_specifier(self->lex->input[token0.offset + 1]); + if (addrsize == OPSIZE_NONE || addrsize == OPSIZE_S) { + return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC); + } + arg->type = addrsize == OPSIZE_L ? ARG_ADDR_LONG : ARG_ADDR_WORD; + return OK; + } else if (token0.type == TT_LPAREN) { + // It was a prefix expression for (An), (PC), (An,Xn) or (PC,Xn) + pars_commit(self); + return pars_parse_arg_inside_parens(self, arg); + } return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); } @@ -1824,10 +1880,10 @@ static int pars_parse_arg_inside_parens( pars_commit(self); } } else if (arg->expr.first_token == 0) { - const int ret = pars_parse_expr(self, &arg->expr); - if (ret != OK) { - return ret; - } + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } } else { return pars_yield_error(self, self->cur_tok_id, E_EA_PART_NOT_EXPR); } @@ -2147,7 +2203,7 @@ static int pars_parse_instruction( if (size_spec.length != 2) { return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC); } - const size_t opsize = + const enum opsize opsize = get_opsize_from_specifier(self->lex->input[size_spec.offset + 1]); if (opsize == OPSIZE_NONE) { return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC); |