diff options
author | Oxore <oxore@protonmail.com> | 2023-06-26 01:18:11 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-26 01:18:11 +0300 |
commit | e46fb8f882b00fe9d24b2dc8810b033164c4b10c (patch) | |
tree | eb07378620d9abbf19efd169dfd0045e3f43d1cd | |
parent | b4f2fd813384a918b617e17aa394f72f8779a0a0 (diff) |
Fix complex addr mode parsing, improve error messages
-rw-r--r-- | main.c | 202 |
1 files changed, 132 insertions, 70 deletions
@@ -28,6 +28,24 @@ #define UNREACHABLE() #endif +#define E_UNIMPL "unimplemented" +#define E_UNREACH "unreachable code reached" +#define E_EXPR "expression token" +#define E_EA_PART "An, Dn, PC or full expression" +#define E_EA_PART_NOT_AN "Dn, PC or full expression" +#define E_EA_PART_NOT_EXPR "An, Dn or PC" +#define E_EA_PART_DELIM "',' or ')'" +#define E_EA_INVALID "invalid addressing mode" +#define E_DN_AN "Dn or An" +#define E_ARG "valid instruction argument" +#define E_MNEMONIC "valid instruction mnemonic" +#define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'" +#define E_ARGS_COUNT "invalid arguments count" +#define E_NL "new line '\\n', '\\r\\n' or '\\r'" +#define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC +#define E_DIRECTIVE "directive" +#define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL + #define ERR 0 #define OK 1 #define CONTINUE 2 @@ -123,6 +141,7 @@ struct lex { FILE *tokbuf_stream; struct token *tokbuf; size_t tokbuf_size; + size_t tokens_count; }; enum stmt_type { @@ -700,6 +719,7 @@ static void lex_yield_token(struct lex *const self, const struct token *const to { self->inside_line = (token->type != TT_NEWLINE) && (token->type != TT_ESCAPE); fwrite_token(token, self->tokbuf_stream); + self->tokens_count++; } static const char *lex_state_error_string( @@ -1102,10 +1122,12 @@ static int lex_next(struct lex *const self, FILE *const stream) if (OK == ret) { return OK; } else if (ERR == ret) { - // TODO handle errors return ERR; } if (c == EOF) { + // Add a hidden EOF token of 0 size + lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0}); + self->tokens_count--; break; } } @@ -1229,9 +1251,7 @@ static int pars_init(struct pars *const self, const struct lex *const lex) static bool pars_is_eof_reached(const struct pars *const self) { - const size_t tokens_count = self->lex->tokbuf_size / - (sizeof *self->lex->tokbuf); - return self->cur_tok_id >= tokens_count; + return self->cur_tok_id >= self->lex->tokens_count; } static const char *stmt_type_to_string(const enum stmt_type type) @@ -1370,20 +1390,20 @@ static size_t find_line_length(const char *const str) return 0; } -static int pars_yield_error_str( +static int pars_yield_error_msg( struct pars *const self, - const struct line_pos_info l, - const char *const found, - const size_t found_length) + const size_t token_id, + const char *const msg) { + const struct token token = self->lex->tokbuf[token_id]; + const struct line_pos_info l = + lex_get_line_pos_info(self->lex, token.offset); fprintf( stderr, - "<stdin>:%lu:%lu: parsing error: expected %s, found '%.*s'\n", + "<stdin>:%lu:%lu: parsing error: %s\n", l.line_num + 1, l.column_num + 1, - "<_unspecified_token_list>", - (int)found_length, - found); + msg); const char *const line = self->lex->input + l.line_offset; const size_t line_length = find_line_length(line); fprintf( stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line); @@ -1399,13 +1419,50 @@ static int pars_yield_error_str( return ERR; } -static int pars_yield_error(struct pars *const self, const size_t token_id) +static int pars_yield_error_expected_str( + struct pars *const self, + const struct line_pos_info l, + const char *const found, + const size_t found_length, + const char *const expected) +{ + fprintf( + stderr, + "<stdin>:%lu:%lu: parsing error: expected %s, found '", + l.line_num + 1, + l.column_num + 1, + expected); + fprint_string_escaped(found, found_length, stderr); + fputs("'\n", stderr); + const char *const line = self->lex->input + l.line_offset; + const size_t line_length = find_line_length(line); + fprintf( stderr, "%5lu | %.*s\n", l.line_num + 1, (int)line_length, line); + fputs(" | ", stderr); + for (size_t i = 0; i < l.column_num; i++) { + if (self->lex->input[l.line_offset + i] == '\t') { + fputc('\t', stderr); + } else { + fputc(' ', stderr); + } + } + fputc('^', stderr); + for (size_t i = 1; i < found_length; i++) { + fputc('~', stderr); + } + fputc('\n', stderr); + return ERR; +} + +static int pars_yield_error( + struct pars *const self, + const size_t token_id, + const char *const expected) { const struct token token = self->lex->tokbuf[token_id]; const struct line_pos_info l = lex_get_line_pos_info(self->lex, token.offset); const char *const found = self->lex->input + token.offset; - return pars_yield_error_str(self, l, found, token.length); + return pars_yield_error_expected_str(self, l, found, token.length, expected); } static int pars_yield_error_nesting( @@ -1419,10 +1476,14 @@ static int pars_yield_error_nesting( return ERR; } -static int pars_yield_error_eof(struct pars *const self) +static int pars_yield_error_eof( + struct pars *const self, const char *const expected) { - (void) self; - return ERR; + const struct token token = self->lex->tokbuf[self->cur_tok_id]; + const struct line_pos_info l = + lex_get_line_pos_info(self->lex, token.offset); + return pars_yield_error_expected_str( + self, l, "EOF", (sizeof "EOF") - 1, expected); } static int pars_parse_direc( @@ -1430,7 +1491,7 @@ static int pars_parse_direc( { (void) self; (void) dot; - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); } enum opsize get_opsize_from_specifier(const char size_specifier) @@ -1599,7 +1660,7 @@ static int pars_parse_expr( // TODO parse expression } else { if (nesting > 0) { - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error(self, self->cur_tok_id, E_EXPR); } break; } @@ -1611,7 +1672,7 @@ static int pars_parse_expr( } if (first_token_id == self->cur_tok_id) { // Nothing has been parsed but expression expected - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error(self, self->cur_tok_id, E_EXPR); } *expr = (struct expr_tokens_span){ .first_token = first_token_id, @@ -1630,7 +1691,7 @@ static int pars_parse_arg_after_prefix_expr( // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn) (void) self; (void) arg; - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); } static int pars_parse_arg_starts_with_minus( @@ -1642,14 +1703,14 @@ static int pars_parse_arg_starts_with_minus( if (pars_is_eof_reached(self)) { pars_commit(self); // The minus token // Just single minus is invalid expression - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, "'(' or expression"); } if (pars_peek_more(self, 1).type == TT_LPAREN) { // It is still either expression or -(An) if (pars_is_eof_reached(self)) { // "-(" is invalid expression pars_commit(self), pars_commit(self); // Commit "-" and "(" - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, "An or expression"); } const struct token token2 = pars_peek_more(self, 2); if (token2.type == TT_ID) { @@ -1671,7 +1732,7 @@ static int pars_parse_arg_starts_with_minus( return OK; } else { // But it has to be a closing parenthesis! - return pars_yield_error(self, rparen_id); + return pars_yield_error(self, rparen_id, "')'"); } } } @@ -1706,7 +1767,7 @@ static int pars_parse_arg_inside_parens( unsigned parts = arg->expr.first_token ? 1 : 0; while (parts < 3) { if (pars_is_eof_reached(self)) { - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, E_EA_PART); } const struct token token0 = pars_peek(self); if (token0.type == TT_ID) { @@ -1727,7 +1788,7 @@ static int pars_parse_arg_inside_parens( an2_found = true; an2 = r.reg_num; } else { - return pars_yield_error(self, pars_commit(self)); + return pars_yield_error(self, pars_commit(self), E_EA_PART_NOT_AN); } break; case REG_PC: @@ -1738,20 +1799,21 @@ static int pars_parse_arg_inside_parens( case REG_SR: case REG_CCR: case REG_USP: - return pars_yield_error(self, pars_commit(self)); + return pars_yield_error(self, pars_commit(self), E_EA_PART); } pars_commit(self); - parts++; } + } else if (arg->expr.first_token == 0) { + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } } else { - const int ret = pars_parse_expr(self, &arg->expr); - if (ret != OK) { - return ret; - } - parts++; + return pars_yield_error(self, self->cur_tok_id, E_EA_PART_NOT_EXPR); } + parts++; if (pars_is_eof_reached(self)) { - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, E_EA_PART_DELIM); } const struct token delim = pars_peek(self); const size_t delim_id = pars_commit(self); @@ -1770,7 +1832,7 @@ static int pars_parse_arg_inside_parens( break; } } else { - return pars_yield_error(self, delim_id); + return pars_yield_error(self, delim_id, E_EA_PART); } } if (parts == 1 && an1_found) { @@ -1807,20 +1869,18 @@ static int pars_parse_arg_inside_parens( // It is (d8,PC,Xn) assert((an1_found && !dn_found) || (!an1_found && dn_found)); arg->type = ARG_PC_ADDR_8_XI; - if (an1_found) { - arg->xi = -an1; - } else if (dn_found) { - arg->xi = dn; - } + arg->xi = an1_found ? -an1 : dn; arg->num_tokens = self->cur_tok_id - arg->first_token; + return OK; } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) { // It is (d8,An,Xn) assert((an1_found && !dn_found) || (!an1_found && dn_found)); arg->type = ARG_AN_ADDR_8_XI; arg->xi = an2_found ? -an2 : dn; arg->num_tokens = self->cur_tok_id - arg->first_token; + return OK; } - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error_msg(self, self->cur_tok_id, E_EA_INVALID); } static int pars_parse_arg_regmask( @@ -1831,7 +1891,7 @@ static int pars_parse_arg_regmask( (void) self; (void) arg; // Very much TODO. - return pars_yield_error(self, ++self->cur_tok_id); + return pars_yield_error_msg(self, ++self->cur_tok_id, E_UNIMPL); } static int pars_parse_arg( @@ -1876,7 +1936,7 @@ static int pars_parse_arg( switch (r.reg) { case REG_NONE: UNREACHABLE(); - return pars_yield_error(self, first_token_id); + return pars_yield_error_msg(self, first_token_id, E_UNREACH); case REG_DN: if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) { // Note: the register is not committed @@ -1894,7 +1954,7 @@ static int pars_parse_arg( arg->xn = r.reg_num; break; case REG_PC: - return pars_yield_error(self, first_token_id); + return pars_yield_error(self, first_token_id, E_DN_AN); case REG_SR: arg->type = ARG_SR; break; @@ -1932,7 +1992,7 @@ static int pars_yield_instruction( const enum mnemonic mnemonic = get_mnemonic_from_identifier( self->lex->input + mnemonic_token.offset, mnemonic_token.length); if (mnemonic == MN_NONE) { - return pars_yield_error(self, mnemonic_id); + return pars_yield_error(self, mnemonic_id, E_MNEMONIC); } if (arg2->type != ARG_NONE) { assert(arg1->type != ARG_NONE); @@ -1948,24 +2008,24 @@ static int pars_yield_instruction( break; case ARGS_COUNT_0: if (arg1->type != ARG_NONE) { - return pars_yield_error(self, arg1->first_token); + return pars_yield_error_msg(self, arg1->first_token, E_ARGS_COUNT); } break; case ARGS_COUNT_1: if (arg1->type == ARG_NONE) { - return pars_yield_error(self, mnemonic_id); + return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT); } else if (arg2->type != ARG_NONE) { - return pars_yield_error(self, arg2->first_token); + return pars_yield_error_msg(self, arg2->first_token, E_ARGS_COUNT); } break; case ARGS_COUNT_1_2: if (arg1->type == ARG_NONE) { - return pars_yield_error(self, mnemonic_id); + return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT); } break; case ARGS_COUNT_2: if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) { - return pars_yield_error(self, mnemonic_id); + return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT); } break; } @@ -2010,7 +2070,7 @@ static int pars_parse_instruction_comment( const size_t nl_id = pars_commit(self); const struct token nl = self->lex->tokbuf[nl_id]; if (nl.type != TT_NEWLINE) { - return pars_yield_error(self, nl_id); + return pars_yield_error(self, nl_id, E_NL); } } return pars_yield_instruction( @@ -2030,16 +2090,20 @@ static int pars_parse_instruction_args( return res1; } if (arg1.type != ARG_NONE) { - if (pars_is_eof_reached(self)) { - return pars_yield_instruction( - self, label_id, 0, mnemonic_id, opsize, &arg1, NULL); - } - if (pars_peek(self).type == TT_COMMA) { - pars_commit(self); - // Try parse second argument - const int res2 = pars_parse_arg(self, &arg2); - if (res2 != OK) { - return res2; + if (!pars_is_eof_reached(self)) { + if (pars_peek(self).type == TT_COMMA) { + pars_commit(self); + // Try parse second argument + if (pars_is_eof_reached(self)) { + return pars_yield_error_eof(self, E_ARG); + } + if (pars_peek(self).type == TT_NEWLINE) { + return pars_yield_error(self, self->cur_tok_id, E_ARG); + } + const int res2 = pars_parse_arg(self, &arg2); + if (res2 != OK) { + return res2; + } } } } @@ -2054,19 +2118,19 @@ static int pars_parse_instruction( const size_t mnemonic_id) { if (pars_is_eof_reached(self)) { - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, E_MNEMONIC); } const struct token size_spec = pars_peek(self); if (size_spec.type == TT_DOT_ID) { const size_t size_spec_id = pars_commit(self); // Size specifier if (size_spec.length != 2) { - return pars_yield_error(self, size_spec_id); + return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC); } const size_t opsize = get_opsize_from_specifier(self->lex->input[size_spec.offset + 1]); if (opsize == OPSIZE_NONE) { - return pars_yield_error(self, size_spec_id); + return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC); } return pars_parse_instruction_args(self, label_id, mnemonic_id, opsize); } @@ -2079,7 +2143,7 @@ static int pars_parse_assignment( { (void) label_id; (void) symbol_id; - return pars_yield_error(self, self->cur_tok_id); + return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); } static int pars_yield_label_comment( @@ -2112,7 +2176,7 @@ static int pars_parse_labeled_statement( return pars_yield_label_comment(self, label_id, 0); } else if (token1.type == TT_ID) { if (pars_is_eof_reached(self)) { - return pars_yield_error_eof(self); + return pars_yield_error_eof(self, E_LABELED_STMT); } const struct token token2 = pars_peek(self); if (!label_id && token2.type == TT_COLON) { @@ -2126,7 +2190,7 @@ static int pars_parse_labeled_statement( } else if (token1.type == TT_DOT_ID) { return pars_parse_direc(self, &token1); } - return pars_yield_error(self, token1_id); + return pars_yield_error(self, token1_id, E_STMT_BEGIN); } static int pars_parse_statement(struct pars *const self) @@ -2140,14 +2204,12 @@ static int pars_parse_statement(struct pars *const self) */ static int pars_run(struct pars *const self) { - const size_t tokens_count = self->lex->tokbuf_size / - (sizeof *self->lex->tokbuf); // Skip dummy token at position 0 self->cur_tok_id = 1; // Leave dummy statement at position 0 fwrite_stmt(&(struct stmt){0}, self->stmttab_stream); int ret = OK; - while (self->cur_tok_id < tokens_count) { + while (self->cur_tok_id < self->lex->tokens_count) { ret = pars_parse_statement(self); if (ret != OK) { break; |