diff options
author | Oxore <oxore@protonmail.com> | 2023-06-25 23:13:44 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-25 23:13:58 +0300 |
commit | e882f7f28ae38ad6b42f8d558d30a0bafd8e5b32 (patch) | |
tree | f0b3a6c47adcb6bc08b7b5a1e954bb4728fa771f | |
parent | 66bfe8a24d9b1ca83d45396a9f9c962379d1895e (diff) |
WIP: Impl some complex addressing modes parsing
-rw-r--r-- | main.c | 665 |
1 files changed, 533 insertions, 132 deletions
@@ -274,8 +274,9 @@ enum arg_type { ARG_ADDR_LONG, ARG_ADDR_UNSPEC, ARG_PC_ADDR_16, - ARG_PC_ADDR_8_XN, + ARG_PC_ADDR_8_XI, ARG_IMMEDIATE, + ARG_REGMASK, ARG_SR, ARG_CCR, ARG_USP, @@ -285,35 +286,21 @@ enum arg_type { enum args_count { ARGS_COUNT_UNKNOWN = 0, ARGS_COUNT_0, - ARGS_COUNT_0_1, - ARGS_COUNT_0_1_2, - ARGS_COUNT_0_2, ARGS_COUNT_1, ARGS_COUNT_1_2, ARGS_COUNT_2, }; -struct arg_16 { - int16_t d; - int8_t an; -}; - -struct arg_8 { - int8_t d; - int8_t an; - int8_t xi; -}; - -union arg_contents { - int8_t xn; // For Dn, An, (An), -(An), (An)+ - struct arg_16 arg_16; // For (d16,An) and (d16,PC) - struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn) +struct expr_tokens_span { + size_t first_token, num_tokens; }; struct arg { - enum arg_type arg_type; - union arg_contents arg_contents; - size_t first_token, num_tokens; // Expression tokens span, may be NULL + enum arg_type type; + int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An) + int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An + struct expr_tokens_span expr; + size_t first_token, num_tokens; ///< Argument tokens span }; struct instruction { @@ -356,6 +343,34 @@ enum pars_error { PE_SOME, }; +enum reg_type { + REG_NONE = 0, + REG_DN, + REG_AN, + REG_PC, + REG_SR, + REG_CCR, + REG_USP, +}; + +enum recognized_token_type { + RTT_NONE = 0, + RTT_REG, // TT_ID + RTT_NUMBER, // TT_NUMHEX, TT_NUMOCT and TT_NUMDEC +}; + +struct token_recognition { + enum recognized_token_type type; + union { + struct { + enum reg_type reg; + int8_t reg_num; + }; // For RTT_REG + int32_t number; // For TT_ID + size_t symbol_id; // For TT_ID and TT_DOT_ID, see (struct pars).symtab + }; +}; + struct pars { const struct lex *lex; // State @@ -571,6 +586,16 @@ static int printed_size(const char c) return 1; } +static bool token_is_number(const enum token_type type) +{ + return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT; +} + +static bool token_is_regmask_delimiter(const enum token_type type) +{ + return type == TT_SLASH || type == TT_MINUS; +} + static int fprint_string_escaped( const char *const str, const size_t length, FILE *const stream) { @@ -1174,8 +1199,9 @@ static const char *arg_type_to_string(const enum arg_type type) case ARG_ADDR_LONG: return "(xxx).l"; case ARG_ADDR_UNSPEC: return "(xxx).?"; case ARG_PC_ADDR_16: return "(d16,PC)"; - case ARG_PC_ADDR_8_XN: return "(d8,PC,Xn)"; + case ARG_PC_ADDR_8_XI: return "(d8,PC,Xn)"; case ARG_IMMEDIATE: return "#imm"; + case ARG_REGMASK: return "REGMASK"; case ARG_SR: return "SR"; case ARG_CCR: return "CCR"; case ARG_USP: return "USP"; @@ -1226,40 +1252,41 @@ static void fprint_arg( const struct arg *const arg, FILE *const s) { - fprintf(s, "(%s", arg_type_to_string(arg->arg_type)); - switch (arg->arg_type) { + fprintf(s, "(%s", arg_type_to_string(arg->type)); + switch (arg->type) { case ARG_NONE: case ARG_DN: case ARG_AN: case ARG_AN_ADDR: case ARG_AN_ADDR_INCR: case ARG_AN_ADDR_DECR: - fprintf(s, " reg %d", arg->arg_contents.xn); + fprintf(s, " reg %d", arg->xn); break; case ARG_AN_ADDR_16: - fprintf(s, " reg %d", arg->arg_contents.arg_16.an); - fprintf(s, " d16 %d", arg->arg_contents.arg_16.d); + fprintf(s, " reg %d", arg->xn); + fprintf(s, " d16 [see raw]"); // TODO print expr tokens break; case ARG_AN_ADDR_8_XI: - fprintf(s, " reg %d", arg->arg_contents.arg_8.an); - fprintf(s, " d8 %d", arg->arg_contents.arg_8.d); - fprintf(s, " xi %d", arg->arg_contents.arg_8.xi); + fprintf(s, " reg %d", arg->xn); + fprintf(s, " d8 [see raw]"); // TODO print expr tokens + fprintf(s, " xi %d", arg->xi); break; case ARG_ADDR_WORD: case ARG_ADDR_LONG: case ARG_ADDR_UNSPEC: - fprintf(s, " addr [see raw]"); + fprintf(s, " addr [see raw]"); // TODO print expr tokens break; case ARG_PC_ADDR_16: - fprintf(s, " d16 [see raw]"); + fprintf(s, " d16 [see raw]"); // TODO print expr tokens break; - case ARG_PC_ADDR_8_XN: - fprintf(s, " d8 [see raw]"); - fprintf(s, " xn %d", arg->arg_contents.arg_8.xi); + case ARG_PC_ADDR_8_XI: + fprintf(s, " d8 [see raw]"); // TODO print expr tokens + fprintf(s, " xi %d", arg->xi); break; case ARG_IMMEDIATE: - fprintf(s, " value [see raw]"); + fprintf(s, " value [see raw]"); // TODO print expr tokens break; + case ARG_REGMASK: case ARG_SR: case ARG_CCR: case ARG_USP: @@ -1294,13 +1321,13 @@ static int fprint_stmt( if (stmt->type == ST_INSTRUCTION) { fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic)); fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize)); - if (stmt->instruction.arg1.arg_type != ARG_NONE) { + if (stmt->instruction.arg1.type != ARG_NONE) { fprintf(s, "\n\t(arg1 "); fprint_arg(lex, &stmt->instruction.arg1, s); fprintf(s, ")"); } - if (stmt->instruction.arg2.arg_type != ARG_NONE) { - assert(stmt->instruction.arg1.arg_type != ARG_NONE); + if (stmt->instruction.arg2.type != ARG_NONE) { + assert(stmt->instruction.arg1.type != ARG_NONE); fprintf(s, "\n\t(arg2 "); fprint_arg(lex, &stmt->instruction.arg2, s); fprintf(s, ")"); @@ -1444,27 +1471,129 @@ static bool is_expression_token(const enum token_type type) return false; } -static int pars_parse_arg( - struct pars *const self, struct arg *const arg) +static struct token pars_peek(const struct pars *const self) { - const size_t tokens_count = self->lex->tokbuf_size / - (sizeof *self->lex->tokbuf); - const size_t first_token_id = self->cur_tok_id; - int nesting = 0; - int commas = 0; - enum arg_type arg_type = ARG_EXPR; - while (self->cur_tok_id < tokens_count) { - const size_t token_id = self->cur_tok_id; // Peek - const struct token token = self->lex->tokbuf[token_id]; - if (nesting == 1 && token.type == TT_COMMA) { - if (commas >= 2) { - return pars_yield_error(self, self->cur_tok_id); - } else { - commas++; + return self->lex->tokbuf[self->cur_tok_id]; +} + +static struct token pars_peek_more( + const struct pars *const self, const size_t more) +{ + return self->lex->tokbuf[self->cur_tok_id + more]; +} + +static size_t pars_commit(struct pars *const self) +{ + return self->cur_tok_id++; +} + +static bool is_pc(const char *const str) +{ + return (str[0] == 'p' && str[1] == 'c') || + (str[0] == 'P' && str[1] == 'C'); +} + +static bool is_sp(const char *const str) +{ + return (str[0] == 's' && str[1] == 'p') || + (str[0] == 'S' && str[1] == 'P'); +} + +static bool is_sr(const char *const str) +{ + return (str[0] == 's' && str[1] == 'r') || + (str[0] == 'S' && str[1] == 'R'); +} + +static bool is_ccr(const char *const str) +{ + return + ((str[0] == 'c' && str[1] == 'c' && str[2] == 'r') || + (str[0] == 'C' && str[1] == 'C' && str[2] == 'R')); +} + +static bool is_usp(const char *const str) +{ + return + ((str[0] == 'u' && str[1] == 's' && str[2] == 'p') || + (str[0] == 'U' && str[1] == 'S' && str[2] == 'P')); +} + +static struct token_recognition pars_recognize_token( + const struct pars *const self, const struct token token) +{ + const char *const str = self->lex->input + token.offset; + if (token.type == TT_ID) { + if (token.length == 2) { + if (tolower(str[0]) == 'a' && is_dec(str[1])) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_AN, + .reg_num = str[1] - '0', + }; + } else if (tolower(str[0]) == 'd' && is_dec(str[1])) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_DN, + .reg_num = str[1] - '0', + }; + } else if (is_sp(str)) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_AN, + .reg_num = 7, + }; + } else if (is_pc(str)) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_PC, + }; + } else if (is_sr(str)) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_SR, + }; } - } else if (token.type == TT_LPAREN) { + } else if (token.length == 3) { + if (is_ccr(str)) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_DN, + }; + } else if (is_usp(str)) { + return (struct token_recognition){ + .type = RTT_REG, + .reg = REG_USP, + }; + } + } + } else if (token.type == TT_NUMDEC) { + // TODO + } else if (token.type == TT_NUMOCT) { + // TODO + } else if (token.type == TT_NUMHEX) { + // TODO + } + return (struct token_recognition){0}; +} + +static int pars_parse_expr( + struct pars *const self, struct expr_tokens_span *const expr) +{ + // This function is called only when expression is expected unconditionally, + // so if the first token cannot be a part of expression, then error must be + // yielded. + const size_t first_token_id = self->cur_tok_id; + unsigned nesting = 0; + while (!pars_is_eof_reached(self)) { + const struct token token = pars_peek(self); + if (token.type == TT_LPAREN) { nesting++; } else if (token.type == TT_RPAREN) { + if (nesting == 0) { + // This is not my closing parenthesis, should stop + break; + } nesting--; } else if (is_expression_token(token.type)) { // TODO parse expression @@ -1474,22 +1603,318 @@ static int pars_parse_arg( } break; } - self->cur_tok_id++; // Commit + pars_commit(self); } if (nesting != 0) { return pars_yield_error_nesting( self, first_token_id, self->cur_tok_id - first_token_id); } if (first_token_id == self->cur_tok_id) { - // Nothing has been parsed - *arg = (struct arg){0}; - } else { - *arg = (struct arg){ - .arg_type = arg_type, - // TODO arg_contents - .first_token = first_token_id, - .num_tokens = self->cur_tok_id - first_token_id, - }; + // Nothing has been parsed but expression expected + return pars_yield_error(self, self->cur_tok_id); + } + *expr = (struct expr_tokens_span){ + .first_token = first_token_id, + .num_tokens = self->cur_tok_id - first_token_id, + }; + return OK; +} + +static int pars_parse_arg_after_prefix_expr( + struct pars *const self, struct arg *const arg) +{ + // At this point a single expression has been parsed and committed. + // It can be one of: + // - Standalone expression + // - Standalone expression with size suffix like ".l" + // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn) + (void) self; + (void) arg; + return pars_yield_error(self, self->cur_tok_id); +} + +static int pars_parse_arg_starts_with_minus( + struct pars *const self, struct arg *const arg) +{ + // At this point cur_tok_id points to the minus that has been peeked, but + // not committed. + const size_t first_token_id = self->cur_tok_id; + if (pars_is_eof_reached(self)) { + pars_commit(self); // The minus token + // Just single minus is invalid expression + return pars_yield_error_eof(self); + } + if (pars_peek_more(self, 1).type == TT_LPAREN) { + // It is still either expression or -(An) + if (pars_is_eof_reached(self)) { + // "-(" is invalid expression + pars_commit(self), pars_commit(self); // Commit "-" and "(" + return pars_yield_error_eof(self); + } + const struct token token2 = pars_peek_more(self, 2); + if (token2.type == TT_ID) { + struct token_recognition r = pars_recognize_token(self, token2); + if (r.type == RTT_REG && r.reg == REG_AN) { + // It is definitely -(An). Commit all previous tokens and + // expect closing parenthesis. + self->cur_tok_id += 3; + const size_t rparen_id = pars_commit(self); + const struct token rparen = self->lex->tokbuf[rparen_id]; + if (rparen.type == TT_RPAREN) { + // Perfect! + *arg = (struct arg){ + .type = ARG_AN_ADDR_DECR, + .xn = r.reg_num, + .first_token = first_token_id, + .num_tokens = self->cur_tok_id - first_token_id, + }; + return OK; + } else { + // But it has to be a closing parenthesis! + return pars_yield_error(self, rparen_id); + } + } + } + } + // Otherwise it is expression - either prefix or standalone + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } + return pars_parse_arg_after_prefix_expr(self, arg); +} + +static int pars_parse_arg_inside_parens( + struct pars *const self, struct arg *const arg) +{ + // At this point cur_tok_id points after the first opening parenthesis that + // has been parsed (committed). + // It can be + // - (expr)(An) + // - (expr)(An,Xi) or (expr)(Xi,An) + // - (expr)(PC,Xi) or (expr)(Xi,PC) + // - (An) or (An)+ + // - (An,expr) or (expr,An) + // - (PC,expr) or (expr,PC) + // - (An,expr,Xi), (An,Xi,expr), (expr,An,Xi), (expr,Xi,An), (Xi,expr,An) or + // (Xi,An,expr) + // - (PC,expr,Xi), (PC,Xi,expr), (expr,PC,Xi), (expr,Xi,PC), (Xi,expr,PC) or + // (Xi,PC,expr) + bool an1_found = false, an2_found = false, dn_found = false; + bool pc_found = false; + int8_t an1 = 0, an2 = 0, dn = 0; + unsigned parts = arg->expr.first_token ? 1 : 0; + while (parts < 3) { + if (pars_is_eof_reached(self)) { + return pars_yield_error_eof(self); + } + const struct token token0 = pars_peek(self); + if (token0.type == TT_ID) { + // It it may be An/Dn/PC register + struct token_recognition r = pars_recognize_token(self, token0); + if (r.type == RTT_REG) { + // This is definitely a register or regmask. + switch (r.reg) { + case REG_DN: + dn_found = true; + dn = r.reg_num; + break; + case REG_AN: + if (!an1_found) { + an1_found = true; + an1 = r.reg_num; + } else if (!an2_found) { + an2_found = true; + an2 = r.reg_num; + } else { + return pars_yield_error(self, pars_commit(self)); + } + break; + case REG_PC: + pc_found = true; + break; + case REG_NONE: + UNREACHABLE(); + case REG_SR: + case REG_CCR: + case REG_USP: + return pars_yield_error(self, pars_commit(self)); + } + pars_commit(self); + parts++; + } + } else { + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } + parts++; + } + if (pars_is_eof_reached(self)) { + return pars_yield_error_eof(self); + } + const struct token delim = pars_peek(self); + const size_t delim_id = pars_commit(self); + if (delim.type == TT_COMMA) { + continue; + } else if (delim.type == TT_RPAREN) { + if (parts == 1 && arg->expr.first_token) { + assert(!an1_found && !an2_found && !dn_found && !pc_found); + // It turns out we are inside of expression, so this closing + // parenthesis is part of it. Let's accumulate it and move + // on. + arg->expr.first_token--; + arg->expr.num_tokens += 2; + return pars_parse_arg_after_prefix_expr(self, arg); + } else { + break; + } + } else { + return pars_yield_error(self, delim_id); + } + } + if (parts == 1 && an1_found) { + // It is either (An) or (An)+ + assert(!pc_found && !dn_found && !arg->expr.first_token); + if (pars_is_eof_reached(self)) { + arg->type = ARG_AN_ADDR; + } else { + const struct token plus = pars_peek(self); + if (plus.type == TT_PLUS) { + pars_commit(self); + arg->type = ARG_AN_ADDR_INCR; + } else { + arg->type = ARG_AN_ADDR; + } + } + arg->xn = an1; + arg->num_tokens = self->cur_tok_id - arg->first_token; + return OK; + } else if (parts == 2 && an1_found && arg->expr.first_token) { + // It is (An,d16) or (d16,An) + assert(!an2_found && !pc_found && !dn_found); + arg->type = ARG_AN_ADDR_16; + arg->xn = an1; + arg->num_tokens = self->cur_tok_id - arg->first_token; + return OK; + } else if (parts == 2 && pc_found && arg->expr.first_token) { + // It is (PC,d16) or (d16,PC) + assert(!an1_found && !an2_found && !dn_found); + arg->type = ARG_PC_ADDR_16; + arg->num_tokens = self->cur_tok_id - arg->first_token; + return OK; + } else if (parts == 3 && pc_found && arg->expr.first_token && (an1_found || dn_found)) { + // It is (d8,PC,Xn) + assert((an1_found && !dn_found) || (!an1_found && dn_found)); + arg->type = ARG_PC_ADDR_8_XI; + if (an1_found) { + arg->xi = -an1; + } else if (dn_found) { + arg->xi = dn; + } + arg->num_tokens = self->cur_tok_id - arg->first_token; + } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) { + // It is (d8,An,Xn) + assert((an1_found && !dn_found) || (!an1_found && dn_found)); + arg->type = ARG_AN_ADDR_8_XI; + arg->xi = an2_found ? -an2 : dn; + arg->num_tokens = self->cur_tok_id - arg->first_token; + } + return pars_yield_error(self, self->cur_tok_id); +} + +static int pars_parse_arg_regmask( + struct pars *const self, struct arg *const arg) +{ + // At this point cur_tok_id points to the register token that has been + // peeked, but not committed. + (void) self; + (void) arg; + // Very much TODO. + return pars_yield_error(self, ++self->cur_tok_id); +} + +static int pars_parse_arg( + struct pars *const self, struct arg *const arg) +{ + if (pars_is_eof_reached(self)) { + return OK; + } + const size_t first_token_id = self->cur_tok_id; + arg->first_token = first_token_id; + const struct token token0 = pars_peek(self); + if (token0.type == TT_HASH) { + // Definitely an immediate value expression + pars_commit(self); + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } + arg->type = ARG_IMMEDIATE; + arg->num_tokens = self->cur_tok_id - first_token_id; + return OK; + } else if (token0.type == TT_MINUS) { + // It is either expression or -(An) + return pars_parse_arg_starts_with_minus(self, arg); + } else if (token0.type == TT_TILDE || token_is_number(token0.type)) { + // Tilde is unary operation, so it must be an expression + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } + return pars_parse_arg_after_prefix_expr(self, arg); + } else if (token0.type == TT_LPAREN) { + // It is either expression or addressing mode (An) / (An)+ / (d16,An) / + // (d8,An,Xn) / (d8,PC,Xn) / (d16,An) + pars_commit(self); + return pars_parse_arg_inside_parens(self, arg); + } else if (token0.type == TT_ID) { + // It is either expression, regmask or just An/Dn/PC/SR/SP/CCR register + struct token_recognition r = pars_recognize_token(self, token0); + if (r.type == RTT_REG) { + // This is definitely a register or regmask. + switch (r.reg) { + case REG_NONE: + UNREACHABLE(); + return pars_yield_error(self, first_token_id); + case REG_DN: + if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) { + // Note: the register is not committed + return pars_parse_arg_regmask(self, arg); + } + arg->type = ARG_DN; + arg->xn = r.reg_num; + break; + case REG_AN: + if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) { + // Note: the register is not committed + return pars_parse_arg_regmask(self, arg); + } + arg->type = ARG_AN; + arg->xn = r.reg_num; + break; + case REG_PC: + return pars_yield_error(self, first_token_id); + case REG_SR: + arg->type = ARG_SR; + break; + case REG_CCR: + arg->type = ARG_CCR; + break; + case REG_USP: + arg->type = ARG_USP; + break; + } + pars_commit(self); + arg->num_tokens = self->cur_tok_id - first_token_id; + return OK; + } else { + const int ret = pars_parse_expr(self, &arg->expr); + if (ret != OK) { + return ret; + } + return pars_parse_arg_after_prefix_expr(self, arg); + } } return OK; } @@ -1509,8 +1934,11 @@ static int pars_yield_instruction( if (mnemonic == MN_NONE) { return pars_yield_error(self, mnemonic_id); } - if (arg2) { - assert(arg1); + if (arg2->type != ARG_NONE) { + assert(arg1->type != ARG_NONE); + } + if (arg1->type == ARG_NONE) { + assert(arg1->type == ARG_NONE); } const enum args_count args_count = get_args_count_for_mnemonic(mnemonic); // Validate instruction arguments count @@ -1519,36 +1947,24 @@ static int pars_yield_instruction( UNREACHABLE(); break; case ARGS_COUNT_0: - if (arg1) { + if (arg1->type != ARG_NONE) { return pars_yield_error(self, arg1->first_token); } break; - case ARGS_COUNT_0_1: - if (arg2) { - return pars_yield_error(self, arg2->first_token); - } - break; - case ARGS_COUNT_0_1_2: - break; - case ARGS_COUNT_0_2: - if (arg1 && !arg2) { - return pars_yield_error(self, mnemonic_id); - } - break; case ARGS_COUNT_1: - if (!arg1) { + if (arg1->type == ARG_NONE) { return pars_yield_error(self, mnemonic_id); - } else if (arg2) { + } else if (arg2->type != ARG_NONE) { return pars_yield_error(self, arg2->first_token); } break; case ARGS_COUNT_1_2: - if (!arg1) { + if (arg1->type == ARG_NONE) { return pars_yield_error(self, mnemonic_id); } break; case ARGS_COUNT_2: - if (!arg1 || !arg2) { + if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) { return pars_yield_error(self, mnemonic_id); } break; @@ -1582,21 +1998,19 @@ static int pars_parse_instruction_comment( size_t comment_id = 0; if (!pars_is_eof_reached(self)) { // Try parse comment - const size_t token1_id = self->cur_tok_id; // Peek comment - const struct token token1 = self->lex->tokbuf[token1_id]; + const struct token token1 = pars_peek(self); const bool is_comment = token1.type == TT_COMMENT_ASTERISK || token1.type == TT_COMMENT_SEMICOLON; if (is_comment) { - self->cur_tok_id++; // Commit comment - comment_id = token1_id; + comment_id = pars_commit(self); } - if (!pars_is_eof_reached(self)) { - // Handle new line - const size_t nl_id = self->cur_tok_id++; // Commit new line - const struct token nl = self->lex->tokbuf[nl_id]; - if (nl.type != TT_NEWLINE) { - return pars_yield_error(self, nl_id); - } + } + if (!pars_is_eof_reached(self)) { + // There must be a new line if not EOF + const size_t nl_id = pars_commit(self); + const struct token nl = self->lex->tokbuf[nl_id]; + if (nl.type != TT_NEWLINE) { + return pars_yield_error(self, nl_id); } } return pars_yield_instruction( @@ -1609,35 +2023,25 @@ static int pars_parse_instruction_args( const size_t mnemonic_id, const enum opsize opsize) { - struct arg arg1, arg2; + struct arg arg1 = {0}, arg2 = {0}; // Try parse first argument const int res1 = pars_parse_arg(self, &arg1); if (res1 != OK) { return res1; } - if (arg1.arg_type == ARG_NONE) { - return pars_parse_instruction_comment( - self, label_id, mnemonic_id, opsize, NULL, NULL); - } - if (pars_is_eof_reached(self)) { - return pars_yield_instruction( - self, label_id, 0, mnemonic_id, opsize, &arg1, NULL); - } - const size_t comma_id = self->cur_tok_id; // Peek comma - const struct token comma = self->lex->tokbuf[comma_id]; - if (comma.type != TT_COMMA) { - return pars_parse_instruction_comment( - self, label_id, mnemonic_id, opsize, NULL, NULL); - } - self->cur_tok_id++; // Commit comma - // Try parse second argument - const int res2 = pars_parse_arg(self, &arg2); - if (res2 != OK) { - return res2; - } - if (pars_is_eof_reached(self)) { - return pars_yield_instruction( - self, label_id, 0, mnemonic_id, opsize, &arg1, &arg2); + if (arg1.type != ARG_NONE) { + if (pars_is_eof_reached(self)) { + return pars_yield_instruction( + self, label_id, 0, mnemonic_id, opsize, &arg1, NULL); + } + if (pars_peek(self).type == TT_COMMA) { + pars_commit(self); + // Try parse second argument + const int res2 = pars_parse_arg(self, &arg2); + if (res2 != OK) { + return res2; + } + } } // Finish parsing instruction, expect comment or newline return pars_parse_instruction_comment( @@ -1652,10 +2056,9 @@ static int pars_parse_instruction( if (pars_is_eof_reached(self)) { return pars_yield_error_eof(self); } - const size_t size_spec_id = self->cur_tok_id; // Peek - const struct token size_spec = self->lex->tokbuf[size_spec_id]; + const struct token size_spec = pars_peek(self); if (size_spec.type == TT_DOT_ID) { - self->cur_tok_id++; // Commit + const size_t size_spec_id = pars_commit(self); // Size specifier if (size_spec.length != 2) { return pars_yield_error(self, size_spec_id); @@ -1699,7 +2102,7 @@ static int pars_yield_label_comment( static int pars_parse_labeled_statement( struct pars *const self, const size_t label_id) { - const size_t token1_id = self->cur_tok_id++; + const size_t token1_id = pars_commit(self); const struct token token1 = self->lex->tokbuf[token1_id]; const bool is_comment = token1.type == TT_COMMENT_ASTERISK || token1.type == TT_COMMENT_SEMICOLON; @@ -1711,17 +2114,15 @@ static int pars_parse_labeled_statement( if (pars_is_eof_reached(self)) { return pars_yield_error_eof(self); } - const size_t token2_id = self->cur_tok_id; // Peek - const struct token token2 = self->lex->tokbuf[token2_id]; + const struct token token2 = pars_peek(self); if (!label_id && token2.type == TT_COLON) { - self->cur_tok_id++; // Commit + pars_commit(self); return pars_parse_labeled_statement(self, token1_id); } else if (token2.type == TT_EQ || token2.type == TT_EQ_DOUBLE) { - self->cur_tok_id++; // Commit - return pars_parse_assignment(self, label_id, token2_id); - } else { - return pars_parse_instruction(self, label_id, token1_id); + pars_commit(self); + return pars_parse_assignment(self, label_id, token1_id); } + return pars_parse_instruction(self, label_id, token1_id); } else if (token1.type == TT_DOT_ID) { return pars_parse_direc(self, &token1); } |