diff options
| author | Oxore <oxore@protonmail.com> | 2023-06-25 23:13:44 +0300 | 
|---|---|---|
| committer | Oxore <oxore@protonmail.com> | 2023-06-25 23:13:58 +0300 | 
| commit | e882f7f28ae38ad6b42f8d558d30a0bafd8e5b32 (patch) | |
| tree | f0b3a6c47adcb6bc08b7b5a1e954bb4728fa771f | |
| parent | 66bfe8a24d9b1ca83d45396a9f9c962379d1895e (diff) | |
WIP: Impl some complex addressing modes parsing
| -rw-r--r-- | main.c | 665 | 
1 files changed, 533 insertions, 132 deletions
| @@ -274,8 +274,9 @@ enum arg_type {      ARG_ADDR_LONG,      ARG_ADDR_UNSPEC,      ARG_PC_ADDR_16, -    ARG_PC_ADDR_8_XN, +    ARG_PC_ADDR_8_XI,      ARG_IMMEDIATE, +    ARG_REGMASK,      ARG_SR,      ARG_CCR,      ARG_USP, @@ -285,35 +286,21 @@ enum arg_type {  enum args_count {      ARGS_COUNT_UNKNOWN = 0,      ARGS_COUNT_0, -    ARGS_COUNT_0_1, -    ARGS_COUNT_0_1_2, -    ARGS_COUNT_0_2,      ARGS_COUNT_1,      ARGS_COUNT_1_2,      ARGS_COUNT_2,  }; -struct arg_16 { -    int16_t d; -    int8_t an; -}; - -struct arg_8 { -    int8_t d; -    int8_t an; -    int8_t xi; -}; - -union arg_contents { -    int8_t xn; // For Dn, An, (An), -(An), (An)+ -    struct arg_16 arg_16; // For (d16,An) and (d16,PC) -    struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn) +struct expr_tokens_span { +    size_t first_token, num_tokens;  };  struct arg { -    enum arg_type arg_type; -    union arg_contents arg_contents; -    size_t first_token, num_tokens; // Expression tokens span, may be NULL +    enum arg_type type; +    int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An) +    int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An +    struct expr_tokens_span expr; +    size_t first_token, num_tokens; ///< Argument tokens span  };  struct instruction { @@ -356,6 +343,34 @@ enum pars_error {      PE_SOME,  }; +enum reg_type { +    REG_NONE = 0, +    REG_DN, +    REG_AN, +    REG_PC, +    REG_SR, +    REG_CCR, +    REG_USP, +}; + +enum recognized_token_type { +    RTT_NONE = 0, +    RTT_REG, // TT_ID +    RTT_NUMBER, // TT_NUMHEX, TT_NUMOCT and TT_NUMDEC +}; + +struct token_recognition { +    enum recognized_token_type type; +    union { +        struct { +            enum reg_type reg; +            int8_t reg_num; +        }; // For RTT_REG +        int32_t number; // For TT_ID +        size_t symbol_id; // For TT_ID and TT_DOT_ID, see (struct pars).symtab +    }; +}; +  struct pars {      const struct lex *lex;      // State @@ -571,6 +586,16 @@ static int printed_size(const char c)      return 1;  } +static bool token_is_number(const enum token_type type) +{ +    return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT; +} + +static bool token_is_regmask_delimiter(const enum token_type type) +{ +    return type == TT_SLASH || type == TT_MINUS; +} +  static int fprint_string_escaped(          const char *const str, const size_t length, FILE *const stream)  { @@ -1174,8 +1199,9 @@ static const char *arg_type_to_string(const enum arg_type type)      case ARG_ADDR_LONG: return "(xxx).l";      case ARG_ADDR_UNSPEC: return "(xxx).?";      case ARG_PC_ADDR_16: return "(d16,PC)"; -    case ARG_PC_ADDR_8_XN: return "(d8,PC,Xn)"; +    case ARG_PC_ADDR_8_XI: return "(d8,PC,Xn)";      case ARG_IMMEDIATE: return "#imm"; +    case ARG_REGMASK: return "REGMASK";      case ARG_SR: return "SR";      case ARG_CCR: return "CCR";      case ARG_USP: return "USP"; @@ -1226,40 +1252,41 @@ static void fprint_arg(          const struct arg *const arg,          FILE *const s)  { -    fprintf(s, "(%s", arg_type_to_string(arg->arg_type)); -    switch (arg->arg_type) { +    fprintf(s, "(%s", arg_type_to_string(arg->type)); +    switch (arg->type) {      case ARG_NONE:      case ARG_DN:      case ARG_AN:      case ARG_AN_ADDR:      case ARG_AN_ADDR_INCR:      case ARG_AN_ADDR_DECR: -        fprintf(s, " reg %d", arg->arg_contents.xn); +        fprintf(s, " reg %d", arg->xn);          break;      case ARG_AN_ADDR_16: -        fprintf(s, " reg %d", arg->arg_contents.arg_16.an); -        fprintf(s, " d16 %d", arg->arg_contents.arg_16.d); +        fprintf(s, " reg %d", arg->xn); +        fprintf(s, " d16 [see raw]"); // TODO print expr tokens          break;      case ARG_AN_ADDR_8_XI: -        fprintf(s, " reg %d", arg->arg_contents.arg_8.an); -        fprintf(s, " d8 %d", arg->arg_contents.arg_8.d); -        fprintf(s, " xi %d", arg->arg_contents.arg_8.xi); +        fprintf(s, " reg %d", arg->xn); +        fprintf(s, " d8 [see raw]"); // TODO print expr tokens +        fprintf(s, " xi %d", arg->xi);          break;      case ARG_ADDR_WORD:      case ARG_ADDR_LONG:      case ARG_ADDR_UNSPEC: -        fprintf(s, " addr [see raw]"); +        fprintf(s, " addr [see raw]"); // TODO print expr tokens          break;      case ARG_PC_ADDR_16: -        fprintf(s, " d16 [see raw]"); +        fprintf(s, " d16 [see raw]"); // TODO print expr tokens          break; -    case ARG_PC_ADDR_8_XN: -        fprintf(s, " d8 [see raw]"); -        fprintf(s, " xn %d", arg->arg_contents.arg_8.xi); +    case ARG_PC_ADDR_8_XI: +        fprintf(s, " d8 [see raw]"); // TODO print expr tokens +        fprintf(s, " xi %d", arg->xi);          break;      case ARG_IMMEDIATE: -        fprintf(s, " value [see raw]"); +        fprintf(s, " value [see raw]"); // TODO print expr tokens          break; +    case ARG_REGMASK:      case ARG_SR:      case ARG_CCR:      case ARG_USP: @@ -1294,13 +1321,13 @@ static int fprint_stmt(      if (stmt->type == ST_INSTRUCTION) {          fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic));          fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize)); -        if (stmt->instruction.arg1.arg_type != ARG_NONE) { +        if (stmt->instruction.arg1.type != ARG_NONE) {              fprintf(s, "\n\t(arg1 ");              fprint_arg(lex, &stmt->instruction.arg1, s);              fprintf(s, ")");          } -        if (stmt->instruction.arg2.arg_type != ARG_NONE) { -            assert(stmt->instruction.arg1.arg_type != ARG_NONE); +        if (stmt->instruction.arg2.type != ARG_NONE) { +            assert(stmt->instruction.arg1.type != ARG_NONE);              fprintf(s, "\n\t(arg2 ");              fprint_arg(lex, &stmt->instruction.arg2, s);              fprintf(s, ")"); @@ -1444,27 +1471,129 @@ static bool is_expression_token(const enum token_type type)      return false;  } -static int pars_parse_arg( -        struct pars *const self, struct arg *const arg) +static struct token pars_peek(const struct pars *const self)  { -    const size_t tokens_count = self->lex->tokbuf_size / -        (sizeof *self->lex->tokbuf); -    const size_t first_token_id = self->cur_tok_id; -    int nesting = 0; -    int commas = 0; -    enum arg_type arg_type = ARG_EXPR; -    while (self->cur_tok_id < tokens_count) { -        const size_t token_id = self->cur_tok_id; // Peek -        const struct token token = self->lex->tokbuf[token_id]; -        if (nesting == 1 && token.type == TT_COMMA) { -            if (commas >= 2) { -                return pars_yield_error(self, self->cur_tok_id); -            } else { -                commas++; +    return self->lex->tokbuf[self->cur_tok_id]; +} + +static struct token pars_peek_more( +        const struct pars *const self, const size_t more) +{ +    return self->lex->tokbuf[self->cur_tok_id + more]; +} + +static size_t pars_commit(struct pars *const self) +{ +    return self->cur_tok_id++; +} + +static bool is_pc(const char *const str) +{ +    return (str[0] == 'p' && str[1] == 'c') || +        (str[0] == 'P' && str[1] == 'C'); +} + +static bool is_sp(const char *const str) +{ +    return (str[0] == 's' && str[1] == 'p') || +        (str[0] == 'S' && str[1] == 'P'); +} + +static bool is_sr(const char *const str) +{ +    return (str[0] == 's' && str[1] == 'r') || +        (str[0] == 'S' && str[1] == 'R'); +} + +static bool is_ccr(const char *const str) +{ +    return +        ((str[0] == 'c' && str[1] == 'c' && str[2] == 'r') || +        (str[0] == 'C' && str[1] == 'C' && str[2] == 'R')); +} + +static bool is_usp(const char *const str) +{ +    return +        ((str[0] == 'u' && str[1] == 's' && str[2] == 'p') || +        (str[0] == 'U' && str[1] == 'S' && str[2] == 'P')); +} + +static struct token_recognition pars_recognize_token( +        const struct pars *const self, const struct token token) +{ +    const char *const str = self->lex->input + token.offset; +    if (token.type == TT_ID) { +        if (token.length == 2) { +            if (tolower(str[0]) == 'a' && is_dec(str[1])) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_AN, +                    .reg_num = str[1] - '0', +                }; +            } else if (tolower(str[0]) == 'd' && is_dec(str[1])) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_DN, +                    .reg_num = str[1] - '0', +                }; +            } else if (is_sp(str)) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_AN, +                    .reg_num = 7, +                }; +            } else if (is_pc(str)) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_PC, +                }; +            } else if (is_sr(str)) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_SR, +                };              } -        } else if (token.type == TT_LPAREN) { +        } else if (token.length == 3) { +            if (is_ccr(str)) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_DN, +                }; +            } else if (is_usp(str)) { +                return (struct token_recognition){ +                    .type = RTT_REG, +                    .reg = REG_USP, +                }; +            } +        } +    } else if (token.type == TT_NUMDEC) { +        // TODO +    } else if (token.type == TT_NUMOCT) { +        // TODO +    } else if (token.type == TT_NUMHEX) { +        // TODO +    } +    return (struct token_recognition){0}; +} + +static int pars_parse_expr( +        struct pars *const self, struct expr_tokens_span *const expr) +{ +    // This function is called only when expression is expected unconditionally, +    // so if the first token cannot be a part of expression, then error must be +    // yielded. +    const size_t first_token_id = self->cur_tok_id; +    unsigned nesting = 0; +    while (!pars_is_eof_reached(self)) { +        const struct token token = pars_peek(self); +        if (token.type == TT_LPAREN) {              nesting++;          } else if (token.type == TT_RPAREN) { +            if (nesting == 0) { +                // This is not my closing parenthesis, should stop +                break; +            }              nesting--;          } else if (is_expression_token(token.type)) {              // TODO parse expression @@ -1474,22 +1603,318 @@ static int pars_parse_arg(              }              break;          } -        self->cur_tok_id++; // Commit +        pars_commit(self);      }      if (nesting != 0) {          return pars_yield_error_nesting(                  self, first_token_id, self->cur_tok_id - first_token_id);      }      if (first_token_id == self->cur_tok_id) { -        // Nothing has been parsed -        *arg = (struct arg){0}; -    } else { -        *arg = (struct arg){ -            .arg_type = arg_type, -            // TODO arg_contents -            .first_token = first_token_id, -            .num_tokens = self->cur_tok_id - first_token_id, -        }; +        // Nothing has been parsed but expression expected +        return pars_yield_error(self, self->cur_tok_id); +    } +    *expr = (struct expr_tokens_span){ +        .first_token = first_token_id, +        .num_tokens = self->cur_tok_id - first_token_id, +    }; +    return OK; +} + +static int pars_parse_arg_after_prefix_expr( +        struct pars *const self, struct arg *const arg) +{ +    // At this point a single expression has been parsed and committed. +    // It can be one of: +    // - Standalone expression +    // - Standalone expression with size suffix like ".l" +    // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn) +    (void) self; +    (void) arg; +    return pars_yield_error(self, self->cur_tok_id); +} + +static int pars_parse_arg_starts_with_minus( +        struct pars *const self, struct arg *const arg) +{ +    // At this point cur_tok_id points to the minus that has been peeked, but +    // not committed. +    const size_t first_token_id = self->cur_tok_id; +    if (pars_is_eof_reached(self)) { +        pars_commit(self); // The minus token +        // Just single minus is invalid expression +        return pars_yield_error_eof(self); +    } +    if (pars_peek_more(self, 1).type == TT_LPAREN) { +        // It is still either expression or -(An) +        if (pars_is_eof_reached(self)) { +            // "-(" is invalid expression +            pars_commit(self), pars_commit(self); // Commit "-" and "(" +            return pars_yield_error_eof(self); +        } +        const struct token token2 = pars_peek_more(self, 2); +        if (token2.type == TT_ID) { +            struct token_recognition r = pars_recognize_token(self, token2); +            if (r.type == RTT_REG && r.reg == REG_AN) { +                // It is definitely -(An). Commit all previous tokens and +                // expect closing parenthesis. +                self->cur_tok_id += 3; +                const size_t rparen_id = pars_commit(self); +                const struct token rparen = self->lex->tokbuf[rparen_id]; +                if (rparen.type == TT_RPAREN) { +                    // Perfect! +                    *arg = (struct arg){ +                        .type = ARG_AN_ADDR_DECR, +                        .xn = r.reg_num, +                        .first_token = first_token_id, +                        .num_tokens = self->cur_tok_id - first_token_id, +                    }; +                    return OK; +                } else { +                    // But it has to be a closing parenthesis! +                    return pars_yield_error(self, rparen_id); +                } +            } +        } +    } +    // Otherwise it is expression - either prefix or standalone +    const int ret = pars_parse_expr(self, &arg->expr); +    if (ret != OK) { +        return ret; +    } +    return pars_parse_arg_after_prefix_expr(self, arg); +} + +static int pars_parse_arg_inside_parens( +        struct pars *const self, struct arg *const arg) +{ +    // At this point cur_tok_id points after the first opening parenthesis that +    // has been parsed (committed). +    // It can be +    // - (expr)(An) +    // - (expr)(An,Xi) or (expr)(Xi,An) +    // - (expr)(PC,Xi) or (expr)(Xi,PC) +    // - (An) or (An)+ +    // - (An,expr) or (expr,An) +    // - (PC,expr) or (expr,PC) +    // - (An,expr,Xi), (An,Xi,expr), (expr,An,Xi), (expr,Xi,An), (Xi,expr,An) or +    // (Xi,An,expr) +    // - (PC,expr,Xi), (PC,Xi,expr), (expr,PC,Xi), (expr,Xi,PC), (Xi,expr,PC) or +    // (Xi,PC,expr) +    bool an1_found = false, an2_found = false, dn_found = false; +    bool pc_found = false; +    int8_t an1 = 0, an2 = 0, dn = 0; +    unsigned parts = arg->expr.first_token ? 1 : 0; +    while (parts < 3) { +        if (pars_is_eof_reached(self)) { +            return pars_yield_error_eof(self); +        } +        const struct token token0 = pars_peek(self); +        if (token0.type == TT_ID) { +            // It it may be An/Dn/PC register +            struct token_recognition r = pars_recognize_token(self, token0); +            if (r.type == RTT_REG) { +                // This is definitely a register or regmask. +                switch (r.reg) { +                case REG_DN: +                    dn_found = true; +                    dn = r.reg_num; +                    break; +                case REG_AN: +                    if (!an1_found) { +                        an1_found = true; +                        an1 = r.reg_num; +                    } else if (!an2_found) { +                        an2_found = true; +                        an2 = r.reg_num; +                    } else { +                        return pars_yield_error(self, pars_commit(self)); +                    } +                    break; +                case REG_PC: +                    pc_found = true; +                    break; +                case REG_NONE: +                    UNREACHABLE(); +                case REG_SR: +                case REG_CCR: +                case REG_USP: +                    return pars_yield_error(self, pars_commit(self)); +                } +                pars_commit(self); +                parts++; +            } +        } else { +            const int ret = pars_parse_expr(self, &arg->expr); +            if (ret != OK) { +                return ret; +            } +            parts++; +        } +        if (pars_is_eof_reached(self)) { +            return pars_yield_error_eof(self); +        } +        const struct token delim = pars_peek(self); +        const size_t delim_id = pars_commit(self); +        if (delim.type == TT_COMMA) { +            continue; +        } else if (delim.type == TT_RPAREN) { +            if (parts == 1 && arg->expr.first_token) { +                assert(!an1_found && !an2_found && !dn_found && !pc_found); +                // It turns out we are inside of expression, so this closing +                // parenthesis is part of it. Let's accumulate it and move +                // on. +                arg->expr.first_token--; +                arg->expr.num_tokens += 2; +                return pars_parse_arg_after_prefix_expr(self, arg); +            } else { +                break; +            } +        } else { +            return pars_yield_error(self, delim_id); +        } +    } +    if (parts == 1 && an1_found) { +        // It is either (An) or (An)+ +        assert(!pc_found && !dn_found && !arg->expr.first_token); +        if (pars_is_eof_reached(self)) { +            arg->type = ARG_AN_ADDR; +        } else { +            const struct token plus = pars_peek(self); +            if (plus.type == TT_PLUS) { +                pars_commit(self); +                arg->type = ARG_AN_ADDR_INCR; +            } else { +                arg->type = ARG_AN_ADDR; +            } +        } +        arg->xn = an1; +        arg->num_tokens = self->cur_tok_id - arg->first_token; +        return OK; +    } else if (parts == 2 && an1_found && arg->expr.first_token) { +        // It is (An,d16) or (d16,An) +        assert(!an2_found && !pc_found && !dn_found); +        arg->type = ARG_AN_ADDR_16; +        arg->xn = an1; +        arg->num_tokens = self->cur_tok_id - arg->first_token; +        return OK; +    } else if (parts == 2 && pc_found && arg->expr.first_token) { +        // It is (PC,d16) or (d16,PC) +        assert(!an1_found && !an2_found && !dn_found); +        arg->type = ARG_PC_ADDR_16; +        arg->num_tokens = self->cur_tok_id - arg->first_token; +        return OK; +    } else if (parts == 3 && pc_found && arg->expr.first_token && (an1_found || dn_found)) { +        // It is (d8,PC,Xn) +        assert((an1_found && !dn_found) || (!an1_found && dn_found)); +        arg->type = ARG_PC_ADDR_8_XI; +        if (an1_found) { +            arg->xi = -an1; +        } else if (dn_found) { +            arg->xi = dn; +        } +        arg->num_tokens = self->cur_tok_id - arg->first_token; +    } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) { +        // It is (d8,An,Xn) +        assert((an1_found && !dn_found) || (!an1_found && dn_found)); +        arg->type = ARG_AN_ADDR_8_XI; +        arg->xi = an2_found ? -an2 : dn; +        arg->num_tokens = self->cur_tok_id - arg->first_token; +    } +    return pars_yield_error(self, self->cur_tok_id); +} + +static int pars_parse_arg_regmask( +        struct pars *const self, struct arg *const arg) +{ +    // At this point cur_tok_id points to the register token that has been +    // peeked, but not committed. +    (void) self; +    (void) arg; +    // Very much TODO. +    return pars_yield_error(self, ++self->cur_tok_id); +} + +static int pars_parse_arg( +        struct pars *const self, struct arg *const arg) +{ +    if (pars_is_eof_reached(self)) { +        return OK; +    } +    const size_t first_token_id = self->cur_tok_id; +    arg->first_token = first_token_id; +    const struct token token0 = pars_peek(self); +    if (token0.type == TT_HASH) { +        // Definitely an immediate value expression +        pars_commit(self); +        const int ret = pars_parse_expr(self, &arg->expr); +        if (ret != OK) { +            return ret; +        } +        arg->type = ARG_IMMEDIATE; +        arg->num_tokens = self->cur_tok_id - first_token_id; +        return OK; +    } else if (token0.type == TT_MINUS) { +        // It is either expression or -(An) +        return pars_parse_arg_starts_with_minus(self, arg); +    } else if (token0.type == TT_TILDE || token_is_number(token0.type)) { +        // Tilde is unary operation, so it must be an expression +        const int ret = pars_parse_expr(self, &arg->expr); +        if (ret != OK) { +            return ret; +        } +        return pars_parse_arg_after_prefix_expr(self, arg); +    } else if (token0.type == TT_LPAREN) { +        // It is either expression or addressing mode (An) / (An)+ / (d16,An) / +        // (d8,An,Xn) / (d8,PC,Xn) / (d16,An) +        pars_commit(self); +        return pars_parse_arg_inside_parens(self, arg); +    } else if (token0.type == TT_ID) { +        // It is either expression, regmask or just An/Dn/PC/SR/SP/CCR register +        struct token_recognition r = pars_recognize_token(self, token0); +        if (r.type == RTT_REG) { +            // This is definitely a register or regmask. +            switch (r.reg) { +            case REG_NONE: +                UNREACHABLE(); +                return pars_yield_error(self, first_token_id); +            case REG_DN: +                if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) { +                    // Note: the register is not committed +                    return pars_parse_arg_regmask(self, arg); +                } +                arg->type = ARG_DN; +                arg->xn = r.reg_num; +                break; +            case REG_AN: +                if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) { +                    // Note: the register is not committed +                    return pars_parse_arg_regmask(self, arg); +                } +                arg->type = ARG_AN; +                arg->xn = r.reg_num; +                break; +            case REG_PC: +                return pars_yield_error(self, first_token_id); +            case REG_SR: +                arg->type = ARG_SR; +                break; +            case REG_CCR: +                arg->type = ARG_CCR; +                break; +            case REG_USP: +                arg->type = ARG_USP; +                break; +            } +            pars_commit(self); +            arg->num_tokens = self->cur_tok_id - first_token_id; +            return OK; +        } else { +            const int ret = pars_parse_expr(self, &arg->expr); +            if (ret != OK) { +                return ret; +            } +            return pars_parse_arg_after_prefix_expr(self, arg); +        }      }      return OK;  } @@ -1509,8 +1934,11 @@ static int pars_yield_instruction(      if (mnemonic == MN_NONE) {          return pars_yield_error(self, mnemonic_id);      } -    if (arg2) { -        assert(arg1); +    if (arg2->type != ARG_NONE) { +        assert(arg1->type != ARG_NONE); +    } +    if (arg1->type == ARG_NONE) { +        assert(arg1->type == ARG_NONE);      }      const enum args_count args_count = get_args_count_for_mnemonic(mnemonic);      // Validate instruction arguments count @@ -1519,36 +1947,24 @@ static int pars_yield_instruction(          UNREACHABLE();          break;      case ARGS_COUNT_0: -        if (arg1) { +        if (arg1->type != ARG_NONE) {              return pars_yield_error(self, arg1->first_token);          }          break; -    case ARGS_COUNT_0_1: -        if (arg2) { -            return pars_yield_error(self, arg2->first_token); -        } -        break; -    case ARGS_COUNT_0_1_2: -        break; -    case ARGS_COUNT_0_2: -        if (arg1 && !arg2) { -            return pars_yield_error(self, mnemonic_id); -        } -        break;      case ARGS_COUNT_1: -        if (!arg1) { +        if (arg1->type == ARG_NONE) {              return pars_yield_error(self, mnemonic_id); -        } else if (arg2) { +        } else if (arg2->type != ARG_NONE) {              return pars_yield_error(self, arg2->first_token);          }          break;      case ARGS_COUNT_1_2: -        if (!arg1) { +        if (arg1->type == ARG_NONE) {              return pars_yield_error(self, mnemonic_id);          }          break;      case ARGS_COUNT_2: -        if (!arg1 || !arg2) { +        if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) {              return pars_yield_error(self, mnemonic_id);          }          break; @@ -1582,21 +1998,19 @@ static int pars_parse_instruction_comment(      size_t comment_id = 0;      if (!pars_is_eof_reached(self)) {          // Try parse comment -        const size_t token1_id = self->cur_tok_id; // Peek comment -        const struct token token1 = self->lex->tokbuf[token1_id]; +        const struct token token1 = pars_peek(self);          const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||              token1.type == TT_COMMENT_SEMICOLON;          if (is_comment) { -            self->cur_tok_id++; // Commit comment -            comment_id = token1_id; +            comment_id = pars_commit(self);          } -        if (!pars_is_eof_reached(self)) { -            // Handle new line -            const size_t nl_id = self->cur_tok_id++; // Commit new line -            const struct token nl = self->lex->tokbuf[nl_id]; -            if (nl.type != TT_NEWLINE) { -                return pars_yield_error(self, nl_id); -            } +    } +    if (!pars_is_eof_reached(self)) { +        // There must be a new line if not EOF +        const size_t nl_id = pars_commit(self); +        const struct token nl = self->lex->tokbuf[nl_id]; +        if (nl.type != TT_NEWLINE) { +            return pars_yield_error(self, nl_id);          }      }      return pars_yield_instruction( @@ -1609,35 +2023,25 @@ static int pars_parse_instruction_args(          const size_t mnemonic_id,          const enum opsize opsize)  { -    struct arg arg1, arg2; +    struct arg arg1 = {0}, arg2 = {0};      // Try parse first argument      const int res1 = pars_parse_arg(self, &arg1);      if (res1 != OK) {          return res1;      } -    if (arg1.arg_type == ARG_NONE) { -        return pars_parse_instruction_comment( -                self, label_id, mnemonic_id, opsize, NULL, NULL); -    } -    if (pars_is_eof_reached(self)) { -        return pars_yield_instruction( -                self, label_id, 0, mnemonic_id, opsize, &arg1, NULL); -    } -    const size_t comma_id = self->cur_tok_id; // Peek comma -    const struct token comma = self->lex->tokbuf[comma_id]; -    if (comma.type != TT_COMMA) { -        return pars_parse_instruction_comment( -                self, label_id, mnemonic_id, opsize, NULL, NULL); -    } -    self->cur_tok_id++; // Commit comma -    // Try parse second argument -    const int res2 = pars_parse_arg(self, &arg2); -    if (res2 != OK) { -        return res2; -    } -    if (pars_is_eof_reached(self)) { -        return pars_yield_instruction( -                self, label_id, 0, mnemonic_id, opsize, &arg1, &arg2); +    if (arg1.type != ARG_NONE) { +        if (pars_is_eof_reached(self)) { +            return pars_yield_instruction( +                    self, label_id, 0, mnemonic_id, opsize, &arg1, NULL); +        } +        if (pars_peek(self).type == TT_COMMA) { +            pars_commit(self); +            // Try parse second argument +            const int res2 = pars_parse_arg(self, &arg2); +            if (res2 != OK) { +                return res2; +            } +        }      }      // Finish parsing instruction, expect comment or newline      return pars_parse_instruction_comment( @@ -1652,10 +2056,9 @@ static int pars_parse_instruction(      if (pars_is_eof_reached(self)) {          return pars_yield_error_eof(self);      } -    const size_t size_spec_id = self->cur_tok_id; // Peek -    const struct token size_spec = self->lex->tokbuf[size_spec_id]; +    const struct token size_spec = pars_peek(self);      if (size_spec.type == TT_DOT_ID) { -        self->cur_tok_id++; // Commit +        const size_t size_spec_id = pars_commit(self);          // Size specifier          if (size_spec.length != 2) {              return pars_yield_error(self, size_spec_id); @@ -1699,7 +2102,7 @@ static int pars_yield_label_comment(  static int pars_parse_labeled_statement(          struct pars *const self, const size_t label_id)  { -    const size_t token1_id = self->cur_tok_id++; +    const size_t token1_id = pars_commit(self);      const struct token token1 = self->lex->tokbuf[token1_id];      const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||          token1.type == TT_COMMENT_SEMICOLON; @@ -1711,17 +2114,15 @@ static int pars_parse_labeled_statement(          if (pars_is_eof_reached(self)) {              return pars_yield_error_eof(self);          } -        const size_t token2_id = self->cur_tok_id; // Peek -        const struct token token2 = self->lex->tokbuf[token2_id]; +        const struct token token2 = pars_peek(self);          if (!label_id && token2.type == TT_COLON) { -            self->cur_tok_id++; // Commit +            pars_commit(self);              return pars_parse_labeled_statement(self, token1_id);          } else if (token2.type == TT_EQ || token2.type == TT_EQ_DOUBLE) { -            self->cur_tok_id++; // Commit -            return pars_parse_assignment(self, label_id, token2_id); -        } else { -            return pars_parse_instruction(self, label_id, token1_id); +            pars_commit(self); +            return pars_parse_assignment(self, label_id, token1_id);          } +        return pars_parse_instruction(self, label_id, token1_id);      } else if (token1.type == TT_DOT_ID) {          return pars_parse_direc(self, &token1);      } | 
