WIP: Impl some complex addressing modes parsing

author: Oxore <oxore@protonmail.com> 2023-06-25 23:13:44 +0300
committer: Oxore <oxore@protonmail.com> 2023-06-25 23:13:58 +0300
commit: e882f7f28ae38ad6b42f8d558d30a0bafd8e5b32 (patch)
tree: f0b3a6c47adcb6bc08b7b5a1e954bb4728fa771f
parent: 66bfe8a24d9b1ca83d45396a9f9c962379d1895e (diff)
1 files changed, 533 insertions, 132 deletions
diff --git a/main.c b/main.c
index 77df6cb..4e343cb 100644
--- a/main.c
+++ b/main.c
@@ -274,8 +274,9 @@ enum arg_type {
     ARG_ADDR_LONG,
     ARG_ADDR_UNSPEC,
     ARG_PC_ADDR_16,
-    ARG_PC_ADDR_8_XN,
+    ARG_PC_ADDR_8_XI,
     ARG_IMMEDIATE,
+    ARG_REGMASK,
     ARG_SR,
     ARG_CCR,
     ARG_USP,
@@ -285,35 +286,21 @@ enum arg_type {
 enum args_count {
     ARGS_COUNT_UNKNOWN = 0,
     ARGS_COUNT_0,
-    ARGS_COUNT_0_1,
-    ARGS_COUNT_0_1_2,
-    ARGS_COUNT_0_2,
     ARGS_COUNT_1,
     ARGS_COUNT_1_2,
     ARGS_COUNT_2,
 };
 
-struct arg_16 {
-    int16_t d;
-    int8_t an;
-};
-
-struct arg_8 {
-    int8_t d;
-    int8_t an;
-    int8_t xi;
-};
-
-union arg_contents {
-    int8_t xn; // For Dn, An, (An), -(An), (An)+
-    struct arg_16 arg_16; // For (d16,An) and (d16,PC)
-    struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn)
+struct expr_tokens_span {
+    size_t first_token, num_tokens;
 };
 
 struct arg {
-    enum arg_type arg_type;
-    union arg_contents arg_contents;
-    size_t first_token, num_tokens; // Expression tokens span, may be NULL
+    enum arg_type type;
+    int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
+    int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An
+    struct expr_tokens_span expr;
+    size_t first_token, num_tokens; ///< Argument tokens span
 };
 
 struct instruction {
@@ -356,6 +343,34 @@ enum pars_error {
     PE_SOME,
 };
 
+enum reg_type {
+    REG_NONE = 0,
+    REG_DN,
+    REG_AN,
+    REG_PC,
+    REG_SR,
+    REG_CCR,
+    REG_USP,
+};
+
+enum recognized_token_type {
+    RTT_NONE = 0,
+    RTT_REG, // TT_ID
+    RTT_NUMBER, // TT_NUMHEX, TT_NUMOCT and TT_NUMDEC
+};
+
+struct token_recognition {
+    enum recognized_token_type type;
+    union {
+        struct {
+            enum reg_type reg;
+            int8_t reg_num;
+        }; // For RTT_REG
+        int32_t number; // For TT_ID
+        size_t symbol_id; // For TT_ID and TT_DOT_ID, see (struct pars).symtab
+    };
+};
+
 struct pars {
     const struct lex *lex;
     // State
@@ -571,6 +586,16 @@ static int printed_size(const char c)
     return 1;
 }
 
+static bool token_is_number(const enum token_type type)
+{
+    return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT;
+}
+
+static bool token_is_regmask_delimiter(const enum token_type type)
+{
+    return type == TT_SLASH || type == TT_MINUS;
+}
+
 static int fprint_string_escaped(
         const char *const str, const size_t length, FILE *const stream)
 {
@@ -1174,8 +1199,9 @@ static const char *arg_type_to_string(const enum arg_type type)
     case ARG_ADDR_LONG: return "(xxx).l";
     case ARG_ADDR_UNSPEC: return "(xxx).?";
     case ARG_PC_ADDR_16: return "(d16,PC)";
-    case ARG_PC_ADDR_8_XN: return "(d8,PC,Xn)";
+    case ARG_PC_ADDR_8_XI: return "(d8,PC,Xn)";
     case ARG_IMMEDIATE: return "#imm";
+    case ARG_REGMASK: return "REGMASK";
     case ARG_SR: return "SR";
     case ARG_CCR: return "CCR";
     case ARG_USP: return "USP";
@@ -1226,40 +1252,41 @@ static void fprint_arg(
         const struct arg *const arg,
         FILE *const s)
 {
-    fprintf(s, "(%s", arg_type_to_string(arg->arg_type));
-    switch (arg->arg_type) {
+    fprintf(s, "(%s", arg_type_to_string(arg->type));
+    switch (arg->type) {
     case ARG_NONE:
     case ARG_DN:
     case ARG_AN:
     case ARG_AN_ADDR:
     case ARG_AN_ADDR_INCR:
     case ARG_AN_ADDR_DECR:
-        fprintf(s, " reg %d", arg->arg_contents.xn);
+        fprintf(s, " reg %d", arg->xn);
         break;
     case ARG_AN_ADDR_16:
-        fprintf(s, " reg %d", arg->arg_contents.arg_16.an);
-        fprintf(s, " d16 %d", arg->arg_contents.arg_16.d);
+        fprintf(s, " reg %d", arg->xn);
+        fprintf(s, " d16 [see raw]"); // TODO print expr tokens
         break;
     case ARG_AN_ADDR_8_XI:
-        fprintf(s, " reg %d", arg->arg_contents.arg_8.an);
-        fprintf(s, " d8 %d", arg->arg_contents.arg_8.d);
-        fprintf(s, " xi %d", arg->arg_contents.arg_8.xi);
+        fprintf(s, " reg %d", arg->xn);
+        fprintf(s, " d8 [see raw]"); // TODO print expr tokens
+        fprintf(s, " xi %d", arg->xi);
         break;
     case ARG_ADDR_WORD:
     case ARG_ADDR_LONG:
     case ARG_ADDR_UNSPEC:
-        fprintf(s, " addr [see raw]");
+        fprintf(s, " addr [see raw]"); // TODO print expr tokens
         break;
     case ARG_PC_ADDR_16:
-        fprintf(s, " d16 [see raw]");
+        fprintf(s, " d16 [see raw]"); // TODO print expr tokens
         break;
-    case ARG_PC_ADDR_8_XN:
-        fprintf(s, " d8 [see raw]");
-        fprintf(s, " xn %d", arg->arg_contents.arg_8.xi);
+    case ARG_PC_ADDR_8_XI:
+        fprintf(s, " d8 [see raw]"); // TODO print expr tokens
+        fprintf(s, " xi %d", arg->xi);
         break;
     case ARG_IMMEDIATE:
-        fprintf(s, " value [see raw]");
+        fprintf(s, " value [see raw]"); // TODO print expr tokens
         break;
+    case ARG_REGMASK:
     case ARG_SR:
     case ARG_CCR:
     case ARG_USP:
@@ -1294,13 +1321,13 @@ static int fprint_stmt(
     if (stmt->type == ST_INSTRUCTION) {
         fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic));
         fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize));
-        if (stmt->instruction.arg1.arg_type != ARG_NONE) {
+        if (stmt->instruction.arg1.type != ARG_NONE) {
             fprintf(s, "\n\t(arg1 ");
             fprint_arg(lex, &stmt->instruction.arg1, s);
             fprintf(s, ")");
         }
-        if (stmt->instruction.arg2.arg_type != ARG_NONE) {
-            assert(stmt->instruction.arg1.arg_type != ARG_NONE);
+        if (stmt->instruction.arg2.type != ARG_NONE) {
+            assert(stmt->instruction.arg1.type != ARG_NONE);
             fprintf(s, "\n\t(arg2 ");
             fprint_arg(lex, &stmt->instruction.arg2, s);
             fprintf(s, ")");
@@ -1444,27 +1471,129 @@ static bool is_expression_token(const enum token_type type)
     return false;
 }
 
-static int pars_parse_arg(
-        struct pars *const self, struct arg *const arg)
+static struct token pars_peek(const struct pars *const self)
 {
-    const size_t tokens_count = self->lex->tokbuf_size /
-        (sizeof *self->lex->tokbuf);
-    const size_t first_token_id = self->cur_tok_id;
-    int nesting = 0;
-    int commas = 0;
-    enum arg_type arg_type = ARG_EXPR;
-    while (self->cur_tok_id < tokens_count) {
-        const size_t token_id = self->cur_tok_id; // Peek
-        const struct token token = self->lex->tokbuf[token_id];
-        if (nesting == 1 && token.type == TT_COMMA) {
-            if (commas >= 2) {
-                return pars_yield_error(self, self->cur_tok_id);
-            } else {
-                commas++;
+    return self->lex->tokbuf[self->cur_tok_id];
+}
+
+static struct token pars_peek_more(
+        const struct pars *const self, const size_t more)
+{
+    return self->lex->tokbuf[self->cur_tok_id + more];
+}
+
+static size_t pars_commit(struct pars *const self)
+{
+    return self->cur_tok_id++;
+}
+
+static bool is_pc(const char *const str)
+{
+    return (str[0] == 'p' && str[1] == 'c') ||
+        (str[0] == 'P' && str[1] == 'C');
+}
+
+static bool is_sp(const char *const str)
+{
+    return (str[0] == 's' && str[1] == 'p') ||
+        (str[0] == 'S' && str[1] == 'P');
+}
+
+static bool is_sr(const char *const str)
+{
+    return (str[0] == 's' && str[1] == 'r') ||
+        (str[0] == 'S' && str[1] == 'R');
+}
+
+static bool is_ccr(const char *const str)
+{
+    return
+        ((str[0] == 'c' && str[1] == 'c' && str[2] == 'r') ||
+        (str[0] == 'C' && str[1] == 'C' && str[2] == 'R'));
+}
+
+static bool is_usp(const char *const str)
+{
+    return
+        ((str[0] == 'u' && str[1] == 's' && str[2] == 'p') ||
+        (str[0] == 'U' && str[1] == 'S' && str[2] == 'P'));
+}
+
+static struct token_recognition pars_recognize_token(
+        const struct pars *const self, const struct token token)
+{
+    const char *const str = self->lex->input + token.offset;
+    if (token.type == TT_ID) {
+        if (token.length == 2) {
+            if (tolower(str[0]) == 'a' && is_dec(str[1])) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_AN,
+                    .reg_num = str[1] - '0',
+                };
+            } else if (tolower(str[0]) == 'd' && is_dec(str[1])) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_DN,
+                    .reg_num = str[1] - '0',
+                };
+            } else if (is_sp(str)) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_AN,
+                    .reg_num = 7,
+                };
+            } else if (is_pc(str)) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_PC,
+                };
+            } else if (is_sr(str)) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_SR,
+                };
             }
-        } else if (token.type == TT_LPAREN) {
+        } else if (token.length == 3) {
+            if (is_ccr(str)) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_DN,
+                };
+            } else if (is_usp(str)) {
+                return (struct token_recognition){
+                    .type = RTT_REG,
+                    .reg = REG_USP,
+                };
+            }
+        }
+    } else if (token.type == TT_NUMDEC) {
+        // TODO
+    } else if (token.type == TT_NUMOCT) {
+        // TODO
+    } else if (token.type == TT_NUMHEX) {
+        // TODO
+    }
+    return (struct token_recognition){0};
+}
+
+static int pars_parse_expr(
+        struct pars *const self, struct expr_tokens_span *const expr)
+{
+    // This function is called only when expression is expected unconditionally,
+    // so if the first token cannot be a part of expression, then error must be
+    // yielded.
+    const size_t first_token_id = self->cur_tok_id;
+    unsigned nesting = 0;
+    while (!pars_is_eof_reached(self)) {
+        const struct token token = pars_peek(self);
+        if (token.type == TT_LPAREN) {
             nesting++;
         } else if (token.type == TT_RPAREN) {
+            if (nesting == 0) {
+                // This is not my closing parenthesis, should stop
+                break;
+            }
             nesting--;
         } else if (is_expression_token(token.type)) {
             // TODO parse expression
@@ -1474,22 +1603,318 @@ static int pars_parse_arg(
             }
             break;
         }
-        self->cur_tok_id++; // Commit
+        pars_commit(self);
     }
     if (nesting != 0) {
         return pars_yield_error_nesting(
                 self, first_token_id, self->cur_tok_id - first_token_id);
     }
     if (first_token_id == self->cur_tok_id) {
-        // Nothing has been parsed
-        *arg = (struct arg){0};
-    } else {
-        *arg = (struct arg){
-            .arg_type = arg_type,
-            // TODO arg_contents
-            .first_token = first_token_id,
-            .num_tokens = self->cur_tok_id - first_token_id,
-        };
+        // Nothing has been parsed but expression expected
+        return pars_yield_error(self, self->cur_tok_id);
+    }
+    *expr = (struct expr_tokens_span){
+        .first_token = first_token_id,
+        .num_tokens = self->cur_tok_id - first_token_id,
+    };
+    return OK;
+}
+
+static int pars_parse_arg_after_prefix_expr(
+        struct pars *const self, struct arg *const arg)
+{
+    // At this point a single expression has been parsed and committed.
+    // It can be one of:
+    // - Standalone expression
+    // - Standalone expression with size suffix like ".l"
+    // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn)
+    (void) self;
+    (void) arg;
+    return pars_yield_error(self, self->cur_tok_id);
+}
+
+static int pars_parse_arg_starts_with_minus(
+        struct pars *const self, struct arg *const arg)
+{
+    // At this point cur_tok_id points to the minus that has been peeked, but
+    // not committed.
+    const size_t first_token_id = self->cur_tok_id;
+    if (pars_is_eof_reached(self)) {
+        pars_commit(self); // The minus token
+        // Just single minus is invalid expression
+        return pars_yield_error_eof(self);
+    }
+    if (pars_peek_more(self, 1).type == TT_LPAREN) {
+        // It is still either expression or -(An)
+        if (pars_is_eof_reached(self)) {
+            // "-(" is invalid expression
+            pars_commit(self), pars_commit(self); // Commit "-" and "("
+            return pars_yield_error_eof(self);
+        }
+        const struct token token2 = pars_peek_more(self, 2);
+        if (token2.type == TT_ID) {
+            struct token_recognition r = pars_recognize_token(self, token2);
+            if (r.type == RTT_REG && r.reg == REG_AN) {
+                // It is definitely -(An). Commit all previous tokens and
+                // expect closing parenthesis.
+                self->cur_tok_id += 3;
+                const size_t rparen_id = pars_commit(self);
+                const struct token rparen = self->lex->tokbuf[rparen_id];
+                if (rparen.type == TT_RPAREN) {
+                    // Perfect!
+                    *arg = (struct arg){
+                        .type = ARG_AN_ADDR_DECR,
+                        .xn = r.reg_num,
+                        .first_token = first_token_id,
+                        .num_tokens = self->cur_tok_id - first_token_id,
+                    };
+                    return OK;
+                } else {
+                    // But it has to be a closing parenthesis!
+                    return pars_yield_error(self, rparen_id);
+                }
+            }
+        }
+    }
+    // Otherwise it is expression - either prefix or standalone
+    const int ret = pars_parse_expr(self, &arg->expr);
+    if (ret != OK) {
+        return ret;
+    }
+    return pars_parse_arg_after_prefix_expr(self, arg);
+}
+
+static int pars_parse_arg_inside_parens(
+        struct pars *const self, struct arg *const arg)
+{
+    // At this point cur_tok_id points after the first opening parenthesis that
+    // has been parsed (committed).
+    // It can be
+    // - (expr)(An)
+    // - (expr)(An,Xi) or (expr)(Xi,An)
+    // - (expr)(PC,Xi) or (expr)(Xi,PC)
+    // - (An) or (An)+
+    // - (An,expr) or (expr,An)
+    // - (PC,expr) or (expr,PC)
+    // - (An,expr,Xi), (An,Xi,expr), (expr,An,Xi), (expr,Xi,An), (Xi,expr,An) or
+    // (Xi,An,expr)
+    // - (PC,expr,Xi), (PC,Xi,expr), (expr,PC,Xi), (expr,Xi,PC), (Xi,expr,PC) or
+    // (Xi,PC,expr)
+    bool an1_found = false, an2_found = false, dn_found = false;
+    bool pc_found = false;
+    int8_t an1 = 0, an2 = 0, dn = 0;
+    unsigned parts = arg->expr.first_token ? 1 : 0;
+    while (parts < 3) {
+        if (pars_is_eof_reached(self)) {
+            return pars_yield_error_eof(self);
+        }
+        const struct token token0 = pars_peek(self);
+        if (token0.type == TT_ID) {
+            // It it may be An/Dn/PC register
+            struct token_recognition r = pars_recognize_token(self, token0);
+            if (r.type == RTT_REG) {
+                // This is definitely a register or regmask.
+                switch (r.reg) {
+                case REG_DN:
+                    dn_found = true;
+                    dn = r.reg_num;
+                    break;
+                case REG_AN:
+                    if (!an1_found) {
+                        an1_found = true;
+                        an1 = r.reg_num;
+                    } else if (!an2_found) {
+                        an2_found = true;
+                        an2 = r.reg_num;
+                    } else {
+                        return pars_yield_error(self, pars_commit(self));
+                    }
+                    break;
+                case REG_PC:
+                    pc_found = true;
+                    break;
+                case REG_NONE:
+                    UNREACHABLE();
+                case REG_SR:
+                case REG_CCR:
+                case REG_USP:
+                    return pars_yield_error(self, pars_commit(self));
+                }
+                pars_commit(self);
+                parts++;
+            }
+        } else {
+            const int ret = pars_parse_expr(self, &arg->expr);
+            if (ret != OK) {
+                return ret;
+            }
+            parts++;
+        }
+        if (pars_is_eof_reached(self)) {
+            return pars_yield_error_eof(self);
+        }
+        const struct token delim = pars_peek(self);
+        const size_t delim_id = pars_commit(self);
+        if (delim.type == TT_COMMA) {
+            continue;
+        } else if (delim.type == TT_RPAREN) {
+            if (parts == 1 && arg->expr.first_token) {
+                assert(!an1_found && !an2_found && !dn_found && !pc_found);
+                // It turns out we are inside of expression, so this closing
+                // parenthesis is part of it. Let's accumulate it and move
+                // on.
+                arg->expr.first_token--;
+                arg->expr.num_tokens += 2;
+                return pars_parse_arg_after_prefix_expr(self, arg);
+            } else {
+                break;
+            }
+        } else {
+            return pars_yield_error(self, delim_id);
+        }
+    }
+    if (parts == 1 && an1_found) {
+        // It is either (An) or (An)+
+        assert(!pc_found && !dn_found && !arg->expr.first_token);
+        if (pars_is_eof_reached(self)) {
+            arg->type = ARG_AN_ADDR;
+        } else {
+            const struct token plus = pars_peek(self);
+            if (plus.type == TT_PLUS) {
+                pars_commit(self);
+                arg->type = ARG_AN_ADDR_INCR;
+            } else {
+                arg->type = ARG_AN_ADDR;
+            }
+        }
+        arg->xn = an1;
+        arg->num_tokens = self->cur_tok_id - arg->first_token;
+        return OK;
+    } else if (parts == 2 && an1_found && arg->expr.first_token) {
+        // It is (An,d16) or (d16,An)
+        assert(!an2_found && !pc_found && !dn_found);
+        arg->type = ARG_AN_ADDR_16;
+        arg->xn = an1;
+        arg->num_tokens = self->cur_tok_id - arg->first_token;
+        return OK;
+    } else if (parts == 2 && pc_found && arg->expr.first_token) {
+        // It is (PC,d16) or (d16,PC)
+        assert(!an1_found && !an2_found && !dn_found);
+        arg->type = ARG_PC_ADDR_16;
+        arg->num_tokens = self->cur_tok_id - arg->first_token;
+        return OK;
+    } else if (parts == 3 && pc_found && arg->expr.first_token && (an1_found || dn_found)) {
+        // It is (d8,PC,Xn)
+        assert((an1_found && !dn_found) || (!an1_found && dn_found));
+        arg->type = ARG_PC_ADDR_8_XI;
+        if (an1_found) {
+            arg->xi = -an1;
+        } else if (dn_found) {
+            arg->xi = dn;
+        }
+        arg->num_tokens = self->cur_tok_id - arg->first_token;
+    } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) {
+        // It is (d8,An,Xn)
+        assert((an1_found && !dn_found) || (!an1_found && dn_found));
+        arg->type = ARG_AN_ADDR_8_XI;
+        arg->xi = an2_found ? -an2 : dn;
+        arg->num_tokens = self->cur_tok_id - arg->first_token;
+    }
+    return pars_yield_error(self, self->cur_tok_id);
+}
+
+static int pars_parse_arg_regmask(
+        struct pars *const self, struct arg *const arg)
+{
+    // At this point cur_tok_id points to the register token that has been
+    // peeked, but not committed.
+    (void) self;
+    (void) arg;
+    // Very much TODO.
+    return pars_yield_error(self, ++self->cur_tok_id);
+}
+
+static int pars_parse_arg(
+        struct pars *const self, struct arg *const arg)
+{
+    if (pars_is_eof_reached(self)) {
+        return OK;
+    }
+    const size_t first_token_id = self->cur_tok_id;
+    arg->first_token = first_token_id;
+    const struct token token0 = pars_peek(self);
+    if (token0.type == TT_HASH) {
+        // Definitely an immediate value expression
+        pars_commit(self);
+        const int ret = pars_parse_expr(self, &arg->expr);
+        if (ret != OK) {
+            return ret;
+        }
+        arg->type = ARG_IMMEDIATE;
+        arg->num_tokens = self->cur_tok_id - first_token_id;
+        return OK;
+    } else if (token0.type == TT_MINUS) {
+        // It is either expression or -(An)
+        return pars_parse_arg_starts_with_minus(self, arg);
+    } else if (token0.type == TT_TILDE || token_is_number(token0.type)) {
+        // Tilde is unary operation, so it must be an expression
+        const int ret = pars_parse_expr(self, &arg->expr);
+        if (ret != OK) {
+            return ret;
+        }
+        return pars_parse_arg_after_prefix_expr(self, arg);
+    } else if (token0.type == TT_LPAREN) {
+        // It is either expression or addressing mode (An) / (An)+ / (d16,An) /
+        // (d8,An,Xn) / (d8,PC,Xn) / (d16,An)
+        pars_commit(self);
+        return pars_parse_arg_inside_parens(self, arg);
+    } else if (token0.type == TT_ID) {
+        // It is either expression, regmask or just An/Dn/PC/SR/SP/CCR register
+        struct token_recognition r = pars_recognize_token(self, token0);
+        if (r.type == RTT_REG) {
+            // This is definitely a register or regmask.
+            switch (r.reg) {
+            case REG_NONE:
+                UNREACHABLE();
+                return pars_yield_error(self, first_token_id);
+            case REG_DN:
+                if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) {
+                    // Note: the register is not committed
+                    return pars_parse_arg_regmask(self, arg);
+                }
+                arg->type = ARG_DN;
+                arg->xn = r.reg_num;
+                break;
+            case REG_AN:
+                if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) {
+                    // Note: the register is not committed
+                    return pars_parse_arg_regmask(self, arg);
+                }
+                arg->type = ARG_AN;
+                arg->xn = r.reg_num;
+                break;
+            case REG_PC:
+                return pars_yield_error(self, first_token_id);
+            case REG_SR:
+                arg->type = ARG_SR;
+                break;
+            case REG_CCR:
+                arg->type = ARG_CCR;
+                break;
+            case REG_USP:
+                arg->type = ARG_USP;
+                break;
+            }
+            pars_commit(self);
+            arg->num_tokens = self->cur_tok_id - first_token_id;
+            return OK;
+        } else {
+            const int ret = pars_parse_expr(self, &arg->expr);
+            if (ret != OK) {
+                return ret;
+            }
+            return pars_parse_arg_after_prefix_expr(self, arg);
+        }
     }
     return OK;
 }
@@ -1509,8 +1934,11 @@ static int pars_yield_instruction(
     if (mnemonic == MN_NONE) {
         return pars_yield_error(self, mnemonic_id);
     }
-    if (arg2) {
-        assert(arg1);
+    if (arg2->type != ARG_NONE) {
+        assert(arg1->type != ARG_NONE);
+    }
+    if (arg1->type == ARG_NONE) {
+        assert(arg1->type == ARG_NONE);
     }
     const enum args_count args_count = get_args_count_for_mnemonic(mnemonic);
     // Validate instruction arguments count
@@ -1519,36 +1947,24 @@ static int pars_yield_instruction(
         UNREACHABLE();
         break;
     case ARGS_COUNT_0:
-        if (arg1) {
+        if (arg1->type != ARG_NONE) {
             return pars_yield_error(self, arg1->first_token);
         }
         break;
-    case ARGS_COUNT_0_1:
-        if (arg2) {
-            return pars_yield_error(self, arg2->first_token);
-        }
-        break;
-    case ARGS_COUNT_0_1_2:
-        break;
-    case ARGS_COUNT_0_2:
-        if (arg1 && !arg2) {
-            return pars_yield_error(self, mnemonic_id);
-        }
-        break;
     case ARGS_COUNT_1:
-        if (!arg1) {
+        if (arg1->type == ARG_NONE) {
             return pars_yield_error(self, mnemonic_id);
-        } else if (arg2) {
+        } else if (arg2->type != ARG_NONE) {
             return pars_yield_error(self, arg2->first_token);
         }
         break;
     case ARGS_COUNT_1_2:
-        if (!arg1) {
+        if (arg1->type == ARG_NONE) {
             return pars_yield_error(self, mnemonic_id);
         }
         break;
     case ARGS_COUNT_2:
-        if (!arg1 || !arg2) {
+        if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) {
             return pars_yield_error(self, mnemonic_id);
         }
         break;
@@ -1582,21 +1998,19 @@ static int pars_parse_instruction_comment(
     size_t comment_id = 0;
     if (!pars_is_eof_reached(self)) {
         // Try parse comment
-        const size_t token1_id = self->cur_tok_id; // Peek comment
-        const struct token token1 = self->lex->tokbuf[token1_id];
+        const struct token token1 = pars_peek(self);
         const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
             token1.type == TT_COMMENT_SEMICOLON;
         if (is_comment) {
-            self->cur_tok_id++; // Commit comment
-            comment_id = token1_id;
+            comment_id = pars_commit(self);
         }
-        if (!pars_is_eof_reached(self)) {
-            // Handle new line
-            const size_t nl_id = self->cur_tok_id++; // Commit new line
-            const struct token nl = self->lex->tokbuf[nl_id];
-            if (nl.type != TT_NEWLINE) {
-                return pars_yield_error(self, nl_id);
-            }
+    }
+    if (!pars_is_eof_reached(self)) {
+        // There must be a new line if not EOF
+        const size_t nl_id = pars_commit(self);
+        const struct token nl = self->lex->tokbuf[nl_id];
+        if (nl.type != TT_NEWLINE) {
+            return pars_yield_error(self, nl_id);
         }
     }
     return pars_yield_instruction(
@@ -1609,35 +2023,25 @@ static int pars_parse_instruction_args(
         const size_t mnemonic_id,
         const enum opsize opsize)
 {
-    struct arg arg1, arg2;
+    struct arg arg1 = {0}, arg2 = {0};
     // Try parse first argument
     const int res1 = pars_parse_arg(self, &arg1);
     if (res1 != OK) {
         return res1;
     }
-    if (arg1.arg_type == ARG_NONE) {
-        return pars_parse_instruction_comment(
-                self, label_id, mnemonic_id, opsize, NULL, NULL);
-    }
-    if (pars_is_eof_reached(self)) {
-        return pars_yield_instruction(
-                self, label_id, 0, mnemonic_id, opsize, &arg1, NULL);
-    }
-    const size_t comma_id = self->cur_tok_id; // Peek comma
-    const struct token comma = self->lex->tokbuf[comma_id];
-    if (comma.type != TT_COMMA) {
-        return pars_parse_instruction_comment(
-                self, label_id, mnemonic_id, opsize, NULL, NULL);
-    }
-    self->cur_tok_id++; // Commit comma
-    // Try parse second argument
-    const int res2 = pars_parse_arg(self, &arg2);
-    if (res2 != OK) {
-        return res2;
-    }
-    if (pars_is_eof_reached(self)) {
-        return pars_yield_instruction(
-                self, label_id, 0, mnemonic_id, opsize, &arg1, &arg2);
+    if (arg1.type != ARG_NONE) {
+        if (pars_is_eof_reached(self)) {
+            return pars_yield_instruction(
+                    self, label_id, 0, mnemonic_id, opsize, &arg1, NULL);
+        }
+        if (pars_peek(self).type == TT_COMMA) {
+            pars_commit(self);
+            // Try parse second argument
+            const int res2 = pars_parse_arg(self, &arg2);
+            if (res2 != OK) {
+                return res2;
+            }
+        }
     }
     // Finish parsing instruction, expect comment or newline
     return pars_parse_instruction_comment(
@@ -1652,10 +2056,9 @@ static int pars_parse_instruction(
     if (pars_is_eof_reached(self)) {
         return pars_yield_error_eof(self);
     }
-    const size_t size_spec_id = self->cur_tok_id; // Peek
-    const struct token size_spec = self->lex->tokbuf[size_spec_id];
+    const struct token size_spec = pars_peek(self);
     if (size_spec.type == TT_DOT_ID) {
-        self->cur_tok_id++; // Commit
+        const size_t size_spec_id = pars_commit(self);
         // Size specifier
         if (size_spec.length != 2) {
             return pars_yield_error(self, size_spec_id);
@@ -1699,7 +2102,7 @@ static int pars_yield_label_comment(
 static int pars_parse_labeled_statement(
         struct pars *const self, const size_t label_id)
 {
-    const size_t token1_id = self->cur_tok_id++;
+    const size_t token1_id = pars_commit(self);
     const struct token token1 = self->lex->tokbuf[token1_id];
     const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
         token1.type == TT_COMMENT_SEMICOLON;
@@ -1711,17 +2114,15 @@ static int pars_parse_labeled_statement(
         if (pars_is_eof_reached(self)) {
             return pars_yield_error_eof(self);
         }
-        const size_t token2_id = self->cur_tok_id; // Peek
-        const struct token token2 = self->lex->tokbuf[token2_id];
+        const struct token token2 = pars_peek(self);
         if (!label_id && token2.type == TT_COLON) {
-            self->cur_tok_id++; // Commit
+            pars_commit(self);
             return pars_parse_labeled_statement(self, token1_id);
         } else if (token2.type == TT_EQ || token2.type == TT_EQ_DOUBLE) {
-            self->cur_tok_id++; // Commit
-            return pars_parse_assignment(self, label_id, token2_id);
-        } else {
-            return pars_parse_instruction(self, label_id, token1_id);
+            pars_commit(self);
+            return pars_parse_assignment(self, label_id, token1_id);
         }
+        return pars_parse_instruction(self, label_id, token1_id);
     } else if (token1.type == TT_DOT_ID) {
         return pars_parse_direc(self, &token1);
     }
author	Oxore <oxore@protonmail.com>	2023-06-25 23:13:44 +0300
committer	Oxore <oxore@protonmail.com>	2023-06-25 23:13:58 +0300
commit	e882f7f28ae38ad6b42f8d558d30a0bafd8e5b32 (patch)
tree	f0b3a6c47adcb6bc08b7b5a1e954bb4728fa771f
parent	66bfe8a24d9b1ca83d45396a9f9c962379d1895e (diff)