Impl regmask parsing

author: Oxore <oxore@protonmail.com> 2023-06-27 00:18:25 +0300
committer: Oxore <oxore@protonmail.com> 2023-06-27 00:18:25 +0300
commit: 2b230a858d7f4a928e2b43b3501fb18016247b14 (patch)
tree: cea923a4065752e9710502098765bf486ba2b785
parent: dc93b017b6c380d7c255ac448f51c10e68d71264 (diff)
1 files changed, 165 insertions, 57 deletions
diff --git a/main.c b/main.c
index 76aaf7d..b38594f 100644
--- a/main.c
+++ b/main.c
@@ -31,21 +31,28 @@
 #define E_UNIMPL "unimplemented"
 #define E_UNREACH "unreachable code reached"
 #define E_EXPR "'(', ')', unary operator, binary operator, number or symbol"
-#define E_EXPR_NONREG "symbol that is not a register"
+#define E_EXPR_NONREG "symbol that is not a register when parsing expression"
 #define E_EXPR_OPEN "'(', number or symbol"
 #define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol"
+#define E_DN "D0, ...D7"
+#define E_AN "A0, ...A7, SP"
+#define E_AN_DN E_AN " or " E_DN
+#define E_REGMASK_DELIM "'/' or '-'"
+#define E_REGMASK_TOKEN "'/', '-', " E_AN_DN
+#define E_REGMASK_ASCEND "registers in register mask range must be specified " \
+    "in ascending order"
 #define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression"
 #define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression"
-#define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC"
+#define E_EA_PART_NOT_EXPR E_AN ", " E_DN ", or PC"
 #define E_EA_PART_DELIM "',' or ')'"
 #define E_EA_INVALID "invalid addressing mode"
-#define E_DN_AN "D0, ...D7, A0, ...A7 or SP"
 #define E_ARG "valid instruction argument"
 #define E_MNEMONIC "valid instruction mnemonic"
 #define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'"
 #define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'"
 #define E_ARGS_COUNT "invalid arguments count"
-#define E_NL "new line '\\n', '\\r\\n' or '\\r'"
+#define E_NL "new line, which is '\\n', '\\r\\n' or '\\r'"
+#define E_INSTR_END "',', comment or " E_NL
 #define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC
 #define E_DIRECTIVE "directive"
 #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL
@@ -320,8 +327,9 @@ struct expr_tokens_span {
 
 struct arg {
     enum arg_type type;
-    int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
-    int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An
+    uint16_t regmask; ///< For regmask (movem only)
+    uint8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
+    uint8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An
     struct expr_tokens_span expr;
     size_t first_token, num_tokens; ///< Argument tokens span
 };
@@ -387,7 +395,7 @@ struct token_recognition {
     union {
         struct {
             enum reg_type reg;
-            int8_t reg_num;
+            uint8_t reg_num;
         }; // For RTT_REG
         int32_t number; // For TT_ID
         size_t symbol_id; // For TT_ID and TT_DOT_ID, see (struct pars).symtab
@@ -1355,6 +1363,23 @@ static void fprint_arg(
         fprintf(s, " value "), fprint_expr(lex, &arg->expr, s);
         break;
     case ARG_REGMASK:
+        fprintf(s, " regs [");
+        {
+            bool leading_space = false;
+            for (unsigned i = 0; i < 8; i++) {
+                if (arg->regmask & (1 << i)) {
+                    fprintf(s, "%sd%d", leading_space ? " " : "", i);
+                    leading_space = true;
+                }
+            }
+            for (unsigned i = 0; i < 8; i++) {
+                if (arg->regmask & (1 << (i + 8))) {
+                    fprintf(s, "%sa%d", leading_space ? " " : "", i);
+                    leading_space = true;
+                }
+            }
+        }
+        fprintf(s, "]");
     case ARG_SR:
     case ARG_CCR:
     case ARG_USP:
@@ -1502,17 +1527,6 @@ static int pars_yield_error(
     return pars_yield_error_expected_str(self, l, found, token.length, expected);
 }
 
-static int pars_yield_error_nesting(
-        struct pars *const self,
-        const size_t expression_start_token_id,
-        const size_t expression_length_tokens)
-{
-    (void) self;
-    (void) expression_start_token_id;
-    (void) expression_length_tokens;
-    return ERR;
-}
-
 static int pars_yield_error_eof(
         struct pars *const self, const char *const expected)
 {
@@ -1839,7 +1853,7 @@ static int pars_parse_arg_inside_parens(
     // (Xi,PC,expr)
     bool an1_found = false, an2_found = false, dn_found = false;
     bool pc_found = false;
-    int8_t an1 = 0, an2 = 0, dn = 0;
+    uint8_t an1 = 0, an2 = 0, dn = 0;
     unsigned parts = arg->expr.first_token ? 1 : 0;
     while (parts < 3) {
         if (pars_is_eof_reached(self)) {
@@ -1964,10 +1978,113 @@ static int pars_parse_arg_regmask(
 {
     // At this point cur_tok_id points to the register token that has been
     // peeked, but not committed.
-    (void) self;
-    (void) arg;
-    // Very much TODO.
-    return pars_yield_error_msg(self, ++self->cur_tok_id, E_UNIMPL);
+    bool range = false, delimiter = true, range_an = false;
+    uint16_t regmask = 0;
+    bool reg_found = false;
+    uint8_t reg = 0;
+    while (1) {
+        if (pars_is_eof_reached(self)) {
+            if (range) {
+                return pars_yield_error_eof(self, range_an ? E_AN : E_DN);
+            }
+            return OK;
+        }
+        const struct token token = pars_peek(self);
+        if (token.type == TT_ID) {
+            struct token_recognition r = pars_recognize_token(self, token);
+            if (r.type == RTT_REG) {
+                if (r.reg == REG_AN) {
+                    if (range) {
+                        assert(reg_found);
+                        if (!range_an) {
+                            return pars_yield_error(
+                                    self, self->cur_tok_id, E_AN);
+                        }
+                        if (r.reg_num < reg) {
+                            return pars_yield_error_msg(
+                                    self, self->cur_tok_id, E_REGMASK_ASCEND);
+                        }
+                        range = false;
+                        for (int i = reg; i <= r.reg_num; i++) {
+                            regmask |= 1 << (i + 8);
+                        }
+                        reg_found = false;
+                    } else if (delimiter) {
+                        delimiter = false;
+                        reg_found = true;
+                        reg = r.reg_num;
+                        range_an = true;
+                    } else {
+                        return pars_yield_error(
+                                self, self->cur_tok_id, E_REGMASK_DELIM);
+                    }
+                } else if (r.reg == REG_DN) {
+                    if (range) {
+                        assert(reg_found);
+                        if (range_an) {
+                            return pars_yield_error(
+                                    self, self->cur_tok_id, E_DN);
+                        }
+                        if (r.reg_num < reg) {
+                            return pars_yield_error_msg(
+                                    self, self->cur_tok_id, E_REGMASK_ASCEND);
+                        }
+                        range = false;
+                        for (int i = reg; i <= r.reg_num; i++) {
+                            regmask |= 1 << i;
+                        }
+                        reg_found = false;
+                    } else if (delimiter) {
+                        delimiter = false;
+                        reg_found = true;
+                        reg = r.reg_num;
+                        range_an = false;
+                    } else {
+                        return pars_yield_error(
+                                self, self->cur_tok_id, E_REGMASK_DELIM);
+                    }
+                } else {
+                    return pars_yield_error(
+                            self, self->cur_tok_id,
+                            (range || delimiter) ? E_AN_DN : E_REGMASK_DELIM);
+                }
+            } else {
+                return pars_yield_error(
+                        self, self->cur_tok_id,
+                        (range || delimiter) ? E_AN_DN : E_REGMASK_DELIM);
+            }
+        } else if (token.type == TT_SLASH) {
+            if (range || delimiter) {
+                return pars_yield_error(self, self->cur_tok_id, E_AN_DN);
+            }
+            if (reg_found) {
+                reg_found = false;
+                regmask |= 1 << (reg + (range_an ? 8 : 0));
+            }
+            delimiter = true;
+        } else if (token.type == TT_MINUS) {
+            if (range || delimiter) {
+                return pars_yield_error(self, self->cur_tok_id, E_AN_DN);
+            }
+            range = true;
+        } else if (regmask) {
+            // Do not commit here because it is not ours token
+            if (reg_found) {
+                reg_found = false;
+                regmask |= 1 << (reg + (range_an ? 8 : 0));
+            }
+            arg->type = ARG_REGMASK;
+            arg->regmask = regmask;
+            return OK;
+        } else {
+            return pars_yield_error(
+                    self, self->cur_tok_id,
+                    (range || delimiter) ? E_AN_DN : E_REGMASK_DELIM);
+        }
+        pars_commit(self);
+    }
+    UNREACHABLE();
+    return pars_yield_error_msg(self, ++self->cur_tok_id, E_UNREACH);
 }
 
 static int pars_parse_arg(
@@ -2030,7 +2147,7 @@ static int pars_parse_arg(
                 arg->xn = r.reg_num;
                 break;
             case REG_PC:
-                return pars_yield_error(self, first_token_id, E_DN_AN);
+                return pars_yield_error(self, first_token_id, E_AN_DN);
             case REG_SR:
                 arg->type = ARG_SR;
                 break;
@@ -2123,36 +2240,6 @@ static int pars_yield_instruction(
     return OK;
 }
 
-static int pars_parse_instruction_comment(
-        struct pars *const self,
-        const size_t label_id,
-        const size_t mnemonic_id,
-        const enum opsize opsize,
-        const struct arg *const arg1,
-        const struct arg *const arg2)
-{
-    size_t comment_id = 0;
-    if (!pars_is_eof_reached(self)) {
-        // Try parse comment
-        const struct token token1 = pars_peek(self);
-        const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
-            token1.type == TT_COMMENT_SEMICOLON;
-        if (is_comment) {
-            comment_id = pars_commit(self);
-        }
-    }
-    if (!pars_is_eof_reached(self)) {
-        // There must be a new line if not EOF
-        const size_t nl_id = pars_commit(self);
-        const struct token nl = self->lex->tokbuf[nl_id];
-        if (nl.type != TT_NEWLINE) {
-            return pars_yield_error(self, nl_id, E_NL);
-        }
-    }
-    return pars_yield_instruction(
-            self, label_id, comment_id, mnemonic_id, opsize, arg1, arg2);
-}
-
 static int pars_parse_instruction_args(
         struct pars *const self,
         const size_t label_id,
@@ -2167,7 +2254,8 @@ static int pars_parse_instruction_args(
     }
     if (arg1.type != ARG_NONE) {
         if (!pars_is_eof_reached(self)) {
-            if (pars_peek(self).type == TT_COMMA) {
+            const struct token token = pars_peek(self);
+            if (token.type == TT_COMMA) {
                 pars_commit(self);
                 // Try parse second argument
                 if (pars_is_eof_reached(self)) {
@@ -2180,12 +2268,32 @@ static int pars_parse_instruction_args(
                 if (res2 != OK) {
                     return res2;
                 }
+            } else if (token.type != TT_COMMENT_SEMICOLON && token.type != TT_NEWLINE) {
+                return pars_yield_error(self, self->cur_tok_id, E_INSTR_END);
             }
         }
     }
     // Finish parsing instruction, expect comment or newline
-    return pars_parse_instruction_comment(
-            self, label_id, mnemonic_id, opsize, &arg1, &arg2);
+    size_t comment_id = 0;
+    if (!pars_is_eof_reached(self)) {
+        // Try parse comment
+        const struct token token1 = pars_peek(self);
+        const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
+            token1.type == TT_COMMENT_SEMICOLON;
+        if (is_comment) {
+            comment_id = pars_commit(self);
+        }
+    }
+    if (!pars_is_eof_reached(self)) {
+        // There must be a new line if not EOF
+        const size_t nl_id = pars_commit(self);
+        const struct token nl = self->lex->tokbuf[nl_id];
+        if (nl.type != TT_NEWLINE) {
+            return pars_yield_error(self, nl_id, E_NL);
+        }
+    }
+    return pars_yield_instruction(
+            self, label_id, comment_id, mnemonic_id, opsize, &arg1, &arg2);
 }
 
 static int pars_parse_instruction(
author	Oxore <oxore@protonmail.com>	2023-06-27 00:18:25 +0300
committer	Oxore <oxore@protonmail.com>	2023-06-27 00:18:25 +0300
commit	2b230a858d7f4a928e2b43b3501fb18016247b14 (patch)
tree	cea923a4065752e9710502098765bf486ba2b785
parent	dc93b017b6c380d7c255ac448f51c10e68d71264 (diff)