Fix reg parsing, refactor error messages, add more sanitizers

author: Oxore <oxore@protonmail.com> 2023-06-26 01:49:24 +0300
committer: Oxore <oxore@protonmail.com> 2023-06-26 01:49:24 +0300
commit: 2dd7bd6ca24b6a28ff36c5c441442d5885c0b611 (patch)
tree: 27f14cd43512559ecb6d3bbe15be218937f6ea5b
parent: e46fb8f882b00fe9d24b2dc8810b033164c4b10c (diff)
2 files changed, 41 insertions, 21 deletions
diff --git a/Makefile b/Makefile
index ef687ba..7267fbf 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
 WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op
 INCLUDES = lib
-_FLAGS = -O2 -fsanitize=unreachable -fsanitize=address
+_FLAGS = -O0 -fsanitize=bounds-strict,unreachable,address,undefined
 _CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(_FLAGS) -pipe -g
 _CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(_FLAGS) -pipe -g
 LDSCRIPTS =
diff --git a/main.c b/main.c
index c134010..f9e0cfe 100644
--- a/main.c
+++ b/main.c
@@ -31,12 +31,12 @@
 #define E_UNIMPL "unimplemented"
 #define E_UNREACH "unreachable code reached"
 #define E_EXPR "expression token"
-#define E_EA_PART "An, Dn, PC or full expression"
-#define E_EA_PART_NOT_AN "Dn, PC or full expression"
-#define E_EA_PART_NOT_EXPR "An, Dn or PC"
+#define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression"
+#define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression"
+#define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC"
 #define E_EA_PART_DELIM "',' or ')'"
 #define E_EA_INVALID "invalid addressing mode"
-#define E_DN_AN "Dn or An"
+#define E_DN_AN "D0, ...D7, A0, ...A7 or SP"
 #define E_ARG "valid instruction argument"
 #define E_MNEMONIC "valid instruction mnemonic"
 #define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'"
@@ -317,7 +317,7 @@ struct expr_tokens_span {
 struct arg {
     enum arg_type type;
     int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
-    int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An
+    int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An
     struct expr_tokens_span expr;
     size_t first_token, num_tokens; ///< Argument tokens span
 };
@@ -1267,6 +1267,22 @@ static const char *stmt_type_to_string(const enum stmt_type type)
     return "_UNKNOWN";
 }
 
+static void fprint_expr(
+        const struct lex *const lex,
+        const struct expr_tokens_span *const expr,
+        FILE *const s)
+{
+    fputc('[', s);
+    for (size_t i = 0; i < expr->num_tokens; i++) {
+        const struct token token = lex->tokbuf[expr->first_token + i];
+        if (token.type == TT_NEWLINE) {
+            break;
+        }
+        fprintf(s, "%.*s", (int)token.length, lex->input + token.offset);
+    }
+    fputc(']', s);
+}
+
 static void fprint_arg(
         const struct lex *const lex,
         const struct arg *const arg,
@@ -1276,35 +1292,39 @@ static void fprint_arg(
     switch (arg->type) {
     case ARG_NONE:
     case ARG_DN:
+        fprintf(s, " reg [d%d]", arg->xn);
+        break;
     case ARG_AN:
     case ARG_AN_ADDR:
     case ARG_AN_ADDR_INCR:
     case ARG_AN_ADDR_DECR:
-        fprintf(s, " reg %d", arg->xn);
+        fprintf(s, " reg [a%d]", arg->xn);
         break;
     case ARG_AN_ADDR_16:
-        fprintf(s, " reg %d", arg->xn);
-        fprintf(s, " d16 [see raw]"); // TODO print expr tokens
+        fprintf(s, " reg [a%d]", arg->xn);
+        fprintf(s, " d16 "), fprint_expr(lex, &arg->expr, s);
         break;
     case ARG_AN_ADDR_8_XI:
-        fprintf(s, " reg %d", arg->xn);
-        fprintf(s, " d8 [see raw]"); // TODO print expr tokens
-        fprintf(s, " xi %d", arg->xi);
+        fprintf(s, " reg [a%d]", arg->xn);
+        fprintf(s, " d8 "), fprint_expr(lex, &arg->expr, s);
+        fprintf(s, " xi [%c%d]", arg->xi & 0x8 ? 'a' : 'd', arg->xi & 0x7);
         break;
     case ARG_ADDR_WORD:
     case ARG_ADDR_LONG:
     case ARG_ADDR_UNSPEC:
-        fprintf(s, " addr [see raw]"); // TODO print expr tokens
+        fprintf(s, " addr "), fprint_expr(lex, &arg->expr, s);
         break;
     case ARG_PC_ADDR_16:
-        fprintf(s, " d16 [see raw]"); // TODO print expr tokens
+        fprintf(s, " reg [pc]");
+        fprintf(s, " d16 "), fprint_expr(lex, &arg->expr, s);
         break;
     case ARG_PC_ADDR_8_XI:
-        fprintf(s, " d8 [see raw]"); // TODO print expr tokens
-        fprintf(s, " xi %d", arg->xi);
+        fprintf(s, " reg [pc]");
+        fprintf(s, " d8 "), fprint_expr(lex, &arg->expr, s);
+        fprintf(s, " xi [%c%d]", arg->xi & 0x8 ? 'a' : 'd', arg->xi & 0x7);
         break;
     case ARG_IMMEDIATE:
-        fprintf(s, " value [see raw]"); // TODO print expr tokens
+        fprintf(s, " value "), fprint_expr(lex, &arg->expr, s);
         break;
     case ARG_REGMASK:
     case ARG_SR:
@@ -1586,13 +1606,13 @@ static struct token_recognition pars_recognize_token(
     const char *const str = self->lex->input + token.offset;
     if (token.type == TT_ID) {
         if (token.length == 2) {
-            if (tolower(str[0]) == 'a' && is_dec(str[1])) {
+            if (tolower(str[0]) == 'a' && is_oct(str[1])) {
                 return (struct token_recognition){
                     .type = RTT_REG,
                     .reg = REG_AN,
                     .reg_num = str[1] - '0',
                 };
-            } else if (tolower(str[0]) == 'd' && is_dec(str[1])) {
+            } else if (tolower(str[0]) == 'd' && is_oct(str[1])) {
                 return (struct token_recognition){
                     .type = RTT_REG,
                     .reg = REG_DN,
@@ -1869,14 +1889,14 @@ static int pars_parse_arg_inside_parens(
         // It is (d8,PC,Xn)
         assert((an1_found && !dn_found) || (!an1_found && dn_found));
         arg->type = ARG_PC_ADDR_8_XI;
-        arg->xi = an1_found ? -an1 : dn;
+        arg->xi = an1_found ? (an1 | 0x8) : dn;
         arg->num_tokens = self->cur_tok_id - arg->first_token;
         return OK;
     } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) {
         // It is (d8,An,Xn)
         assert((an1_found && !dn_found) || (!an1_found && dn_found));
         arg->type = ARG_AN_ADDR_8_XI;
-        arg->xi = an2_found ? -an2 : dn;
+        arg->xi = an2_found ? (an2 | 0x8) : dn;
         arg->num_tokens = self->cur_tok_id - arg->first_token;
         return OK;
     }
author	Oxore <oxore@protonmail.com>	2023-06-26 01:49:24 +0300
committer	Oxore <oxore@protonmail.com>	2023-06-26 01:49:24 +0300
commit	2dd7bd6ca24b6a28ff36c5c441442d5885c0b611 (patch)
tree	27f14cd43512559ecb6d3bbe15be218937f6ea5b
parent	e46fb8f882b00fe9d24b2dc8810b033164c4b10c (diff)