Impl .def/.endef, fix instructions, ignore parsing errors

author: Oxore <oxore@protonmail.com> 2023-06-28 22:12:39 +0300
committer: Oxore <oxore@protonmail.com> 2023-06-28 22:14:01 +0300
commit: 04e380b927ee78f5474d1d87fa59383771c720aa (patch)
tree: 743f490104d19a0cbe2e92a4477d768bd0ce5719
parent: 43d1ce1201bb1f86345a9f74bbcf53e492f1dc70 (diff)
1 files changed, 98 insertions, 84 deletions
diff --git a/main.c b/main.c
index 51be920..db4dca3 100644
--- a/main.c
+++ b/main.c
@@ -59,6 +59,9 @@
 #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL
 #define E_UNKNOWN_DRC "unknown directive"
 #define E_STR "string"
+#define E_ID "identifier"
+#define E_NESTED_DEF "nested .def ... .endef blocks are illegal"
+#define E_NMATCH_ENDEF ".endef directive without matching .def"
 
 #define ERR 0
 #define OK 1
@@ -189,7 +192,7 @@ enum mnemonic {
     MN_BLE,
     MN_BLS,
     MN_BLT,
-    MN_BMT,
+    MN_BMI,
     MN_BNE,
     MN_BPL,
     MN_BVC,
@@ -197,6 +200,7 @@ enum mnemonic {
     MN_BCHG,
     MN_BCLR,
     MN_BSET,
+    MN_BTST,
     MN_CHK,
     MN_CLR,
     MN_CMP,
@@ -214,7 +218,7 @@ enum mnemonic {
     MN_DBLE,
     MN_DBLS,
     MN_DBLT,
-    MN_DBMT,
+    MN_DBMI,
     MN_DBNE,
     MN_DBPL,
     MN_DBVC,
@@ -465,6 +469,7 @@ struct pars {
     // State
     size_t cur_tok_id;
     enum pars_error error;
+    bool in_sat; /// Indicates whether inside .def ... .endef block or not
     // Statement table
     FILE *stmttab_stream;
     struct stmt *stmttab;
@@ -541,7 +546,7 @@ const struct mnemonic_meta {
     { "ble",    ARGS_COUNT_1 },
     { "bls",    ARGS_COUNT_1 },
     { "blt",    ARGS_COUNT_1 },
-    { "bmt",    ARGS_COUNT_1 },
+    { "bmi",    ARGS_COUNT_1 },
     { "bne",    ARGS_COUNT_1 },
     { "bpl",    ARGS_COUNT_1 },
     { "bvc",    ARGS_COUNT_1 },
@@ -549,6 +554,7 @@ const struct mnemonic_meta {
     { "bchg",   ARGS_COUNT_2 },
     { "bclr",   ARGS_COUNT_2 },
     { "bset",   ARGS_COUNT_2 },
+    { "btst",   ARGS_COUNT_2 },
     { "chk",    ARGS_COUNT_2 },
     { "clr",    ARGS_COUNT_1 },
     { "cmp",    ARGS_COUNT_2 },
@@ -566,7 +572,7 @@ const struct mnemonic_meta {
     { "dble",   ARGS_COUNT_2 },
     { "dbls",   ARGS_COUNT_2 },
     { "dblt",   ARGS_COUNT_2 },
-    { "dbmt",   ARGS_COUNT_2 },
+    { "dbmi",   ARGS_COUNT_2 },
     { "dbne",   ARGS_COUNT_2 },
     { "dbpl",   ARGS_COUNT_2 },
     { "dbvc",   ARGS_COUNT_2 },
@@ -638,15 +644,15 @@ const struct mnemonic_meta {
 };
 
 static int pars_directive_skip(struct pars *, enum directive_type, size_t);
-static int pars_directive_handler_align(struct pars *, enum directive_type, size_t);
-static int pars_directive_handler_file(struct pars *, enum directive_type, size_t);
+static int pars_directive_handler_def(struct pars *, enum directive_type, size_t);
+static int pars_directive_handler_endef(struct pars *, enum directive_type, size_t);
 
 const struct directive_description {
     const char *str;
     int (*handler)(struct pars *, enum directive_type, size_t lable_id);
 } g_directives[DIRECTIVES_COUNT] = {
     { "",         NULL, },
-    { "align",    pars_directive_handler_align, },
+    { "align",    pars_directive_skip, },
     { "ascii",    pars_directive_skip, },
     { "bin",      pars_directive_skip, },
     { "bsection", pars_directive_skip, },
@@ -655,7 +661,7 @@ const struct directive_description {
     { "cmnt",     pars_directive_skip, },
     { "comm",     pars_directive_skip, },
     { "data",     pars_directive_skip, },
-    { "def",      pars_directive_skip, },
+    { "def",      pars_directive_handler_def, },
     { "dim",      pars_directive_skip, },
     { "double",   pars_directive_skip, },
     { "dsection", pars_directive_skip, },
@@ -664,12 +670,12 @@ const struct directive_description {
     { "else",     pars_directive_skip, },
     { "end",      pars_directive_skip, },
     { "endc",     pars_directive_skip, },
-    { "endef",    pars_directive_skip, },
+    { "endef",    pars_directive_handler_endef, },
     { "endif",    pars_directive_skip, },
     { "ends",     pars_directive_skip, },
     { "extend",   pars_directive_skip, },
     { "extern",   pars_directive_skip, },
-    { "file",     pars_directive_handler_file, },
+    { "file",     pars_directive_skip, },
     { "fill",     pars_directive_skip, },
     { "float",    pars_directive_skip, },
     { "fpdata",   pars_directive_skip, },
@@ -820,7 +826,7 @@ static const char *token_type_to_string(const enum token_type type)
     return "_UNKNOWN";
 }
 
-static int fprint_tok(const char *const input, struct token *token, FILE *const stream)
+static int fprint_token_debug(const char *const input, struct token *token, FILE *const stream)
 {
     int res = fprintf(stream, "%s<", token_type_to_string(token->type));
     if (res == -1) {
@@ -1560,7 +1566,7 @@ static void fprint_arg(
     fprintf(s, "])");
 }
 
-static int fprint_stmt(
+static int fprint_stmt_debug(
         const struct lex *const lex,
         struct stmt *const stmt,
         FILE *const s)
@@ -1597,13 +1603,15 @@ static int fprint_stmt(
                 s,
                 "\n\t(name \"%s\")",
                 directive_to_string(stmt->directive.type));
-        fprintf(s, "\n\t(arg (raw-tokens [");
-        fprint_tokens(
-                lex,
-                stmt->directive.first_token,
-                stmt->directive.num_tokens,
-                s);
-        fprintf(s, "]))");
+        if (stmt->directive.first_token && stmt->directive.num_tokens) {
+            fprintf(s, "\n\t(arg (raw-tokens [");
+            fprint_tokens(
+                    lex,
+                    stmt->directive.first_token,
+                    stmt->directive.num_tokens,
+                    s);
+            fprintf(s, "]))");
+        }
     }
     if (stmt->comment_token) {
         const struct token comment = lex->tokbuf[stmt->comment_token];
@@ -1626,6 +1634,33 @@ static int fwrite_stmt(const struct stmt *const stmt, FILE *const stream)
     return res;
 }
 
+static struct token pars_peek(const struct pars *const self)
+{
+    return self->lex->tokbuf[self->cur_tok_id];
+}
+
+static struct token pars_peek_more(
+        const struct pars *const self, const size_t more)
+{
+    return self->lex->tokbuf[self->cur_tok_id + more];
+}
+
+static size_t pars_commit(struct pars *const self)
+{
+    return self->cur_tok_id++;
+}
+
+static void pars_skip_to_newline(struct pars *const self)
+{
+    while (!pars_is_eof_reached(self)) {
+        const struct token nl = pars_peek(self);
+        pars_commit(self);
+        if (nl.type == TT_NEWLINE) {
+            return;
+        }
+    }
+}
+
 static int pars_yield_error_msg(
         struct pars *const self,
         const size_t token_id,
@@ -1652,6 +1687,7 @@ static int pars_yield_error_msg(
         }
     }
     fputs("^\n", stderr);
+    pars_skip_to_newline(self);
     return ERR;
 }
 
@@ -1686,6 +1722,7 @@ static int pars_yield_error_expected_str(
         fputc('~', stderr);
     }
     fputc('\n', stderr);
+    pars_skip_to_newline(self);
     return ERR;
 }
 
@@ -1722,22 +1759,6 @@ enum opsize get_opsize_from_specifier(const char size_specifier)
     return OPSIZE_NONE;
 }
 
-static struct token pars_peek(const struct pars *const self)
-{
-    return self->lex->tokbuf[self->cur_tok_id];
-}
-
-static struct token pars_peek_more(
-        const struct pars *const self, const size_t more)
-{
-    return self->lex->tokbuf[self->cur_tok_id + more];
-}
-
-static size_t pars_commit(struct pars *const self)
-{
-    return self->cur_tok_id++;
-}
-
 static bool is_pc(const char *const str)
 {
     return (str[0] == 'p' && str[1] == 'c') ||
@@ -1912,8 +1933,10 @@ static int pars_parse_expr(
     return OK;
 }
 
-static int pars_parse_comment_and_newline(
-        struct pars *const self, size_t *const output_comment_id)
+static int pars_parse_comment_and_newline2(
+        struct pars *const self,
+        size_t *const output_comment_id,
+        const bool allow_escape)
 {
     size_t comment_id = 0;
     if (!pars_is_eof_reached(self)) {
@@ -1929,7 +1952,7 @@ static int pars_parse_comment_and_newline(
         // There must be a new line if not EOF
         const size_t nl_id = pars_commit(self);
         const struct token nl = self->lex->tokbuf[nl_id];
-        if (nl.type != TT_NEWLINE) {
+        if (nl.type != TT_NEWLINE && (allow_escape && nl.type != TT_ESCAPE)) {
             return pars_yield_error(
                     self, nl_id, comment_id ? E_NL : E_COMMENT_NL);
         }
@@ -1938,14 +1961,27 @@ static int pars_parse_comment_and_newline(
     return OK;
 }
 
+static int pars_parse_comment_and_newline(
+        struct pars *const self, size_t *const output_comment_id)
+{
+    return pars_parse_comment_and_newline2(self, output_comment_id, false);
+}
+
 static int pars_finish_directive(
         struct pars *const self,
         const size_t label_id,
         const struct directive directive)
 {
-    // Finish parsing instruction, expect comment or newline
+    // Finish parsing instruction, expect comment or newline, or even escape
+    // symbol in some cases.
+    const bool allow_escape = directive.type == DT_DEF ||
+        directive.type == DT_DIM || directive.type == DT_LINE ||
+        directive.type == DT_SCL || directive.type == DT_SIZE ||
+        directive.type == DT_TAG || directive.type == DT_TYPE ||
+        directive.type == DT_VAL ;
     size_t comment_id = 0;
-    const int ret = pars_parse_comment_and_newline(self, &comment_id);
+    const int ret = pars_parse_comment_and_newline2(
+            self, &comment_id, allow_escape);
     if (ret != OK) {
         return ret;
     }
@@ -1978,47 +2014,37 @@ static int pars_directive_skip(
         pars_commit(self);
         num_tokens++;
     }
-    const struct directive directive = {
-        .type = drc,
-        .first_token = first_token,
-        .num_tokens = num_tokens,
-    };
+    const struct directive directive = { drc, first_token, num_tokens };
     return pars_finish_directive(self, label_id, directive);
 }
 
-static int pars_directive_handler_align(
+static int pars_directive_handler_def(
         struct pars *const self,
         const enum directive_type drc,
         const size_t label_id)
 {
-    struct expr_tokens_span expr;
-    const int ret = pars_parse_expr(self, &expr);
-    if (ret != OK) {
-        return ret;
+    const struct token token = pars_peek(self);
+    if (token.type != TT_ID) {
+        return pars_yield_error(self, self->cur_tok_id, E_ID);
     }
-    const struct directive directive = {
-        .type = drc,
-        .first_token = expr.first_token,
-        .num_tokens = expr.num_tokens,
-    };
+    if (self->in_sat) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_NESTED_DEF);
+    }
+    self->in_sat = true;
+    const struct directive directive = { drc, pars_commit(self), 1 };
     return pars_finish_directive(self, label_id, directive);
 }
 
-static int pars_directive_handler_file(
+static int pars_directive_handler_endef(
         struct pars *const self,
         const enum directive_type drc,
         const size_t label_id)
 {
-    const struct token filename = pars_peek(self);
-    if (filename.type != TT_STRING) {
-        return pars_yield_error(self, self->cur_tok_id, E_STR);
+    if (!self->in_sat) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF);
     }
-    const size_t first_token = pars_commit(self);
-    const struct directive directive = {
-        .type = drc,
-        .first_token = first_token,
-        .num_tokens = 1
-    };
+    self->in_sat = false;
+    const struct directive directive = { drc, 0, 0 };
     return pars_finish_directive(self, label_id, directive);
 }
 
@@ -2566,22 +2592,9 @@ static int pars_parse_instruction_args(
     }
     // Finish parsing instruction, expect comment or newline
     size_t comment_id = 0;
-    if (!pars_is_eof_reached(self)) {
-        // Try parse comment
-        const struct token token1 = pars_peek(self);
-        const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
-            token1.type == TT_COMMENT_SEMICOLON;
-        if (is_comment) {
-            comment_id = pars_commit(self);
-        }
-    }
-    if (!pars_is_eof_reached(self)) {
-        // There must be a new line if not EOF
-        const size_t nl_id = pars_commit(self);
-        const struct token nl = self->lex->tokbuf[nl_id];
-        if (nl.type != TT_NEWLINE) {
-            return pars_yield_error(self, nl_id, E_NL);
-        }
+    const int ret = pars_parse_comment_and_newline(self, &comment_id);
+    if (ret != OK) {
+        return ret;
     }
     return pars_yield_instruction(
             self, label_id, comment_id, mnemonic_id, opsize, &arg1, &arg2);
@@ -2688,7 +2701,8 @@ static int pars_run(struct pars *const self)
     while (self->cur_tok_id < self->lex->tokens_count) {
         ret = pars_parse_statement(self);
         if (ret != OK) {
-            break;
+            // Don't really care about parsing errors right now
+            ret = OK;
         }
     }
     fflush(self->stmttab_stream);
@@ -2727,12 +2741,12 @@ static int assem_emit(struct assem *const self, FILE *const stream)
     const struct pars *const pars = self->pars;
     if (TRACE_LEXER) {
         for (size_t i = 1; i < lex->tokbuf_size / (sizeof *lex->tokbuf); i++) {
-            fprint_tok(lex->input, &lex->tokbuf[i], stream);
+            fprint_token_debug(lex->input, &lex->tokbuf[i], stream);
         }
     }
     if (TRACE_PARSER) {
         for (size_t i = 1; i < pars->stmttab_size / (sizeof *pars->stmttab); i++) {
-            fprint_stmt(lex, pars->stmttab + i, stream);
+            fprint_stmt_debug(lex, pars->stmttab + i, stream);
         }
     }
     return OK;
author	Oxore <oxore@protonmail.com>	2023-06-28 22:12:39 +0300
committer	Oxore <oxore@protonmail.com>	2023-06-28 22:14:01 +0300
commit	04e380b927ee78f5474d1d87fa59383771c720aa (patch)
tree	743f490104d19a0cbe2e92a4477d768bd0ce5719
parent	43d1ce1201bb1f86345a9f74bbcf53e492f1dc70 (diff)