Impl expr numbers parsing and function type and size

author: Oxore <oxore@protonmail.com> 2023-10-21 07:24:48 +0300
committer: Oxore <oxore@protonmail.com> 2023-10-21 07:24:48 +0300
commit: 6529ed369f4da544c31aa1db0b23fa4eb4663013 (patch)
tree: 87026787bc121faa721823ad65c53f92d73214da
parent: ad54e3c0a58e98927ec7e43418d593412ba6fa9f (diff)
3 files changed, 385 insertions, 82 deletions
diff --git a/main.c b/main.c
index 5277aad..289af3c 100644
--- a/main.c
+++ b/main.c
@@ -23,17 +23,23 @@
 #endif
 
 #if defined(__GNUC__) || defined(__clang__)
+#if !defined(NDEBUG)
+#define UNREACHABLE() assert(false)
+#else
 #define UNREACHABLE __builtin_unreachable
+#endif
 #else
 #define UNREACHABLE()
 #endif
 
 #define E_NIMPL "not implemented"
 #define E_UNREACH "unreachable code reached"
-#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol"
+#define E_EXPR "'(', ')', unary operator, binary operator, number or identifier"
 #define E_EXPR_NONREG "symbol that is not a register when parsing expression"
-#define E_EXPR_OPEN "'(', number or symbol"
-#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol"
+#define E_EXPR_OPEN_WITH_ID "'-', '(', number or identifier"
+#define E_EXPR_CLOSE_WITH_ID "')', '+', '-', '+', '/' or identifier"
+#define E_EXPR_OPEN "'-', '(' or number"
+#define E_EXPR_CLOSE "')', '+', '-', '+' or '/'"
 #define E_DN "D0, ...D7"
 #define E_AN "A0, ...A7, SP"
 #define E_AN_DN E_AN " or " E_DN
@@ -60,16 +66,28 @@
 #define E_DIRECTIVE "directive"
 #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL
 #define E_UNKNOWN_DRC "unknown directive"
+#define E_NUM "number"
 #define E_STR "string"
 #define E_ID "identifier"
+#define E_ID_NUM_DOT E_ID ", " E_NUM " or '.'"
 #define E_NESTED_DEF "nested .def ... .endef blocks are illegal"
 #define E_NMATCH_ENDEF ".endef directive without matching .def"
+#define E_MULTIPLE_VAL "multiple .val directives specified"
+#define E_MULTIPLE_SCL "multiple .scl directives specified"
+#define E_MULTIPLE_TYPE "multiple .type directives specified"
+#define E_MAX_NESTING "maximum expression nesting level reached"
 
 #define ERR 0
 #define OK 1
 #define CONTINUE 2
-
 #define BCC_S_MAX_BACKWARDS 126
+#define EXPR_NESTING_MAX 10
+#define PARS_EXPR_FLAG_ALLOW_ID 1
+
+// These are from Sierra's FILE_FMT.H
+#define C_EFCN	    -1	    /* physical end of function		 */
+#define DT_FCN	    2	    /* function		 */
+
 
 enum token_type {
     TT_NONE = 0,
@@ -111,6 +129,7 @@ enum token_type {
 
 struct token {
     enum token_type type;
+    uint32_t value; ///< For TT_NUMOCT, TT_NUMDEC, TT_NUMHEX
     size_t offset;
     size_t length;
 };
@@ -151,6 +170,7 @@ struct lex {
     // State variables
     enum lex_state state;
     enum lex_error error;
+    uint32_t current_number_value;
     size_t cursor;
     size_t tok_offset;
     bool inside_line;
@@ -172,6 +192,7 @@ enum stmt_type {
     ST_ASSIGNMENT,
     ST_COMMENT,
     ST_DIRECTIVE,
+    ST_META_SAT, ///< Not a real statement, just an accumulation of .def .endef block
 };
 
 enum mnemonic {
@@ -392,8 +413,10 @@ enum args_count {
     ARGS_COUNT_2,
 };
 
-struct expr_tokens_span {
+struct expr {
     size_t first_token, num_tokens;
+    int32_t value;
+    bool value_is_resolved;
 };
 
 struct arg {
@@ -402,7 +425,7 @@ struct arg {
     uint8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
     uint8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An
     enum opsize briefext_size;
-    struct expr_tokens_span expr;
+    struct expr expr;
     size_t first_token, num_tokens; ///< Argument tokens span
 };
 
@@ -418,23 +441,25 @@ struct directive {
     size_t first_token, num_tokens; ///< Directive arguments tokens span
 };
 
+/// Symbol Attribute Table (SAT, a `.def ... .endef` block)
+struct sat {
+    struct expr def_arg;
+    struct expr val_arg;
+    struct expr scl_arg;
+    struct expr type_arg;
+};
+
 struct stmt {
     enum stmt_type type;
+    uint32_t addr;
     union {
         struct instruction instruction;
         struct directive directive;
+        struct sat sat;
     };
     size_t label_token;
     size_t first_token, num_tokens; // Statement tokens span, may be NULL
     size_t comment_token;
-    uint32_t addr;
-};
-
-struct symbol {
-    size_t offset; // Byte offset in continuous null terminated symbol buffer
-    // Instead of strcmp every item in symtab we can compare hashes and get O(N)
-    // for search.
-    uint32_t hash;
 };
 
 enum pars_error {
@@ -481,10 +506,8 @@ struct pars {
     // State
     size_t cur_tok_id;
     enum pars_error error;
-    /*!
-     * SAT stands for Symbol Attribute Table
-     */
     bool in_sat; ///< Indicates whether inside `.def ... .endef` block or not
+    struct sat sat;
     // Statement table
     FILE *stmttab_stream;
     struct stmt *stmttab;
@@ -502,6 +525,7 @@ struct pars {
 
 struct assem {
     const struct pars *pars;
+    struct sat sat;
 };
 
 static int pars_parse_arg_inside_parens(
@@ -663,6 +687,9 @@ const struct mnemonic_meta {
 static int pars_directive_skip(struct pars *, enum directive_type, size_t);
 static int pars_directive_handler_def(struct pars *, enum directive_type, size_t);
 static int pars_directive_handler_endef(struct pars *, enum directive_type, size_t);
+static int pars_directive_handler_scl(struct pars *, enum directive_type, size_t);
+static int pars_directive_handler_type(struct pars *, enum directive_type, size_t);
+static int pars_directive_handler_val(struct pars *, enum directive_type, size_t);
 
 const struct directive_description {
     const char *str;
@@ -709,7 +736,7 @@ const struct directive_description {
     { "packed",   pars_directive_skip, },
     { "page",     pars_directive_skip, },
     { "reorg",    pars_directive_skip, },
-    { "scl",      pars_directive_skip, },
+    { "scl",      pars_directive_handler_scl,  },
     { "section",  pars_directive_skip, },
     { "short",    pars_directive_skip, },
     { "single",   pars_directive_skip, },
@@ -719,8 +746,8 @@ const struct directive_description {
     { "tag",      pars_directive_skip, },
     { "text",     pars_directive_skip, },
     { "tsection", pars_directive_skip, },
-    { "type",     pars_directive_skip, },
-    { "val",      pars_directive_skip, },
+    { "type",     pars_directive_handler_type, },
+    { "val",      pars_directive_handler_val,  },
     { "word",     pars_directive_skip, },
     { "xdef",     pars_directive_skip, },
     { "xref",     pars_directive_skip, },
@@ -800,6 +827,17 @@ static int fprint_string_escaped(
     return written;
 }
 
+static int hex_digit_to_int(char c)
+{
+    if (c >= 'a' && c <= 'f') {
+        return c - 'a';
+    }
+    if (c >= 'A' && c <= 'F') {
+        return c - 'A';
+    }
+    return c - '0';
+}
+
 static const char *token_type_to_string(const enum token_type type)
 {
     switch (type) {
@@ -1032,15 +1070,19 @@ static int lex_handle_next(struct lex *const self, const int c)
             self->state = LS_ID;
         } else if (c == '0') {
             self->tok_offset = self->cursor;
+            self->current_number_value = 0;
             self->state = LS_NUMOCTHEX;
         } else if (is_dec(c)) {
             self->tok_offset = self->cursor;
+            self->current_number_value = c - '0';
             self->state = LS_NUMDEC;
         } else if (c == '@') {
             self->tok_offset = self->cursor;
+            self->current_number_value = 0;
             self->state = LS_NUMOCT;
         } else if (c == '$') {
             self->tok_offset = self->cursor;
+            self->current_number_value = 0;
             self->state = LS_NUMHEX;
         } else if (c == '"') {
             self->tok_offset = self->cursor;
@@ -1058,59 +1100,59 @@ static int lex_handle_next(struct lex *const self, const int c)
             self->tok_offset = self->cursor;
             self->state = LS_DOT;
         } else if (c == ',') {
-            lex_yield_token(self, &(struct token){TT_COMMA, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_COMMA, 0, self->cursor, 1});
         } else if (c == '(') {
-            lex_yield_token(self, &(struct token){TT_LPAREN, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_LPAREN, 0, self->cursor, 1});
         } else if (c == ')') {
-            lex_yield_token(self, &(struct token){TT_RPAREN, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_RPAREN, 0, self->cursor, 1});
         } else if (c == '[') {
-            lex_yield_token(self, &(struct token){TT_LBRACKET, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_LBRACKET, 0, self->cursor, 1});
         } else if (c == ']') {
-            lex_yield_token(self, &(struct token){TT_RBRACKET, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_RBRACKET, 0, self->cursor, 1});
         } else if (c == '{') {
-            lex_yield_token(self, &(struct token){TT_LBRACE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_LBRACE, 0, self->cursor, 1});
         } else if (c == '{') {
-            lex_yield_token(self, &(struct token){TT_RBRACE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_RBRACE, 0, self->cursor, 1});
         } else if (c == '+') {
-            lex_yield_token(self, &(struct token){TT_PLUS, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_PLUS, 0, self->cursor, 1});
         } else if (c == '-') {
-            lex_yield_token(self, &(struct token){TT_MINUS, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_MINUS, 0, self->cursor, 1});
         } else if (c == '*') {
             if (self->inside_line) {
                 lex_yield_token(
-                        self, &(struct token){TT_ASTERISK, self->cursor, 1});
+                        self, &(struct token){TT_ASTERISK, 0, self->cursor, 1});
             } else {
                 self->tok_offset = self->cursor;
                 self->state = LS_COMMENT_ASTERISK;
             }
         } else if (c == '/') {
-            lex_yield_token(self, &(struct token){TT_SLASH, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_SLASH, 0, self->cursor, 1});
         } else if (c == '=') {
             self->tok_offset = self->cursor;
             self->state = LS_EQ;
         } else if (c == ':') {
-            lex_yield_token(self, &(struct token){TT_COLON, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_COLON, 0, self->cursor, 1});
         } else if (c == '%') {
-            lex_yield_token(self, &(struct token){TT_PERCENT, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_PERCENT, 0, self->cursor, 1});
         } else if (c == '#') {
-            lex_yield_token(self, &(struct token){TT_HASH, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_HASH, 0, self->cursor, 1});
         } else if (c == '!') {
-            lex_yield_token(self, &(struct token){TT_BANG, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_BANG, 0, self->cursor, 1});
         } else if (c == '~') {
-            lex_yield_token(self, &(struct token){TT_TILDE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_TILDE, 0, self->cursor, 1});
         } else if (c == '&') {
-            lex_yield_token(self, &(struct token){TT_AMPERSAND, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_AMPERSAND, 0, self->cursor, 1});
         } else if (c == '|') {
-            lex_yield_token(self, &(struct token){TT_PIPE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_PIPE, 0, self->cursor, 1});
         } else if (c == '^') {
-            lex_yield_token(self, &(struct token){TT_CAP, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_CAP, 0, self->cursor, 1});
         } else if (c == '\r') {
             self->tok_offset = self->cursor;
             self->state = LS_CR;
         } else if (c == '\n') {
-            lex_yield_token(self, &(struct token){TT_NEWLINE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_NEWLINE, 0, self->cursor, 1});
         } else if (c == '\\') {
-            lex_yield_token(self, &(struct token){TT_ESCAPE, self->cursor, 1});
+            lex_yield_token(self, &(struct token){TT_ESCAPE, 0, self->cursor, 1});
         } else if (c == ' ' || c == '\t') {
             // ignore spaces and tabs
         } else if (c == EOF) {
@@ -1123,8 +1165,8 @@ static int lex_handle_next(struct lex *const self, const int c)
         break;
     case LS_CR: // Accumulate CRLF into single token
         {
-            const size_t size = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR
-            const struct token token = {TT_NEWLINE, self->tok_offset, size};
+            const size_t length = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR
+            const struct token token = {TT_NEWLINE, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             self->state = LS_FREE;
             if (c != '\n') {
@@ -1136,7 +1178,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_LSHIFT:
         if (c == '<') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_LSHIFT, self->tok_offset, length};
+            const struct token token = {TT_LSHIFT, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             self->state = LS_FREE;
         } else {
@@ -1146,7 +1188,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_RSHIFT:
         if (c == '>') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_RSHIFT, self->tok_offset, length};
+            const struct token token = {TT_RSHIFT, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             self->state = LS_FREE;
         } else {
@@ -1157,7 +1199,7 @@ static int lex_handle_next(struct lex *const self, const int c)
         {
             const size_t length = (c == '=') ? 2 : 1;
             const enum token_type type = (c == '=') ? TT_EQ_DOUBLE : TT_EQ;
-            const struct token token = {type, self->tok_offset, length};
+            const struct token token = {type, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
         }
         self->state = LS_FREE;
@@ -1170,7 +1212,7 @@ static int lex_handle_next(struct lex *const self, const int c)
         if (is_alphanum(c) || c == '_') {
             self->state = LS_DOT_ID;
         } else {
-            lex_yield_token(self, &(struct token){TT_DOT, self->tok_offset, 1});
+            lex_yield_token(self, &(struct token){TT_DOT, 0, self->tok_offset, 1});
             self->state = LS_FREE;
             return lex_handle_next(self, c);
         }
@@ -1178,7 +1220,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_DOT_ID:
         if (!is_alphanum(c) && c != '_') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_DOT_ID, self->tok_offset, length};
+            const struct token token = {TT_DOT_ID, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             self->state = LS_FREE;
             return lex_handle_next(self, c);
@@ -1187,7 +1229,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_ID:
         if (!is_alphanum(c) && c != '_') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_ID, self->tok_offset, length};
+            const struct token token = {TT_ID, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             self->state = LS_FREE;
             return lex_handle_next(self, c);
@@ -1198,11 +1240,12 @@ static int lex_handle_next(struct lex *const self, const int c)
             self->state = LS_NUMHEX;
         } else if (is_oct(c)) {
             self->state = LS_NUMOCT;
+            return lex_handle_next(self, c);
         } else if (is_alphabetic(c) || c == '_') {
             return lex_yield_error(self, c);
         } else {
             assert((self->cursor - self->tok_offset) == 1);
-            const struct token token = {TT_NUMDEC, self->tok_offset, 1};
+            const struct token token = {TT_NUMDEC, 0, self->tok_offset, 1};
             lex_yield_token(self, &token);
             // It was just zero, handle this char in LS_FREE state then
             self->state = LS_FREE;
@@ -1210,11 +1253,16 @@ static int lex_handle_next(struct lex *const self, const int c)
         }
         break;
     case LS_NUMOCT:
-        if (is_alphabetic(c) || c == '_') {
+        if (is_oct(c)) {
+            self->current_number_value <<= 3;
+            self->current_number_value |= c - '0';
+        } else if (is_alphabetic(c) || c == '_') {
             return lex_yield_error(self, c);
-        } else if (!is_oct(c)) {
+        } else {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_NUMOCT, self->tok_offset, length};
+            const struct token token = {
+                TT_NUMOCT, self->current_number_value, self->tok_offset, length
+            };
             lex_yield_token(self, &token);
             // This token is finished, handle this char in LS_FREE state
             self->state = LS_FREE;
@@ -1223,13 +1271,16 @@ static int lex_handle_next(struct lex *const self, const int c)
         break;
     case LS_NUMHEX:
         if (is_hex(c)) {
-            // Keep calm
+            self->current_number_value <<= 4;
+            self->current_number_value |= hex_digit_to_int(c);
         } else if (is_alphabetic(c) || c == '_') {
             // Panik!
             return lex_yield_error(self, c);
         } else {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_NUMHEX, self->tok_offset, length};
+            const struct token token = {
+                TT_NUMHEX, self->current_number_value, self->tok_offset, length
+            };
             lex_yield_token(self, &token);
             // This token is finished, handle this char in LS_FREE state
             self->state = LS_FREE;
@@ -1237,11 +1288,16 @@ static int lex_handle_next(struct lex *const self, const int c)
         }
         break;
     case LS_NUMDEC:
-        if (is_alphabetic(c) || c == '_') {
+        if (is_dec(c)) {
+            self->current_number_value *= 10;
+            self->current_number_value += c - '0';
+        } else if (is_alphabetic(c) || c == '_') {
             return lex_yield_error(self, c);
-        } else if (!is_dec(c)) {
+        } else {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_NUMDEC, self->tok_offset, length};
+            const struct token token = {
+                TT_NUMDEC, self->current_number_value, self->tok_offset, length
+            };
             lex_yield_token(self, &token);
             // This token is finished, handle this char in LS_FREE state
             self->state = LS_FREE;
@@ -1253,7 +1309,7 @@ static int lex_handle_next(struct lex *const self, const int c)
             self->state = LS_STRING_ESC;
         } else if (c == '"') {
             const size_t length = self->cursor - self->tok_offset + 1;
-            const struct token token = {TT_STRING, self->tok_offset, length};
+            const struct token token = {TT_STRING, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             // This token is finished
             self->state = LS_FREE;
@@ -1265,7 +1321,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_COMMENT_ASTERISK:
         if (c == '\r' || c == '\n') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_COMMENT_ASTERISK, self->tok_offset, length};
+            const struct token token = {TT_COMMENT_ASTERISK, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             // This token is finished, handle this char in LS_FREE state
             self->state = LS_FREE;
@@ -1275,7 +1331,7 @@ static int lex_handle_next(struct lex *const self, const int c)
     case LS_COMMENT_SEMICOLON:
         if (c == '\r' || c == '\n') {
             const size_t length = self->cursor - self->tok_offset;
-            const struct token token = {TT_COMMENT_SEMICOLON, self->tok_offset, length};
+            const struct token token = {TT_COMMENT_SEMICOLON, 0, self->tok_offset, length};
             lex_yield_token(self, &token);
             // This token is finished, handle this char in LS_FREE state
             self->state = LS_FREE;
@@ -1309,7 +1365,7 @@ static int lex_next(struct lex *const self, FILE *const stream)
         }
         if (c == EOF) {
             // Add a hidden EOF token of 0 size
-            lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0});
+            lex_yield_token(self, &(struct token){TT_NONE, 0, self->cursor, 0});
             break;
         }
     }
@@ -1483,6 +1539,7 @@ static const char *stmt_type_to_string(const enum stmt_type type)
     case ST_ASSIGNMENT: return "ASSIGNMENT";
     case ST_COMMENT: return "COMMENT";
     case ST_DIRECTIVE: return "DIRECTIVE";
+    case ST_META_SAT: return "META_SAT";
     }
     return "_UNKNOWN";
 }
@@ -1508,7 +1565,7 @@ static int fprint_tokens(
 
 static void fprint_expr(
         const struct lex *const lex,
-        const struct expr_tokens_span *const expr,
+        const struct expr *const expr,
         FILE *const s)
 {
     fputc('[', s);
@@ -1757,6 +1814,8 @@ static size_t pars_commit(struct pars *const self)
 
 static void pars_skip_to_newline(struct pars *const self)
 {
+    // Reset state In case of inside of the .def .. .endef block
+    self->in_sat = false;
     while (!pars_is_eof_reached(self)) {
         const struct token nl = pars_peek(self);
         pars_commit(self);
@@ -1954,13 +2013,55 @@ static struct token_recognition pars_recognize_token(
     return (struct token_recognition){0};
 }
 
+struct expr_value {
+    enum token_type operator;
+    int32_t value;
+    bool negative;
+    bool bit_inverted;
+};
+
+static int32_t apply_binary_operator(int32_t a, int32_t b, enum token_type op)
+{
+    switch (op) {
+    case TT_ASTERISK:
+        return a * b;
+    case TT_SLASH:
+        return a / b;
+    case TT_LSHIFT:
+        return a << b;
+    case TT_RSHIFT:
+        return a >> b;
+    case TT_AMPERSAND:
+        return a & b;
+    case TT_PIPE:
+        return a | b;
+    case TT_CAP:
+        return a ^ b;
+    case TT_NONE:
+    case TT_PLUS:
+        return a + b;
+    default:
+        break;
+    }
+    UNREACHABLE();
+    assert(false);
+}
+
 static int pars_parse_expr(
-        struct pars *const self, struct expr_tokens_span *const expr)
+        struct pars *const self,
+        struct expr *const expr,
+        int flags)
 {
     // This function is called only when expression is expected unconditionally,
     // so if the first token cannot be a part of expression, then error must be
     // yielded.
     const size_t first_token_id = self->cur_tok_id;
+    const char *const e_expr_open = (flags & PARS_EXPR_FLAG_ALLOW_ID)
+        ? E_EXPR_OPEN_WITH_ID : E_EXPR_OPEN;
+    const char *const e_expr_close = (flags & PARS_EXPR_FLAG_ALLOW_ID)
+        ? E_EXPR_CLOSE_WITH_ID : E_EXPR_CLOSE;
+    struct expr_value stack[EXPR_NESTING_MAX] = {{0}};
+    bool value_is_resolved = true;
     unsigned nesting = 0;
     // Otherwise expect open parenthesis, number, or unary operator.
     bool expect_close_or_binary = false;
@@ -1970,7 +2071,7 @@ static int pars_parse_expr(
                 assert(pars_is_eof_reached(self));
                 return pars_yield_error_eof(
                         self,
-                        expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);
+                        expect_close_or_binary ? e_expr_close : e_expr_open);
             }
             break;
         }
@@ -1980,45 +2081,80 @@ static int pars_parse_expr(
                 if (nesting == 0) {
                     break;
                 }
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_close);
             } else {
                 nesting++;
+                if (nesting >= EXPR_NESTING_MAX) {
+                    return pars_yield_error_msg(self, self->cur_tok_id, E_MAX_NESTING);
+                }
             }
         } else if (token.type == TT_MINUS) {
             // Minus is both unary and binary operator, so it does not care
             // about expression parsing state
             expect_close_or_binary = false;
+            stack[nesting].negative = !stack[nesting].negative;
         } else if (token.type == TT_TILDE) {
             if (expect_close_or_binary) {
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_close);
             }
+            stack[nesting].bit_inverted = !stack[nesting].bit_inverted;
         } else if (token.type == TT_ID) {
+            if (0 == (flags & PARS_EXPR_FLAG_ALLOW_ID)) {
+                return pars_yield_error(
+                        self,
+                        self->cur_tok_id,
+                        expect_close_or_binary ? e_expr_close : e_expr_open);
+            }
+            value_is_resolved = false;
             if (expect_close_or_binary) {
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_close);
             }
             if (pars_recognize_token(self, token).type == RTT_REG) {
                 return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG);
             }
             expect_close_or_binary = true;
+            stack[nesting].operator = TT_NONE;
+            stack[nesting].negative = false;
         } else if (token_is_number(token.type)) {
             if (expect_close_or_binary) {
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_close);
             }
             expect_close_or_binary = true;
+            int32_t value = token.value;
+            if (stack[nesting].negative) {
+                value = -value;
+            }
+            if (stack[nesting].bit_inverted) {
+                value = ~value;
+            }
+            stack[nesting].value = apply_binary_operator(
+                    stack[nesting].value, value, stack[nesting].operator);
+            stack[nesting].operator = TT_NONE;
         } else if (token_is_binary_operator(token.type)) {
             if (!expect_close_or_binary) {
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_open);
             }
             expect_close_or_binary = false;
+            stack[nesting].operator = token.type;
         } else if (token.type == TT_RPAREN) {
             if (!expect_close_or_binary) {
-                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN);
+                return pars_yield_error(self, self->cur_tok_id, e_expr_open);
             }
             if (nesting == 0) {
                 // This is not my closing parenthesis, should stop
                 break;
             }
             nesting--;
+            int32_t value = stack[nesting + 1].value;
+            if (stack[nesting].negative) {
+                value = -value;
+            }
+            if (stack[nesting].bit_inverted) {
+                value = ~value;
+            }
+            stack[nesting].value = apply_binary_operator(
+                    stack[nesting].value, value, stack[nesting].operator);
+            stack[nesting].operator = TT_NONE;
         } else {
             if (nesting == 0 && expect_close_or_binary) {
                 break;
@@ -2026,14 +2162,16 @@ static int pars_parse_expr(
             return pars_yield_error(
                     self,
                     self->cur_tok_id,
-                    expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);
+                    expect_close_or_binary ? e_expr_close : e_expr_open);
         }
         pars_commit(self);
     }
     assert(first_token_id != self->cur_tok_id);
-    *expr = (struct expr_tokens_span){
+    *expr = (struct expr){
         .first_token = first_token_id,
         .num_tokens = self->cur_tok_id - first_token_id,
+        .value = stack[nesting].value,
+        .value_is_resolved = value_is_resolved,
     };
     return OK;
 }
@@ -2100,6 +2238,13 @@ static int pars_finish_directive(
         .num_tokens = self->cur_tok_id - first_token,
     };
     pars_put_stmt(self, &stmt);
+    if (directive.type == DT_ENDEF) {
+        struct stmt stmt = {
+            .type = ST_META_SAT,
+            .sat = self->sat,
+        };
+        pars_put_stmt(self, &stmt);
+    }
     return OK;
 }
 
@@ -2131,14 +2276,16 @@ static int pars_directive_handler_def(
         const size_t label_id)
 {
     const size_t name_token = self->cur_tok_id - 1;
-    const struct token token = pars_peek(self);
-    if (token.type != TT_ID) {
+    const struct token arg_token = pars_peek(self);
+    if (arg_token.type != TT_ID) {
         return pars_yield_error(self, self->cur_tok_id, E_ID);
     }
     if (self->in_sat) {
         return pars_yield_error_msg(self, self->cur_tok_id, E_NESTED_DEF);
     }
+    self->sat = (struct sat){0};
     self->in_sat = true;
+    self->sat.def_arg = (struct expr){self->cur_tok_id, 1, 0, false};
     const struct directive directive = { drc, name_token, pars_commit(self), 1 };
     return pars_finish_directive(self, label_id, directive);
 }
@@ -2157,6 +2304,86 @@ static int pars_directive_handler_endef(
     return pars_finish_directive(self, label_id, directive);
 }
 
+static int pars_directive_handler_scl(
+        struct pars *const self,
+        const enum directive_type drc,
+        const size_t label_id)
+{
+    const size_t name_token = self->cur_tok_id - 1;
+    if (!self->in_sat) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF);
+    }
+    if (self->sat.scl_arg.first_token) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_SCL);
+    }
+    struct expr expr = {0};
+    const int ret = pars_parse_expr(self, &expr, 0);
+    if (ret != OK) {
+        return ret;
+    }
+    assert(expr.value_is_resolved);
+    const struct directive directive = {
+        drc, name_token, expr.first_token, expr.num_tokens
+    };
+    self->sat.scl_arg = expr;
+    return pars_finish_directive(self, label_id, directive);
+}
+
+static int pars_directive_handler_type(
+        struct pars *const self,
+        const enum directive_type drc,
+        const size_t label_id)
+{
+    const size_t name_token = self->cur_tok_id - 1;
+    if (!self->in_sat) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF);
+    }
+    if (self->sat.type_arg.first_token) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_TYPE);
+    }
+    struct expr expr = {0};
+    const int ret = pars_parse_expr(self, &expr, 0);
+    if (ret != OK) {
+        return ret;
+    }
+    assert(expr.value_is_resolved);
+    const struct directive directive = {
+        drc, name_token, expr.first_token, expr.num_tokens
+    };
+    self->sat.type_arg = expr;
+    return pars_finish_directive(self, label_id, directive);
+}
+
+static int pars_directive_handler_val(
+        struct pars *const self,
+        const enum directive_type drc,
+        const size_t label_id)
+{
+    const size_t name_token = self->cur_tok_id - 1;
+    const struct token arg_token = pars_peek(self);
+    if (!self->in_sat) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF);
+    }
+    if (self->sat.val_arg.first_token) {
+        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_VAL);
+    }
+    struct expr expr = {self->cur_tok_id, 1, 0, false};
+    if (arg_token.type == TT_ID || arg_token.type == TT_DOT) {
+        pars_commit(self);
+    } else {
+        const int ret = pars_parse_expr(self, &expr, 0);
+        if (ret != OK) {
+            return ret;
+        }
+        assert(expr.value_is_resolved);
+    }
+    const struct directive directive = {
+        drc, name_token, expr.first_token, expr.num_tokens
+    };
+    self->sat.val_arg = expr;
+    return pars_finish_directive(self, label_id, directive);
+}
+
 static int pars_parse_direc(struct pars *const self, const size_t label_id)
 {
     const struct token dotid = pars_peek(self);
@@ -2259,7 +2486,7 @@ static int pars_parse_arg_starts_with_minus(
         }
     }
     // Otherwise it is expression - either prefix or standalone
-    const int ret = pars_parse_expr(self, &arg->expr);
+    const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);
     if (ret != OK) {
         return ret;
     }
@@ -2362,7 +2589,7 @@ static int pars_parse_arg_inside_parens_single_item(
             }
         }
     } else if (arg->expr.first_token == 0) {
-        const int ret = pars_parse_expr(self, &arg->expr);
+        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);
         if (ret != OK) {
             return ret;
         }
@@ -2617,7 +2844,7 @@ static int pars_parse_arg(
     if (token0.type == TT_HASH) {
         // Definitely an immediate value expression
         pars_commit(self);
-        const int ret = pars_parse_expr(self, &arg->expr);
+        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);
         if (ret != OK) {
             return ret;
         }
@@ -2629,7 +2856,7 @@ static int pars_parse_arg(
         return pars_parse_arg_starts_with_minus(self, arg);
     } else if (token0.type == TT_TILDE || token_is_number(token0.type)) {
         // Tilde is unary operation, so it must be an expression
-        const int ret = pars_parse_expr(self, &arg->expr);
+        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);
         if (ret != OK) {
             return ret;
         }
@@ -2680,7 +2907,7 @@ static int pars_parse_arg(
             arg->num_tokens = self->cur_tok_id - first_token_id;
             return OK;
         } else {
-            const int ret = pars_parse_expr(self, &arg->expr);
+            const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);
             if (ret != OK) {
                 return ret;
             }
@@ -2984,7 +3211,7 @@ static void emit_token_id(
 
 static void emit_expr(
         const struct lex *const lex,
-        const struct expr_tokens_span *const expr,
+        const struct expr *const expr,
         FILE *const s)
 {
     for (size_t i = 0; i < expr->num_tokens; i++) {
@@ -3166,6 +3393,41 @@ static void emit_directive_short(
     }
 }
 
+static void emit_directive_size_of_function(
+        const struct lex *const lex,
+        const size_t function_name_token_id,
+        FILE *const s)
+{
+    const struct token name_token = lex->tokbuf[function_name_token_id];
+    const char *name = lex->input + name_token.offset;
+    int len = name_token.length;
+    if (*name == '_') {
+        // Strip leading underscore
+        // FIXME It should be sort of an option that may be disabled.
+        name++;
+        len--;
+    }
+    fprintf(s, "\t.size\t%.*s, .-%.*s", len, name, len, name);
+}
+
+static void emit_directive_type(
+        const struct lex *const lex,
+        const size_t function_name_token_id,
+        const char *const type_str,
+        FILE *const s)
+{
+    const struct token name_token = lex->tokbuf[function_name_token_id];
+    const char *name = lex->input + name_token.offset;
+    int len = name_token.length;
+    if (*name == '_') {
+        // Strip leading underscore
+        // FIXME It should be sort of an option that may be disabled.
+        name++;
+        len--;
+    }
+    fprintf(s, "\t.type\t%.*s, @%s", len, name, type_str);
+}
+
 static enum opsize assem_resolve_bcc(
         struct assem *const self,
         const size_t stmt_number)
@@ -3225,6 +3487,28 @@ static struct res { uint32_t value; bool ok; } assem_find_symbol(
     return (struct res){ .ok = false };
 }
 
+static void assem_emit_meta_sat(
+        struct assem *const self,
+        const struct sat *const sat,
+        FILE *const stream)
+{
+    const struct lex *const lex = self->pars->lex;
+    if (0 == sat->val_arg.first_token) {
+        return;
+    }
+    if (sat->scl_arg.value == -1) {
+        emit_directive_size_of_function(
+                lex, sat->def_arg.first_token, stream);
+        fprintf(stream, "\n\n");
+    }
+    // Check if first derived type (2 bits at offset 4) is function
+    if (((sat->type_arg.value >> 4) & 3) == DT_FCN) {
+        emit_directive_type(
+                lex, sat->def_arg.first_token, "function", stream);
+        fprintf(stream, "\n");
+    }
+}
+
 static int assem_emit(struct assem *const self, FILE *const stream)
 {
     const struct lex *const lex = self->pars->lex;
@@ -3240,6 +3524,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)
         }
     }
     for (size_t i = 1; i < pars->stmttab_size / (sizeof *pars->stmttab); i++) {
+        bool line_is_empty = true;
         const struct stmt *stmt = pars->stmttab + i;
         if (stmt->label_token) {
             const struct token token = lex->tokbuf[stmt->label_token];
@@ -3249,6 +3534,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)
             } else {
                 fprintf(stream, "%.*s:", (int)token.length, lex->input + token.offset);
             }
+            line_is_empty = false;
         }
         if (stmt->type == ST_INSTRUCTION) {
             const struct instruction instr = stmt->instruction;
@@ -3272,6 +3558,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)
                     emit_arg(lex, &instr.arg2, stream);
                 }
             }
+            line_is_empty = false;
         } else if (stmt->type == ST_DIRECTIVE) {
             const struct directive *dir = &stmt->directive;
             switch (dir->type) {
@@ -3280,14 +3567,17 @@ static int assem_emit(struct assem *const self, FILE *const stream)
             case DT_GLOBL:
             case DT_TEXT:
                 emit_directive_same(lex, dir, stream);
+                line_is_empty = false;
                 break;
             case DT_ASCII:
             case DT_BYTE:
                 emit_directive_byte(lex, dir, stream);
+                line_is_empty = false;
                 break;
             case DT_SHORT:
             case DT_WORD:
                 emit_directive_short(lex, dir, stream);
+                line_is_empty = false;
                 break;
             default:
                 break;
@@ -3303,7 +3593,12 @@ static int assem_emit(struct assem *const self, FILE *const stream)
                 fprintf(stream, " | @%08x", res.value);
             }
         }
-        fprintf(stream, "\n");
+        if (!line_is_empty) {
+            fprintf(stream, "\n");
+        }
+        if (stmt->type == ST_META_SAT) {
+            assem_emit_meta_sat(self, &stmt->sat, stream);
+        }
     }
     return OK;
 }
diff --git a/test.sh b/test.sh
index 5fe9d5c..a5bb47c 100755
--- a/test.sh
+++ b/test.sh
@@ -14,4 +14,5 @@ echo "asm68 -l -Q -o test1.o test1.S" >>"$dosbuild_dir/build.bat"
 echo "asm68 -l -Q -o test2.o test2.S" >>"$dosbuild_dir/build.bat"
 echo "asm68 -l -Q -o test3.o test3.S" >>"$dosbuild_dir/build.bat"
 echo "asm68 -l -Q -o test4.o test4.S" >>"$dosbuild_dir/build.bat"
+echo "asm68 -l -Q -o test_sat.o test_sat.S" >>"$dosbuild_dir/build.bat"
 dosemu -quiet -K "$dosbuild_dir" -E'build.bat'
diff --git a/tests/test_sat.S b/tests/test_sat.S
new file mode 100644
index 0000000..32ada7b
--- /dev/null
+++ b/tests/test_sat.S
@@ -0,0 +1,7 @@
+	.def	_fn\	.val	_fn\	.scl	3\	.type	0x10024\	.endef
+_fn:
+	nop
+	.def	_fn\	.val	.\	.scl	-1\	.endef
+	.def	_fn2\	.val	-1\	.endef
+	.def	_fn2\	.val	0x100\	.endef
+	.def	_fn2\	.val	0\	.endef
author	Oxore <oxore@protonmail.com>	2023-10-21 07:24:48 +0300
committer	Oxore <oxore@protonmail.com>	2023-10-21 07:24:48 +0300
commit	6529ed369f4da544c31aa1db0b23fa4eb4663013 (patch)
tree	87026787bc121faa721823ad65c53f92d73214da
parent	ad54e3c0a58e98927ec7e43418d593412ba6fa9f (diff)