diff options
| author | Oxore <oxore@protonmail.com> | 2023-10-21 07:24:48 +0300 | 
|---|---|---|
| committer | Oxore <oxore@protonmail.com> | 2023-10-21 07:24:48 +0300 | 
| commit | 6529ed369f4da544c31aa1db0b23fa4eb4663013 (patch) | |
| tree | 87026787bc121faa721823ad65c53f92d73214da | |
| parent | ad54e3c0a58e98927ec7e43418d593412ba6fa9f (diff) | |
Impl expr numbers parsing and function type and size
| -rw-r--r-- | main.c | 459 | ||||
| -rwxr-xr-x | test.sh | 1 | ||||
| -rw-r--r-- | tests/test_sat.S | 7 | 
3 files changed, 385 insertions, 82 deletions
| @@ -23,17 +23,23 @@  #endif  #if defined(__GNUC__) || defined(__clang__) +#if !defined(NDEBUG) +#define UNREACHABLE() assert(false) +#else  #define UNREACHABLE __builtin_unreachable +#endif  #else  #define UNREACHABLE()  #endif  #define E_NIMPL "not implemented"  #define E_UNREACH "unreachable code reached" -#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol" +#define E_EXPR "'(', ')', unary operator, binary operator, number or identifier"  #define E_EXPR_NONREG "symbol that is not a register when parsing expression" -#define E_EXPR_OPEN "'(', number or symbol" -#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol" +#define E_EXPR_OPEN_WITH_ID "'-', '(', number or identifier" +#define E_EXPR_CLOSE_WITH_ID "')', '+', '-', '+', '/' or identifier" +#define E_EXPR_OPEN "'-', '(' or number" +#define E_EXPR_CLOSE "')', '+', '-', '+' or '/'"  #define E_DN "D0, ...D7"  #define E_AN "A0, ...A7, SP"  #define E_AN_DN E_AN " or " E_DN @@ -60,16 +66,28 @@  #define E_DIRECTIVE "directive"  #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL  #define E_UNKNOWN_DRC "unknown directive" +#define E_NUM "number"  #define E_STR "string"  #define E_ID "identifier" +#define E_ID_NUM_DOT E_ID ", " E_NUM " or '.'"  #define E_NESTED_DEF "nested .def ... .endef blocks are illegal"  #define E_NMATCH_ENDEF ".endef directive without matching .def" +#define E_MULTIPLE_VAL "multiple .val directives specified" +#define E_MULTIPLE_SCL "multiple .scl directives specified" +#define E_MULTIPLE_TYPE "multiple .type directives specified" +#define E_MAX_NESTING "maximum expression nesting level reached"  #define ERR 0  #define OK 1  #define CONTINUE 2 -  #define BCC_S_MAX_BACKWARDS 126 +#define EXPR_NESTING_MAX 10 +#define PARS_EXPR_FLAG_ALLOW_ID 1 + +// These are from Sierra's FILE_FMT.H +#define C_EFCN	    -1	    /* physical end of function		 */ +#define DT_FCN	    2	    /* function		 */ +  enum token_type {      TT_NONE = 0, @@ -111,6 +129,7 @@ enum token_type {  struct token {      enum token_type type; +    uint32_t value; ///< For TT_NUMOCT, TT_NUMDEC, TT_NUMHEX      size_t offset;      size_t length;  }; @@ -151,6 +170,7 @@ struct lex {      // State variables      enum lex_state state;      enum lex_error error; +    uint32_t current_number_value;      size_t cursor;      size_t tok_offset;      bool inside_line; @@ -172,6 +192,7 @@ enum stmt_type {      ST_ASSIGNMENT,      ST_COMMENT,      ST_DIRECTIVE, +    ST_META_SAT, ///< Not a real statement, just an accumulation of .def .endef block  };  enum mnemonic { @@ -392,8 +413,10 @@ enum args_count {      ARGS_COUNT_2,  }; -struct expr_tokens_span { +struct expr {      size_t first_token, num_tokens; +    int32_t value; +    bool value_is_resolved;  };  struct arg { @@ -402,7 +425,7 @@ struct arg {      uint8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)      uint8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An      enum opsize briefext_size; -    struct expr_tokens_span expr; +    struct expr expr;      size_t first_token, num_tokens; ///< Argument tokens span  }; @@ -418,23 +441,25 @@ struct directive {      size_t first_token, num_tokens; ///< Directive arguments tokens span  }; +/// Symbol Attribute Table (SAT, a `.def ... .endef` block) +struct sat { +    struct expr def_arg; +    struct expr val_arg; +    struct expr scl_arg; +    struct expr type_arg; +}; +  struct stmt {      enum stmt_type type; +    uint32_t addr;      union {          struct instruction instruction;          struct directive directive; +        struct sat sat;      };      size_t label_token;      size_t first_token, num_tokens; // Statement tokens span, may be NULL      size_t comment_token; -    uint32_t addr; -}; - -struct symbol { -    size_t offset; // Byte offset in continuous null terminated symbol buffer -    // Instead of strcmp every item in symtab we can compare hashes and get O(N) -    // for search. -    uint32_t hash;  };  enum pars_error { @@ -481,10 +506,8 @@ struct pars {      // State      size_t cur_tok_id;      enum pars_error error; -    /*! -     * SAT stands for Symbol Attribute Table -     */      bool in_sat; ///< Indicates whether inside `.def ... .endef` block or not +    struct sat sat;      // Statement table      FILE *stmttab_stream;      struct stmt *stmttab; @@ -502,6 +525,7 @@ struct pars {  struct assem {      const struct pars *pars; +    struct sat sat;  };  static int pars_parse_arg_inside_parens( @@ -663,6 +687,9 @@ const struct mnemonic_meta {  static int pars_directive_skip(struct pars *, enum directive_type, size_t);  static int pars_directive_handler_def(struct pars *, enum directive_type, size_t);  static int pars_directive_handler_endef(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_scl(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_type(struct pars *, enum directive_type, size_t); +static int pars_directive_handler_val(struct pars *, enum directive_type, size_t);  const struct directive_description {      const char *str; @@ -709,7 +736,7 @@ const struct directive_description {      { "packed",   pars_directive_skip, },      { "page",     pars_directive_skip, },      { "reorg",    pars_directive_skip, }, -    { "scl",      pars_directive_skip, }, +    { "scl",      pars_directive_handler_scl,  },      { "section",  pars_directive_skip, },      { "short",    pars_directive_skip, },      { "single",   pars_directive_skip, }, @@ -719,8 +746,8 @@ const struct directive_description {      { "tag",      pars_directive_skip, },      { "text",     pars_directive_skip, },      { "tsection", pars_directive_skip, }, -    { "type",     pars_directive_skip, }, -    { "val",      pars_directive_skip, }, +    { "type",     pars_directive_handler_type, }, +    { "val",      pars_directive_handler_val,  },      { "word",     pars_directive_skip, },      { "xdef",     pars_directive_skip, },      { "xref",     pars_directive_skip, }, @@ -800,6 +827,17 @@ static int fprint_string_escaped(      return written;  } +static int hex_digit_to_int(char c) +{ +    if (c >= 'a' && c <= 'f') { +        return c - 'a'; +    } +    if (c >= 'A' && c <= 'F') { +        return c - 'A'; +    } +    return c - '0'; +} +  static const char *token_type_to_string(const enum token_type type)  {      switch (type) { @@ -1032,15 +1070,19 @@ static int lex_handle_next(struct lex *const self, const int c)              self->state = LS_ID;          } else if (c == '0') {              self->tok_offset = self->cursor; +            self->current_number_value = 0;              self->state = LS_NUMOCTHEX;          } else if (is_dec(c)) {              self->tok_offset = self->cursor; +            self->current_number_value = c - '0';              self->state = LS_NUMDEC;          } else if (c == '@') {              self->tok_offset = self->cursor; +            self->current_number_value = 0;              self->state = LS_NUMOCT;          } else if (c == '$') {              self->tok_offset = self->cursor; +            self->current_number_value = 0;              self->state = LS_NUMHEX;          } else if (c == '"') {              self->tok_offset = self->cursor; @@ -1058,59 +1100,59 @@ static int lex_handle_next(struct lex *const self, const int c)              self->tok_offset = self->cursor;              self->state = LS_DOT;          } else if (c == ',') { -            lex_yield_token(self, &(struct token){TT_COMMA, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_COMMA, 0, self->cursor, 1});          } else if (c == '(') { -            lex_yield_token(self, &(struct token){TT_LPAREN, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_LPAREN, 0, self->cursor, 1});          } else if (c == ')') { -            lex_yield_token(self, &(struct token){TT_RPAREN, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_RPAREN, 0, self->cursor, 1});          } else if (c == '[') { -            lex_yield_token(self, &(struct token){TT_LBRACKET, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_LBRACKET, 0, self->cursor, 1});          } else if (c == ']') { -            lex_yield_token(self, &(struct token){TT_RBRACKET, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_RBRACKET, 0, self->cursor, 1});          } else if (c == '{') { -            lex_yield_token(self, &(struct token){TT_LBRACE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_LBRACE, 0, self->cursor, 1});          } else if (c == '{') { -            lex_yield_token(self, &(struct token){TT_RBRACE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_RBRACE, 0, self->cursor, 1});          } else if (c == '+') { -            lex_yield_token(self, &(struct token){TT_PLUS, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_PLUS, 0, self->cursor, 1});          } else if (c == '-') { -            lex_yield_token(self, &(struct token){TT_MINUS, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_MINUS, 0, self->cursor, 1});          } else if (c == '*') {              if (self->inside_line) {                  lex_yield_token( -                        self, &(struct token){TT_ASTERISK, self->cursor, 1}); +                        self, &(struct token){TT_ASTERISK, 0, self->cursor, 1});              } else {                  self->tok_offset = self->cursor;                  self->state = LS_COMMENT_ASTERISK;              }          } else if (c == '/') { -            lex_yield_token(self, &(struct token){TT_SLASH, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_SLASH, 0, self->cursor, 1});          } else if (c == '=') {              self->tok_offset = self->cursor;              self->state = LS_EQ;          } else if (c == ':') { -            lex_yield_token(self, &(struct token){TT_COLON, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_COLON, 0, self->cursor, 1});          } else if (c == '%') { -            lex_yield_token(self, &(struct token){TT_PERCENT, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_PERCENT, 0, self->cursor, 1});          } else if (c == '#') { -            lex_yield_token(self, &(struct token){TT_HASH, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_HASH, 0, self->cursor, 1});          } else if (c == '!') { -            lex_yield_token(self, &(struct token){TT_BANG, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_BANG, 0, self->cursor, 1});          } else if (c == '~') { -            lex_yield_token(self, &(struct token){TT_TILDE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_TILDE, 0, self->cursor, 1});          } else if (c == '&') { -            lex_yield_token(self, &(struct token){TT_AMPERSAND, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_AMPERSAND, 0, self->cursor, 1});          } else if (c == '|') { -            lex_yield_token(self, &(struct token){TT_PIPE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_PIPE, 0, self->cursor, 1});          } else if (c == '^') { -            lex_yield_token(self, &(struct token){TT_CAP, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_CAP, 0, self->cursor, 1});          } else if (c == '\r') {              self->tok_offset = self->cursor;              self->state = LS_CR;          } else if (c == '\n') { -            lex_yield_token(self, &(struct token){TT_NEWLINE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_NEWLINE, 0, self->cursor, 1});          } else if (c == '\\') { -            lex_yield_token(self, &(struct token){TT_ESCAPE, self->cursor, 1}); +            lex_yield_token(self, &(struct token){TT_ESCAPE, 0, self->cursor, 1});          } else if (c == ' ' || c == '\t') {              // ignore spaces and tabs          } else if (c == EOF) { @@ -1123,8 +1165,8 @@ static int lex_handle_next(struct lex *const self, const int c)          break;      case LS_CR: // Accumulate CRLF into single token          { -            const size_t size = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR -            const struct token token = {TT_NEWLINE, self->tok_offset, size}; +            const size_t length = c == '\n' ? 2 : 1; // 2 for CRLF, 1 for just CR +            const struct token token = {TT_NEWLINE, 0, self->tok_offset, length};              lex_yield_token(self, &token);              self->state = LS_FREE;              if (c != '\n') { @@ -1136,7 +1178,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_LSHIFT:          if (c == '<') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_LSHIFT, self->tok_offset, length}; +            const struct token token = {TT_LSHIFT, 0, self->tok_offset, length};              lex_yield_token(self, &token);              self->state = LS_FREE;          } else { @@ -1146,7 +1188,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_RSHIFT:          if (c == '>') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_RSHIFT, self->tok_offset, length}; +            const struct token token = {TT_RSHIFT, 0, self->tok_offset, length};              lex_yield_token(self, &token);              self->state = LS_FREE;          } else { @@ -1157,7 +1199,7 @@ static int lex_handle_next(struct lex *const self, const int c)          {              const size_t length = (c == '=') ? 2 : 1;              const enum token_type type = (c == '=') ? TT_EQ_DOUBLE : TT_EQ; -            const struct token token = {type, self->tok_offset, length}; +            const struct token token = {type, 0, self->tok_offset, length};              lex_yield_token(self, &token);          }          self->state = LS_FREE; @@ -1170,7 +1212,7 @@ static int lex_handle_next(struct lex *const self, const int c)          if (is_alphanum(c) || c == '_') {              self->state = LS_DOT_ID;          } else { -            lex_yield_token(self, &(struct token){TT_DOT, self->tok_offset, 1}); +            lex_yield_token(self, &(struct token){TT_DOT, 0, self->tok_offset, 1});              self->state = LS_FREE;              return lex_handle_next(self, c);          } @@ -1178,7 +1220,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_DOT_ID:          if (!is_alphanum(c) && c != '_') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_DOT_ID, self->tok_offset, length}; +            const struct token token = {TT_DOT_ID, 0, self->tok_offset, length};              lex_yield_token(self, &token);              self->state = LS_FREE;              return lex_handle_next(self, c); @@ -1187,7 +1229,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_ID:          if (!is_alphanum(c) && c != '_') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_ID, self->tok_offset, length}; +            const struct token token = {TT_ID, 0, self->tok_offset, length};              lex_yield_token(self, &token);              self->state = LS_FREE;              return lex_handle_next(self, c); @@ -1198,11 +1240,12 @@ static int lex_handle_next(struct lex *const self, const int c)              self->state = LS_NUMHEX;          } else if (is_oct(c)) {              self->state = LS_NUMOCT; +            return lex_handle_next(self, c);          } else if (is_alphabetic(c) || c == '_') {              return lex_yield_error(self, c);          } else {              assert((self->cursor - self->tok_offset) == 1); -            const struct token token = {TT_NUMDEC, self->tok_offset, 1}; +            const struct token token = {TT_NUMDEC, 0, self->tok_offset, 1};              lex_yield_token(self, &token);              // It was just zero, handle this char in LS_FREE state then              self->state = LS_FREE; @@ -1210,11 +1253,16 @@ static int lex_handle_next(struct lex *const self, const int c)          }          break;      case LS_NUMOCT: -        if (is_alphabetic(c) || c == '_') { +        if (is_oct(c)) { +            self->current_number_value <<= 3; +            self->current_number_value |= c - '0'; +        } else if (is_alphabetic(c) || c == '_') {              return lex_yield_error(self, c); -        } else if (!is_oct(c)) { +        } else {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_NUMOCT, self->tok_offset, length}; +            const struct token token = { +                TT_NUMOCT, self->current_number_value, self->tok_offset, length +            };              lex_yield_token(self, &token);              // This token is finished, handle this char in LS_FREE state              self->state = LS_FREE; @@ -1223,13 +1271,16 @@ static int lex_handle_next(struct lex *const self, const int c)          break;      case LS_NUMHEX:          if (is_hex(c)) { -            // Keep calm +            self->current_number_value <<= 4; +            self->current_number_value |= hex_digit_to_int(c);          } else if (is_alphabetic(c) || c == '_') {              // Panik!              return lex_yield_error(self, c);          } else {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_NUMHEX, self->tok_offset, length}; +            const struct token token = { +                TT_NUMHEX, self->current_number_value, self->tok_offset, length +            };              lex_yield_token(self, &token);              // This token is finished, handle this char in LS_FREE state              self->state = LS_FREE; @@ -1237,11 +1288,16 @@ static int lex_handle_next(struct lex *const self, const int c)          }          break;      case LS_NUMDEC: -        if (is_alphabetic(c) || c == '_') { +        if (is_dec(c)) { +            self->current_number_value *= 10; +            self->current_number_value += c - '0'; +        } else if (is_alphabetic(c) || c == '_') {              return lex_yield_error(self, c); -        } else if (!is_dec(c)) { +        } else {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_NUMDEC, self->tok_offset, length}; +            const struct token token = { +                TT_NUMDEC, self->current_number_value, self->tok_offset, length +            };              lex_yield_token(self, &token);              // This token is finished, handle this char in LS_FREE state              self->state = LS_FREE; @@ -1253,7 +1309,7 @@ static int lex_handle_next(struct lex *const self, const int c)              self->state = LS_STRING_ESC;          } else if (c == '"') {              const size_t length = self->cursor - self->tok_offset + 1; -            const struct token token = {TT_STRING, self->tok_offset, length}; +            const struct token token = {TT_STRING, 0, self->tok_offset, length};              lex_yield_token(self, &token);              // This token is finished              self->state = LS_FREE; @@ -1265,7 +1321,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_COMMENT_ASTERISK:          if (c == '\r' || c == '\n') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_COMMENT_ASTERISK, self->tok_offset, length}; +            const struct token token = {TT_COMMENT_ASTERISK, 0, self->tok_offset, length};              lex_yield_token(self, &token);              // This token is finished, handle this char in LS_FREE state              self->state = LS_FREE; @@ -1275,7 +1331,7 @@ static int lex_handle_next(struct lex *const self, const int c)      case LS_COMMENT_SEMICOLON:          if (c == '\r' || c == '\n') {              const size_t length = self->cursor - self->tok_offset; -            const struct token token = {TT_COMMENT_SEMICOLON, self->tok_offset, length}; +            const struct token token = {TT_COMMENT_SEMICOLON, 0, self->tok_offset, length};              lex_yield_token(self, &token);              // This token is finished, handle this char in LS_FREE state              self->state = LS_FREE; @@ -1309,7 +1365,7 @@ static int lex_next(struct lex *const self, FILE *const stream)          }          if (c == EOF) {              // Add a hidden EOF token of 0 size -            lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0}); +            lex_yield_token(self, &(struct token){TT_NONE, 0, self->cursor, 0});              break;          }      } @@ -1483,6 +1539,7 @@ static const char *stmt_type_to_string(const enum stmt_type type)      case ST_ASSIGNMENT: return "ASSIGNMENT";      case ST_COMMENT: return "COMMENT";      case ST_DIRECTIVE: return "DIRECTIVE"; +    case ST_META_SAT: return "META_SAT";      }      return "_UNKNOWN";  } @@ -1508,7 +1565,7 @@ static int fprint_tokens(  static void fprint_expr(          const struct lex *const lex, -        const struct expr_tokens_span *const expr, +        const struct expr *const expr,          FILE *const s)  {      fputc('[', s); @@ -1757,6 +1814,8 @@ static size_t pars_commit(struct pars *const self)  static void pars_skip_to_newline(struct pars *const self)  { +    // Reset state In case of inside of the .def .. .endef block +    self->in_sat = false;      while (!pars_is_eof_reached(self)) {          const struct token nl = pars_peek(self);          pars_commit(self); @@ -1954,13 +2013,55 @@ static struct token_recognition pars_recognize_token(      return (struct token_recognition){0};  } +struct expr_value { +    enum token_type operator; +    int32_t value; +    bool negative; +    bool bit_inverted; +}; + +static int32_t apply_binary_operator(int32_t a, int32_t b, enum token_type op) +{ +    switch (op) { +    case TT_ASTERISK: +        return a * b; +    case TT_SLASH: +        return a / b; +    case TT_LSHIFT: +        return a << b; +    case TT_RSHIFT: +        return a >> b; +    case TT_AMPERSAND: +        return a & b; +    case TT_PIPE: +        return a | b; +    case TT_CAP: +        return a ^ b; +    case TT_NONE: +    case TT_PLUS: +        return a + b; +    default: +        break; +    } +    UNREACHABLE(); +    assert(false); +} +  static int pars_parse_expr( -        struct pars *const self, struct expr_tokens_span *const expr) +        struct pars *const self, +        struct expr *const expr, +        int flags)  {      // This function is called only when expression is expected unconditionally,      // so if the first token cannot be a part of expression, then error must be      // yielded.      const size_t first_token_id = self->cur_tok_id; +    const char *const e_expr_open = (flags & PARS_EXPR_FLAG_ALLOW_ID) +        ? E_EXPR_OPEN_WITH_ID : E_EXPR_OPEN; +    const char *const e_expr_close = (flags & PARS_EXPR_FLAG_ALLOW_ID) +        ? E_EXPR_CLOSE_WITH_ID : E_EXPR_CLOSE; +    struct expr_value stack[EXPR_NESTING_MAX] = {{0}}; +    bool value_is_resolved = true;      unsigned nesting = 0;      // Otherwise expect open parenthesis, number, or unary operator.      bool expect_close_or_binary = false; @@ -1970,7 +2071,7 @@ static int pars_parse_expr(                  assert(pars_is_eof_reached(self));                  return pars_yield_error_eof(                          self, -                        expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); +                        expect_close_or_binary ? e_expr_close : e_expr_open);              }              break;          } @@ -1980,45 +2081,80 @@ static int pars_parse_expr(                  if (nesting == 0) {                      break;                  } -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +                return pars_yield_error(self, self->cur_tok_id, e_expr_close);              } else {                  nesting++; +                if (nesting >= EXPR_NESTING_MAX) { +                    return pars_yield_error_msg(self, self->cur_tok_id, E_MAX_NESTING); +                }              }          } else if (token.type == TT_MINUS) {              // Minus is both unary and binary operator, so it does not care              // about expression parsing state              expect_close_or_binary = false; +            stack[nesting].negative = !stack[nesting].negative;          } else if (token.type == TT_TILDE) {              if (expect_close_or_binary) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +                return pars_yield_error(self, self->cur_tok_id, e_expr_close);              } +            stack[nesting].bit_inverted = !stack[nesting].bit_inverted;          } else if (token.type == TT_ID) { +            if (0 == (flags & PARS_EXPR_FLAG_ALLOW_ID)) { +                return pars_yield_error( +                        self, +                        self->cur_tok_id, +                        expect_close_or_binary ? e_expr_close : e_expr_open); +            } +            value_is_resolved = false;              if (expect_close_or_binary) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +                return pars_yield_error(self, self->cur_tok_id, e_expr_close);              }              if (pars_recognize_token(self, token).type == RTT_REG) {                  return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG);              }              expect_close_or_binary = true; +            stack[nesting].operator = TT_NONE; +            stack[nesting].negative = false;          } else if (token_is_number(token.type)) {              if (expect_close_or_binary) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +                return pars_yield_error(self, self->cur_tok_id, e_expr_close);              }              expect_close_or_binary = true; +            int32_t value = token.value; +            if (stack[nesting].negative) { +                value = -value; +            } +            if (stack[nesting].bit_inverted) { +                value = ~value; +            } +            stack[nesting].value = apply_binary_operator( +                    stack[nesting].value, value, stack[nesting].operator); +            stack[nesting].operator = TT_NONE;          } else if (token_is_binary_operator(token.type)) {              if (!expect_close_or_binary) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); +                return pars_yield_error(self, self->cur_tok_id, e_expr_open);              }              expect_close_or_binary = false; +            stack[nesting].operator = token.type;          } else if (token.type == TT_RPAREN) {              if (!expect_close_or_binary) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); +                return pars_yield_error(self, self->cur_tok_id, e_expr_open);              }              if (nesting == 0) {                  // This is not my closing parenthesis, should stop                  break;              }              nesting--; +            int32_t value = stack[nesting + 1].value; +            if (stack[nesting].negative) { +                value = -value; +            } +            if (stack[nesting].bit_inverted) { +                value = ~value; +            } +            stack[nesting].value = apply_binary_operator( +                    stack[nesting].value, value, stack[nesting].operator); +            stack[nesting].operator = TT_NONE;          } else {              if (nesting == 0 && expect_close_or_binary) {                  break; @@ -2026,14 +2162,16 @@ static int pars_parse_expr(              return pars_yield_error(                      self,                      self->cur_tok_id, -                    expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN); +                    expect_close_or_binary ? e_expr_close : e_expr_open);          }          pars_commit(self);      }      assert(first_token_id != self->cur_tok_id); -    *expr = (struct expr_tokens_span){ +    *expr = (struct expr){          .first_token = first_token_id,          .num_tokens = self->cur_tok_id - first_token_id, +        .value = stack[nesting].value, +        .value_is_resolved = value_is_resolved,      };      return OK;  } @@ -2100,6 +2238,13 @@ static int pars_finish_directive(          .num_tokens = self->cur_tok_id - first_token,      };      pars_put_stmt(self, &stmt); +    if (directive.type == DT_ENDEF) { +        struct stmt stmt = { +            .type = ST_META_SAT, +            .sat = self->sat, +        }; +        pars_put_stmt(self, &stmt); +    }      return OK;  } @@ -2131,14 +2276,16 @@ static int pars_directive_handler_def(          const size_t label_id)  {      const size_t name_token = self->cur_tok_id - 1; -    const struct token token = pars_peek(self); -    if (token.type != TT_ID) { +    const struct token arg_token = pars_peek(self); +    if (arg_token.type != TT_ID) {          return pars_yield_error(self, self->cur_tok_id, E_ID);      }      if (self->in_sat) {          return pars_yield_error_msg(self, self->cur_tok_id, E_NESTED_DEF);      } +    self->sat = (struct sat){0};      self->in_sat = true; +    self->sat.def_arg = (struct expr){self->cur_tok_id, 1, 0, false};      const struct directive directive = { drc, name_token, pars_commit(self), 1 };      return pars_finish_directive(self, label_id, directive);  } @@ -2157,6 +2304,86 @@ static int pars_directive_handler_endef(      return pars_finish_directive(self, label_id, directive);  } +static int pars_directive_handler_scl( +        struct pars *const self, +        const enum directive_type drc, +        const size_t label_id) +{ +    const size_t name_token = self->cur_tok_id - 1; +    if (!self->in_sat) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); +    } +    if (self->sat.scl_arg.first_token) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_SCL); +    } +    struct expr expr = {0}; +    const int ret = pars_parse_expr(self, &expr, 0); +    if (ret != OK) { +        return ret; +    } +    assert(expr.value_is_resolved); +    const struct directive directive = { +        drc, name_token, expr.first_token, expr.num_tokens +    }; +    self->sat.scl_arg = expr; +    return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_type( +        struct pars *const self, +        const enum directive_type drc, +        const size_t label_id) +{ +    const size_t name_token = self->cur_tok_id - 1; +    if (!self->in_sat) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); +    } +    if (self->sat.type_arg.first_token) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_TYPE); +    } +    struct expr expr = {0}; +    const int ret = pars_parse_expr(self, &expr, 0); +    if (ret != OK) { +        return ret; +    } +    assert(expr.value_is_resolved); +    const struct directive directive = { +        drc, name_token, expr.first_token, expr.num_tokens +    }; +    self->sat.type_arg = expr; +    return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_val( +        struct pars *const self, +        const enum directive_type drc, +        const size_t label_id) +{ +    const size_t name_token = self->cur_tok_id - 1; +    const struct token arg_token = pars_peek(self); +    if (!self->in_sat) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_NMATCH_ENDEF); +    } +    if (self->sat.val_arg.first_token) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_MULTIPLE_VAL); +    } +    struct expr expr = {self->cur_tok_id, 1, 0, false}; +    if (arg_token.type == TT_ID || arg_token.type == TT_DOT) { +        pars_commit(self); +    } else { +        const int ret = pars_parse_expr(self, &expr, 0); +        if (ret != OK) { +            return ret; +        } +        assert(expr.value_is_resolved); +    } +    const struct directive directive = { +        drc, name_token, expr.first_token, expr.num_tokens +    }; +    self->sat.val_arg = expr; +    return pars_finish_directive(self, label_id, directive); +} +  static int pars_parse_direc(struct pars *const self, const size_t label_id)  {      const struct token dotid = pars_peek(self); @@ -2259,7 +2486,7 @@ static int pars_parse_arg_starts_with_minus(          }      }      // Otherwise it is expression - either prefix or standalone -    const int ret = pars_parse_expr(self, &arg->expr); +    const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);      if (ret != OK) {          return ret;      } @@ -2362,7 +2589,7 @@ static int pars_parse_arg_inside_parens_single_item(              }          }      } else if (arg->expr.first_token == 0) { -        const int ret = pars_parse_expr(self, &arg->expr); +        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);          if (ret != OK) {              return ret;          } @@ -2617,7 +2844,7 @@ static int pars_parse_arg(      if (token0.type == TT_HASH) {          // Definitely an immediate value expression          pars_commit(self); -        const int ret = pars_parse_expr(self, &arg->expr); +        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);          if (ret != OK) {              return ret;          } @@ -2629,7 +2856,7 @@ static int pars_parse_arg(          return pars_parse_arg_starts_with_minus(self, arg);      } else if (token0.type == TT_TILDE || token_is_number(token0.type)) {          // Tilde is unary operation, so it must be an expression -        const int ret = pars_parse_expr(self, &arg->expr); +        const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);          if (ret != OK) {              return ret;          } @@ -2680,7 +2907,7 @@ static int pars_parse_arg(              arg->num_tokens = self->cur_tok_id - first_token_id;              return OK;          } else { -            const int ret = pars_parse_expr(self, &arg->expr); +            const int ret = pars_parse_expr(self, &arg->expr, PARS_EXPR_FLAG_ALLOW_ID);              if (ret != OK) {                  return ret;              } @@ -2984,7 +3211,7 @@ static void emit_token_id(  static void emit_expr(          const struct lex *const lex, -        const struct expr_tokens_span *const expr, +        const struct expr *const expr,          FILE *const s)  {      for (size_t i = 0; i < expr->num_tokens; i++) { @@ -3166,6 +3393,41 @@ static void emit_directive_short(      }  } +static void emit_directive_size_of_function( +        const struct lex *const lex, +        const size_t function_name_token_id, +        FILE *const s) +{ +    const struct token name_token = lex->tokbuf[function_name_token_id]; +    const char *name = lex->input + name_token.offset; +    int len = name_token.length; +    if (*name == '_') { +        // Strip leading underscore +        // FIXME It should be sort of an option that may be disabled. +        name++; +        len--; +    } +    fprintf(s, "\t.size\t%.*s, .-%.*s", len, name, len, name); +} + +static void emit_directive_type( +        const struct lex *const lex, +        const size_t function_name_token_id, +        const char *const type_str, +        FILE *const s) +{ +    const struct token name_token = lex->tokbuf[function_name_token_id]; +    const char *name = lex->input + name_token.offset; +    int len = name_token.length; +    if (*name == '_') { +        // Strip leading underscore +        // FIXME It should be sort of an option that may be disabled. +        name++; +        len--; +    } +    fprintf(s, "\t.type\t%.*s, @%s", len, name, type_str); +} +  static enum opsize assem_resolve_bcc(          struct assem *const self,          const size_t stmt_number) @@ -3225,6 +3487,28 @@ static struct res { uint32_t value; bool ok; } assem_find_symbol(      return (struct res){ .ok = false };  } +static void assem_emit_meta_sat( +        struct assem *const self, +        const struct sat *const sat, +        FILE *const stream) +{ +    const struct lex *const lex = self->pars->lex; +    if (0 == sat->val_arg.first_token) { +        return; +    } +    if (sat->scl_arg.value == -1) { +        emit_directive_size_of_function( +                lex, sat->def_arg.first_token, stream); +        fprintf(stream, "\n\n"); +    } +    // Check if first derived type (2 bits at offset 4) is function +    if (((sat->type_arg.value >> 4) & 3) == DT_FCN) { +        emit_directive_type( +                lex, sat->def_arg.first_token, "function", stream); +        fprintf(stream, "\n"); +    } +} +  static int assem_emit(struct assem *const self, FILE *const stream)  {      const struct lex *const lex = self->pars->lex; @@ -3240,6 +3524,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)          }      }      for (size_t i = 1; i < pars->stmttab_size / (sizeof *pars->stmttab); i++) { +        bool line_is_empty = true;          const struct stmt *stmt = pars->stmttab + i;          if (stmt->label_token) {              const struct token token = lex->tokbuf[stmt->label_token]; @@ -3249,6 +3534,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)              } else {                  fprintf(stream, "%.*s:", (int)token.length, lex->input + token.offset);              } +            line_is_empty = false;          }          if (stmt->type == ST_INSTRUCTION) {              const struct instruction instr = stmt->instruction; @@ -3272,6 +3558,7 @@ static int assem_emit(struct assem *const self, FILE *const stream)                      emit_arg(lex, &instr.arg2, stream);                  }              } +            line_is_empty = false;          } else if (stmt->type == ST_DIRECTIVE) {              const struct directive *dir = &stmt->directive;              switch (dir->type) { @@ -3280,14 +3567,17 @@ static int assem_emit(struct assem *const self, FILE *const stream)              case DT_GLOBL:              case DT_TEXT:                  emit_directive_same(lex, dir, stream); +                line_is_empty = false;                  break;              case DT_ASCII:              case DT_BYTE:                  emit_directive_byte(lex, dir, stream); +                line_is_empty = false;                  break;              case DT_SHORT:              case DT_WORD:                  emit_directive_short(lex, dir, stream); +                line_is_empty = false;                  break;              default:                  break; @@ -3303,7 +3593,12 @@ static int assem_emit(struct assem *const self, FILE *const stream)                  fprintf(stream, " | @%08x", res.value);              }          } -        fprintf(stream, "\n"); +        if (!line_is_empty) { +            fprintf(stream, "\n"); +        } +        if (stmt->type == ST_META_SAT) { +            assem_emit_meta_sat(self, &stmt->sat, stream); +        }      }      return OK;  } @@ -14,4 +14,5 @@ echo "asm68 -l -Q -o test1.o test1.S" >>"$dosbuild_dir/build.bat"  echo "asm68 -l -Q -o test2.o test2.S" >>"$dosbuild_dir/build.bat"  echo "asm68 -l -Q -o test3.o test3.S" >>"$dosbuild_dir/build.bat"  echo "asm68 -l -Q -o test4.o test4.S" >>"$dosbuild_dir/build.bat" +echo "asm68 -l -Q -o test_sat.o test_sat.S" >>"$dosbuild_dir/build.bat"  dosemu -quiet -K "$dosbuild_dir" -E'build.bat' diff --git a/tests/test_sat.S b/tests/test_sat.S new file mode 100644 index 0000000..32ada7b --- /dev/null +++ b/tests/test_sat.S @@ -0,0 +1,7 @@ +	.def	_fn\	.val	_fn\	.scl	3\	.type	0x10024\	.endef +_fn: +	nop +	.def	_fn\	.val	.\	.scl	-1\	.endef +	.def	_fn2\	.val	-1\	.endef +	.def	_fn2\	.val	0x100\	.endef +	.def	_fn2\	.val	0\	.endef | 
