diff options
| -rw-r--r-- | main.c | 180 | 
1 files changed, 118 insertions, 62 deletions
| @@ -30,7 +30,10 @@  #define E_UNIMPL "unimplemented"  #define E_UNREACH "unreachable code reached" -#define E_EXPR "expression token" +#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol" +#define E_EXPR_NONREG "symbol that is not a register" +#define E_EXPR_OPEN "'(', number or symbol" +#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol"  #define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression"  #define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression"  #define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC" @@ -40,6 +43,7 @@  #define E_ARG "valid instruction argument"  #define E_MNEMONIC "valid instruction mnemonic"  #define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'" +#define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'"  #define E_ARGS_COUNT "invalid arguments count"  #define E_NL "new line '\\n', '\\r\\n' or '\\r'"  #define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC @@ -413,6 +417,9 @@ struct assem {      const struct pars *pars;  }; +static int pars_parse_arg_inside_parens( +        struct pars *const self, struct arg *const arg); +  const char *const g_escape_table[256] = {      "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07",      "\\x08", "\\t", "\\n", "\\x0b", "\\x0c", "\\r", "\\x0e", "\\x0f", "\\x10", @@ -610,6 +617,15 @@ static bool token_is_number(const enum token_type type)      return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT;  } +static bool token_is_binary_operator(const enum token_type type) +{ +    return type == TT_PLUS || type == TT_MINUS || +        type == TT_ASTERISK || type == TT_SLASH || type == TT_PERCENT || +        type == TT_RSHIFT || type == TT_RSHIFT || +        type == TT_AMPERSAND || type == TT_CAP || +        type == TT_PIPE || type == TT_BANG; +} +  static bool token_is_regmask_delimiter(const enum token_type type)  {      return type == TT_SLASH || type == TT_MINUS; @@ -745,7 +761,7 @@ static const char *lex_state_error_string(              "'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' "              "or EOF";      case LS_NUMHEX: -        return "';', '[0-9a-zA-Z]' , ',', '.', '(', ')', '+', " +        return "';', '[0-9a-fA-F]' , ',', '.', '(', ')', '+', "              "'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' "              "or EOF";      case LS_NUMDEC: @@ -801,6 +817,17 @@ static struct line_pos_info lex_get_line_pos_info(      return l;  } +static size_t find_line_length(const char *const str) +{ +    for (size_t i = 0;; i++) { +        const char c = str[i]; +        if (c == '\n' || c == '\r' || c == '\000') { +            return i; +        } +    } +    return 0; +} +  static int lex_yield_error(struct lex *const self, const int c)  {      fflush(self->input_stream); @@ -825,7 +852,9 @@ static int lex_yield_error(struct lex *const self, const int c)              lex_state_error_string(self->state, self->inside_line));      fputs(g_escape_table[c_char], stderr);      fputs("'\n", stderr); -    fprintf(stderr, "%5lu | %s\n", l.line_num + 1, self->input + l.line_offset); +    const char *const line = self->input + l.line_offset; +    const size_t line_length = find_line_length(line); +    fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);      fputs("      | ", stderr);      for (size_t i = 0; i < l.column_num; i++) {          if (self->input[l.line_offset + i] == '\t') { @@ -1127,7 +1156,6 @@ static int lex_next(struct lex *const self, FILE *const stream)          if (c == EOF) {              // Add a hidden EOF token of 0 size              lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0}); -            self->tokens_count--;              break;          }      } @@ -1399,17 +1427,6 @@ static int fwrite_stmt(const struct stmt *const stmt, FILE *const stream)      return res;  } -static size_t find_line_length(const char *const str) -{ -    for (size_t i = 0;; i++) { -        const char c = str[i]; -        if (c == '\n' || c == '\r' || c == '\000') { -            return i; -        } -    } -    return 0; -} -  static int pars_yield_error_msg(          struct pars *const self,          const size_t token_id, @@ -1426,7 +1443,7 @@ static int pars_yield_error_msg(              msg);      const char *const line = self->lex->input + l.line_offset;      const size_t line_length = find_line_length(line); -    fprintf( stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line); +    fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);      fputs("      | ", stderr);      for (size_t i = 0; i < l.column_num; i++) {          if (self->lex->input[l.line_offset + i] == '\t') { @@ -1525,33 +1542,6 @@ enum opsize get_opsize_from_specifier(const char size_specifier)      return OPSIZE_NONE;  } -static bool is_expression_token(const enum token_type type) -{ -    switch (type) { -    case TT_PLUS: return true; -    case TT_MINUS: return true; -    case TT_ASTERISK: return true; -    case TT_SLASH: return true; -    case TT_PERCENT: return true; -    case TT_LSHIFT: return true; -    case TT_RSHIFT: return true; -    case TT_HASH: return true; -    case TT_BANG: return true; -    case TT_TILDE: return true; -    case TT_AMPERSAND: return true; -    case TT_PIPE: return true; -    case TT_CAP: return true; -    case TT_ID: return true; -    case TT_NUMDEC: return true; -    case TT_NUMOCT: return true; -    case TT_NUMHEX: return true; -    case TT_LPAREN: return true; -    case TT_RPAREN: return true; -    default: return false; -    } -    return false; -} -  static struct token pars_peek(const struct pars *const self)  {      return self->lex->tokbuf[self->cur_tok_id]; @@ -1666,33 +1656,71 @@ static int pars_parse_expr(      // yielded.      const size_t first_token_id = self->cur_tok_id;      unsigned nesting = 0; +    // Otherwise expect open parenthesis, number, or unary operator. +    bool expect_close_or_binary = false;      while (!pars_is_eof_reached(self)) {          const struct token token = pars_peek(self);          if (token.type == TT_LPAREN) { -            nesting++; +            if (expect_close_or_binary) { +                if (nesting == 0) { +                    break; +                } +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +            } else { +                nesting++; +            } +        } else if (token.type == TT_MINUS) { +            // Minus is both unary and binary operator, so it does not care +            // about expression parsing state +            expect_close_or_binary = false; +        } else if (token.type == TT_TILDE) { +            if (expect_close_or_binary) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +            } +        } else if (token.type == TT_ID) { +            if (expect_close_or_binary) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +            } +            if (pars_recognize_token(self, token).type == RTT_REG) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG); +            } +            expect_close_or_binary = true; +        } else if (token_is_number(token.type)) { +            if (expect_close_or_binary) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE); +            } +            expect_close_or_binary = true; +        } else if (token_is_binary_operator(token.type)) { +            if (!expect_close_or_binary) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); +            } +            expect_close_or_binary = false;          } else if (token.type == TT_RPAREN) { +            if (!expect_close_or_binary) { +                return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN); +            }              if (nesting == 0) {                  // This is not my closing parenthesis, should stop                  break;              }              nesting--; -        } else if (is_expression_token(token.type)) { -            // TODO parse expression          } else { -            if (nesting > 0) { -                return pars_yield_error(self, self->cur_tok_id, E_EXPR); +            if (nesting == 0) { +                break;              } -            break; +            return pars_yield_error( +                    self, +                    self->cur_tok_id, +                    expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);          }          pars_commit(self);      } +    assert(first_token_id != self->cur_tok_id);      if (nesting != 0) { -        return pars_yield_error_nesting( -                self, first_token_id, self->cur_tok_id - first_token_id); -    } -    if (first_token_id == self->cur_tok_id) { -        // Nothing has been parsed but expression expected -        return pars_yield_error(self, self->cur_tok_id, E_EXPR); +        assert(pars_is_eof_reached(self)); +        return pars_yield_error_eof( +                self, +                expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);      }      *expr = (struct expr_tokens_span){          .first_token = first_token_id, @@ -1709,8 +1737,36 @@ static int pars_parse_arg_after_prefix_expr(      // - Standalone expression      // - Standalone expression with size suffix like ".l"      // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn) -    (void) self; -    (void) arg; +    if (pars_is_eof_reached(self)) { +        // It was a standalone expression without size suffix, yield an +        // argument from here +        arg->type = ARG_ADDR_UNSPEC; +        return OK; +    } +    const struct token token0 = pars_peek(self); +    if (token0.type == TT_NEWLINE) { +        // It was a standalone expression without size suffix, yield an +        // argument from here +        arg->type = ARG_ADDR_UNSPEC; +        return OK; +    } if (token0.type == TT_DOT_ID) { +        // It must be a size specifier, or error otherwise +        const size_t size_spec_id = pars_commit(self); +        if (token0.length != 2) { +            return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC); +        } +        const enum opsize addrsize = +            get_opsize_from_specifier(self->lex->input[token0.offset + 1]); +        if (addrsize == OPSIZE_NONE || addrsize == OPSIZE_S) { +            return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC); +        } +        arg->type = addrsize == OPSIZE_L ? ARG_ADDR_LONG : ARG_ADDR_WORD; +        return OK; +    } else if (token0.type == TT_LPAREN) { +        // It was a prefix expression for (An), (PC), (An,Xn) or (PC,Xn) +        pars_commit(self); +        return pars_parse_arg_inside_parens(self, arg); +    }      return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);  } @@ -1824,10 +1880,10 @@ static int pars_parse_arg_inside_parens(                  pars_commit(self);              }          } else if (arg->expr.first_token == 0) { -                const int ret = pars_parse_expr(self, &arg->expr); -                if (ret != OK) { -                    return ret; -                } +            const int ret = pars_parse_expr(self, &arg->expr); +            if (ret != OK) { +                return ret; +            }          } else {              return pars_yield_error(self, self->cur_tok_id, E_EA_PART_NOT_EXPR);          } @@ -2147,7 +2203,7 @@ static int pars_parse_instruction(          if (size_spec.length != 2) {              return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC);          } -        const size_t opsize = +        const enum opsize opsize =              get_opsize_from_specifier(self->lex->input[size_spec.offset + 1]);          if (opsize == OPSIZE_NONE) {              return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC); | 
