summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-26 22:04:39 +0300
committerOxore <oxore@protonmail.com>2023-06-26 22:47:40 +0300
commitdc93b017b6c380d7c255ac448f51c10e68d71264 (patch)
treec900089e9a660ffe91c4822a6911b36b43ca9d52
parent2dd7bd6ca24b6a28ff36c5c441442d5885c0b611 (diff)
Impl expr validation, fix lex error test
-rw-r--r--main.c180
1 files changed, 118 insertions, 62 deletions
diff --git a/main.c b/main.c
index f9e0cfe..76aaf7d 100644
--- a/main.c
+++ b/main.c
@@ -30,7 +30,10 @@
#define E_UNIMPL "unimplemented"
#define E_UNREACH "unreachable code reached"
-#define E_EXPR "expression token"
+#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol"
+#define E_EXPR_NONREG "symbol that is not a register"
+#define E_EXPR_OPEN "'(', number or symbol"
+#define E_EXPR_CLOSE "')', '+', '-', '+', '/' or symbol"
#define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression"
#define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression"
#define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC"
@@ -40,6 +43,7 @@
#define E_ARG "valid instruction argument"
#define E_MNEMONIC "valid instruction mnemonic"
#define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'"
+#define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'"
#define E_ARGS_COUNT "invalid arguments count"
#define E_NL "new line '\\n', '\\r\\n' or '\\r'"
#define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC
@@ -413,6 +417,9 @@ struct assem {
const struct pars *pars;
};
+static int pars_parse_arg_inside_parens(
+ struct pars *const self, struct arg *const arg);
+
const char *const g_escape_table[256] = {
"\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07",
"\\x08", "\\t", "\\n", "\\x0b", "\\x0c", "\\r", "\\x0e", "\\x0f", "\\x10",
@@ -610,6 +617,15 @@ static bool token_is_number(const enum token_type type)
return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT;
}
+static bool token_is_binary_operator(const enum token_type type)
+{
+ return type == TT_PLUS || type == TT_MINUS ||
+ type == TT_ASTERISK || type == TT_SLASH || type == TT_PERCENT ||
+ type == TT_RSHIFT || type == TT_RSHIFT ||
+ type == TT_AMPERSAND || type == TT_CAP ||
+ type == TT_PIPE || type == TT_BANG;
+}
+
static bool token_is_regmask_delimiter(const enum token_type type)
{
return type == TT_SLASH || type == TT_MINUS;
@@ -745,7 +761,7 @@ static const char *lex_state_error_string(
"'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' "
"or EOF";
case LS_NUMHEX:
- return "';', '[0-9a-zA-Z]' , ',', '.', '(', ')', '+', "
+ return "';', '[0-9a-fA-F]' , ',', '.', '(', ')', '+', "
"'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' "
"or EOF";
case LS_NUMDEC:
@@ -801,6 +817,17 @@ static struct line_pos_info lex_get_line_pos_info(
return l;
}
+static size_t find_line_length(const char *const str)
+{
+ for (size_t i = 0;; i++) {
+ const char c = str[i];
+ if (c == '\n' || c == '\r' || c == '\000') {
+ return i;
+ }
+ }
+ return 0;
+}
+
static int lex_yield_error(struct lex *const self, const int c)
{
fflush(self->input_stream);
@@ -825,7 +852,9 @@ static int lex_yield_error(struct lex *const self, const int c)
lex_state_error_string(self->state, self->inside_line));
fputs(g_escape_table[c_char], stderr);
fputs("'\n", stderr);
- fprintf(stderr, "%5lu | %s\n", l.line_num + 1, self->input + l.line_offset);
+ const char *const line = self->input + l.line_offset;
+ const size_t line_length = find_line_length(line);
+ fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);
fputs(" | ", stderr);
for (size_t i = 0; i < l.column_num; i++) {
if (self->input[l.line_offset + i] == '\t') {
@@ -1127,7 +1156,6 @@ static int lex_next(struct lex *const self, FILE *const stream)
if (c == EOF) {
// Add a hidden EOF token of 0 size
lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0});
- self->tokens_count--;
break;
}
}
@@ -1399,17 +1427,6 @@ static int fwrite_stmt(const struct stmt *const stmt, FILE *const stream)
return res;
}
-static size_t find_line_length(const char *const str)
-{
- for (size_t i = 0;; i++) {
- const char c = str[i];
- if (c == '\n' || c == '\r' || c == '\000') {
- return i;
- }
- }
- return 0;
-}
-
static int pars_yield_error_msg(
struct pars *const self,
const size_t token_id,
@@ -1426,7 +1443,7 @@ static int pars_yield_error_msg(
msg);
const char *const line = self->lex->input + l.line_offset;
const size_t line_length = find_line_length(line);
- fprintf( stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);
+ fprintf(stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);
fputs(" | ", stderr);
for (size_t i = 0; i < l.column_num; i++) {
if (self->lex->input[l.line_offset + i] == '\t') {
@@ -1525,33 +1542,6 @@ enum opsize get_opsize_from_specifier(const char size_specifier)
return OPSIZE_NONE;
}
-static bool is_expression_token(const enum token_type type)
-{
- switch (type) {
- case TT_PLUS: return true;
- case TT_MINUS: return true;
- case TT_ASTERISK: return true;
- case TT_SLASH: return true;
- case TT_PERCENT: return true;
- case TT_LSHIFT: return true;
- case TT_RSHIFT: return true;
- case TT_HASH: return true;
- case TT_BANG: return true;
- case TT_TILDE: return true;
- case TT_AMPERSAND: return true;
- case TT_PIPE: return true;
- case TT_CAP: return true;
- case TT_ID: return true;
- case TT_NUMDEC: return true;
- case TT_NUMOCT: return true;
- case TT_NUMHEX: return true;
- case TT_LPAREN: return true;
- case TT_RPAREN: return true;
- default: return false;
- }
- return false;
-}
-
static struct token pars_peek(const struct pars *const self)
{
return self->lex->tokbuf[self->cur_tok_id];
@@ -1666,33 +1656,71 @@ static int pars_parse_expr(
// yielded.
const size_t first_token_id = self->cur_tok_id;
unsigned nesting = 0;
+ // Otherwise expect open parenthesis, number, or unary operator.
+ bool expect_close_or_binary = false;
while (!pars_is_eof_reached(self)) {
const struct token token = pars_peek(self);
if (token.type == TT_LPAREN) {
- nesting++;
+ if (expect_close_or_binary) {
+ if (nesting == 0) {
+ break;
+ }
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+ } else {
+ nesting++;
+ }
+ } else if (token.type == TT_MINUS) {
+ // Minus is both unary and binary operator, so it does not care
+ // about expression parsing state
+ expect_close_or_binary = false;
+ } else if (token.type == TT_TILDE) {
+ if (expect_close_or_binary) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+ }
+ } else if (token.type == TT_ID) {
+ if (expect_close_or_binary) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+ }
+ if (pars_recognize_token(self, token).type == RTT_REG) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_NONREG);
+ }
+ expect_close_or_binary = true;
+ } else if (token_is_number(token.type)) {
+ if (expect_close_or_binary) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_CLOSE);
+ }
+ expect_close_or_binary = true;
+ } else if (token_is_binary_operator(token.type)) {
+ if (!expect_close_or_binary) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN);
+ }
+ expect_close_or_binary = false;
} else if (token.type == TT_RPAREN) {
+ if (!expect_close_or_binary) {
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR_OPEN);
+ }
if (nesting == 0) {
// This is not my closing parenthesis, should stop
break;
}
nesting--;
- } else if (is_expression_token(token.type)) {
- // TODO parse expression
} else {
- if (nesting > 0) {
- return pars_yield_error(self, self->cur_tok_id, E_EXPR);
+ if (nesting == 0) {
+ break;
}
- break;
+ return pars_yield_error(
+ self,
+ self->cur_tok_id,
+ expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);
}
pars_commit(self);
}
+ assert(first_token_id != self->cur_tok_id);
if (nesting != 0) {
- return pars_yield_error_nesting(
- self, first_token_id, self->cur_tok_id - first_token_id);
- }
- if (first_token_id == self->cur_tok_id) {
- // Nothing has been parsed but expression expected
- return pars_yield_error(self, self->cur_tok_id, E_EXPR);
+ assert(pars_is_eof_reached(self));
+ return pars_yield_error_eof(
+ self,
+ expect_close_or_binary ? E_EXPR_CLOSE : E_EXPR_OPEN);
}
*expr = (struct expr_tokens_span){
.first_token = first_token_id,
@@ -1709,8 +1737,36 @@ static int pars_parse_arg_after_prefix_expr(
// - Standalone expression
// - Standalone expression with size suffix like ".l"
// - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn)
- (void) self;
- (void) arg;
+ if (pars_is_eof_reached(self)) {
+ // It was a standalone expression without size suffix, yield an
+ // argument from here
+ arg->type = ARG_ADDR_UNSPEC;
+ return OK;
+ }
+ const struct token token0 = pars_peek(self);
+ if (token0.type == TT_NEWLINE) {
+ // It was a standalone expression without size suffix, yield an
+ // argument from here
+ arg->type = ARG_ADDR_UNSPEC;
+ return OK;
+ } if (token0.type == TT_DOT_ID) {
+ // It must be a size specifier, or error otherwise
+ const size_t size_spec_id = pars_commit(self);
+ if (token0.length != 2) {
+ return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC);
+ }
+ const enum opsize addrsize =
+ get_opsize_from_specifier(self->lex->input[token0.offset + 1]);
+ if (addrsize == OPSIZE_NONE || addrsize == OPSIZE_S) {
+ return pars_yield_error(self, size_spec_id, E_ADDR_SIZE_SPEC);
+ }
+ arg->type = addrsize == OPSIZE_L ? ARG_ADDR_LONG : ARG_ADDR_WORD;
+ return OK;
+ } else if (token0.type == TT_LPAREN) {
+ // It was a prefix expression for (An), (PC), (An,Xn) or (PC,Xn)
+ pars_commit(self);
+ return pars_parse_arg_inside_parens(self, arg);
+ }
return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
}
@@ -1824,10 +1880,10 @@ static int pars_parse_arg_inside_parens(
pars_commit(self);
}
} else if (arg->expr.first_token == 0) {
- const int ret = pars_parse_expr(self, &arg->expr);
- if (ret != OK) {
- return ret;
- }
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
} else {
return pars_yield_error(self, self->cur_tok_id, E_EA_PART_NOT_EXPR);
}
@@ -2147,7 +2203,7 @@ static int pars_parse_instruction(
if (size_spec.length != 2) {
return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC);
}
- const size_t opsize =
+ const enum opsize opsize =
get_opsize_from_specifier(self->lex->input[size_spec.offset + 1]);
if (opsize == OPSIZE_NONE) {
return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC);