summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-26 01:18:11 +0300
committerOxore <oxore@protonmail.com>2023-06-26 01:18:11 +0300
commite46fb8f882b00fe9d24b2dc8810b033164c4b10c (patch)
treeeb07378620d9abbf19efd169dfd0045e3f43d1cd
parentb4f2fd813384a918b617e17aa394f72f8779a0a0 (diff)
Fix complex addr mode parsing, improve error messages
-rw-r--r--main.c202
1 files changed, 132 insertions, 70 deletions
diff --git a/main.c b/main.c
index 4e343cb..c134010 100644
--- a/main.c
+++ b/main.c
@@ -28,6 +28,24 @@
#define UNREACHABLE()
#endif
+#define E_UNIMPL "unimplemented"
+#define E_UNREACH "unreachable code reached"
+#define E_EXPR "expression token"
+#define E_EA_PART "An, Dn, PC or full expression"
+#define E_EA_PART_NOT_AN "Dn, PC or full expression"
+#define E_EA_PART_NOT_EXPR "An, Dn or PC"
+#define E_EA_PART_DELIM "',' or ')'"
+#define E_EA_INVALID "invalid addressing mode"
+#define E_DN_AN "Dn or An"
+#define E_ARG "valid instruction argument"
+#define E_MNEMONIC "valid instruction mnemonic"
+#define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'"
+#define E_ARGS_COUNT "invalid arguments count"
+#define E_NL "new line '\\n', '\\r\\n' or '\\r'"
+#define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC
+#define E_DIRECTIVE "directive"
+#define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL
+
#define ERR 0
#define OK 1
#define CONTINUE 2
@@ -123,6 +141,7 @@ struct lex {
FILE *tokbuf_stream;
struct token *tokbuf;
size_t tokbuf_size;
+ size_t tokens_count;
};
enum stmt_type {
@@ -700,6 +719,7 @@ static void lex_yield_token(struct lex *const self, const struct token *const to
{
self->inside_line = (token->type != TT_NEWLINE) && (token->type != TT_ESCAPE);
fwrite_token(token, self->tokbuf_stream);
+ self->tokens_count++;
}
static const char *lex_state_error_string(
@@ -1102,10 +1122,12 @@ static int lex_next(struct lex *const self, FILE *const stream)
if (OK == ret) {
return OK;
} else if (ERR == ret) {
- // TODO handle errors
return ERR;
}
if (c == EOF) {
+ // Add a hidden EOF token of 0 size
+ lex_yield_token(self, &(struct token){TT_NONE, self->cursor, 0});
+ self->tokens_count--;
break;
}
}
@@ -1229,9 +1251,7 @@ static int pars_init(struct pars *const self, const struct lex *const lex)
static bool pars_is_eof_reached(const struct pars *const self)
{
- const size_t tokens_count = self->lex->tokbuf_size /
- (sizeof *self->lex->tokbuf);
- return self->cur_tok_id >= tokens_count;
+ return self->cur_tok_id >= self->lex->tokens_count;
}
static const char *stmt_type_to_string(const enum stmt_type type)
@@ -1370,20 +1390,20 @@ static size_t find_line_length(const char *const str)
return 0;
}
-static int pars_yield_error_str(
+static int pars_yield_error_msg(
struct pars *const self,
- const struct line_pos_info l,
- const char *const found,
- const size_t found_length)
+ const size_t token_id,
+ const char *const msg)
{
+ const struct token token = self->lex->tokbuf[token_id];
+ const struct line_pos_info l =
+ lex_get_line_pos_info(self->lex, token.offset);
fprintf(
stderr,
- "<stdin>:%lu:%lu: parsing error: expected %s, found '%.*s'\n",
+ "<stdin>:%lu:%lu: parsing error: %s\n",
l.line_num + 1,
l.column_num + 1,
- "<_unspecified_token_list>",
- (int)found_length,
- found);
+ msg);
const char *const line = self->lex->input + l.line_offset;
const size_t line_length = find_line_length(line);
fprintf( stderr, "%5lu | %.*s\n", l.line_num, (int)line_length, line);
@@ -1399,13 +1419,50 @@ static int pars_yield_error_str(
return ERR;
}
-static int pars_yield_error(struct pars *const self, const size_t token_id)
+static int pars_yield_error_expected_str(
+ struct pars *const self,
+ const struct line_pos_info l,
+ const char *const found,
+ const size_t found_length,
+ const char *const expected)
+{
+ fprintf(
+ stderr,
+ "<stdin>:%lu:%lu: parsing error: expected %s, found '",
+ l.line_num + 1,
+ l.column_num + 1,
+ expected);
+ fprint_string_escaped(found, found_length, stderr);
+ fputs("'\n", stderr);
+ const char *const line = self->lex->input + l.line_offset;
+ const size_t line_length = find_line_length(line);
+ fprintf( stderr, "%5lu | %.*s\n", l.line_num + 1, (int)line_length, line);
+ fputs(" | ", stderr);
+ for (size_t i = 0; i < l.column_num; i++) {
+ if (self->lex->input[l.line_offset + i] == '\t') {
+ fputc('\t', stderr);
+ } else {
+ fputc(' ', stderr);
+ }
+ }
+ fputc('^', stderr);
+ for (size_t i = 1; i < found_length; i++) {
+ fputc('~', stderr);
+ }
+ fputc('\n', stderr);
+ return ERR;
+}
+
+static int pars_yield_error(
+ struct pars *const self,
+ const size_t token_id,
+ const char *const expected)
{
const struct token token = self->lex->tokbuf[token_id];
const struct line_pos_info l =
lex_get_line_pos_info(self->lex, token.offset);
const char *const found = self->lex->input + token.offset;
- return pars_yield_error_str(self, l, found, token.length);
+ return pars_yield_error_expected_str(self, l, found, token.length, expected);
}
static int pars_yield_error_nesting(
@@ -1419,10 +1476,14 @@ static int pars_yield_error_nesting(
return ERR;
}
-static int pars_yield_error_eof(struct pars *const self)
+static int pars_yield_error_eof(
+ struct pars *const self, const char *const expected)
{
- (void) self;
- return ERR;
+ const struct token token = self->lex->tokbuf[self->cur_tok_id];
+ const struct line_pos_info l =
+ lex_get_line_pos_info(self->lex, token.offset);
+ return pars_yield_error_expected_str(
+ self, l, "EOF", (sizeof "EOF") - 1, expected);
}
static int pars_parse_direc(
@@ -1430,7 +1491,7 @@ static int pars_parse_direc(
{
(void) self;
(void) dot;
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
}
enum opsize get_opsize_from_specifier(const char size_specifier)
@@ -1599,7 +1660,7 @@ static int pars_parse_expr(
// TODO parse expression
} else {
if (nesting > 0) {
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR);
}
break;
}
@@ -1611,7 +1672,7 @@ static int pars_parse_expr(
}
if (first_token_id == self->cur_tok_id) {
// Nothing has been parsed but expression expected
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error(self, self->cur_tok_id, E_EXPR);
}
*expr = (struct expr_tokens_span){
.first_token = first_token_id,
@@ -1630,7 +1691,7 @@ static int pars_parse_arg_after_prefix_expr(
// - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn)
(void) self;
(void) arg;
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
}
static int pars_parse_arg_starts_with_minus(
@@ -1642,14 +1703,14 @@ static int pars_parse_arg_starts_with_minus(
if (pars_is_eof_reached(self)) {
pars_commit(self); // The minus token
// Just single minus is invalid expression
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, "'(' or expression");
}
if (pars_peek_more(self, 1).type == TT_LPAREN) {
// It is still either expression or -(An)
if (pars_is_eof_reached(self)) {
// "-(" is invalid expression
pars_commit(self), pars_commit(self); // Commit "-" and "("
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, "An or expression");
}
const struct token token2 = pars_peek_more(self, 2);
if (token2.type == TT_ID) {
@@ -1671,7 +1732,7 @@ static int pars_parse_arg_starts_with_minus(
return OK;
} else {
// But it has to be a closing parenthesis!
- return pars_yield_error(self, rparen_id);
+ return pars_yield_error(self, rparen_id, "')'");
}
}
}
@@ -1706,7 +1767,7 @@ static int pars_parse_arg_inside_parens(
unsigned parts = arg->expr.first_token ? 1 : 0;
while (parts < 3) {
if (pars_is_eof_reached(self)) {
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, E_EA_PART);
}
const struct token token0 = pars_peek(self);
if (token0.type == TT_ID) {
@@ -1727,7 +1788,7 @@ static int pars_parse_arg_inside_parens(
an2_found = true;
an2 = r.reg_num;
} else {
- return pars_yield_error(self, pars_commit(self));
+ return pars_yield_error(self, pars_commit(self), E_EA_PART_NOT_AN);
}
break;
case REG_PC:
@@ -1738,20 +1799,21 @@ static int pars_parse_arg_inside_parens(
case REG_SR:
case REG_CCR:
case REG_USP:
- return pars_yield_error(self, pars_commit(self));
+ return pars_yield_error(self, pars_commit(self), E_EA_PART);
}
pars_commit(self);
- parts++;
}
+ } else if (arg->expr.first_token == 0) {
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
} else {
- const int ret = pars_parse_expr(self, &arg->expr);
- if (ret != OK) {
- return ret;
- }
- parts++;
+ return pars_yield_error(self, self->cur_tok_id, E_EA_PART_NOT_EXPR);
}
+ parts++;
if (pars_is_eof_reached(self)) {
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, E_EA_PART_DELIM);
}
const struct token delim = pars_peek(self);
const size_t delim_id = pars_commit(self);
@@ -1770,7 +1832,7 @@ static int pars_parse_arg_inside_parens(
break;
}
} else {
- return pars_yield_error(self, delim_id);
+ return pars_yield_error(self, delim_id, E_EA_PART);
}
}
if (parts == 1 && an1_found) {
@@ -1807,20 +1869,18 @@ static int pars_parse_arg_inside_parens(
// It is (d8,PC,Xn)
assert((an1_found && !dn_found) || (!an1_found && dn_found));
arg->type = ARG_PC_ADDR_8_XI;
- if (an1_found) {
- arg->xi = -an1;
- } else if (dn_found) {
- arg->xi = dn;
- }
+ arg->xi = an1_found ? -an1 : dn;
arg->num_tokens = self->cur_tok_id - arg->first_token;
+ return OK;
} else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) {
// It is (d8,An,Xn)
assert((an1_found && !dn_found) || (!an1_found && dn_found));
arg->type = ARG_AN_ADDR_8_XI;
arg->xi = an2_found ? -an2 : dn;
arg->num_tokens = self->cur_tok_id - arg->first_token;
+ return OK;
}
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error_msg(self, self->cur_tok_id, E_EA_INVALID);
}
static int pars_parse_arg_regmask(
@@ -1831,7 +1891,7 @@ static int pars_parse_arg_regmask(
(void) self;
(void) arg;
// Very much TODO.
- return pars_yield_error(self, ++self->cur_tok_id);
+ return pars_yield_error_msg(self, ++self->cur_tok_id, E_UNIMPL);
}
static int pars_parse_arg(
@@ -1876,7 +1936,7 @@ static int pars_parse_arg(
switch (r.reg) {
case REG_NONE:
UNREACHABLE();
- return pars_yield_error(self, first_token_id);
+ return pars_yield_error_msg(self, first_token_id, E_UNREACH);
case REG_DN:
if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) {
// Note: the register is not committed
@@ -1894,7 +1954,7 @@ static int pars_parse_arg(
arg->xn = r.reg_num;
break;
case REG_PC:
- return pars_yield_error(self, first_token_id);
+ return pars_yield_error(self, first_token_id, E_DN_AN);
case REG_SR:
arg->type = ARG_SR;
break;
@@ -1932,7 +1992,7 @@ static int pars_yield_instruction(
const enum mnemonic mnemonic = get_mnemonic_from_identifier(
self->lex->input + mnemonic_token.offset, mnemonic_token.length);
if (mnemonic == MN_NONE) {
- return pars_yield_error(self, mnemonic_id);
+ return pars_yield_error(self, mnemonic_id, E_MNEMONIC);
}
if (arg2->type != ARG_NONE) {
assert(arg1->type != ARG_NONE);
@@ -1948,24 +2008,24 @@ static int pars_yield_instruction(
break;
case ARGS_COUNT_0:
if (arg1->type != ARG_NONE) {
- return pars_yield_error(self, arg1->first_token);
+ return pars_yield_error_msg(self, arg1->first_token, E_ARGS_COUNT);
}
break;
case ARGS_COUNT_1:
if (arg1->type == ARG_NONE) {
- return pars_yield_error(self, mnemonic_id);
+ return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT);
} else if (arg2->type != ARG_NONE) {
- return pars_yield_error(self, arg2->first_token);
+ return pars_yield_error_msg(self, arg2->first_token, E_ARGS_COUNT);
}
break;
case ARGS_COUNT_1_2:
if (arg1->type == ARG_NONE) {
- return pars_yield_error(self, mnemonic_id);
+ return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT);
}
break;
case ARGS_COUNT_2:
if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) {
- return pars_yield_error(self, mnemonic_id);
+ return pars_yield_error_msg(self, mnemonic_id, E_ARGS_COUNT);
}
break;
}
@@ -2010,7 +2070,7 @@ static int pars_parse_instruction_comment(
const size_t nl_id = pars_commit(self);
const struct token nl = self->lex->tokbuf[nl_id];
if (nl.type != TT_NEWLINE) {
- return pars_yield_error(self, nl_id);
+ return pars_yield_error(self, nl_id, E_NL);
}
}
return pars_yield_instruction(
@@ -2030,16 +2090,20 @@ static int pars_parse_instruction_args(
return res1;
}
if (arg1.type != ARG_NONE) {
- if (pars_is_eof_reached(self)) {
- return pars_yield_instruction(
- self, label_id, 0, mnemonic_id, opsize, &arg1, NULL);
- }
- if (pars_peek(self).type == TT_COMMA) {
- pars_commit(self);
- // Try parse second argument
- const int res2 = pars_parse_arg(self, &arg2);
- if (res2 != OK) {
- return res2;
+ if (!pars_is_eof_reached(self)) {
+ if (pars_peek(self).type == TT_COMMA) {
+ pars_commit(self);
+ // Try parse second argument
+ if (pars_is_eof_reached(self)) {
+ return pars_yield_error_eof(self, E_ARG);
+ }
+ if (pars_peek(self).type == TT_NEWLINE) {
+ return pars_yield_error(self, self->cur_tok_id, E_ARG);
+ }
+ const int res2 = pars_parse_arg(self, &arg2);
+ if (res2 != OK) {
+ return res2;
+ }
}
}
}
@@ -2054,19 +2118,19 @@ static int pars_parse_instruction(
const size_t mnemonic_id)
{
if (pars_is_eof_reached(self)) {
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, E_MNEMONIC);
}
const struct token size_spec = pars_peek(self);
if (size_spec.type == TT_DOT_ID) {
const size_t size_spec_id = pars_commit(self);
// Size specifier
if (size_spec.length != 2) {
- return pars_yield_error(self, size_spec_id);
+ return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC);
}
const size_t opsize =
get_opsize_from_specifier(self->lex->input[size_spec.offset + 1]);
if (opsize == OPSIZE_NONE) {
- return pars_yield_error(self, size_spec_id);
+ return pars_yield_error(self, size_spec_id, E_INSN_SIZE_SPEC);
}
return pars_parse_instruction_args(self, label_id, mnemonic_id, opsize);
}
@@ -2079,7 +2143,7 @@ static int pars_parse_assignment(
{
(void) label_id;
(void) symbol_id;
- return pars_yield_error(self, self->cur_tok_id);
+ return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
}
static int pars_yield_label_comment(
@@ -2112,7 +2176,7 @@ static int pars_parse_labeled_statement(
return pars_yield_label_comment(self, label_id, 0);
} else if (token1.type == TT_ID) {
if (pars_is_eof_reached(self)) {
- return pars_yield_error_eof(self);
+ return pars_yield_error_eof(self, E_LABELED_STMT);
}
const struct token token2 = pars_peek(self);
if (!label_id && token2.type == TT_COLON) {
@@ -2126,7 +2190,7 @@ static int pars_parse_labeled_statement(
} else if (token1.type == TT_DOT_ID) {
return pars_parse_direc(self, &token1);
}
- return pars_yield_error(self, token1_id);
+ return pars_yield_error(self, token1_id, E_STMT_BEGIN);
}
static int pars_parse_statement(struct pars *const self)
@@ -2140,14 +2204,12 @@ static int pars_parse_statement(struct pars *const self)
*/
static int pars_run(struct pars *const self)
{
- const size_t tokens_count = self->lex->tokbuf_size /
- (sizeof *self->lex->tokbuf);
// Skip dummy token at position 0
self->cur_tok_id = 1;
// Leave dummy statement at position 0
fwrite_stmt(&(struct stmt){0}, self->stmttab_stream);
int ret = OK;
- while (self->cur_tok_id < tokens_count) {
+ while (self->cur_tok_id < self->lex->tokens_count) {
ret = pars_parse_statement(self);
if (ret != OK) {
break;