summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-25 23:13:44 +0300
committerOxore <oxore@protonmail.com>2023-06-25 23:13:58 +0300
commite882f7f28ae38ad6b42f8d558d30a0bafd8e5b32 (patch)
treef0b3a6c47adcb6bc08b7b5a1e954bb4728fa771f
parent66bfe8a24d9b1ca83d45396a9f9c962379d1895e (diff)
WIP: Impl some complex addressing modes parsing
-rw-r--r--main.c665
1 files changed, 533 insertions, 132 deletions
diff --git a/main.c b/main.c
index 77df6cb..4e343cb 100644
--- a/main.c
+++ b/main.c
@@ -274,8 +274,9 @@ enum arg_type {
ARG_ADDR_LONG,
ARG_ADDR_UNSPEC,
ARG_PC_ADDR_16,
- ARG_PC_ADDR_8_XN,
+ ARG_PC_ADDR_8_XI,
ARG_IMMEDIATE,
+ ARG_REGMASK,
ARG_SR,
ARG_CCR,
ARG_USP,
@@ -285,35 +286,21 @@ enum arg_type {
enum args_count {
ARGS_COUNT_UNKNOWN = 0,
ARGS_COUNT_0,
- ARGS_COUNT_0_1,
- ARGS_COUNT_0_1_2,
- ARGS_COUNT_0_2,
ARGS_COUNT_1,
ARGS_COUNT_1_2,
ARGS_COUNT_2,
};
-struct arg_16 {
- int16_t d;
- int8_t an;
-};
-
-struct arg_8 {
- int8_t d;
- int8_t an;
- int8_t xi;
-};
-
-union arg_contents {
- int8_t xn; // For Dn, An, (An), -(An), (An)+
- struct arg_16 arg_16; // For (d16,An) and (d16,PC)
- struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn)
+struct expr_tokens_span {
+ size_t first_token, num_tokens;
};
struct arg {
- enum arg_type arg_type;
- union arg_contents arg_contents;
- size_t first_token, num_tokens; // Expression tokens span, may be NULL
+ enum arg_type type;
+ int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An)
+ int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An
+ struct expr_tokens_span expr;
+ size_t first_token, num_tokens; ///< Argument tokens span
};
struct instruction {
@@ -356,6 +343,34 @@ enum pars_error {
PE_SOME,
};
+enum reg_type {
+ REG_NONE = 0,
+ REG_DN,
+ REG_AN,
+ REG_PC,
+ REG_SR,
+ REG_CCR,
+ REG_USP,
+};
+
+enum recognized_token_type {
+ RTT_NONE = 0,
+ RTT_REG, // TT_ID
+ RTT_NUMBER, // TT_NUMHEX, TT_NUMOCT and TT_NUMDEC
+};
+
+struct token_recognition {
+ enum recognized_token_type type;
+ union {
+ struct {
+ enum reg_type reg;
+ int8_t reg_num;
+ }; // For RTT_REG
+ int32_t number; // For TT_ID
+ size_t symbol_id; // For TT_ID and TT_DOT_ID, see (struct pars).symtab
+ };
+};
+
struct pars {
const struct lex *lex;
// State
@@ -571,6 +586,16 @@ static int printed_size(const char c)
return 1;
}
+static bool token_is_number(const enum token_type type)
+{
+ return type == TT_NUMHEX || type == TT_NUMDEC || type == TT_NUMOCT;
+}
+
+static bool token_is_regmask_delimiter(const enum token_type type)
+{
+ return type == TT_SLASH || type == TT_MINUS;
+}
+
static int fprint_string_escaped(
const char *const str, const size_t length, FILE *const stream)
{
@@ -1174,8 +1199,9 @@ static const char *arg_type_to_string(const enum arg_type type)
case ARG_ADDR_LONG: return "(xxx).l";
case ARG_ADDR_UNSPEC: return "(xxx).?";
case ARG_PC_ADDR_16: return "(d16,PC)";
- case ARG_PC_ADDR_8_XN: return "(d8,PC,Xn)";
+ case ARG_PC_ADDR_8_XI: return "(d8,PC,Xn)";
case ARG_IMMEDIATE: return "#imm";
+ case ARG_REGMASK: return "REGMASK";
case ARG_SR: return "SR";
case ARG_CCR: return "CCR";
case ARG_USP: return "USP";
@@ -1226,40 +1252,41 @@ static void fprint_arg(
const struct arg *const arg,
FILE *const s)
{
- fprintf(s, "(%s", arg_type_to_string(arg->arg_type));
- switch (arg->arg_type) {
+ fprintf(s, "(%s", arg_type_to_string(arg->type));
+ switch (arg->type) {
case ARG_NONE:
case ARG_DN:
case ARG_AN:
case ARG_AN_ADDR:
case ARG_AN_ADDR_INCR:
case ARG_AN_ADDR_DECR:
- fprintf(s, " reg %d", arg->arg_contents.xn);
+ fprintf(s, " reg %d", arg->xn);
break;
case ARG_AN_ADDR_16:
- fprintf(s, " reg %d", arg->arg_contents.arg_16.an);
- fprintf(s, " d16 %d", arg->arg_contents.arg_16.d);
+ fprintf(s, " reg %d", arg->xn);
+ fprintf(s, " d16 [see raw]"); // TODO print expr tokens
break;
case ARG_AN_ADDR_8_XI:
- fprintf(s, " reg %d", arg->arg_contents.arg_8.an);
- fprintf(s, " d8 %d", arg->arg_contents.arg_8.d);
- fprintf(s, " xi %d", arg->arg_contents.arg_8.xi);
+ fprintf(s, " reg %d", arg->xn);
+ fprintf(s, " d8 [see raw]"); // TODO print expr tokens
+ fprintf(s, " xi %d", arg->xi);
break;
case ARG_ADDR_WORD:
case ARG_ADDR_LONG:
case ARG_ADDR_UNSPEC:
- fprintf(s, " addr [see raw]");
+ fprintf(s, " addr [see raw]"); // TODO print expr tokens
break;
case ARG_PC_ADDR_16:
- fprintf(s, " d16 [see raw]");
+ fprintf(s, " d16 [see raw]"); // TODO print expr tokens
break;
- case ARG_PC_ADDR_8_XN:
- fprintf(s, " d8 [see raw]");
- fprintf(s, " xn %d", arg->arg_contents.arg_8.xi);
+ case ARG_PC_ADDR_8_XI:
+ fprintf(s, " d8 [see raw]"); // TODO print expr tokens
+ fprintf(s, " xi %d", arg->xi);
break;
case ARG_IMMEDIATE:
- fprintf(s, " value [see raw]");
+ fprintf(s, " value [see raw]"); // TODO print expr tokens
break;
+ case ARG_REGMASK:
case ARG_SR:
case ARG_CCR:
case ARG_USP:
@@ -1294,13 +1321,13 @@ static int fprint_stmt(
if (stmt->type == ST_INSTRUCTION) {
fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic));
fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize));
- if (stmt->instruction.arg1.arg_type != ARG_NONE) {
+ if (stmt->instruction.arg1.type != ARG_NONE) {
fprintf(s, "\n\t(arg1 ");
fprint_arg(lex, &stmt->instruction.arg1, s);
fprintf(s, ")");
}
- if (stmt->instruction.arg2.arg_type != ARG_NONE) {
- assert(stmt->instruction.arg1.arg_type != ARG_NONE);
+ if (stmt->instruction.arg2.type != ARG_NONE) {
+ assert(stmt->instruction.arg1.type != ARG_NONE);
fprintf(s, "\n\t(arg2 ");
fprint_arg(lex, &stmt->instruction.arg2, s);
fprintf(s, ")");
@@ -1444,27 +1471,129 @@ static bool is_expression_token(const enum token_type type)
return false;
}
-static int pars_parse_arg(
- struct pars *const self, struct arg *const arg)
+static struct token pars_peek(const struct pars *const self)
{
- const size_t tokens_count = self->lex->tokbuf_size /
- (sizeof *self->lex->tokbuf);
- const size_t first_token_id = self->cur_tok_id;
- int nesting = 0;
- int commas = 0;
- enum arg_type arg_type = ARG_EXPR;
- while (self->cur_tok_id < tokens_count) {
- const size_t token_id = self->cur_tok_id; // Peek
- const struct token token = self->lex->tokbuf[token_id];
- if (nesting == 1 && token.type == TT_COMMA) {
- if (commas >= 2) {
- return pars_yield_error(self, self->cur_tok_id);
- } else {
- commas++;
+ return self->lex->tokbuf[self->cur_tok_id];
+}
+
+static struct token pars_peek_more(
+ const struct pars *const self, const size_t more)
+{
+ return self->lex->tokbuf[self->cur_tok_id + more];
+}
+
+static size_t pars_commit(struct pars *const self)
+{
+ return self->cur_tok_id++;
+}
+
+static bool is_pc(const char *const str)
+{
+ return (str[0] == 'p' && str[1] == 'c') ||
+ (str[0] == 'P' && str[1] == 'C');
+}
+
+static bool is_sp(const char *const str)
+{
+ return (str[0] == 's' && str[1] == 'p') ||
+ (str[0] == 'S' && str[1] == 'P');
+}
+
+static bool is_sr(const char *const str)
+{
+ return (str[0] == 's' && str[1] == 'r') ||
+ (str[0] == 'S' && str[1] == 'R');
+}
+
+static bool is_ccr(const char *const str)
+{
+ return
+ ((str[0] == 'c' && str[1] == 'c' && str[2] == 'r') ||
+ (str[0] == 'C' && str[1] == 'C' && str[2] == 'R'));
+}
+
+static bool is_usp(const char *const str)
+{
+ return
+ ((str[0] == 'u' && str[1] == 's' && str[2] == 'p') ||
+ (str[0] == 'U' && str[1] == 'S' && str[2] == 'P'));
+}
+
+static struct token_recognition pars_recognize_token(
+ const struct pars *const self, const struct token token)
+{
+ const char *const str = self->lex->input + token.offset;
+ if (token.type == TT_ID) {
+ if (token.length == 2) {
+ if (tolower(str[0]) == 'a' && is_dec(str[1])) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_AN,
+ .reg_num = str[1] - '0',
+ };
+ } else if (tolower(str[0]) == 'd' && is_dec(str[1])) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_DN,
+ .reg_num = str[1] - '0',
+ };
+ } else if (is_sp(str)) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_AN,
+ .reg_num = 7,
+ };
+ } else if (is_pc(str)) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_PC,
+ };
+ } else if (is_sr(str)) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_SR,
+ };
}
- } else if (token.type == TT_LPAREN) {
+ } else if (token.length == 3) {
+ if (is_ccr(str)) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_DN,
+ };
+ } else if (is_usp(str)) {
+ return (struct token_recognition){
+ .type = RTT_REG,
+ .reg = REG_USP,
+ };
+ }
+ }
+ } else if (token.type == TT_NUMDEC) {
+ // TODO
+ } else if (token.type == TT_NUMOCT) {
+ // TODO
+ } else if (token.type == TT_NUMHEX) {
+ // TODO
+ }
+ return (struct token_recognition){0};
+}
+
+static int pars_parse_expr(
+ struct pars *const self, struct expr_tokens_span *const expr)
+{
+ // This function is called only when expression is expected unconditionally,
+ // so if the first token cannot be a part of expression, then error must be
+ // yielded.
+ const size_t first_token_id = self->cur_tok_id;
+ unsigned nesting = 0;
+ while (!pars_is_eof_reached(self)) {
+ const struct token token = pars_peek(self);
+ if (token.type == TT_LPAREN) {
nesting++;
} else if (token.type == TT_RPAREN) {
+ if (nesting == 0) {
+ // This is not my closing parenthesis, should stop
+ break;
+ }
nesting--;
} else if (is_expression_token(token.type)) {
// TODO parse expression
@@ -1474,22 +1603,318 @@ static int pars_parse_arg(
}
break;
}
- self->cur_tok_id++; // Commit
+ pars_commit(self);
}
if (nesting != 0) {
return pars_yield_error_nesting(
self, first_token_id, self->cur_tok_id - first_token_id);
}
if (first_token_id == self->cur_tok_id) {
- // Nothing has been parsed
- *arg = (struct arg){0};
- } else {
- *arg = (struct arg){
- .arg_type = arg_type,
- // TODO arg_contents
- .first_token = first_token_id,
- .num_tokens = self->cur_tok_id - first_token_id,
- };
+ // Nothing has been parsed but expression expected
+ return pars_yield_error(self, self->cur_tok_id);
+ }
+ *expr = (struct expr_tokens_span){
+ .first_token = first_token_id,
+ .num_tokens = self->cur_tok_id - first_token_id,
+ };
+ return OK;
+}
+
+static int pars_parse_arg_after_prefix_expr(
+ struct pars *const self, struct arg *const arg)
+{
+ // At this point a single expression has been parsed and committed.
+ // It can be one of:
+ // - Standalone expression
+ // - Standalone expression with size suffix like ".l"
+ // - Prefix expression followed by (An), (PC), (An,Xn) or (PC,Xn)
+ (void) self;
+ (void) arg;
+ return pars_yield_error(self, self->cur_tok_id);
+}
+
+static int pars_parse_arg_starts_with_minus(
+ struct pars *const self, struct arg *const arg)
+{
+ // At this point cur_tok_id points to the minus that has been peeked, but
+ // not committed.
+ const size_t first_token_id = self->cur_tok_id;
+ if (pars_is_eof_reached(self)) {
+ pars_commit(self); // The minus token
+ // Just single minus is invalid expression
+ return pars_yield_error_eof(self);
+ }
+ if (pars_peek_more(self, 1).type == TT_LPAREN) {
+ // It is still either expression or -(An)
+ if (pars_is_eof_reached(self)) {
+ // "-(" is invalid expression
+ pars_commit(self), pars_commit(self); // Commit "-" and "("
+ return pars_yield_error_eof(self);
+ }
+ const struct token token2 = pars_peek_more(self, 2);
+ if (token2.type == TT_ID) {
+ struct token_recognition r = pars_recognize_token(self, token2);
+ if (r.type == RTT_REG && r.reg == REG_AN) {
+ // It is definitely -(An). Commit all previous tokens and
+ // expect closing parenthesis.
+ self->cur_tok_id += 3;
+ const size_t rparen_id = pars_commit(self);
+ const struct token rparen = self->lex->tokbuf[rparen_id];
+ if (rparen.type == TT_RPAREN) {
+ // Perfect!
+ *arg = (struct arg){
+ .type = ARG_AN_ADDR_DECR,
+ .xn = r.reg_num,
+ .first_token = first_token_id,
+ .num_tokens = self->cur_tok_id - first_token_id,
+ };
+ return OK;
+ } else {
+ // But it has to be a closing parenthesis!
+ return pars_yield_error(self, rparen_id);
+ }
+ }
+ }
+ }
+ // Otherwise it is expression - either prefix or standalone
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
+ return pars_parse_arg_after_prefix_expr(self, arg);
+}
+
+static int pars_parse_arg_inside_parens(
+ struct pars *const self, struct arg *const arg)
+{
+ // At this point cur_tok_id points after the first opening parenthesis that
+ // has been parsed (committed).
+ // It can be
+ // - (expr)(An)
+ // - (expr)(An,Xi) or (expr)(Xi,An)
+ // - (expr)(PC,Xi) or (expr)(Xi,PC)
+ // - (An) or (An)+
+ // - (An,expr) or (expr,An)
+ // - (PC,expr) or (expr,PC)
+ // - (An,expr,Xi), (An,Xi,expr), (expr,An,Xi), (expr,Xi,An), (Xi,expr,An) or
+ // (Xi,An,expr)
+ // - (PC,expr,Xi), (PC,Xi,expr), (expr,PC,Xi), (expr,Xi,PC), (Xi,expr,PC) or
+ // (Xi,PC,expr)
+ bool an1_found = false, an2_found = false, dn_found = false;
+ bool pc_found = false;
+ int8_t an1 = 0, an2 = 0, dn = 0;
+ unsigned parts = arg->expr.first_token ? 1 : 0;
+ while (parts < 3) {
+ if (pars_is_eof_reached(self)) {
+ return pars_yield_error_eof(self);
+ }
+ const struct token token0 = pars_peek(self);
+ if (token0.type == TT_ID) {
+ // It it may be An/Dn/PC register
+ struct token_recognition r = pars_recognize_token(self, token0);
+ if (r.type == RTT_REG) {
+ // This is definitely a register or regmask.
+ switch (r.reg) {
+ case REG_DN:
+ dn_found = true;
+ dn = r.reg_num;
+ break;
+ case REG_AN:
+ if (!an1_found) {
+ an1_found = true;
+ an1 = r.reg_num;
+ } else if (!an2_found) {
+ an2_found = true;
+ an2 = r.reg_num;
+ } else {
+ return pars_yield_error(self, pars_commit(self));
+ }
+ break;
+ case REG_PC:
+ pc_found = true;
+ break;
+ case REG_NONE:
+ UNREACHABLE();
+ case REG_SR:
+ case REG_CCR:
+ case REG_USP:
+ return pars_yield_error(self, pars_commit(self));
+ }
+ pars_commit(self);
+ parts++;
+ }
+ } else {
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
+ parts++;
+ }
+ if (pars_is_eof_reached(self)) {
+ return pars_yield_error_eof(self);
+ }
+ const struct token delim = pars_peek(self);
+ const size_t delim_id = pars_commit(self);
+ if (delim.type == TT_COMMA) {
+ continue;
+ } else if (delim.type == TT_RPAREN) {
+ if (parts == 1 && arg->expr.first_token) {
+ assert(!an1_found && !an2_found && !dn_found && !pc_found);
+ // It turns out we are inside of expression, so this closing
+ // parenthesis is part of it. Let's accumulate it and move
+ // on.
+ arg->expr.first_token--;
+ arg->expr.num_tokens += 2;
+ return pars_parse_arg_after_prefix_expr(self, arg);
+ } else {
+ break;
+ }
+ } else {
+ return pars_yield_error(self, delim_id);
+ }
+ }
+ if (parts == 1 && an1_found) {
+ // It is either (An) or (An)+
+ assert(!pc_found && !dn_found && !arg->expr.first_token);
+ if (pars_is_eof_reached(self)) {
+ arg->type = ARG_AN_ADDR;
+ } else {
+ const struct token plus = pars_peek(self);
+ if (plus.type == TT_PLUS) {
+ pars_commit(self);
+ arg->type = ARG_AN_ADDR_INCR;
+ } else {
+ arg->type = ARG_AN_ADDR;
+ }
+ }
+ arg->xn = an1;
+ arg->num_tokens = self->cur_tok_id - arg->first_token;
+ return OK;
+ } else if (parts == 2 && an1_found && arg->expr.first_token) {
+ // It is (An,d16) or (d16,An)
+ assert(!an2_found && !pc_found && !dn_found);
+ arg->type = ARG_AN_ADDR_16;
+ arg->xn = an1;
+ arg->num_tokens = self->cur_tok_id - arg->first_token;
+ return OK;
+ } else if (parts == 2 && pc_found && arg->expr.first_token) {
+ // It is (PC,d16) or (d16,PC)
+ assert(!an1_found && !an2_found && !dn_found);
+ arg->type = ARG_PC_ADDR_16;
+ arg->num_tokens = self->cur_tok_id - arg->first_token;
+ return OK;
+ } else if (parts == 3 && pc_found && arg->expr.first_token && (an1_found || dn_found)) {
+ // It is (d8,PC,Xn)
+ assert((an1_found && !dn_found) || (!an1_found && dn_found));
+ arg->type = ARG_PC_ADDR_8_XI;
+ if (an1_found) {
+ arg->xi = -an1;
+ } else if (dn_found) {
+ arg->xi = dn;
+ }
+ arg->num_tokens = self->cur_tok_id - arg->first_token;
+ } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) {
+ // It is (d8,An,Xn)
+ assert((an1_found && !dn_found) || (!an1_found && dn_found));
+ arg->type = ARG_AN_ADDR_8_XI;
+ arg->xi = an2_found ? -an2 : dn;
+ arg->num_tokens = self->cur_tok_id - arg->first_token;
+ }
+ return pars_yield_error(self, self->cur_tok_id);
+}
+
+static int pars_parse_arg_regmask(
+ struct pars *const self, struct arg *const arg)
+{
+ // At this point cur_tok_id points to the register token that has been
+ // peeked, but not committed.
+ (void) self;
+ (void) arg;
+ // Very much TODO.
+ return pars_yield_error(self, ++self->cur_tok_id);
+}
+
+static int pars_parse_arg(
+ struct pars *const self, struct arg *const arg)
+{
+ if (pars_is_eof_reached(self)) {
+ return OK;
+ }
+ const size_t first_token_id = self->cur_tok_id;
+ arg->first_token = first_token_id;
+ const struct token token0 = pars_peek(self);
+ if (token0.type == TT_HASH) {
+ // Definitely an immediate value expression
+ pars_commit(self);
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
+ arg->type = ARG_IMMEDIATE;
+ arg->num_tokens = self->cur_tok_id - first_token_id;
+ return OK;
+ } else if (token0.type == TT_MINUS) {
+ // It is either expression or -(An)
+ return pars_parse_arg_starts_with_minus(self, arg);
+ } else if (token0.type == TT_TILDE || token_is_number(token0.type)) {
+ // Tilde is unary operation, so it must be an expression
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
+ return pars_parse_arg_after_prefix_expr(self, arg);
+ } else if (token0.type == TT_LPAREN) {
+ // It is either expression or addressing mode (An) / (An)+ / (d16,An) /
+ // (d8,An,Xn) / (d8,PC,Xn) / (d16,An)
+ pars_commit(self);
+ return pars_parse_arg_inside_parens(self, arg);
+ } else if (token0.type == TT_ID) {
+ // It is either expression, regmask or just An/Dn/PC/SR/SP/CCR register
+ struct token_recognition r = pars_recognize_token(self, token0);
+ if (r.type == RTT_REG) {
+ // This is definitely a register or regmask.
+ switch (r.reg) {
+ case REG_NONE:
+ UNREACHABLE();
+ return pars_yield_error(self, first_token_id);
+ case REG_DN:
+ if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) {
+ // Note: the register is not committed
+ return pars_parse_arg_regmask(self, arg);
+ }
+ arg->type = ARG_DN;
+ arg->xn = r.reg_num;
+ break;
+ case REG_AN:
+ if (token_is_regmask_delimiter(pars_peek_more(self, 1).type)) {
+ // Note: the register is not committed
+ return pars_parse_arg_regmask(self, arg);
+ }
+ arg->type = ARG_AN;
+ arg->xn = r.reg_num;
+ break;
+ case REG_PC:
+ return pars_yield_error(self, first_token_id);
+ case REG_SR:
+ arg->type = ARG_SR;
+ break;
+ case REG_CCR:
+ arg->type = ARG_CCR;
+ break;
+ case REG_USP:
+ arg->type = ARG_USP;
+ break;
+ }
+ pars_commit(self);
+ arg->num_tokens = self->cur_tok_id - first_token_id;
+ return OK;
+ } else {
+ const int ret = pars_parse_expr(self, &arg->expr);
+ if (ret != OK) {
+ return ret;
+ }
+ return pars_parse_arg_after_prefix_expr(self, arg);
+ }
}
return OK;
}
@@ -1509,8 +1934,11 @@ static int pars_yield_instruction(
if (mnemonic == MN_NONE) {
return pars_yield_error(self, mnemonic_id);
}
- if (arg2) {
- assert(arg1);
+ if (arg2->type != ARG_NONE) {
+ assert(arg1->type != ARG_NONE);
+ }
+ if (arg1->type == ARG_NONE) {
+ assert(arg1->type == ARG_NONE);
}
const enum args_count args_count = get_args_count_for_mnemonic(mnemonic);
// Validate instruction arguments count
@@ -1519,36 +1947,24 @@ static int pars_yield_instruction(
UNREACHABLE();
break;
case ARGS_COUNT_0:
- if (arg1) {
+ if (arg1->type != ARG_NONE) {
return pars_yield_error(self, arg1->first_token);
}
break;
- case ARGS_COUNT_0_1:
- if (arg2) {
- return pars_yield_error(self, arg2->first_token);
- }
- break;
- case ARGS_COUNT_0_1_2:
- break;
- case ARGS_COUNT_0_2:
- if (arg1 && !arg2) {
- return pars_yield_error(self, mnemonic_id);
- }
- break;
case ARGS_COUNT_1:
- if (!arg1) {
+ if (arg1->type == ARG_NONE) {
return pars_yield_error(self, mnemonic_id);
- } else if (arg2) {
+ } else if (arg2->type != ARG_NONE) {
return pars_yield_error(self, arg2->first_token);
}
break;
case ARGS_COUNT_1_2:
- if (!arg1) {
+ if (arg1->type == ARG_NONE) {
return pars_yield_error(self, mnemonic_id);
}
break;
case ARGS_COUNT_2:
- if (!arg1 || !arg2) {
+ if (arg1->type == ARG_NONE || arg2->type == ARG_NONE) {
return pars_yield_error(self, mnemonic_id);
}
break;
@@ -1582,21 +1998,19 @@ static int pars_parse_instruction_comment(
size_t comment_id = 0;
if (!pars_is_eof_reached(self)) {
// Try parse comment
- const size_t token1_id = self->cur_tok_id; // Peek comment
- const struct token token1 = self->lex->tokbuf[token1_id];
+ const struct token token1 = pars_peek(self);
const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
token1.type == TT_COMMENT_SEMICOLON;
if (is_comment) {
- self->cur_tok_id++; // Commit comment
- comment_id = token1_id;
+ comment_id = pars_commit(self);
}
- if (!pars_is_eof_reached(self)) {
- // Handle new line
- const size_t nl_id = self->cur_tok_id++; // Commit new line
- const struct token nl = self->lex->tokbuf[nl_id];
- if (nl.type != TT_NEWLINE) {
- return pars_yield_error(self, nl_id);
- }
+ }
+ if (!pars_is_eof_reached(self)) {
+ // There must be a new line if not EOF
+ const size_t nl_id = pars_commit(self);
+ const struct token nl = self->lex->tokbuf[nl_id];
+ if (nl.type != TT_NEWLINE) {
+ return pars_yield_error(self, nl_id);
}
}
return pars_yield_instruction(
@@ -1609,35 +2023,25 @@ static int pars_parse_instruction_args(
const size_t mnemonic_id,
const enum opsize opsize)
{
- struct arg arg1, arg2;
+ struct arg arg1 = {0}, arg2 = {0};
// Try parse first argument
const int res1 = pars_parse_arg(self, &arg1);
if (res1 != OK) {
return res1;
}
- if (arg1.arg_type == ARG_NONE) {
- return pars_parse_instruction_comment(
- self, label_id, mnemonic_id, opsize, NULL, NULL);
- }
- if (pars_is_eof_reached(self)) {
- return pars_yield_instruction(
- self, label_id, 0, mnemonic_id, opsize, &arg1, NULL);
- }
- const size_t comma_id = self->cur_tok_id; // Peek comma
- const struct token comma = self->lex->tokbuf[comma_id];
- if (comma.type != TT_COMMA) {
- return pars_parse_instruction_comment(
- self, label_id, mnemonic_id, opsize, NULL, NULL);
- }
- self->cur_tok_id++; // Commit comma
- // Try parse second argument
- const int res2 = pars_parse_arg(self, &arg2);
- if (res2 != OK) {
- return res2;
- }
- if (pars_is_eof_reached(self)) {
- return pars_yield_instruction(
- self, label_id, 0, mnemonic_id, opsize, &arg1, &arg2);
+ if (arg1.type != ARG_NONE) {
+ if (pars_is_eof_reached(self)) {
+ return pars_yield_instruction(
+ self, label_id, 0, mnemonic_id, opsize, &arg1, NULL);
+ }
+ if (pars_peek(self).type == TT_COMMA) {
+ pars_commit(self);
+ // Try parse second argument
+ const int res2 = pars_parse_arg(self, &arg2);
+ if (res2 != OK) {
+ return res2;
+ }
+ }
}
// Finish parsing instruction, expect comment or newline
return pars_parse_instruction_comment(
@@ -1652,10 +2056,9 @@ static int pars_parse_instruction(
if (pars_is_eof_reached(self)) {
return pars_yield_error_eof(self);
}
- const size_t size_spec_id = self->cur_tok_id; // Peek
- const struct token size_spec = self->lex->tokbuf[size_spec_id];
+ const struct token size_spec = pars_peek(self);
if (size_spec.type == TT_DOT_ID) {
- self->cur_tok_id++; // Commit
+ const size_t size_spec_id = pars_commit(self);
// Size specifier
if (size_spec.length != 2) {
return pars_yield_error(self, size_spec_id);
@@ -1699,7 +2102,7 @@ static int pars_yield_label_comment(
static int pars_parse_labeled_statement(
struct pars *const self, const size_t label_id)
{
- const size_t token1_id = self->cur_tok_id++;
+ const size_t token1_id = pars_commit(self);
const struct token token1 = self->lex->tokbuf[token1_id];
const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
token1.type == TT_COMMENT_SEMICOLON;
@@ -1711,17 +2114,15 @@ static int pars_parse_labeled_statement(
if (pars_is_eof_reached(self)) {
return pars_yield_error_eof(self);
}
- const size_t token2_id = self->cur_tok_id; // Peek
- const struct token token2 = self->lex->tokbuf[token2_id];
+ const struct token token2 = pars_peek(self);
if (!label_id && token2.type == TT_COLON) {
- self->cur_tok_id++; // Commit
+ pars_commit(self);
return pars_parse_labeled_statement(self, token1_id);
} else if (token2.type == TT_EQ || token2.type == TT_EQ_DOUBLE) {
- self->cur_tok_id++; // Commit
- return pars_parse_assignment(self, label_id, token2_id);
- } else {
- return pars_parse_instruction(self, label_id, token1_id);
+ pars_commit(self);
+ return pars_parse_assignment(self, label_id, token1_id);
}
+ return pars_parse_instruction(self, label_id, token1_id);
} else if (token1.type == TT_DOT_ID) {
return pars_parse_direc(self, &token1);
}