summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-25 13:27:05 +0300
committerOxore <oxore@protonmail.com>2023-06-25 13:27:05 +0300
commit5ccc7b6c240bd927305bbacf4fb0a102de8f1147 (patch)
tree621f3077e57ba1fa35328d941e69a1bdb339c6a0
parentf4666450e21bf4558ace3c93eb474f062a0fda4b (diff)
Impl lexing TT_DOT_ID to support parsing directives
-rw-r--r--main.c103
1 files changed, 67 insertions, 36 deletions
diff --git a/main.c b/main.c
index 2bdadc3..dee5171 100644
--- a/main.c
+++ b/main.c
@@ -46,8 +46,8 @@ enum token_type {
TT_EQ_DOUBLE,
TT_COLON,
TT_PERCENT,
- TT_LEFT_SHIFT,
- TT_RIGHT_SHIFT,
+ TT_LSHIFT,
+ TT_RSHIFT,
TT_HASH,
TT_BANG,
TT_TILDE,
@@ -55,7 +55,8 @@ enum token_type {
TT_PIPE,
TT_CAP,
TT_STRING,
- TT_IDENTIFIER,
+ TT_ID,
+ TT_DOT_ID,
TT_NUMDEC,
TT_NUMOCT,
TT_NUMHEX,
@@ -83,10 +84,12 @@ enum lex_error {
enum lex_state {
LS_FREE = 0,
LS_CR,
- LS_LEFT_SHIFT,
- LS_RIGHT_SHIFT,
+ LS_LSHIFT,
+ LS_RSHIFT,
LS_EQ,
- LS_IDENTIFIER,
+ LS_DOT,
+ LS_DOT_ID,
+ LS_ID,
LS_NUMOCTHEX,
LS_NUMOCT,
LS_NUMHEX,
@@ -302,7 +305,7 @@ struct arg_8 {
};
union arg_contents {
- int32_t imm, addr, xn;
+ int8_t xn; // For Dn, An, (An), -(An), (An)+
struct arg_16 arg_16; // For (d16,An) and (d16,PC)
struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn)
};
@@ -598,8 +601,8 @@ static const char *token_type_to_string(const enum token_type type)
case TT_EQ_DOUBLE: return "EQ_DOUBLE";
case TT_COLON: return "COLON";
case TT_PERCENT: return "PERCENT";
- case TT_LEFT_SHIFT: return "LEFT_SHIFT";
- case TT_RIGHT_SHIFT: return "RIGHT_SHIFT";
+ case TT_LSHIFT: return "LSHIFT";
+ case TT_RSHIFT: return "RSHIFT";
case TT_HASH: return "HASH";
case TT_BANG: return "BANG";
case TT_TILDE: return "TILDE";
@@ -607,7 +610,8 @@ static const char *token_type_to_string(const enum token_type type)
case TT_PIPE: return "PIPE";
case TT_CAP: return "CAP";
case TT_STRING: return "STRING";
- case TT_IDENTIFIER: return "IDENTIFIER";
+ case TT_ID: return "ID";
+ case TT_DOT_ID: return "DOT_ID";
case TT_NUMDEC: return "NUMDEC";
case TT_NUMOCT: return "NUMOCT";
case TT_NUMHEX: return "NUMHEX";
@@ -703,13 +707,15 @@ static const char *lex_state_error_string(
return "';', '[0-9]' , ',', '.', '(', ')', '+', "
"'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' "
"or EOF";
- case LS_LEFT_SHIFT:
+ case LS_LSHIFT:
return "'<'";
- case LS_RIGHT_SHIFT:
+ case LS_RSHIFT:
return "'>'";
case LS_CR:
case LS_EQ:
- case LS_IDENTIFIER:
+ case LS_DOT:
+ case LS_DOT_ID:
+ case LS_ID:
case LS_STRING:
case LS_STRING_ESC:
case LS_COMMENT_ASTERISK:
@@ -795,7 +801,7 @@ static int lex_handle_next(struct lex *const self, const int c)
case LS_FREE:
if (is_alphabetic(c) || c == '_') {
self->tok_offset = self->cursor;
- self->state = LS_IDENTIFIER;
+ self->state = LS_ID;
} else if (c == '0') {
self->tok_offset = self->cursor;
self->state = LS_NUMOCTHEX;
@@ -816,14 +822,15 @@ static int lex_handle_next(struct lex *const self, const int c)
self->state = LS_COMMENT_SEMICOLON;
} else if (c == '<') {
self->tok_offset = self->cursor;
- self->state = LS_LEFT_SHIFT;
+ self->state = LS_LSHIFT;
} else if (c == '>') {
self->tok_offset = self->cursor;
- self->state = LS_RIGHT_SHIFT;
+ self->state = LS_RSHIFT;
+ } else if (c == '.') {
+ self->tok_offset = self->cursor;
+ self->state = LS_DOT;
} else if (c == ',') {
lex_yield_token(self, &(struct token){TT_COMMA, self->cursor, 1});
- } else if (c == '.') {
- lex_yield_token(self, &(struct token){TT_DOT, self->cursor, 1});
} else if (c == '(') {
lex_yield_token(self, &(struct token){TT_LPAREN, self->cursor, 1});
} else if (c == ')') {
@@ -898,20 +905,20 @@ static int lex_handle_next(struct lex *const self, const int c)
}
}
break;
- case LS_LEFT_SHIFT:
+ case LS_LSHIFT:
if (c == '<') {
const size_t length = self->cursor - self->tok_offset;
- const struct token token = {TT_LEFT_SHIFT, self->tok_offset, length};
+ const struct token token = {TT_LSHIFT, self->tok_offset, length};
lex_yield_token(self, &token);
self->state = LS_FREE;
} else {
return lex_yield_error(self, c);
}
break;
- case LS_RIGHT_SHIFT:
+ case LS_RSHIFT:
if (c == '>') {
const size_t length = self->cursor - self->tok_offset;
- const struct token token = {TT_RIGHT_SHIFT, self->tok_offset, length};
+ const struct token token = {TT_RSHIFT, self->tok_offset, length};
lex_yield_token(self, &token);
self->state = LS_FREE;
} else {
@@ -931,10 +938,28 @@ static int lex_handle_next(struct lex *const self, const int c)
return lex_handle_next(self, c);
}
break;
- case LS_IDENTIFIER:
+ case LS_DOT:
+ if (is_alphanum(c) || c == '_') {
+ self->state = LS_DOT_ID;
+ } else {
+ lex_yield_token(self, &(struct token){TT_DOT, self->tok_offset, 1});
+ self->state = LS_FREE;
+ return lex_handle_next(self, c);
+ }
+ break;
+ case LS_DOT_ID:
if (!is_alphanum(c) && c != '_') {
const size_t length = self->cursor - self->tok_offset;
- const struct token token = {TT_IDENTIFIER, self->tok_offset, length};
+ const struct token token = {TT_DOT_ID, self->tok_offset, length};
+ lex_yield_token(self, &token);
+ self->state = LS_FREE;
+ return lex_handle_next(self, c);
+ }
+ break;
+ case LS_ID:
+ if (!is_alphanum(c) && c != '_') {
+ const size_t length = self->cursor - self->tok_offset;
+ const struct token token = {TT_ID, self->tok_offset, length};
lex_yield_token(self, &token);
self->state = LS_FREE;
return lex_handle_next(self, c);
@@ -1223,17 +1248,17 @@ static void fprint_arg(
case ARG_ADDR_WORD:
case ARG_ADDR_LONG:
case ARG_ADDR_UNSPEC:
- fprintf(s, " addr %d", arg->arg_contents.addr);
+ fprintf(s, " addr [see raw]");
break;
case ARG_PC_ADDR_16:
- fprintf(s, " d16 %d", arg->arg_contents.arg_16.d);
+ fprintf(s, " d16 [see raw]");
break;
case ARG_PC_ADDR_8_XN:
- fprintf(s, " d8 %d", arg->arg_contents.arg_8.d);
+ fprintf(s, " d8 [see raw]");
fprintf(s, " xn %d", arg->arg_contents.arg_8.xi);
break;
case ARG_IMMEDIATE:
- fprintf(s, " value %d", arg->arg_contents.imm);
+ fprintf(s, " value [see raw]");
break;
case ARG_SR:
case ARG_CCR:
@@ -1247,7 +1272,10 @@ static void fprint_arg(
if (token.type == TT_NEWLINE) {
break;
}
- fprintf(s, "%.*s ", (int)token.length, lex->input + token.offset);
+ if (i > 0) {
+ fputc(' ', s);
+ }
+ fprintf(s, "%.*s", (int)token.length, lex->input + token.offset);
}
fprintf(s, "\")");
}
@@ -1288,7 +1316,10 @@ static int fprint_stmt(
if (token.type == TT_NEWLINE) {
break;
}
- fprintf(s, "%.*s ", (int)token.length, lex->input + token.offset);
+ if (i > 0) {
+ fputc(' ', s);
+ }
+ fprintf(s, "%.*s", (int)token.length, lex->input + token.offset);
}
fprintf(s, "\"))\n");
return 0;
@@ -1394,15 +1425,15 @@ static bool is_expression_token(const enum token_type type)
case TT_ASTERISK: return true;
case TT_SLASH: return true;
case TT_PERCENT: return true;
- case TT_LEFT_SHIFT: return true;
- case TT_RIGHT_SHIFT: return true;
+ case TT_LSHIFT: return true;
+ case TT_RSHIFT: return true;
case TT_HASH: return true;
case TT_BANG: return true;
case TT_TILDE: return true;
case TT_AMPERSAND: return true;
case TT_PIPE: return true;
case TT_CAP: return true;
- case TT_IDENTIFIER: return true;
+ case TT_ID: return true;
case TT_NUMDEC: return true;
case TT_NUMOCT: return true;
case TT_NUMHEX: return true;
@@ -1622,7 +1653,7 @@ static int pars_parse_instruction(
}
const size_t size_spec_id = self->cur_tok_id++;
const struct token size_spec = self->lex->tokbuf[size_spec_id];
- if (size_spec.type != TT_IDENTIFIER) {
+ if (size_spec.type != TT_ID) {
return pars_yield_error(self, size_spec_id);
}
// Size specifier
@@ -1679,7 +1710,7 @@ static int pars_parse_labeled_statement(
return pars_yield_label_comment(self, label_id, token1_id);
} else if (token1.type == TT_NEWLINE) {
return pars_yield_label_comment(self, label_id, 0);
- } else if (token1.type == TT_IDENTIFIER) {
+ } else if (token1.type == TT_ID) {
if (pars_is_eof_reached(self)) {
return pars_yield_error_eof(self);
}
@@ -1694,7 +1725,7 @@ static int pars_parse_labeled_statement(
} else {
return pars_parse_instruction(self, label_id, token1_id);
}
- } else if (token1.type == TT_DOT) {
+ } else if (token1.type == TT_DOT_ID) {
return pars_parse_direc(self, &token1);
}
return pars_yield_error(self, token1_id);