diff options
author | Oxore <oxore@protonmail.com> | 2023-06-25 13:27:05 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-25 13:27:05 +0300 |
commit | 5ccc7b6c240bd927305bbacf4fb0a102de8f1147 (patch) | |
tree | 621f3077e57ba1fa35328d941e69a1bdb339c6a0 | |
parent | f4666450e21bf4558ace3c93eb474f062a0fda4b (diff) |
Impl lexing TT_DOT_ID to support parsing directives
-rw-r--r-- | main.c | 103 |
1 files changed, 67 insertions, 36 deletions
@@ -46,8 +46,8 @@ enum token_type { TT_EQ_DOUBLE, TT_COLON, TT_PERCENT, - TT_LEFT_SHIFT, - TT_RIGHT_SHIFT, + TT_LSHIFT, + TT_RSHIFT, TT_HASH, TT_BANG, TT_TILDE, @@ -55,7 +55,8 @@ enum token_type { TT_PIPE, TT_CAP, TT_STRING, - TT_IDENTIFIER, + TT_ID, + TT_DOT_ID, TT_NUMDEC, TT_NUMOCT, TT_NUMHEX, @@ -83,10 +84,12 @@ enum lex_error { enum lex_state { LS_FREE = 0, LS_CR, - LS_LEFT_SHIFT, - LS_RIGHT_SHIFT, + LS_LSHIFT, + LS_RSHIFT, LS_EQ, - LS_IDENTIFIER, + LS_DOT, + LS_DOT_ID, + LS_ID, LS_NUMOCTHEX, LS_NUMOCT, LS_NUMHEX, @@ -302,7 +305,7 @@ struct arg_8 { }; union arg_contents { - int32_t imm, addr, xn; + int8_t xn; // For Dn, An, (An), -(An), (An)+ struct arg_16 arg_16; // For (d16,An) and (d16,PC) struct arg_8 arg_8; // For (d8,An,Xi) and (d8,PC,Xn) }; @@ -598,8 +601,8 @@ static const char *token_type_to_string(const enum token_type type) case TT_EQ_DOUBLE: return "EQ_DOUBLE"; case TT_COLON: return "COLON"; case TT_PERCENT: return "PERCENT"; - case TT_LEFT_SHIFT: return "LEFT_SHIFT"; - case TT_RIGHT_SHIFT: return "RIGHT_SHIFT"; + case TT_LSHIFT: return "LSHIFT"; + case TT_RSHIFT: return "RSHIFT"; case TT_HASH: return "HASH"; case TT_BANG: return "BANG"; case TT_TILDE: return "TILDE"; @@ -607,7 +610,8 @@ static const char *token_type_to_string(const enum token_type type) case TT_PIPE: return "PIPE"; case TT_CAP: return "CAP"; case TT_STRING: return "STRING"; - case TT_IDENTIFIER: return "IDENTIFIER"; + case TT_ID: return "ID"; + case TT_DOT_ID: return "DOT_ID"; case TT_NUMDEC: return "NUMDEC"; case TT_NUMOCT: return "NUMOCT"; case TT_NUMHEX: return "NUMHEX"; @@ -703,13 +707,15 @@ static const char *lex_state_error_string( return "';', '[0-9]' , ',', '.', '(', ')', '+', " "'-', '=', ':', '%', '#', ' ', '\\t', '\\r', '\\n', '\\r\\n' " "or EOF"; - case LS_LEFT_SHIFT: + case LS_LSHIFT: return "'<'"; - case LS_RIGHT_SHIFT: + case LS_RSHIFT: return "'>'"; case LS_CR: case LS_EQ: - case LS_IDENTIFIER: + case LS_DOT: + case LS_DOT_ID: + case LS_ID: case LS_STRING: case LS_STRING_ESC: case LS_COMMENT_ASTERISK: @@ -795,7 +801,7 @@ static int lex_handle_next(struct lex *const self, const int c) case LS_FREE: if (is_alphabetic(c) || c == '_') { self->tok_offset = self->cursor; - self->state = LS_IDENTIFIER; + self->state = LS_ID; } else if (c == '0') { self->tok_offset = self->cursor; self->state = LS_NUMOCTHEX; @@ -816,14 +822,15 @@ static int lex_handle_next(struct lex *const self, const int c) self->state = LS_COMMENT_SEMICOLON; } else if (c == '<') { self->tok_offset = self->cursor; - self->state = LS_LEFT_SHIFT; + self->state = LS_LSHIFT; } else if (c == '>') { self->tok_offset = self->cursor; - self->state = LS_RIGHT_SHIFT; + self->state = LS_RSHIFT; + } else if (c == '.') { + self->tok_offset = self->cursor; + self->state = LS_DOT; } else if (c == ',') { lex_yield_token(self, &(struct token){TT_COMMA, self->cursor, 1}); - } else if (c == '.') { - lex_yield_token(self, &(struct token){TT_DOT, self->cursor, 1}); } else if (c == '(') { lex_yield_token(self, &(struct token){TT_LPAREN, self->cursor, 1}); } else if (c == ')') { @@ -898,20 +905,20 @@ static int lex_handle_next(struct lex *const self, const int c) } } break; - case LS_LEFT_SHIFT: + case LS_LSHIFT: if (c == '<') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_LEFT_SHIFT, self->tok_offset, length}; + const struct token token = {TT_LSHIFT, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; } else { return lex_yield_error(self, c); } break; - case LS_RIGHT_SHIFT: + case LS_RSHIFT: if (c == '>') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_RIGHT_SHIFT, self->tok_offset, length}; + const struct token token = {TT_RSHIFT, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; } else { @@ -931,10 +938,28 @@ static int lex_handle_next(struct lex *const self, const int c) return lex_handle_next(self, c); } break; - case LS_IDENTIFIER: + case LS_DOT: + if (is_alphanum(c) || c == '_') { + self->state = LS_DOT_ID; + } else { + lex_yield_token(self, &(struct token){TT_DOT, self->tok_offset, 1}); + self->state = LS_FREE; + return lex_handle_next(self, c); + } + break; + case LS_DOT_ID: if (!is_alphanum(c) && c != '_') { const size_t length = self->cursor - self->tok_offset; - const struct token token = {TT_IDENTIFIER, self->tok_offset, length}; + const struct token token = {TT_DOT_ID, self->tok_offset, length}; + lex_yield_token(self, &token); + self->state = LS_FREE; + return lex_handle_next(self, c); + } + break; + case LS_ID: + if (!is_alphanum(c) && c != '_') { + const size_t length = self->cursor - self->tok_offset; + const struct token token = {TT_ID, self->tok_offset, length}; lex_yield_token(self, &token); self->state = LS_FREE; return lex_handle_next(self, c); @@ -1223,17 +1248,17 @@ static void fprint_arg( case ARG_ADDR_WORD: case ARG_ADDR_LONG: case ARG_ADDR_UNSPEC: - fprintf(s, " addr %d", arg->arg_contents.addr); + fprintf(s, " addr [see raw]"); break; case ARG_PC_ADDR_16: - fprintf(s, " d16 %d", arg->arg_contents.arg_16.d); + fprintf(s, " d16 [see raw]"); break; case ARG_PC_ADDR_8_XN: - fprintf(s, " d8 %d", arg->arg_contents.arg_8.d); + fprintf(s, " d8 [see raw]"); fprintf(s, " xn %d", arg->arg_contents.arg_8.xi); break; case ARG_IMMEDIATE: - fprintf(s, " value %d", arg->arg_contents.imm); + fprintf(s, " value [see raw]"); break; case ARG_SR: case ARG_CCR: @@ -1247,7 +1272,10 @@ static void fprint_arg( if (token.type == TT_NEWLINE) { break; } - fprintf(s, "%.*s ", (int)token.length, lex->input + token.offset); + if (i > 0) { + fputc(' ', s); + } + fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); } fprintf(s, "\")"); } @@ -1288,7 +1316,10 @@ static int fprint_stmt( if (token.type == TT_NEWLINE) { break; } - fprintf(s, "%.*s ", (int)token.length, lex->input + token.offset); + if (i > 0) { + fputc(' ', s); + } + fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); } fprintf(s, "\"))\n"); return 0; @@ -1394,15 +1425,15 @@ static bool is_expression_token(const enum token_type type) case TT_ASTERISK: return true; case TT_SLASH: return true; case TT_PERCENT: return true; - case TT_LEFT_SHIFT: return true; - case TT_RIGHT_SHIFT: return true; + case TT_LSHIFT: return true; + case TT_RSHIFT: return true; case TT_HASH: return true; case TT_BANG: return true; case TT_TILDE: return true; case TT_AMPERSAND: return true; case TT_PIPE: return true; case TT_CAP: return true; - case TT_IDENTIFIER: return true; + case TT_ID: return true; case TT_NUMDEC: return true; case TT_NUMOCT: return true; case TT_NUMHEX: return true; @@ -1622,7 +1653,7 @@ static int pars_parse_instruction( } const size_t size_spec_id = self->cur_tok_id++; const struct token size_spec = self->lex->tokbuf[size_spec_id]; - if (size_spec.type != TT_IDENTIFIER) { + if (size_spec.type != TT_ID) { return pars_yield_error(self, size_spec_id); } // Size specifier @@ -1679,7 +1710,7 @@ static int pars_parse_labeled_statement( return pars_yield_label_comment(self, label_id, token1_id); } else if (token1.type == TT_NEWLINE) { return pars_yield_label_comment(self, label_id, 0); - } else if (token1.type == TT_IDENTIFIER) { + } else if (token1.type == TT_ID) { if (pars_is_eof_reached(self)) { return pars_yield_error_eof(self); } @@ -1694,7 +1725,7 @@ static int pars_parse_labeled_statement( } else { return pars_parse_instruction(self, label_id, token1_id); } - } else if (token1.type == TT_DOT) { + } else if (token1.type == TT_DOT_ID) { return pars_parse_direc(self, &token1); } return pars_yield_error(self, token1_id); |