diff options
author | Oxore <oxore@protonmail.com> | 2023-06-28 00:39:26 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-28 00:52:44 +0300 |
commit | f77ac6908cca9fb2be32d04fd6387d417cbc9c39 (patch) | |
tree | afbed08bd677bb186351f1b82a4d9d9f5be4fc0d | |
parent | 91bc07e04e9009d5eddaacf7002551046699d313 (diff) |
Impl .align and .file directives parsing
-rw-r--r-- | main.c | 265 |
1 files changed, 210 insertions, 55 deletions
@@ -28,7 +28,7 @@ #define UNREACHABLE() #endif -#define E_UNIMPL "unimplemented" +#define E_NIMPL "not implemented" #define E_UNREACH "unreachable code reached" #define E_EXPR "'(', ')', unary operator, binary operator, number or symbol" #define E_EXPR_NONREG "symbol that is not a register when parsing expression" @@ -52,10 +52,13 @@ #define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'" #define E_ARGS_COUNT "invalid arguments count" #define E_NL "new line, which is '\\n', '\\r\\n' or '\\r'" -#define E_INSTR_END "',', comment or " E_NL +#define E_COMMENT_NL "';' or " E_NL +#define E_INSTR_END "',', " E_COMMENT_NL #define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC #define E_DIRECTIVE "directive" #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL +#define E_UNKNOWN_DRC "unknown directive" +#define E_STR "string" #define ERR 0 #define OK 1 @@ -283,6 +286,13 @@ enum mnemonic { MNEMONICS_COUNT, }; +enum directive_type { + DT_NONE = 0, + DT_ALIGN, + DT_FILE, + DIRECTIVES_COUNT, +}; + enum opsize { OPSIZE_NONE = 0, OPSIZE_S, @@ -340,21 +350,16 @@ struct instruction { struct arg arg1, arg2; }; -struct def_endef { - size_t sym_id; - size_t tag_sym_id; - int32_t size; - int32_t storage_class; - int32_t type; +struct directive { + enum directive_type type; + size_t first_token, num_tokens; /// Directive arguments tokens span }; struct stmt { enum stmt_type type; union { struct instruction instruction; - int32_t align; - size_t globl_sym_id; - size_t file_sym_id; + struct directive directive; }; size_t label_token; size_t first_token, num_tokens; // Statement tokens span, may be NULL @@ -458,7 +463,7 @@ const char *const g_escape_table[256] = { "\\xfd", "\\xfe", }; -struct mnemonic_meta { +const struct mnemonic_meta { const char *str; enum args_count args_count; } g_mnemmonics[MNEMONICS_COUNT] = { @@ -579,6 +584,18 @@ struct mnemonic_meta { { "unlk", ARGS_COUNT_1 }, }; +static int pars_directive_handler_align(struct pars *, size_t); +static int pars_directive_handler_file(struct pars *, size_t); + +const struct directive_description { + const char *str; + int (*handler)(struct pars *, size_t lable_id); +} g_directives[DIRECTIVES_COUNT] = { + { "", NULL, }, + { "align", pars_directive_handler_align, }, + { "file", pars_directive_handler_file, }, +}; + static bool should_be_escaped(const int c) { return c < ' ' || c == '"' || c == '\\' || c == '<' || c == '>' || c > '~'; @@ -1210,6 +1227,11 @@ static const char *mnemonic_to_string(const enum mnemonic m) return g_mnemmonics[m].str; } +static const char *directive_to_string(const enum directive_type t) { + assert(t < DIRECTIVES_COUNT); + return g_directives[t].str; +} + static const char *opsize_to_string(const enum opsize s) { switch (s) { @@ -1242,6 +1264,27 @@ static enum mnemonic get_mnemonic_from_identifier( return MN_NONE; } +static enum directive_type get_directive_from_identifier( + const char *const str, const size_t str_length) +{ + // The longest directive have 8 chars (without leading dot), e.g. + // "bsection" or "external". + if (str_length > 8) { + return DT_NONE; + } + char directive_str[9] = {0}; + for (size_t i = 0; i < str_length; i++) { + directive_str[i] = tolower(str[i]); + } + // Start from 1 since - is dummy NONE + for (size_t i = 1; i < DIRECTIVES_COUNT; i++) { + if (0 == strcmp(directive_str, g_directives[i].str)) { + return (enum directive_type)i; + } + } + return DT_NONE; +} + static const char *arg_type_to_string(const enum arg_type type) { switch (type) { @@ -1303,6 +1346,25 @@ static const char *stmt_type_to_string(const enum stmt_type type) return "_UNKNOWN"; } +static int fprint_tokens( + const struct lex *const lex, + const size_t first_token, + const size_t num_tokens, + FILE *const s) +{ + for (size_t i = 0; i < num_tokens; i++) { + const struct token token = lex->tokbuf[first_token + i]; + if (token.type == TT_NEWLINE) { + break; + } + if (i > 0) { + fputc(' ', s); + } + fprintf(s, "\"%.*s\"", (int)token.length, lex->input + token.offset); + } + return 0; +} + static void fprint_expr( const struct lex *const lex, const struct expr_tokens_span *const expr, @@ -1386,18 +1448,9 @@ static void fprint_arg( case ARG_EXPR: break; } - fprintf(s, " raw \""); - for (size_t i = 0; i < arg->num_tokens; i++) { - const struct token token = lex->tokbuf[arg->first_token + i]; - if (token.type == TT_NEWLINE) { - break; - } - if (i > 0) { - fputc(' ', s); - } - fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); - } - fprintf(s, "\")"); + fprintf(s, " raw-tokens ["); + fprint_tokens(lex, arg->first_token, arg->num_tokens, s); + fprintf(s, "])"); } static int fprint_stmt( @@ -1409,10 +1462,17 @@ static int fprint_stmt( fprintf(s, "(%s", stmt_type_to_string(stmt->type)); if (stmt->label_token) { const struct token label = lex->tokbuf[stmt->label_token]; - fprintf(s, "\n\t(label \"%.*s\")", (int)label.length, lex->input + label.offset); + fprintf( + s, + "\n\t(label \"%.*s\")", + (int)label.length, + lex->input + label.offset); } if (stmt->type == ST_INSTRUCTION) { - fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic)); + fprintf( + s, + "\n\t(mnemonic \"%s\")", + mnemonic_to_string(stmt->instruction.mnemonic)); fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize)); if (stmt->instruction.arg1.type != ARG_NONE) { fprintf(s, "\n\t(arg1 "); @@ -1425,23 +1485,30 @@ static int fprint_stmt( fprint_arg(lex, &stmt->instruction.arg2, s); fprintf(s, ")"); } + } else if (stmt->type == ST_DIRECTIVE) { + fprintf( + s, + "\n\t(name \"%s\")", + directive_to_string(stmt->directive.type)); + fprintf(s, "\n\t(arg (raw-tokens ["); + fprint_tokens( + lex, + stmt->directive.first_token, + stmt->directive.num_tokens, + s); + fprintf(s, "]))"); } if (stmt->comment_token) { const struct token comment = lex->tokbuf[stmt->comment_token]; - fprintf(s, "\n\t(comment \"%.*s\")", (int)comment.length, lex->input + comment.offset); - } - fprintf(s, "\n\t(raw \""); - for (size_t i = 0; i < stmt->num_tokens; i++) { - const struct token token = lex->tokbuf[stmt->first_token + i]; - if (token.type == TT_NEWLINE) { - break; - } - if (i > 0) { - fputc(' ', s); - } - fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); - } - fprintf(s, "\"))\n"); + fprintf( + s, + "\n\t(comment \"%.*s\")", + (int)comment.length, + lex->input + comment.offset); + } + fprintf(s, "\n\t(raw-tokens ["); + fprint_tokens(lex, stmt->first_token, stmt->num_tokens, s); + fprintf(s, "]))\n"); return 0; } @@ -1537,14 +1604,6 @@ static int pars_yield_error_eof( self, l, "EOF", (sizeof "EOF") - 1, expected); } -static int pars_parse_direc( - struct pars *const self, const struct token *const dot) -{ - (void) self; - (void) dot; - return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); -} - enum opsize get_opsize_from_specifier(const char size_specifier) { switch (tolower(size_specifier)) { @@ -1728,7 +1787,7 @@ static int pars_parse_expr( } nesting--; } else { - if (nesting == 0) { + if (nesting == 0 && expect_close_or_binary) { break; } return pars_yield_error( @@ -1746,6 +1805,101 @@ static int pars_parse_expr( return OK; } +static int pars_parse_comment_and_newline( + struct pars *const self, size_t *const output_comment_id) +{ + size_t comment_id = 0; + if (!pars_is_eof_reached(self)) { + // Try parse comment + const struct token token1 = pars_peek(self); + const bool is_comment = token1.type == TT_COMMENT_ASTERISK || + token1.type == TT_COMMENT_SEMICOLON; + if (is_comment) { + comment_id = pars_commit(self); + } + } + if (!pars_is_eof_reached(self)) { + // There must be a new line if not EOF + const size_t nl_id = pars_commit(self); + const struct token nl = self->lex->tokbuf[nl_id]; + if (nl.type != TT_NEWLINE) { + return pars_yield_error( + self, nl_id, comment_id ? E_NL : E_COMMENT_NL); + } + } + *output_comment_id = comment_id; + return OK; +} + +static int pars_finish_directive( + struct pars *const self, + const size_t label_id, + const struct directive directive) +{ + // Finish parsing instruction, expect comment or newline + size_t comment_id = 0; + const int ret = pars_parse_comment_and_newline(self, &comment_id); + if (ret != OK) { + return ret; + } + const struct stmt stmt = { + .type = ST_DIRECTIVE, + .directive = directive, + .label_token = label_id, + .comment_token = comment_id, + .first_token = label_id, + .num_tokens = self->cur_tok_id - label_id, + }; + fwrite_stmt(&stmt, self->stmttab_stream); + return OK; +} + +static int pars_directive_handler_align( + struct pars *const self, const size_t label_id) +{ + struct expr_tokens_span expr; + const int ret = pars_parse_expr(self, &expr); + if (ret != OK) { + return ret; + } + const struct directive directive = { + .type = DT_ALIGN, + .first_token = expr.first_token, + .num_tokens = expr.num_tokens, + }; + return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_file( + struct pars *const self, const size_t label_id) +{ + const struct token filename = pars_peek(self); + if (filename.type != TT_STRING) { + return pars_yield_error(self, self->cur_tok_id, E_STR); + } + const size_t first_token = pars_commit(self); + const struct directive directive = { + .type = DT_FILE, + .first_token = first_token, + .num_tokens = 1 + }; + return pars_finish_directive(self, label_id, directive); +} + +static int pars_parse_direc(struct pars *const self, const size_t label_id) +{ + const struct token dotid = pars_peek(self); + // Get rid of leading dot in the string pointer and in the length as well by + // adding and subtracting 1 respectively + enum directive_type d = get_directive_from_identifier( + self->lex->input + dotid.offset + 1, dotid.length - 1); + if (d == DT_NONE) { + return pars_yield_error_msg(self, self->cur_tok_id, E_UNKNOWN_DRC); + } + pars_commit(self); + return g_directives[d].handler(self, label_id); +} + static int pars_parse_arg_after_prefix_expr( struct pars *const self, struct arg *const arg) { @@ -2328,7 +2482,7 @@ static int pars_parse_assignment( { (void) label_id; (void) symbol_id; - return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); + return pars_yield_error_msg(self, self->cur_tok_id, E_NIMPL); } static int pars_yield_label_comment( @@ -2351,15 +2505,16 @@ static int pars_yield_label_comment( static int pars_parse_labeled_statement( struct pars *const self, const size_t label_id) { - const size_t token1_id = pars_commit(self); - const struct token token1 = self->lex->tokbuf[token1_id]; + const struct token token1 = pars_peek(self); const bool is_comment = token1.type == TT_COMMENT_ASTERISK || token1.type == TT_COMMENT_SEMICOLON; if (is_comment) { - return pars_yield_label_comment(self, label_id, token1_id); + return pars_yield_label_comment(self, label_id, pars_commit(self)); } else if (token1.type == TT_NEWLINE) { + pars_commit(self); return pars_yield_label_comment(self, label_id, 0); } else if (token1.type == TT_ID) { + const size_t token1_id = pars_commit(self); if (pars_is_eof_reached(self)) { return pars_yield_error_eof(self, E_LABELED_STMT); } @@ -2373,9 +2528,9 @@ static int pars_parse_labeled_statement( } return pars_parse_instruction(self, label_id, token1_id); } else if (token1.type == TT_DOT_ID) { - return pars_parse_direc(self, &token1); + return pars_parse_direc(self, label_id); } - return pars_yield_error(self, token1_id, E_STMT_BEGIN); + return pars_yield_error(self, self->cur_tok_id, E_STMT_BEGIN); } static int pars_parse_statement(struct pars *const self) |