summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-28 00:39:26 +0300
committerOxore <oxore@protonmail.com>2023-06-28 00:52:44 +0300
commitf77ac6908cca9fb2be32d04fd6387d417cbc9c39 (patch)
treeafbed08bd677bb186351f1b82a4d9d9f5be4fc0d
parent91bc07e04e9009d5eddaacf7002551046699d313 (diff)
Impl .align and .file directives parsing
-rw-r--r--main.c265
1 files changed, 210 insertions, 55 deletions
diff --git a/main.c b/main.c
index df5849c..15cdd87 100644
--- a/main.c
+++ b/main.c
@@ -28,7 +28,7 @@
#define UNREACHABLE()
#endif
-#define E_UNIMPL "unimplemented"
+#define E_NIMPL "not implemented"
#define E_UNREACH "unreachable code reached"
#define E_EXPR "'(', ')', unary operator, binary operator, number or symbol"
#define E_EXPR_NONREG "symbol that is not a register when parsing expression"
@@ -52,10 +52,13 @@
#define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'"
#define E_ARGS_COUNT "invalid arguments count"
#define E_NL "new line, which is '\\n', '\\r\\n' or '\\r'"
-#define E_INSTR_END "',', comment or " E_NL
+#define E_COMMENT_NL "';' or " E_NL
+#define E_INSTR_END "',', " E_COMMENT_NL
#define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC
#define E_DIRECTIVE "directive"
#define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL
+#define E_UNKNOWN_DRC "unknown directive"
+#define E_STR "string"
#define ERR 0
#define OK 1
@@ -283,6 +286,13 @@ enum mnemonic {
MNEMONICS_COUNT,
};
+enum directive_type {
+ DT_NONE = 0,
+ DT_ALIGN,
+ DT_FILE,
+ DIRECTIVES_COUNT,
+};
+
enum opsize {
OPSIZE_NONE = 0,
OPSIZE_S,
@@ -340,21 +350,16 @@ struct instruction {
struct arg arg1, arg2;
};
-struct def_endef {
- size_t sym_id;
- size_t tag_sym_id;
- int32_t size;
- int32_t storage_class;
- int32_t type;
+struct directive {
+ enum directive_type type;
+ size_t first_token, num_tokens; /// Directive arguments tokens span
};
struct stmt {
enum stmt_type type;
union {
struct instruction instruction;
- int32_t align;
- size_t globl_sym_id;
- size_t file_sym_id;
+ struct directive directive;
};
size_t label_token;
size_t first_token, num_tokens; // Statement tokens span, may be NULL
@@ -458,7 +463,7 @@ const char *const g_escape_table[256] = {
"\\xfd", "\\xfe",
};
-struct mnemonic_meta {
+const struct mnemonic_meta {
const char *str;
enum args_count args_count;
} g_mnemmonics[MNEMONICS_COUNT] = {
@@ -579,6 +584,18 @@ struct mnemonic_meta {
{ "unlk", ARGS_COUNT_1 },
};
+static int pars_directive_handler_align(struct pars *, size_t);
+static int pars_directive_handler_file(struct pars *, size_t);
+
+const struct directive_description {
+ const char *str;
+ int (*handler)(struct pars *, size_t lable_id);
+} g_directives[DIRECTIVES_COUNT] = {
+ { "", NULL, },
+ { "align", pars_directive_handler_align, },
+ { "file", pars_directive_handler_file, },
+};
+
static bool should_be_escaped(const int c)
{
return c < ' ' || c == '"' || c == '\\' || c == '<' || c == '>' || c > '~';
@@ -1210,6 +1227,11 @@ static const char *mnemonic_to_string(const enum mnemonic m)
return g_mnemmonics[m].str;
}
+static const char *directive_to_string(const enum directive_type t) {
+ assert(t < DIRECTIVES_COUNT);
+ return g_directives[t].str;
+}
+
static const char *opsize_to_string(const enum opsize s)
{
switch (s) {
@@ -1242,6 +1264,27 @@ static enum mnemonic get_mnemonic_from_identifier(
return MN_NONE;
}
+static enum directive_type get_directive_from_identifier(
+ const char *const str, const size_t str_length)
+{
+ // The longest directive have 8 chars (without leading dot), e.g.
+ // "bsection" or "external".
+ if (str_length > 8) {
+ return DT_NONE;
+ }
+ char directive_str[9] = {0};
+ for (size_t i = 0; i < str_length; i++) {
+ directive_str[i] = tolower(str[i]);
+ }
+ // Start from 1 since - is dummy NONE
+ for (size_t i = 1; i < DIRECTIVES_COUNT; i++) {
+ if (0 == strcmp(directive_str, g_directives[i].str)) {
+ return (enum directive_type)i;
+ }
+ }
+ return DT_NONE;
+}
+
static const char *arg_type_to_string(const enum arg_type type)
{
switch (type) {
@@ -1303,6 +1346,25 @@ static const char *stmt_type_to_string(const enum stmt_type type)
return "_UNKNOWN";
}
+static int fprint_tokens(
+ const struct lex *const lex,
+ const size_t first_token,
+ const size_t num_tokens,
+ FILE *const s)
+{
+ for (size_t i = 0; i < num_tokens; i++) {
+ const struct token token = lex->tokbuf[first_token + i];
+ if (token.type == TT_NEWLINE) {
+ break;
+ }
+ if (i > 0) {
+ fputc(' ', s);
+ }
+ fprintf(s, "\"%.*s\"", (int)token.length, lex->input + token.offset);
+ }
+ return 0;
+}
+
static void fprint_expr(
const struct lex *const lex,
const struct expr_tokens_span *const expr,
@@ -1386,18 +1448,9 @@ static void fprint_arg(
case ARG_EXPR:
break;
}
- fprintf(s, " raw \"");
- for (size_t i = 0; i < arg->num_tokens; i++) {
- const struct token token = lex->tokbuf[arg->first_token + i];
- if (token.type == TT_NEWLINE) {
- break;
- }
- if (i > 0) {
- fputc(' ', s);
- }
- fprintf(s, "%.*s", (int)token.length, lex->input + token.offset);
- }
- fprintf(s, "\")");
+ fprintf(s, " raw-tokens [");
+ fprint_tokens(lex, arg->first_token, arg->num_tokens, s);
+ fprintf(s, "])");
}
static int fprint_stmt(
@@ -1409,10 +1462,17 @@ static int fprint_stmt(
fprintf(s, "(%s", stmt_type_to_string(stmt->type));
if (stmt->label_token) {
const struct token label = lex->tokbuf[stmt->label_token];
- fprintf(s, "\n\t(label \"%.*s\")", (int)label.length, lex->input + label.offset);
+ fprintf(
+ s,
+ "\n\t(label \"%.*s\")",
+ (int)label.length,
+ lex->input + label.offset);
}
if (stmt->type == ST_INSTRUCTION) {
- fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic));
+ fprintf(
+ s,
+ "\n\t(mnemonic \"%s\")",
+ mnemonic_to_string(stmt->instruction.mnemonic));
fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize));
if (stmt->instruction.arg1.type != ARG_NONE) {
fprintf(s, "\n\t(arg1 ");
@@ -1425,23 +1485,30 @@ static int fprint_stmt(
fprint_arg(lex, &stmt->instruction.arg2, s);
fprintf(s, ")");
}
+ } else if (stmt->type == ST_DIRECTIVE) {
+ fprintf(
+ s,
+ "\n\t(name \"%s\")",
+ directive_to_string(stmt->directive.type));
+ fprintf(s, "\n\t(arg (raw-tokens [");
+ fprint_tokens(
+ lex,
+ stmt->directive.first_token,
+ stmt->directive.num_tokens,
+ s);
+ fprintf(s, "]))");
}
if (stmt->comment_token) {
const struct token comment = lex->tokbuf[stmt->comment_token];
- fprintf(s, "\n\t(comment \"%.*s\")", (int)comment.length, lex->input + comment.offset);
- }
- fprintf(s, "\n\t(raw \"");
- for (size_t i = 0; i < stmt->num_tokens; i++) {
- const struct token token = lex->tokbuf[stmt->first_token + i];
- if (token.type == TT_NEWLINE) {
- break;
- }
- if (i > 0) {
- fputc(' ', s);
- }
- fprintf(s, "%.*s", (int)token.length, lex->input + token.offset);
- }
- fprintf(s, "\"))\n");
+ fprintf(
+ s,
+ "\n\t(comment \"%.*s\")",
+ (int)comment.length,
+ lex->input + comment.offset);
+ }
+ fprintf(s, "\n\t(raw-tokens [");
+ fprint_tokens(lex, stmt->first_token, stmt->num_tokens, s);
+ fprintf(s, "]))\n");
return 0;
}
@@ -1537,14 +1604,6 @@ static int pars_yield_error_eof(
self, l, "EOF", (sizeof "EOF") - 1, expected);
}
-static int pars_parse_direc(
- struct pars *const self, const struct token *const dot)
-{
- (void) self;
- (void) dot;
- return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
-}
-
enum opsize get_opsize_from_specifier(const char size_specifier)
{
switch (tolower(size_specifier)) {
@@ -1728,7 +1787,7 @@ static int pars_parse_expr(
}
nesting--;
} else {
- if (nesting == 0) {
+ if (nesting == 0 && expect_close_or_binary) {
break;
}
return pars_yield_error(
@@ -1746,6 +1805,101 @@ static int pars_parse_expr(
return OK;
}
+static int pars_parse_comment_and_newline(
+ struct pars *const self, size_t *const output_comment_id)
+{
+ size_t comment_id = 0;
+ if (!pars_is_eof_reached(self)) {
+ // Try parse comment
+ const struct token token1 = pars_peek(self);
+ const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
+ token1.type == TT_COMMENT_SEMICOLON;
+ if (is_comment) {
+ comment_id = pars_commit(self);
+ }
+ }
+ if (!pars_is_eof_reached(self)) {
+ // There must be a new line if not EOF
+ const size_t nl_id = pars_commit(self);
+ const struct token nl = self->lex->tokbuf[nl_id];
+ if (nl.type != TT_NEWLINE) {
+ return pars_yield_error(
+ self, nl_id, comment_id ? E_NL : E_COMMENT_NL);
+ }
+ }
+ *output_comment_id = comment_id;
+ return OK;
+}
+
+static int pars_finish_directive(
+ struct pars *const self,
+ const size_t label_id,
+ const struct directive directive)
+{
+ // Finish parsing instruction, expect comment or newline
+ size_t comment_id = 0;
+ const int ret = pars_parse_comment_and_newline(self, &comment_id);
+ if (ret != OK) {
+ return ret;
+ }
+ const struct stmt stmt = {
+ .type = ST_DIRECTIVE,
+ .directive = directive,
+ .label_token = label_id,
+ .comment_token = comment_id,
+ .first_token = label_id,
+ .num_tokens = self->cur_tok_id - label_id,
+ };
+ fwrite_stmt(&stmt, self->stmttab_stream);
+ return OK;
+}
+
+static int pars_directive_handler_align(
+ struct pars *const self, const size_t label_id)
+{
+ struct expr_tokens_span expr;
+ const int ret = pars_parse_expr(self, &expr);
+ if (ret != OK) {
+ return ret;
+ }
+ const struct directive directive = {
+ .type = DT_ALIGN,
+ .first_token = expr.first_token,
+ .num_tokens = expr.num_tokens,
+ };
+ return pars_finish_directive(self, label_id, directive);
+}
+
+static int pars_directive_handler_file(
+ struct pars *const self, const size_t label_id)
+{
+ const struct token filename = pars_peek(self);
+ if (filename.type != TT_STRING) {
+ return pars_yield_error(self, self->cur_tok_id, E_STR);
+ }
+ const size_t first_token = pars_commit(self);
+ const struct directive directive = {
+ .type = DT_FILE,
+ .first_token = first_token,
+ .num_tokens = 1
+ };
+ return pars_finish_directive(self, label_id, directive);
+}
+
+static int pars_parse_direc(struct pars *const self, const size_t label_id)
+{
+ const struct token dotid = pars_peek(self);
+ // Get rid of leading dot in the string pointer and in the length as well by
+ // adding and subtracting 1 respectively
+ enum directive_type d = get_directive_from_identifier(
+ self->lex->input + dotid.offset + 1, dotid.length - 1);
+ if (d == DT_NONE) {
+ return pars_yield_error_msg(self, self->cur_tok_id, E_UNKNOWN_DRC);
+ }
+ pars_commit(self);
+ return g_directives[d].handler(self, label_id);
+}
+
static int pars_parse_arg_after_prefix_expr(
struct pars *const self, struct arg *const arg)
{
@@ -2328,7 +2482,7 @@ static int pars_parse_assignment(
{
(void) label_id;
(void) symbol_id;
- return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL);
+ return pars_yield_error_msg(self, self->cur_tok_id, E_NIMPL);
}
static int pars_yield_label_comment(
@@ -2351,15 +2505,16 @@ static int pars_yield_label_comment(
static int pars_parse_labeled_statement(
struct pars *const self, const size_t label_id)
{
- const size_t token1_id = pars_commit(self);
- const struct token token1 = self->lex->tokbuf[token1_id];
+ const struct token token1 = pars_peek(self);
const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||
token1.type == TT_COMMENT_SEMICOLON;
if (is_comment) {
- return pars_yield_label_comment(self, label_id, token1_id);
+ return pars_yield_label_comment(self, label_id, pars_commit(self));
} else if (token1.type == TT_NEWLINE) {
+ pars_commit(self);
return pars_yield_label_comment(self, label_id, 0);
} else if (token1.type == TT_ID) {
+ const size_t token1_id = pars_commit(self);
if (pars_is_eof_reached(self)) {
return pars_yield_error_eof(self, E_LABELED_STMT);
}
@@ -2373,9 +2528,9 @@ static int pars_parse_labeled_statement(
}
return pars_parse_instruction(self, label_id, token1_id);
} else if (token1.type == TT_DOT_ID) {
- return pars_parse_direc(self, &token1);
+ return pars_parse_direc(self, label_id);
}
- return pars_yield_error(self, token1_id, E_STMT_BEGIN);
+ return pars_yield_error(self, self->cur_tok_id, E_STMT_BEGIN);
}
static int pars_parse_statement(struct pars *const self)