diff options
| -rw-r--r-- | main.c | 265 | 
1 files changed, 210 insertions, 55 deletions
| @@ -28,7 +28,7 @@  #define UNREACHABLE()  #endif -#define E_UNIMPL "unimplemented" +#define E_NIMPL "not implemented"  #define E_UNREACH "unreachable code reached"  #define E_EXPR "'(', ')', unary operator, binary operator, number or symbol"  #define E_EXPR_NONREG "symbol that is not a register when parsing expression" @@ -52,10 +52,13 @@  #define E_ADDR_SIZE_SPEC "'.b', '.w' or '.l'"  #define E_ARGS_COUNT "invalid arguments count"  #define E_NL "new line, which is '\\n', '\\r\\n' or '\\r'" -#define E_INSTR_END "',', comment or " E_NL +#define E_COMMENT_NL "';' or " E_NL +#define E_INSTR_END "',', " E_COMMENT_NL  #define E_LABELED_STMT "':', '=', '==' or " E_MNEMONIC  #define E_DIRECTIVE "directive"  #define E_STMT_BEGIN "label, " E_MNEMONIC ", " E_DIRECTIVE " or " E_NL +#define E_UNKNOWN_DRC "unknown directive" +#define E_STR "string"  #define ERR 0  #define OK 1 @@ -283,6 +286,13 @@ enum mnemonic {      MNEMONICS_COUNT,  }; +enum directive_type { +    DT_NONE = 0, +    DT_ALIGN, +    DT_FILE, +    DIRECTIVES_COUNT, +}; +  enum opsize {      OPSIZE_NONE = 0,      OPSIZE_S, @@ -340,21 +350,16 @@ struct instruction {      struct arg arg1, arg2;  }; -struct def_endef { -    size_t sym_id; -    size_t tag_sym_id; -    int32_t size; -    int32_t storage_class; -    int32_t type; +struct directive { +    enum directive_type type; +    size_t first_token, num_tokens; /// Directive arguments tokens span  };  struct stmt {      enum stmt_type type;      union {          struct instruction instruction; -        int32_t align; -        size_t globl_sym_id; -        size_t file_sym_id; +        struct directive directive;      };      size_t label_token;      size_t first_token, num_tokens; // Statement tokens span, may be NULL @@ -458,7 +463,7 @@ const char *const g_escape_table[256] = {      "\\xfd", "\\xfe",  }; -struct mnemonic_meta { +const struct mnemonic_meta {      const char *str;      enum args_count args_count;  } g_mnemmonics[MNEMONICS_COUNT] = { @@ -579,6 +584,18 @@ struct mnemonic_meta {      { "unlk",   ARGS_COUNT_1 },  }; +static int pars_directive_handler_align(struct pars *, size_t); +static int pars_directive_handler_file(struct pars *, size_t); + +const struct directive_description { +    const char *str; +    int (*handler)(struct pars *, size_t lable_id); +} g_directives[DIRECTIVES_COUNT] = { +    { "",           NULL, }, +    { "align",      pars_directive_handler_align, }, +    { "file",       pars_directive_handler_file, }, +}; +  static bool should_be_escaped(const int c)  {      return c < ' ' || c == '"' || c == '\\' || c == '<' || c == '>' || c > '~'; @@ -1210,6 +1227,11 @@ static const char *mnemonic_to_string(const enum mnemonic m)      return g_mnemmonics[m].str;  } +static const char *directive_to_string(const enum directive_type t) { +    assert(t < DIRECTIVES_COUNT); +    return g_directives[t].str; +} +  static const char *opsize_to_string(const enum opsize s)  {      switch (s) { @@ -1242,6 +1264,27 @@ static enum mnemonic get_mnemonic_from_identifier(      return MN_NONE;  } +static enum directive_type get_directive_from_identifier( +        const char *const str, const size_t str_length) +{ +    // The longest directive have 8 chars (without leading dot), e.g. +    // "bsection" or "external". +    if (str_length > 8) { +        return DT_NONE; +    } +    char directive_str[9] = {0}; +    for (size_t i = 0; i < str_length; i++) { +        directive_str[i] = tolower(str[i]); +    } +    // Start from 1 since - is dummy NONE +    for (size_t i = 1; i < DIRECTIVES_COUNT; i++) { +        if (0 == strcmp(directive_str, g_directives[i].str)) { +            return (enum directive_type)i; +        } +    } +    return DT_NONE; +} +  static const char *arg_type_to_string(const enum arg_type type)  {      switch (type) { @@ -1303,6 +1346,25 @@ static const char *stmt_type_to_string(const enum stmt_type type)      return "_UNKNOWN";  } +static int fprint_tokens( +        const struct lex *const lex, +        const size_t first_token, +        const size_t num_tokens, +        FILE *const s) +{ +    for (size_t i = 0; i < num_tokens; i++) { +        const struct token token = lex->tokbuf[first_token + i]; +        if (token.type == TT_NEWLINE) { +            break; +        } +        if (i > 0) { +            fputc(' ', s); +        } +        fprintf(s, "\"%.*s\"", (int)token.length, lex->input + token.offset); +    } +    return 0; +} +  static void fprint_expr(          const struct lex *const lex,          const struct expr_tokens_span *const expr, @@ -1386,18 +1448,9 @@ static void fprint_arg(      case ARG_EXPR:          break;      } -    fprintf(s, " raw \""); -    for (size_t i = 0; i < arg->num_tokens; i++) { -        const struct token token = lex->tokbuf[arg->first_token + i]; -        if (token.type == TT_NEWLINE) { -            break; -        } -        if (i > 0) { -            fputc(' ', s); -        } -        fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); -    } -    fprintf(s, "\")"); +    fprintf(s, " raw-tokens ["); +    fprint_tokens(lex, arg->first_token, arg->num_tokens, s); +    fprintf(s, "])");  }  static int fprint_stmt( @@ -1409,10 +1462,17 @@ static int fprint_stmt(      fprintf(s, "(%s", stmt_type_to_string(stmt->type));      if (stmt->label_token) {          const struct token label = lex->tokbuf[stmt->label_token]; -        fprintf(s, "\n\t(label \"%.*s\")", (int)label.length, lex->input + label.offset); +        fprintf( +                s, +                "\n\t(label \"%.*s\")", +                (int)label.length, +                lex->input + label.offset);      }      if (stmt->type == ST_INSTRUCTION) { -        fprintf(s, "\n\t(mnemonic \"%s\")", mnemonic_to_string(stmt->instruction.mnemonic)); +        fprintf( +                s, +                "\n\t(mnemonic \"%s\")", +                mnemonic_to_string(stmt->instruction.mnemonic));          fprintf(s, "\n\t(size %s)", opsize_to_string(stmt->instruction.opsize));          if (stmt->instruction.arg1.type != ARG_NONE) {              fprintf(s, "\n\t(arg1 "); @@ -1425,23 +1485,30 @@ static int fprint_stmt(              fprint_arg(lex, &stmt->instruction.arg2, s);              fprintf(s, ")");          } +    } else if (stmt->type == ST_DIRECTIVE) { +        fprintf( +                s, +                "\n\t(name \"%s\")", +                directive_to_string(stmt->directive.type)); +        fprintf(s, "\n\t(arg (raw-tokens ["); +        fprint_tokens( +                lex, +                stmt->directive.first_token, +                stmt->directive.num_tokens, +                s); +        fprintf(s, "]))");      }      if (stmt->comment_token) {          const struct token comment = lex->tokbuf[stmt->comment_token]; -        fprintf(s, "\n\t(comment \"%.*s\")", (int)comment.length, lex->input + comment.offset); -    } -    fprintf(s, "\n\t(raw \""); -    for (size_t i = 0; i < stmt->num_tokens; i++) { -        const struct token token = lex->tokbuf[stmt->first_token + i]; -        if (token.type == TT_NEWLINE) { -            break; -        } -        if (i > 0) { -            fputc(' ', s); -        } -        fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); -    } -    fprintf(s, "\"))\n"); +        fprintf( +                s, +                "\n\t(comment \"%.*s\")", +                (int)comment.length, +                lex->input + comment.offset); +    } +    fprintf(s, "\n\t(raw-tokens ["); +    fprint_tokens(lex, stmt->first_token, stmt->num_tokens, s); +    fprintf(s, "]))\n");      return 0;  } @@ -1537,14 +1604,6 @@ static int pars_yield_error_eof(              self, l, "EOF", (sizeof "EOF") - 1, expected);  } -static int pars_parse_direc( -        struct pars *const self, const struct token *const dot) -{ -    (void) self; -    (void) dot; -    return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); -} -  enum opsize get_opsize_from_specifier(const char size_specifier)  {      switch (tolower(size_specifier)) { @@ -1728,7 +1787,7 @@ static int pars_parse_expr(              }              nesting--;          } else { -            if (nesting == 0) { +            if (nesting == 0 && expect_close_or_binary) {                  break;              }              return pars_yield_error( @@ -1746,6 +1805,101 @@ static int pars_parse_expr(      return OK;  } +static int pars_parse_comment_and_newline( +        struct pars *const self, size_t *const output_comment_id) +{ +    size_t comment_id = 0; +    if (!pars_is_eof_reached(self)) { +        // Try parse comment +        const struct token token1 = pars_peek(self); +        const bool is_comment = token1.type == TT_COMMENT_ASTERISK || +            token1.type == TT_COMMENT_SEMICOLON; +        if (is_comment) { +            comment_id = pars_commit(self); +        } +    } +    if (!pars_is_eof_reached(self)) { +        // There must be a new line if not EOF +        const size_t nl_id = pars_commit(self); +        const struct token nl = self->lex->tokbuf[nl_id]; +        if (nl.type != TT_NEWLINE) { +            return pars_yield_error( +                    self, nl_id, comment_id ? E_NL : E_COMMENT_NL); +        } +    } +    *output_comment_id = comment_id; +    return OK; +} + +static int pars_finish_directive( +        struct pars *const self, +        const size_t label_id, +        const struct directive directive) +{ +    // Finish parsing instruction, expect comment or newline +    size_t comment_id = 0; +    const int ret = pars_parse_comment_and_newline(self, &comment_id); +    if (ret != OK) { +        return ret; +    } +    const struct stmt stmt = { +        .type = ST_DIRECTIVE, +        .directive = directive, +        .label_token = label_id, +        .comment_token = comment_id, +        .first_token = label_id, +        .num_tokens = self->cur_tok_id - label_id, +    }; +    fwrite_stmt(&stmt, self->stmttab_stream); +    return OK; +} + +static int pars_directive_handler_align( +        struct pars *const self, const size_t label_id) +{ +    struct expr_tokens_span expr; +    const int ret = pars_parse_expr(self, &expr); +    if (ret != OK) { +        return ret; +    } +    const struct directive directive = { +        .type = DT_ALIGN, +        .first_token = expr.first_token, +        .num_tokens = expr.num_tokens, +    }; +    return pars_finish_directive(self, label_id, directive); +} + +static int pars_directive_handler_file( +        struct pars *const self, const size_t label_id) +{ +    const struct token filename = pars_peek(self); +    if (filename.type != TT_STRING) { +        return pars_yield_error(self, self->cur_tok_id, E_STR); +    } +    const size_t first_token = pars_commit(self); +    const struct directive directive = { +        .type = DT_FILE, +        .first_token = first_token, +        .num_tokens = 1 +    }; +    return pars_finish_directive(self, label_id, directive); +} + +static int pars_parse_direc(struct pars *const self, const size_t label_id) +{ +    const struct token dotid = pars_peek(self); +    // Get rid of leading dot in the string pointer and in the length as well by +    // adding and subtracting 1 respectively +    enum directive_type d = get_directive_from_identifier( +            self->lex->input + dotid.offset + 1, dotid.length - 1); +    if (d == DT_NONE) { +        return pars_yield_error_msg(self, self->cur_tok_id, E_UNKNOWN_DRC); +    } +    pars_commit(self); +    return g_directives[d].handler(self, label_id); +} +  static int pars_parse_arg_after_prefix_expr(          struct pars *const self, struct arg *const arg)  { @@ -2328,7 +2482,7 @@ static int pars_parse_assignment(  {      (void) label_id;      (void) symbol_id; -    return pars_yield_error_msg(self, self->cur_tok_id, E_UNIMPL); +    return pars_yield_error_msg(self, self->cur_tok_id, E_NIMPL);  }  static int pars_yield_label_comment( @@ -2351,15 +2505,16 @@ static int pars_yield_label_comment(  static int pars_parse_labeled_statement(          struct pars *const self, const size_t label_id)  { -    const size_t token1_id = pars_commit(self); -    const struct token token1 = self->lex->tokbuf[token1_id]; +    const struct token token1 = pars_peek(self);      const bool is_comment = token1.type == TT_COMMENT_ASTERISK ||          token1.type == TT_COMMENT_SEMICOLON;      if (is_comment) { -        return pars_yield_label_comment(self, label_id, token1_id); +        return pars_yield_label_comment(self, label_id, pars_commit(self));      } else if (token1.type == TT_NEWLINE) { +        pars_commit(self);          return pars_yield_label_comment(self, label_id, 0);      } else if (token1.type == TT_ID) { +        const size_t token1_id = pars_commit(self);          if (pars_is_eof_reached(self)) {              return pars_yield_error_eof(self, E_LABELED_STMT);          } @@ -2373,9 +2528,9 @@ static int pars_parse_labeled_statement(          }          return pars_parse_instruction(self, label_id, token1_id);      } else if (token1.type == TT_DOT_ID) { -        return pars_parse_direc(self, &token1); +        return pars_parse_direc(self, label_id);      } -    return pars_yield_error(self, token1_id, E_STMT_BEGIN); +    return pars_yield_error(self, self->cur_tok_id, E_STMT_BEGIN);  }  static int pars_parse_statement(struct pars *const self) | 
