diff options
author | Oxore <oxore@protonmail.com> | 2023-06-26 01:49:24 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-26 01:49:24 +0300 |
commit | 2dd7bd6ca24b6a28ff36c5c441442d5885c0b611 (patch) | |
tree | 27f14cd43512559ecb6d3bbe15be218937f6ea5b | |
parent | e46fb8f882b00fe9d24b2dc8810b033164c4b10c (diff) |
Fix reg parsing, refactor error messages, add more sanitizers
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | main.c | 60 |
2 files changed, 41 insertions, 21 deletions
@@ -2,7 +2,7 @@ WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op INCLUDES = lib -_FLAGS = -O2 -fsanitize=unreachable -fsanitize=address +_FLAGS = -O0 -fsanitize=bounds-strict,unreachable,address,undefined _CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(_FLAGS) -pipe -g _CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(_FLAGS) -pipe -g LDSCRIPTS = @@ -31,12 +31,12 @@ #define E_UNIMPL "unimplemented" #define E_UNREACH "unreachable code reached" #define E_EXPR "expression token" -#define E_EA_PART "An, Dn, PC or full expression" -#define E_EA_PART_NOT_AN "Dn, PC or full expression" -#define E_EA_PART_NOT_EXPR "An, Dn or PC" +#define E_EA_PART "D0, ...D7, A0, ...A7, SP, PC or full expression" +#define E_EA_PART_NOT_AN "D0, ...D7, PC or full expression" +#define E_EA_PART_NOT_EXPR "D0, ...D7 A0, ...A7, SP, or PC" #define E_EA_PART_DELIM "',' or ')'" #define E_EA_INVALID "invalid addressing mode" -#define E_DN_AN "Dn or An" +#define E_DN_AN "D0, ...D7, A0, ...A7 or SP" #define E_ARG "valid instruction argument" #define E_MNEMONIC "valid instruction mnemonic" #define E_INSN_SIZE_SPEC "'.s', '.b', '.w' or '.l'" @@ -317,7 +317,7 @@ struct expr_tokens_span { struct arg { enum arg_type type; int8_t xn; ///< For Dn, An, (An), -(An), (An)+, (d16,An) - int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it is negative if An + int8_t xi; ///< For (d8,An,Xi) and (d8,PC,Xi), it has 0x8 mask set if An struct expr_tokens_span expr; size_t first_token, num_tokens; ///< Argument tokens span }; @@ -1267,6 +1267,22 @@ static const char *stmt_type_to_string(const enum stmt_type type) return "_UNKNOWN"; } +static void fprint_expr( + const struct lex *const lex, + const struct expr_tokens_span *const expr, + FILE *const s) +{ + fputc('[', s); + for (size_t i = 0; i < expr->num_tokens; i++) { + const struct token token = lex->tokbuf[expr->first_token + i]; + if (token.type == TT_NEWLINE) { + break; + } + fprintf(s, "%.*s", (int)token.length, lex->input + token.offset); + } + fputc(']', s); +} + static void fprint_arg( const struct lex *const lex, const struct arg *const arg, @@ -1276,35 +1292,39 @@ static void fprint_arg( switch (arg->type) { case ARG_NONE: case ARG_DN: + fprintf(s, " reg [d%d]", arg->xn); + break; case ARG_AN: case ARG_AN_ADDR: case ARG_AN_ADDR_INCR: case ARG_AN_ADDR_DECR: - fprintf(s, " reg %d", arg->xn); + fprintf(s, " reg [a%d]", arg->xn); break; case ARG_AN_ADDR_16: - fprintf(s, " reg %d", arg->xn); - fprintf(s, " d16 [see raw]"); // TODO print expr tokens + fprintf(s, " reg [a%d]", arg->xn); + fprintf(s, " d16 "), fprint_expr(lex, &arg->expr, s); break; case ARG_AN_ADDR_8_XI: - fprintf(s, " reg %d", arg->xn); - fprintf(s, " d8 [see raw]"); // TODO print expr tokens - fprintf(s, " xi %d", arg->xi); + fprintf(s, " reg [a%d]", arg->xn); + fprintf(s, " d8 "), fprint_expr(lex, &arg->expr, s); + fprintf(s, " xi [%c%d]", arg->xi & 0x8 ? 'a' : 'd', arg->xi & 0x7); break; case ARG_ADDR_WORD: case ARG_ADDR_LONG: case ARG_ADDR_UNSPEC: - fprintf(s, " addr [see raw]"); // TODO print expr tokens + fprintf(s, " addr "), fprint_expr(lex, &arg->expr, s); break; case ARG_PC_ADDR_16: - fprintf(s, " d16 [see raw]"); // TODO print expr tokens + fprintf(s, " reg [pc]"); + fprintf(s, " d16 "), fprint_expr(lex, &arg->expr, s); break; case ARG_PC_ADDR_8_XI: - fprintf(s, " d8 [see raw]"); // TODO print expr tokens - fprintf(s, " xi %d", arg->xi); + fprintf(s, " reg [pc]"); + fprintf(s, " d8 "), fprint_expr(lex, &arg->expr, s); + fprintf(s, " xi [%c%d]", arg->xi & 0x8 ? 'a' : 'd', arg->xi & 0x7); break; case ARG_IMMEDIATE: - fprintf(s, " value [see raw]"); // TODO print expr tokens + fprintf(s, " value "), fprint_expr(lex, &arg->expr, s); break; case ARG_REGMASK: case ARG_SR: @@ -1586,13 +1606,13 @@ static struct token_recognition pars_recognize_token( const char *const str = self->lex->input + token.offset; if (token.type == TT_ID) { if (token.length == 2) { - if (tolower(str[0]) == 'a' && is_dec(str[1])) { + if (tolower(str[0]) == 'a' && is_oct(str[1])) { return (struct token_recognition){ .type = RTT_REG, .reg = REG_AN, .reg_num = str[1] - '0', }; - } else if (tolower(str[0]) == 'd' && is_dec(str[1])) { + } else if (tolower(str[0]) == 'd' && is_oct(str[1])) { return (struct token_recognition){ .type = RTT_REG, .reg = REG_DN, @@ -1869,14 +1889,14 @@ static int pars_parse_arg_inside_parens( // It is (d8,PC,Xn) assert((an1_found && !dn_found) || (!an1_found && dn_found)); arg->type = ARG_PC_ADDR_8_XI; - arg->xi = an1_found ? -an1 : dn; + arg->xi = an1_found ? (an1 | 0x8) : dn; arg->num_tokens = self->cur_tok_id - arg->first_token; return OK; } else if (parts == 3 && an1_found && arg->expr.first_token && (an2_found || dn_found)) { // It is (d8,An,Xn) assert((an1_found && !dn_found) || (!an1_found && dn_found)); arg->type = ARG_AN_ADDR_8_XI; - arg->xi = an2_found ? -an2 : dn; + arg->xi = an2_found ? (an2 | 0x8) : dn; arg->num_tokens = self->cur_tok_id - arg->first_token; return OK; } |