diff options
author | Oxore <oxore@protonmail.com> | 2023-06-21 22:26:40 +0300 |
---|---|---|
committer | Oxore <oxore@protonmail.com> | 2023-06-21 22:27:20 +0300 |
commit | 335d8f13db928a91d7d194d6d51d8eb814d14112 (patch) | |
tree | 818bf840577b14c7b2cfc635d0333705d4c9d5b5 |
Initial commit
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | Makefile | 26 | ||||
-rw-r--r-- | main.c | 279 |
3 files changed, 310 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1dabaad --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +main +m68k-trasm +cmake[-_]build*/ +build*/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5b1111e --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: Unlicense + +WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op +OPTFLAGS = -O2 +INCLUDES = lib +_CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(OPTFLAGS) -pipe -g +_CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(OPTFLAGS) -pipe -g +LDSCRIPTS = +_LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS)) + +OBJECTS=main.o + +.PHONY: all +all: m68k-trasm + +m68k-trasm: $(OBJECTS) $(LDSCRIPTS) Makefile + $(CC) -o $@ $(_LDFLAGS) $(OBJECTS) + +$(OBJECTS): Makefile + +%.o: %.c Makefile + $(CC) $(_CFLAGS) -c -o $@ $< + +clean: + rm -rfv m68k-trasm $(OBJECTS) + @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: Unlicense + * + * This program translates Sierra m68k assembly dialect to GNU AS m68k dialect. + * + * NOTE: Unicode is not supported, ASCII only. + */ + +#include <assert.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +#define OK 0 +#define ERR 2 + +enum tok_kind { + TK_NONE = 0, + TK_SPACE, + TK_TAB, + TK_CR, + TK_LF, + TK_DOT, + TK_COMMA, + TK_PLUS, + TK_MINUS, + TK_EQ, + TK_COLON, + TK_ASTERISK, + TK_STRING, + TK_ALNUM, + TK_NUMDEC, + TK_NUMOCT, + TK_NUMHEX, + TK_PARENL, + TK_PARENR, + TK_COMMENT_ASTERISK, + TK_COMMENT_SEMICOLON, +}; + +struct tok { + enum tok_kind kind; + size_t offset; + size_t length; +}; + +enum lex_error { + LE_NONE = 0, +}; + +struct lex { + // Input data buffer + FILE *parsed_input_stream; + char *parsed_input; + size_t parsed_input_size; + // Tokens table + FILE *tokbuf_stream; + struct tok *tokbuf; + size_t tokbuf_size; + // State + size_t offset; + enum lex_error error; +}; + +enum stmt_kind { + SK_NONE = 0, + SK_LABEL, + SK_INSTRUCTION, + SK_TEXT, + SK_DIR_FILE, + SK_DIR_TEXT, + SK_DIR_ALIGN, + SK_DIR_DEF_ENDEF, + SK_DIR_GLOBL, + SK_DIR_LINE, +}; + +enum opcode { + OPCODE_NONE, + OPCODE_NOP, +}; + +enum opsize { + OPSIZE_NONE = 0, + OPSIZE_S, + OPSIZE_B, + OPSIZE_W, + OPSIZE_L, +}; + +enum arg_kind { + ARG_NONE = 0, + ARG_DN, + ARG_AN, + ARG_AN_ADDR, + ARG_AN_ADDR_INCR, + ARG_AN_ADDR_DECR, + ARG_AN_ADDR_16, + ARG_AN_ADDR_8_XN, + ARG_ADDR_WORD, + ARG_ADDR_LONG, + ARG_ADDR_UNSPEC, + ARG_PC_ADDR_16, + ARG_PC_ADDR_8_XN, + ARG_IMMEDIATE, +}; + +struct arg_8_xn { + int8_t val; + int8_t an; + int8_t xi; +}; + +struct instruction { + enum opcode opcode; + enum opsize opsize; + enum arg_kind arg1_kind, arg2_kind; + union { + int32_t imm, addr; + struct arg_8_xn arg_8_xn; // For (d,An,Xi) and (d,PC,Xn) + } arg1, arg2; +}; + +struct def_endef { + size_t sym_id; + size_t tag_sym_id; + int32_t size; + int32_t storage_class; + int32_t type; +}; + +struct stmt { + enum stmt_kind kind; + union { + struct instruction instruction; + int32_t align; + size_t globl_sym_id; + size_t file_sym_id; + }; + size_t first_token, num_tokens; // Statement tokens span, may be NULL + size_t comment_token; +}; + +struct symbol { + size_t offset; // Byte offset in continuous null terminated symbol buffer + // Instead of strcmp every item in symtab we can compare hashes and get O(N) + // for search. + uint32_t hash; +}; + +struct pars { + struct lex *lex; + // Statement table + FILE *stmttab_stream; + struct stmt *stmttab; + size_t stmttab_size; + // Symbol table + FILE *symtab_stream; + struct sym *symtab; + size_t symtab_size; + // Symbol buffer for symbol table + FILE *symbuf_stream; + char *symbuf; + size_t symbuf_size; +}; + +struct assem { + struct pars *pars; +}; + +static int lex_init(struct lex *const self) +{ + (void) self; + return OK; +} + +static int lex_next(struct lex *const self) +{ + (void) self; + return OK; +} + +static void lex_destroy(struct lex *const self) +{ + (void) self; +} + +static int pars_init(struct pars *const self, struct lex *const lex) +{ + (void) self; + (void) lex; + return OK; +} + +static int pars_run(struct pars *const self, FILE *const stream) +{ + (void) self; + (void) stream; + return OK; +} + +static void pars_destroy(struct pars *const self) +{ + (void) self; +} + +static int assem_init(struct assem *const self, struct pars *const pars) +{ + (void) self; + (void) pars; + return OK; +} + +static int assem_resolve(struct assem *const self) +{ + (void) self; + return OK; +} + +static int assem_emit(struct assem *const self, FILE *const stream) +{ + (void) self; + (void) stream; + return OK; +} + +static void assem_destroy(struct assem *const self) +{ + (void) self; +} + +int main(const int argc, char *const argv[]) +{ + // No fucks given about arguments for now + (void)argc; + (void)argv; + struct lex lex; + struct pars pars; + if (OK != lex_init(&lex)) { + return EXIT_FAILURE; + } + // Chain lexer and parser + if (OK != pars_init(&pars, &lex)) { + lex_destroy(&lex); + return EXIT_FAILURE; + } + // Parse assembly program text (tokenize + parse) + if (OK != pars_run(&pars, stdin)) { + pars_destroy(&pars); + lex_destroy(&lex); + return EXIT_FAILURE; + } + struct assem assem; + // Allocate adn populate code table and metadata table from parsed data + if (OK != assem_init(&assem, &pars)) { + pars_destroy(&pars); + lex_destroy(&lex); + return EXIT_FAILURE; + } + // Resolve all ambiguities + if (OK != assem_resolve(&assem)) { + assem_destroy(&assem); + pars_destroy(&pars); + lex_destroy(&lex); + return EXIT_FAILURE; + } + // Emit unambiguous assembly language program text for specified dialect + // (currently m68k GNU AS only is supported) + if (OK != assem_emit(&assem, stdout)) { + assem_destroy(&assem); + pars_destroy(&pars); + lex_destroy(&lex); + return EXIT_FAILURE; + } + assem_destroy(&assem); + pars_destroy(&pars); + lex_destroy(&lex); +} |