/* SPDX-License-Identifier: Unlicense * * This program translates Sierra m68k assembly dialect to GNU AS m68k dialect. * * NOTE: Unicode is not supported, ASCII only. */ #include #include #include #include #include #include #define OK 0 #define ERR 2 enum tok_kind { TK_NONE = 0, TK_SPACE, TK_TAB, TK_CR, TK_LF, TK_DOT, TK_COMMA, TK_PLUS, TK_MINUS, TK_EQ, TK_COLON, TK_ASTERISK, TK_STRING, TK_ALNUM, TK_NUMDEC, TK_NUMOCT, TK_NUMHEX, TK_PARENL, TK_PARENR, TK_COMMENT_ASTERISK, TK_COMMENT_SEMICOLON, }; struct tok { enum tok_kind kind; size_t offset; size_t length; }; enum lex_error { LE_NONE = 0, }; struct lex { // Input data buffer FILE *parsed_input_stream; char *parsed_input; size_t parsed_input_size; // Tokens table FILE *tokbuf_stream; struct tok *tokbuf; size_t tokbuf_size; // State size_t offset; enum lex_error error; }; enum stmt_kind { SK_NONE = 0, SK_LABEL, SK_INSTRUCTION, SK_TEXT, SK_DIR_FILE, SK_DIR_TEXT, SK_DIR_ALIGN, SK_DIR_DEF_ENDEF, SK_DIR_GLOBL, SK_DIR_LINE, }; enum opcode { OPCODE_NONE, OPCODE_NOP, }; enum opsize { OPSIZE_NONE = 0, OPSIZE_S, OPSIZE_B, OPSIZE_W, OPSIZE_L, }; enum arg_kind { ARG_NONE = 0, ARG_DN, ARG_AN, ARG_AN_ADDR, ARG_AN_ADDR_INCR, ARG_AN_ADDR_DECR, ARG_AN_ADDR_16, ARG_AN_ADDR_8_XN, ARG_ADDR_WORD, ARG_ADDR_LONG, ARG_ADDR_UNSPEC, ARG_PC_ADDR_16, ARG_PC_ADDR_8_XN, ARG_IMMEDIATE, }; struct arg_8_xn { int8_t val; int8_t an; int8_t xi; }; struct instruction { enum opcode opcode; enum opsize opsize; enum arg_kind arg1_kind, arg2_kind; union { int32_t imm, addr; struct arg_8_xn arg_8_xn; // For (d,An,Xi) and (d,PC,Xn) } arg1, arg2; }; struct def_endef { size_t sym_id; size_t tag_sym_id; int32_t size; int32_t storage_class; int32_t type; }; struct stmt { enum stmt_kind kind; union { struct instruction instruction; int32_t align; size_t globl_sym_id; size_t file_sym_id; }; size_t first_token, num_tokens; // Statement tokens span, may be NULL size_t comment_token; }; struct symbol { size_t offset; // Byte offset in continuous null terminated symbol buffer // Instead of strcmp every item in symtab we can compare hashes and get O(N) // for search. uint32_t hash; }; struct pars { struct lex *lex; // Statement table FILE *stmttab_stream; struct stmt *stmttab; size_t stmttab_size; // Symbol table FILE *symtab_stream; struct sym *symtab; size_t symtab_size; // Symbol buffer for symbol table FILE *symbuf_stream; char *symbuf; size_t symbuf_size; }; struct assem { struct pars *pars; }; static int lex_init(struct lex *const self) { (void) self; return OK; } static int lex_next(struct lex *const self) { (void) self; return OK; } static void lex_destroy(struct lex *const self) { (void) self; } static int pars_init(struct pars *const self, struct lex *const lex) { (void) self; (void) lex; return OK; } static int pars_run(struct pars *const self, FILE *const stream) { (void) self; (void) stream; return OK; } static void pars_destroy(struct pars *const self) { (void) self; } static int assem_init(struct assem *const self, struct pars *const pars) { (void) self; (void) pars; return OK; } static int assem_resolve(struct assem *const self) { (void) self; return OK; } static int assem_emit(struct assem *const self, FILE *const stream) { (void) self; (void) stream; return OK; } static void assem_destroy(struct assem *const self) { (void) self; } int main(const int argc, char *const argv[]) { // No fucks given about arguments for now (void)argc; (void)argv; struct lex lex; struct pars pars; if (OK != lex_init(&lex)) { return EXIT_FAILURE; } // Chain lexer and parser if (OK != pars_init(&pars, &lex)) { lex_destroy(&lex); return EXIT_FAILURE; } // Parse assembly program text (tokenize + parse) if (OK != pars_run(&pars, stdin)) { pars_destroy(&pars); lex_destroy(&lex); return EXIT_FAILURE; } struct assem assem; // Allocate adn populate code table and metadata table from parsed data if (OK != assem_init(&assem, &pars)) { pars_destroy(&pars); lex_destroy(&lex); return EXIT_FAILURE; } // Resolve all ambiguities if (OK != assem_resolve(&assem)) { assem_destroy(&assem); pars_destroy(&pars); lex_destroy(&lex); return EXIT_FAILURE; } // Emit unambiguous assembly language program text for specified dialect // (currently m68k GNU AS only is supported) if (OK != assem_emit(&assem, stdout)) { assem_destroy(&assem); pars_destroy(&pars); lex_destroy(&lex); return EXIT_FAILURE; } assem_destroy(&assem); pars_destroy(&pars); lex_destroy(&lex); }