summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOxore <oxore@protonmail.com>2023-06-21 22:26:40 +0300
committerOxore <oxore@protonmail.com>2023-06-21 22:27:20 +0300
commit335d8f13db928a91d7d194d6d51d8eb814d14112 (patch)
tree818bf840577b14c7b2cfc635d0333705d4c9d5b5
Initial commit
-rw-r--r--.gitignore5
-rw-r--r--Makefile26
-rw-r--r--main.c279
3 files changed, 310 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1dabaad
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.o
+main
+m68k-trasm
+cmake[-_]build*/
+build*/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5b1111e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: Unlicense
+
+WARNFLAGS = -Wall -Wextra -pedantic -Wlogical-op
+OPTFLAGS = -O2
+INCLUDES = lib
+_CFLAGS = $(CFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(OPTFLAGS) -pipe -g
+_CXXFLAGS = $(CXXFLAGS) $(WARNFLAGS) $(addprefix -I,$(INCLUDES)) $(OPTFLAGS) -pipe -g
+LDSCRIPTS =
+_LDFLAGS = $(LDFLAGS) $(OPTFLAGS) $(addprefix -T,$(LDSCRIPTS))
+
+OBJECTS=main.o
+
+.PHONY: all
+all: m68k-trasm
+
+m68k-trasm: $(OBJECTS) $(LDSCRIPTS) Makefile
+ $(CC) -o $@ $(_LDFLAGS) $(OBJECTS)
+
+$(OBJECTS): Makefile
+
+%.o: %.c Makefile
+ $(CC) $(_CFLAGS) -c -o $@ $<
+
+clean:
+ rm -rfv m68k-trasm $(OBJECTS)
+
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..3020862
--- /dev/null
+++ b/main.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: Unlicense
+ *
+ * This program translates Sierra m68k assembly dialect to GNU AS m68k dialect.
+ *
+ * NOTE: Unicode is not supported, ASCII only.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define OK 0
+#define ERR 2
+
+enum tok_kind {
+ TK_NONE = 0,
+ TK_SPACE,
+ TK_TAB,
+ TK_CR,
+ TK_LF,
+ TK_DOT,
+ TK_COMMA,
+ TK_PLUS,
+ TK_MINUS,
+ TK_EQ,
+ TK_COLON,
+ TK_ASTERISK,
+ TK_STRING,
+ TK_ALNUM,
+ TK_NUMDEC,
+ TK_NUMOCT,
+ TK_NUMHEX,
+ TK_PARENL,
+ TK_PARENR,
+ TK_COMMENT_ASTERISK,
+ TK_COMMENT_SEMICOLON,
+};
+
+struct tok {
+ enum tok_kind kind;
+ size_t offset;
+ size_t length;
+};
+
+enum lex_error {
+ LE_NONE = 0,
+};
+
+struct lex {
+ // Input data buffer
+ FILE *parsed_input_stream;
+ char *parsed_input;
+ size_t parsed_input_size;
+ // Tokens table
+ FILE *tokbuf_stream;
+ struct tok *tokbuf;
+ size_t tokbuf_size;
+ // State
+ size_t offset;
+ enum lex_error error;
+};
+
+enum stmt_kind {
+ SK_NONE = 0,
+ SK_LABEL,
+ SK_INSTRUCTION,
+ SK_TEXT,
+ SK_DIR_FILE,
+ SK_DIR_TEXT,
+ SK_DIR_ALIGN,
+ SK_DIR_DEF_ENDEF,
+ SK_DIR_GLOBL,
+ SK_DIR_LINE,
+};
+
+enum opcode {
+ OPCODE_NONE,
+ OPCODE_NOP,
+};
+
+enum opsize {
+ OPSIZE_NONE = 0,
+ OPSIZE_S,
+ OPSIZE_B,
+ OPSIZE_W,
+ OPSIZE_L,
+};
+
+enum arg_kind {
+ ARG_NONE = 0,
+ ARG_DN,
+ ARG_AN,
+ ARG_AN_ADDR,
+ ARG_AN_ADDR_INCR,
+ ARG_AN_ADDR_DECR,
+ ARG_AN_ADDR_16,
+ ARG_AN_ADDR_8_XN,
+ ARG_ADDR_WORD,
+ ARG_ADDR_LONG,
+ ARG_ADDR_UNSPEC,
+ ARG_PC_ADDR_16,
+ ARG_PC_ADDR_8_XN,
+ ARG_IMMEDIATE,
+};
+
+struct arg_8_xn {
+ int8_t val;
+ int8_t an;
+ int8_t xi;
+};
+
+struct instruction {
+ enum opcode opcode;
+ enum opsize opsize;
+ enum arg_kind arg1_kind, arg2_kind;
+ union {
+ int32_t imm, addr;
+ struct arg_8_xn arg_8_xn; // For (d,An,Xi) and (d,PC,Xn)
+ } arg1, arg2;
+};
+
+struct def_endef {
+ size_t sym_id;
+ size_t tag_sym_id;
+ int32_t size;
+ int32_t storage_class;
+ int32_t type;
+};
+
+struct stmt {
+ enum stmt_kind kind;
+ union {
+ struct instruction instruction;
+ int32_t align;
+ size_t globl_sym_id;
+ size_t file_sym_id;
+ };
+ size_t first_token, num_tokens; // Statement tokens span, may be NULL
+ size_t comment_token;
+};
+
+struct symbol {
+ size_t offset; // Byte offset in continuous null terminated symbol buffer
+ // Instead of strcmp every item in symtab we can compare hashes and get O(N)
+ // for search.
+ uint32_t hash;
+};
+
+struct pars {
+ struct lex *lex;
+ // Statement table
+ FILE *stmttab_stream;
+ struct stmt *stmttab;
+ size_t stmttab_size;
+ // Symbol table
+ FILE *symtab_stream;
+ struct sym *symtab;
+ size_t symtab_size;
+ // Symbol buffer for symbol table
+ FILE *symbuf_stream;
+ char *symbuf;
+ size_t symbuf_size;
+};
+
+struct assem {
+ struct pars *pars;
+};
+
+static int lex_init(struct lex *const self)
+{
+ (void) self;
+ return OK;
+}
+
+static int lex_next(struct lex *const self)
+{
+ (void) self;
+ return OK;
+}
+
+static void lex_destroy(struct lex *const self)
+{
+ (void) self;
+}
+
+static int pars_init(struct pars *const self, struct lex *const lex)
+{
+ (void) self;
+ (void) lex;
+ return OK;
+}
+
+static int pars_run(struct pars *const self, FILE *const stream)
+{
+ (void) self;
+ (void) stream;
+ return OK;
+}
+
+static void pars_destroy(struct pars *const self)
+{
+ (void) self;
+}
+
+static int assem_init(struct assem *const self, struct pars *const pars)
+{
+ (void) self;
+ (void) pars;
+ return OK;
+}
+
+static int assem_resolve(struct assem *const self)
+{
+ (void) self;
+ return OK;
+}
+
+static int assem_emit(struct assem *const self, FILE *const stream)
+{
+ (void) self;
+ (void) stream;
+ return OK;
+}
+
+static void assem_destroy(struct assem *const self)
+{
+ (void) self;
+}
+
+int main(const int argc, char *const argv[])
+{
+ // No fucks given about arguments for now
+ (void)argc;
+ (void)argv;
+ struct lex lex;
+ struct pars pars;
+ if (OK != lex_init(&lex)) {
+ return EXIT_FAILURE;
+ }
+ // Chain lexer and parser
+ if (OK != pars_init(&pars, &lex)) {
+ lex_destroy(&lex);
+ return EXIT_FAILURE;
+ }
+ // Parse assembly program text (tokenize + parse)
+ if (OK != pars_run(&pars, stdin)) {
+ pars_destroy(&pars);
+ lex_destroy(&lex);
+ return EXIT_FAILURE;
+ }
+ struct assem assem;
+ // Allocate adn populate code table and metadata table from parsed data
+ if (OK != assem_init(&assem, &pars)) {
+ pars_destroy(&pars);
+ lex_destroy(&lex);
+ return EXIT_FAILURE;
+ }
+ // Resolve all ambiguities
+ if (OK != assem_resolve(&assem)) {
+ assem_destroy(&assem);
+ pars_destroy(&pars);
+ lex_destroy(&lex);
+ return EXIT_FAILURE;
+ }
+ // Emit unambiguous assembly language program text for specified dialect
+ // (currently m68k GNU AS only is supported)
+ if (OK != assem_emit(&assem, stdout)) {
+ assem_destroy(&assem);
+ pars_destroy(&pars);
+ lex_destroy(&lex);
+ return EXIT_FAILURE;
+ }
+ assem_destroy(&assem);
+ pars_destroy(&pars);
+ lex_destroy(&lex);
+}