diff --git a/asm/CMakeLists.txt b/asm/CMakeLists.txt index 3e05032..ed35032 100644 --- a/asm/CMakeLists.txt +++ b/asm/CMakeLists.txt @@ -2,5 +2,5 @@ file(GLOB_RECURSE asm_sources *.c *.h include/ivy/asm/*.h) add_library(ivy-asm SHARED ${asm_sources}) target_include_directories(ivy-asm PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/) -target_link_libraries(ivy-asm ivy-common) +target_link_libraries(ivy-asm ivy-common Bluelib::Core Bluelib::Object) target_compile_definitions(ivy-asm PRIVATE IVY_EXPORT=1) \ No newline at end of file diff --git a/asm/include/ivy/asm/lex.h b/asm/include/ivy/asm/lex.h index 95e8954..6a950c9 100644 --- a/asm/include/ivy/asm/lex.h +++ b/asm/include/ivy/asm/lex.h @@ -1,8 +1,89 @@ #ifndef IVY_ASM_LEX_H_ #define IVY_ASM_LEX_H_ +#include #include +#include +#include -IVY_API void placeholder12(void); +enum ivy_asm_token_type { + IVY_ASM_TOK_NONE = 0, + IVY_ASM_TOK_KEYWORD, + IVY_ASM_TOK_SYMBOL, + IVY_ASM_TOK_INT, + IVY_ASM_TOK_DOUBLE, + IVY_ASM_TOK_LABEL, + IVY_ASM_TOK_IDENT, + IVY_ASM_TOK_STRING, + IVY_ASM_TOK_LINEFEED, +}; -#endif \ No newline at end of file +enum ivy_asm_keyword { + IVY_ASM_KW_NONE = 0, + IVY_ASM_KW_USE, + IVY_ASM_KW_IDENT, + IVY_ASM_KW_SELECTOR, + IVY_ASM_KW_ATOM, + IVY_ASM_KW_LAMBDA, + IVY_ASM_KW_CONSTPOOL, + IVY_ASM_KW_CLASS, + IVY_ASM_KW_MSGH, + IVY_ASM_KW_END, +}; + +enum ivy_asm_symbol { + IVY_ASM_SYM_NONE = 0, + IVY_ASM_SYM_DOT, + IVY_ASM_SYM_COMMA, + IVY_ASM_SYM_LEFT_PAREN, + IVY_ASM_SYM_RIGHT_PAREN, + IVY_ASM_SYM_LEFT_BRACKET, + IVY_ASM_SYM_RIGHT_BRACKET, + IVY_ASM_SYM_COLON, + IVY_ASM_SYM_SEMICOLON, + IVY_ASM_SYM_DOLLAR, + IVY_ASM_SYM_HYPHEN, + IVY_ASM_SYM_SQUOTE, + IVY_ASM_SYM_DQUOTE, + IVY_ASM_SYM_FORWARD_SLASH_ASTERISK, + IVY_ASM_SYM_ASTERISK_FORWARD_SLASH, +}; + +struct ivy_asm_token { + enum ivy_asm_token_type t_type; + struct ivy_asm_token *t_next; + + union { + enum ivy_asm_keyword t_keyword; + enum ivy_asm_symbol t_symbol; + struct { + long long v; + unsigned long long uv; + bool sign; + } t_int; + double t_double; + char *t_str; + }; +}; + +struct ivy_asm_lexer_symbol_node; +struct ivy_asm_lexer_state; +struct ivy_asm_lexer; + +IVY_API enum ivy_status ivy_asm_lexer_create(struct ivy_asm_lexer **lex); +IVY_API void ivy_asm_lexer_destroy(struct ivy_asm_lexer *lex); + +IVY_API void ivy_asm_lexer_set_source( + struct ivy_asm_lexer *lex, struct ivy_line_source *src); +IVY_API enum ivy_status ivy_asm_lexer_get_status(struct ivy_asm_lexer *lex); + +IVY_API struct ivy_asm_token *ivy_asm_lexer_peek(struct ivy_asm_lexer *lex); +IVY_API struct ivy_asm_token *ivy_asm_lexer_read(struct ivy_asm_lexer *lex); + +IVY_API void ivy_asm_token_destroy(struct ivy_asm_token *tok); + +IVY_API const char *ivy_asm_token_type_to_string(enum ivy_asm_token_type type); +IVY_API const char *ivy_asm_keyword_to_string(enum ivy_asm_keyword keyword); +IVY_API const char *ivy_asm_symbol_to_string(enum ivy_asm_symbol sym); + +#endif diff --git a/asm/lex.c b/asm/lex.c index c00bdbb..516e5a0 100644 --- a/asm/lex.c +++ b/asm/lex.c @@ -1,6 +1,1054 @@ +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include "lex.h" -IVY_API void placeholder12(void) +#define LINEBUF_DEFAULT_CAPACITY 1024 + +#define LEX_TOKEN_DEF(i, n) \ + { \ + .id = (i), .name = (n) \ + } + +static struct lex_token_def keywords[] = { + LEX_TOKEN_DEF(IVY_ASM_KW_USE, "@use"), + LEX_TOKEN_DEF(IVY_ASM_KW_IDENT, "@ident"), + LEX_TOKEN_DEF(IVY_ASM_KW_SELECTOR, "@selector"), + LEX_TOKEN_DEF(IVY_ASM_KW_ATOM, "@atom"), + LEX_TOKEN_DEF(IVY_ASM_KW_LAMBDA, "@lambda"), + LEX_TOKEN_DEF(IVY_ASM_KW_CONSTPOOL, "@constpool"), + LEX_TOKEN_DEF(IVY_ASM_KW_CLASS, "@class"), + LEX_TOKEN_DEF(IVY_ASM_KW_MSGH, "@msgh"), + LEX_TOKEN_DEF(IVY_ASM_KW_END, "@end"), +}; +static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; + +static struct lex_token_def symbols[] = { + LEX_TOKEN_DEF(IVY_ASM_SYM_DOT, "."), + LEX_TOKEN_DEF(IVY_ASM_SYM_COMMA, ","), + LEX_TOKEN_DEF(IVY_ASM_SYM_LEFT_PAREN, "("), + LEX_TOKEN_DEF(IVY_ASM_SYM_RIGHT_PAREN, ")"), + LEX_TOKEN_DEF(IVY_ASM_SYM_LEFT_BRACKET, "["), + LEX_TOKEN_DEF(IVY_ASM_SYM_RIGHT_BRACKET, "]"), + LEX_TOKEN_DEF(IVY_ASM_SYM_COLON, ":"), + LEX_TOKEN_DEF(IVY_ASM_SYM_SEMICOLON, ";"), + LEX_TOKEN_DEF(IVY_ASM_SYM_DOLLAR, "$"), + LEX_TOKEN_DEF(IVY_ASM_SYM_HYPHEN, "-"), + LEX_TOKEN_DEF(IVY_ASM_SYM_SQUOTE, "'"), + LEX_TOKEN_DEF(IVY_ASM_SYM_DQUOTE, "\""), + LEX_TOKEN_DEF(IVY_ASM_SYM_FORWARD_SLASH_ASTERISK, "/*"), + LEX_TOKEN_DEF(IVY_ASM_SYM_ASTERISK_FORWARD_SLASH, "*/"), +}; +static const size_t nr_symbols = sizeof symbols / sizeof symbols[0]; + +static struct lexer_state *push_lexer_state( + struct ivy_asm_lexer *lex, enum lexer_state_type state_type) { + struct lexer_state *state = malloc(sizeof *state); + if (!state) { + return NULL; + } + memset(state, 0x0, sizeof *state); + + state->s_type = state_type; + b_queue_push_back(&lex->lex_state, &state->s_entry); + + return state; +} + +static void pop_lexer_state(struct ivy_asm_lexer *lex) +{ + b_queue_entry *entry = b_queue_pop_back(&lex->lex_state); + if (!entry) { + return; + } + + struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry); + free(state); +} + +static struct lexer_state *get_lexer_state(struct ivy_asm_lexer *lex) +{ + b_queue_entry *entry = b_queue_last(&lex->lex_state); + if (!entry) { + return NULL; + } + + return b_unbox(struct lexer_state, entry, s_entry); +} + +static void destroy_state_stack(b_queue *state) +{ + b_queue_iterator it; + b_queue_iterator_begin(state, &it); + while (b_queue_iterator_is_valid(&it)) { + struct lexer_state *node + = b_unbox(struct lexer_state, it.entry, s_entry); + b_queue_iterator_erase(&it); + + free(node); + } +} + +static struct ivy_asm_lexer_symbol_node *get_symbol_node( + struct ivy_asm_lexer_symbol_node *node, char c) +{ + b_queue_iterator it; + b_queue_foreach (&it, &node->s_children) { + struct ivy_asm_lexer_symbol_node *child = b_unbox( + struct ivy_asm_lexer_symbol_node, it.entry, s_entry); + if (child->s_char == c) { + return child; + } + } + + return NULL; +} + +static b_string *get_temp_string(struct ivy_asm_lexer *lex) +{ + if (!lex->lex_temp) { + lex->lex_temp = b_string_create(); + } + + b_string_clear(lex->lex_temp); + return lex->lex_temp; +} + +static enum ivy_status put_symbol( + struct ivy_asm_lexer_symbol_node *tree, struct lex_token_def *sym) +{ + for (size_t i = 0; sym->name[i]; i++) { + char c = sym->name[i]; + struct ivy_asm_lexer_symbol_node *child = get_symbol_node(tree, c); + if (child) { + tree = child; + continue; + } + + child = malloc(sizeof *child); + if (!child) { + return IVY_ERR_NO_MEMORY; + } + + memset(child, 0x0, sizeof *child); + + child->s_id = IVY_ASM_SYM_NONE; + child->s_char = c; + + b_queue_push_back(&tree->s_children, &child->s_entry); + tree = child; + } + + tree->s_id = sym->id; + return IVY_OK; +} + +static void destroy_symbol_tree(struct ivy_asm_lexer_symbol_node *tree) +{ + b_queue_iterator it; + b_queue_iterator_begin(&tree->s_children, &it); + while (b_queue_iterator_is_valid(&it)) { + struct ivy_asm_lexer_symbol_node *node = b_unbox( + struct ivy_asm_lexer_symbol_node, it.entry, s_entry); + b_queue_iterator_erase(&it); + + destroy_symbol_tree(node); + } + + free(tree); +} + +static struct ivy_asm_lexer_symbol_node *build_symbol_tree(void) +{ + struct ivy_asm_lexer_symbol_node *root = malloc(sizeof *root); + if (!root) { + return NULL; + } + + memset(root, 0x0, sizeof *root); + root->s_id = IVY_ASM_SYM_NONE; + + enum ivy_status status = IVY_OK; + for (size_t i = 0; i < nr_symbols; i++) { + status = put_symbol(root, &symbols[i]); + + if (status != IVY_OK) { + destroy_symbol_tree(root); + return NULL; + } + } + + return root; +} + +static void init_keywords(b_dict *keyword_dict) +{ + for (size_t i = 0; i < nr_keywords; i++) { + struct lex_token_def *keyword = &keywords[i]; + b_dict_put(keyword_dict, keyword->name, B_RV_INT(keyword->id)); + } +} + +static enum ivy_keyword find_keyword_by_name(struct ivy_asm_lexer *lex, const char *s) +{ + b_number *id = B_NUMBER(b_dict_at(lex->lex_keywords, s)); + if (!id) { + return IVY_ASM_KW_NONE; + } + + return b_number_get_int(id); +} + +enum ivy_status ivy_asm_lexer_create(struct ivy_asm_lexer **lexp) +{ + struct ivy_asm_lexer *lex = malloc(sizeof *lex); + if (!lex) { + return IVY_ERR_NO_MEMORY; + } + + memset(lex, 0x0, sizeof *lex); + + lex->lex_status = IVY_OK; + lex->lex_prev_token = IVY_ASM_TOK_NONE; + + lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY); + lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY; + + lex->lex_sym_tree = build_symbol_tree(); + if (!lex->lex_sym_tree) { + ivy_asm_lexer_destroy(lex); + return IVY_ERR_NO_MEMORY; + } + + if (!push_lexer_state(lex, STATE_NORMAL)) { + ivy_asm_lexer_destroy(lex); + return IVY_ERR_NO_MEMORY; + } + + lex->lex_keywords = b_dict_create(); + init_keywords(lex->lex_keywords); + *lexp = lex; + + return IVY_OK; +} + +void ivy_asm_lexer_destroy(struct ivy_asm_lexer *lex) +{ + while (lex->lex_queue) { + struct ivy_asm_token *next = lex->lex_queue->t_next; + ivy_asm_token_destroy(lex->lex_queue); + lex->lex_queue = next; + } + + if (lex->lex_linebuf) { + free(lex->lex_linebuf); + } + + if (lex->lex_sym_tree) { + destroy_symbol_tree(lex->lex_sym_tree); + } + + if (lex->lex_temp) { + b_string_release(lex->lex_temp); + } + + if (lex->lex_keywords) { + b_dict_release(lex->lex_keywords); + } + + destroy_state_stack(&lex->lex_state); + + free(lex); +} + +void ivy_asm_lexer_set_source(struct ivy_asm_lexer *lex, struct ivy_line_source *src) +{ + lex->lex_source = src; +} + +enum ivy_status ivy_asm_lexer_get_status(struct ivy_asm_lexer *lex) +{ + return lex->lex_status; +} + +static enum ivy_status refill_linebuf(struct ivy_asm_lexer *lex) +{ + if (!lex->lex_source) { + return IVY_ERR_EOF; + } + + enum ivy_status status = ivy_line_source_readline( + lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap, + &lex->lex_linebuf_len, NULL); + + if (status == IVY_OK) { + lex->lex_linebuf_ptr = 0; + } + + return status; +} + +static int peek(struct ivy_asm_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { + status = refill_linebuf(lex); + } + + if (status != IVY_OK) { + return status; + } + + if (lex->lex_linebuf_len == 0) { + return IVY_ERR_EOF; + } + + int c = lex->lex_linebuf[lex->lex_linebuf_ptr]; + return c; +} + +static int peek_next(struct ivy_asm_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { + status = refill_linebuf(lex); + } + + if (status != IVY_OK) { + return status; + } + + if (lex->lex_linebuf_len == 0) { + return IVY_ERR_EOF; + } + + if (lex->lex_linebuf_ptr + 1 >= lex->lex_linebuf_len) { + return IVY_ERR_EOF; + } + + int c = lex->lex_linebuf[lex->lex_linebuf_ptr + 1]; + return c; +} + +static int advance(struct ivy_asm_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { + status = refill_linebuf(lex); + } + + if (status != IVY_OK) { + return status; + } + + if (lex->lex_linebuf_len == 0) { + return IVY_ERR_EOF; + } + + int c = lex->lex_linebuf[lex->lex_linebuf_ptr++]; + return c; +} + +static bool input_available(struct ivy_asm_lexer* lex) +{ + return lex->lex_linebuf_ptr < lex->lex_linebuf_len; +} + +static bool char_can_begin_symbol(char c) +{ + for (size_t i = 0; i < nr_symbols; i++) { + if (symbols[i].name[0] == c) { + return true; + } + } + + return false; +} + +static struct ivy_asm_token *create_token(enum ivy_asm_token_type type) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return NULL; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = type; + return tok; +} + +static enum ivy_status push_token(struct ivy_asm_lexer *lex, struct ivy_asm_token *tok) +{ + struct ivy_asm_token **slot = &lex->lex_queue; + + while (*slot) { + slot = &(*slot)->t_next; + } + + *slot = tok; + lex->lex_prev_token = tok->t_type; + return IVY_OK; +} + +static enum ivy_status push_linefeed(struct ivy_asm_lexer *lex) +{ + if (lex->lex_prev_token == IVY_ASM_TOK_LINEFEED) { + return IVY_OK; + } + + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_LINEFEED; + return push_token(lex, tok); +} + +static enum ivy_status push_string_content(struct ivy_asm_lexer *lex, char *s) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_STRING; + tok->t_str = s; + return push_token(lex, tok); +} + +static enum ivy_status push_symbol(struct ivy_asm_lexer *lex, enum ivy_asm_symbol sym) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_SYMBOL; + tok->t_symbol = sym; + return push_token(lex, tok); +} + +static enum ivy_status push_int(struct ivy_asm_lexer *lex, long long v) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_INT; + tok->t_int.v = v; + tok->t_int.sign = true; + return push_token(lex, tok); +} + +static enum ivy_status push_uint(struct ivy_asm_lexer *lex, unsigned long long v) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_INT; + tok->t_int.uv = v; + tok->t_int.sign = false; + return push_token(lex, tok); +} + +static enum ivy_status push_double(struct ivy_asm_lexer *lex, double v) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_DOUBLE; + tok->t_double = v; + return push_token(lex, tok); +} + +static enum ivy_status push_keyword(struct ivy_asm_lexer *lex, enum ivy_keyword keyword) +{ + struct ivy_asm_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_ASM_TOK_KEYWORD; + tok->t_keyword = keyword; + return push_token(lex, tok); +} + +static enum ivy_status read_line_comment(struct ivy_asm_lexer *lex) +{ + while (true) { + int c = advance(lex); + + if (c == IVY_ERR_EOF || c == '\n') { + break; + } + + if (c < 0) { + return c; + } + } + + return IVY_OK; +} + +static enum ivy_status read_block_comment(struct ivy_asm_lexer *lex) +{ + int depth = 1; + char buf[2] = {0}; + + while (depth > 0) { + int c = peek(lex); + if (c < 0) { + return c; + } + + if (!buf[0]) { + buf[0] = c; + } else if (!buf[1]) { + buf[1] = c; + } else { + buf[0] = buf[1]; + buf[1] = c; + } + + if (buf[0] == '/' && buf[1] == '*') { + depth++; + } else if (buf[0] == '*' && buf[1] == '/') { + depth--; + } + + advance(lex); + } + + return IVY_OK; +} + +static enum ivy_status read_squote_marker(struct ivy_asm_lexer *lex) +{ + struct lexer_state *state = get_lexer_state(lex); + + if (state->s_type == STATE_STRING) { + /* already within a string */ + pop_lexer_state(lex); + return IVY_OK; + } + + /* start of a new string */ + if (!push_lexer_state(lex, STATE_STRING)) { + return IVY_ERR_NO_MEMORY; + } + + return IVY_OK; +} + +static enum ivy_status read_dquote_marker(struct ivy_asm_lexer *lex) +{ + struct lexer_state *state = get_lexer_state(lex); + + if (state->s_type == STATE_DSTRING) { + /* already within a string */ + pop_lexer_state(lex); + return IVY_OK; + } + + /* start of a new string */ + if (!push_lexer_state(lex, STATE_DSTRING)) { + return IVY_ERR_NO_MEMORY; + } + + return IVY_OK; +} + +static enum ivy_status read_string_content(struct ivy_asm_lexer *lex) +{ + int c; + b_string *str = get_temp_string(lex); + struct lexer_state *state = get_lexer_state(lex); + + if (!str) { + return IVY_ERR_NO_MEMORY; + } + + while (true) { + c = peek(lex); + + if (state->s_type == STATE_STRING && (c == '\'')) { + break; + } + + if (state->s_type == STATE_DSTRING && c == '"') { + break; + } + + char s[2] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) { + return IVY_OK; + } + + char *s = b_string_steal(str); + + enum ivy_status status = push_string_content(lex, s); + if (status != IVY_OK) { + free(s); + } + + return status; +} + +static enum ivy_status read_symbol(struct ivy_asm_lexer *lex) +{ + struct ivy_asm_lexer_symbol_node *node = lex->lex_sym_tree; + struct lexer_state *state = get_lexer_state(lex); + + while (true) { + int c = peek(lex); + + struct ivy_asm_lexer_symbol_node *next = get_symbol_node(node, c); + if (!next) { + break; + } + + node = next; + advance(lex); + } + + if (!node || node->s_id == IVY_ASM_SYM_NONE) { + return IVY_ERR_BAD_SYNTAX; + } + + switch (node->s_id) { + case IVY_ASM_SYM_SQUOTE: + return read_squote_marker(lex); + case IVY_ASM_SYM_DQUOTE: + return read_dquote_marker(lex); + case IVY_ASM_SYM_FORWARD_SLASH_ASTERISK: + return read_block_comment(lex); + case IVY_ASM_SYM_SEMICOLON: + return read_line_comment(lex); + default: + push_symbol(lex, node->s_id); + return IVY_OK; + } +} + +static enum ivy_status read_number(struct ivy_asm_lexer *lex) +{ + /* skip the leading # symbol */ + advance(lex); + + int token_len = 0; + int base = 10; + int dots = 0; + bool neg = false; + b_string *str = get_temp_string(lex); + + while (true) { + int c = peek(lex); + if (c == IVY_ERR_EOF) { + break; + } + + if (c < 0) { + return c; + } + + if (c == '_') { + token_len++; + advance(lex); + continue; + } + + if (c == '-') { + if (neg) { + return IVY_ERR_BAD_SYNTAX; + } + + neg = true; + token_len++; + advance(lex); + continue; + } + + if (c == '.' && iswdigit(peek_next(lex))) { + if (base != 10) { + return IVY_ERR_BAD_SYNTAX; + } + + if (dots > 0) { + return IVY_ERR_BAD_SYNTAX; + } + + token_len++; + dots++; + char s[] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + continue; + } + + if (isspace(c) || ispunct(c)) { + break; + } + + if (c == '0' && token_len == 0) { + base = 7; + token_len++; + advance(lex); + continue; + } + + if (c == 'x' && token_len == 1) { + base = 16; + token_len++; + advance(lex); + continue; + } + + if (c == 'b' && token_len == 1) { + base = 2; + token_len++; + advance(lex); + continue; + } + + if (base == 2 && c != '0' && c != '1') { + return IVY_ERR_BAD_SYNTAX; + } + + if (base == 10 && !isdigit(c)) { + return IVY_ERR_BAD_SYNTAX; + } + + if (base == 16 && !isxdigit(c)) { + return IVY_ERR_BAD_SYNTAX; + } + + char s[] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + if (token_len == 1 && base == 7) { + return push_uint(lex, 0); + } + + const char *s = b_string_ptr(str); + char *ep = NULL; + + if (dots > 0) { + double v = strtod(s, &ep); + + if (*ep != '\0') { + return IVY_ERR_BAD_SYNTAX; + } + + if (neg) { + v *= -1; + } + + return push_double(lex, v); + + } else if (neg) { + long long v = strtoll(s, &ep, base); + + if (*ep != '\0') { + return IVY_ERR_BAD_SYNTAX; + } + + v *= -1; + + return push_int(lex, v); + } else { + unsigned long long v = strtoull(s, &ep, base); + + if (*ep != '\0') { + return IVY_ERR_BAD_SYNTAX; + } + + return push_uint(lex, v); + } +} + +static enum ivy_status read_keyword(struct ivy_asm_lexer *lex) +{ + advance(lex); + + b_string *str = get_temp_string(lex); + b_string_append_cstr(str, "@"); + + bool label = false; + + while (true) { + int c = peek(lex); + + if (c < 0) { + break; + } + + if (!isalnum(c) && c != '_') { + break; + } + + char s[2] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + const char *s = b_string_ptr(str); + + enum ivy_keyword keyword = find_keyword_by_name(lex, s); + + if (keyword == IVY_ASM_KW_NONE) { + return IVY_ERR_BAD_SYNTAX; + } + + return push_keyword(lex, keyword); +} + +static enum ivy_status read_ident(struct ivy_asm_lexer *lex) +{ + b_string *str = get_temp_string(lex); + bool label = false; + + while (true) { + int c = peek(lex); + + if (c < 0) { + break; + } + + if (c == ':' && peek_next(lex) != ':') { + advance(lex); + label = true; + break; + } + + if (!isalnum(c) && c != '_') { + break; + } + + char s[2] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + const char *s = b_string_ptr(str); + + struct ivy_asm_token *tok + = create_token(label ? IVY_ASM_TOK_LABEL : IVY_ASM_TOK_IDENT); + tok->t_str = b_string_steal(str); + + return push_token(lex, tok); +} + +static enum ivy_status pump_tokens(struct ivy_asm_lexer *lex) +{ + struct lexer_state *state = get_lexer_state(lex); + + int c = peek(lex); + + if (c < 0) { + return c; + } + + if (state->s_type == STATE_DSTRING && c != '"') { + return read_string_content(lex); + } + + if (state->s_type == STATE_STRING && c != '\'') { + return read_string_content(lex); + } + + /* `state` is invalid past this point, as the read_* functions + * may perform state transitions. */ + state = NULL; + + if (c == '\n') { + while (c == '\n') { + advance(lex); + + if (!input_available(lex)) { + break; + } + + c = peek(lex); + } + + if (c < 0) { + return c; + } + + return push_linefeed(lex); + } + + while (isspace(c)) { + advance(lex); + c = peek(lex); + } + + if (isalpha(c) || c == '_') { + return read_ident(lex); + } + + if (char_can_begin_symbol(c)) { + return read_symbol(lex); + } + + if (c == '@') { + return read_keyword(lex); + } + + if (c == '#') { + return read_number(lex); + } + + return IVY_ERR_BAD_SYNTAX; +} + +struct ivy_asm_token *ivy_asm_lexer_peek(struct ivy_asm_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + while (!lex->lex_queue) { + status = pump_tokens(lex); + + if (status != IVY_OK) { + lex->lex_status = status; + return NULL; + } + } + + lex->lex_status = status; + struct ivy_asm_token *tok = lex->lex_queue; + return tok; +} + +struct ivy_asm_token *ivy_asm_lexer_read(struct ivy_asm_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + while (!lex->lex_queue) { + status = pump_tokens(lex); + + if (status != IVY_OK) { + lex->lex_status = status; + return NULL; + } + } + + struct ivy_asm_token *tok = lex->lex_queue; + lex->lex_queue = lex->lex_queue->t_next; + return tok; +} + +void ivy_asm_token_destroy(struct ivy_asm_token *tok) +{ + switch (tok->t_type) { + case IVY_ASM_TOK_STRING: + case IVY_ASM_TOK_IDENT: + free(tok->t_str); + break; + default: + break; + } + + free(tok); +} + +#define ENUM_STR(x) \ + case x: \ + return #x + +const char *ivy_asm_token_type_to_string(enum ivy_asm_token_type type) +{ + switch (type) { + ENUM_STR(IVY_ASM_TOK_NONE); + ENUM_STR(IVY_ASM_TOK_KEYWORD); + ENUM_STR(IVY_ASM_TOK_SYMBOL); + ENUM_STR(IVY_ASM_TOK_INT); + ENUM_STR(IVY_ASM_TOK_DOUBLE); + ENUM_STR(IVY_ASM_TOK_LABEL); + ENUM_STR(IVY_ASM_TOK_IDENT); + ENUM_STR(IVY_ASM_TOK_STRING); + ENUM_STR(IVY_ASM_TOK_LINEFEED); + default: + return ""; + } +} + +const char *ivy_asm_keyword_to_string(enum ivy_asm_keyword keyword) +{ + switch (keyword) { + ENUM_STR(IVY_ASM_KW_NONE); + ENUM_STR(IVY_ASM_KW_USE); + ENUM_STR(IVY_ASM_KW_IDENT); + ENUM_STR(IVY_ASM_KW_SELECTOR); + ENUM_STR(IVY_ASM_KW_ATOM); + ENUM_STR(IVY_ASM_KW_LAMBDA); + ENUM_STR(IVY_ASM_KW_CONSTPOOL); + ENUM_STR(IVY_ASM_KW_CLASS); + ENUM_STR(IVY_ASM_KW_MSGH); + ENUM_STR(IVY_ASM_KW_END); + default: + return ""; + } +} + +const char *ivy_asm_symbol_to_string(enum ivy_asm_symbol sym) +{ + switch (sym) { + ENUM_STR(IVY_ASM_SYM_NONE); + ENUM_STR(IVY_ASM_SYM_DOT); + ENUM_STR(IVY_ASM_SYM_SQUOTE); + ENUM_STR(IVY_ASM_SYM_DQUOTE); + ENUM_STR(IVY_ASM_SYM_LEFT_BRACKET); + ENUM_STR(IVY_ASM_SYM_RIGHT_BRACKET); + ENUM_STR(IVY_ASM_SYM_LEFT_PAREN); + ENUM_STR(IVY_ASM_SYM_RIGHT_PAREN); + ENUM_STR(IVY_ASM_SYM_COLON); + ENUM_STR(IVY_ASM_SYM_HYPHEN); + ENUM_STR(IVY_ASM_SYM_COMMA); + ENUM_STR(IVY_ASM_SYM_SEMICOLON); + ENUM_STR(IVY_ASM_SYM_DOLLAR); + ENUM_STR(IVY_ASM_SYM_FORWARD_SLASH_ASTERISK); + default: + return ""; + } } diff --git a/asm/lex.h b/asm/lex.h new file mode 100644 index 0000000..aa8fafb --- /dev/null +++ b/asm/lex.h @@ -0,0 +1,53 @@ +#ifndef _LEX_H_ +#define _LEX_H_ + +#include +#include +#include +#include + +enum lexer_state_type { + STATE_NORMAL, + STATE_STRING, + STATE_DSTRING, +}; + +struct lexer_state { + enum lexer_state_type s_type; + b_queue_entry s_entry; +}; + +struct ivy_asm_lexer_symbol_node { + char s_char; + enum ivy_symbol s_id; + + b_queue_entry s_entry; + b_queue s_children; +}; + +struct lex_token_def { + int id; + const char *name; + uint64_t name_hash; +}; + +struct ivy_asm_lexer { + struct ivy_asm_lexer_symbol_node *lex_sym_tree; + struct ivy_line_source *lex_source; + b_dict *lex_keywords; + enum ivy_status lex_status; + + struct ivy_asm_token *lex_queue; + enum ivy_asm_token_type lex_prev_token; + + b_string *lex_temp; + b_queue lex_state; + unsigned int lex_brace_depth; + + char *lex_linebuf; + size_t lex_linebuf_len; + size_t lex_linebuf_cap; + size_t lex_linebuf_ptr; +}; + +#endif \ No newline at end of file