#include "lex.h" #include #include #include #include #include #include #include #include #include #include #include #include #define LINEBUF_DEFAULT_CAPACITY 1024 #define LEX_TOKEN_DEF(i, n) \ { \ .id = (i), .name = (n) \ } static struct lex_token_def keywords[] = { LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"), LEX_TOKEN_DEF(IVY_KW_USE, "use"), LEX_TOKEN_DEF(IVY_KW_CLASS, "class"), LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"), LEX_TOKEN_DEF(IVY_KW_TRY, "try"), LEX_TOKEN_DEF(IVY_KW_THROW, "throw"), LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"), LEX_TOKEN_DEF(IVY_KW_UNDERSTANDS, "understands"), LEX_TOKEN_DEF(IVY_KW_IF, "if"), LEX_TOKEN_DEF(IVY_KW_THEN, "then"), LEX_TOKEN_DEF(IVY_KW_AND, "and"), LEX_TOKEN_DEF(IVY_KW_OR, "or"), LEX_TOKEN_DEF(IVY_KW_IS, "is"), LEX_TOKEN_DEF(IVY_KW_NOT, "not"), LEX_TOKEN_DEF(IVY_KW_ELSE, "else"), LEX_TOKEN_DEF(IVY_KW_WHILE, "while"), LEX_TOKEN_DEF(IVY_KW_FOR, "for"), LEX_TOKEN_DEF(IVY_KW_MATCH, "match"), LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"), LEX_TOKEN_DEF(IVY_KW_IN, "in"), LEX_TOKEN_DEF(IVY_KW_DO, "do"), LEX_TOKEN_DEF(IVY_KW_GET, "get"), LEX_TOKEN_DEF(IVY_KW_SET, "set"), LEX_TOKEN_DEF(IVY_KW_END, "end"), }; static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; static struct lex_token_def symbols[] = { LEX_TOKEN_DEF(IVY_SYM_DOT, "."), LEX_TOKEN_DEF(IVY_SYM_SQUOTE, "'"), LEX_TOKEN_DEF(IVY_SYM_DQUOTE, "\""), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["), LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"), LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("), LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"), LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"), LEX_TOKEN_DEF(IVY_SYM_COLON, ":"), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"), LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"), LEX_TOKEN_DEF(IVY_SYM_HYPHEN, "-"), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_HYPHEN, "--"), LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"), LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"), LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_ASTERISK, "/*"), LEX_TOKEN_DEF(IVY_SYM_ASTERISK_FORWARD_SLASH, "*/"), LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"), LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"), LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE, "<<"), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE, ">>"), LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE_EQUAL, "<="), LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE_EQUAL, ">="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="), LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="), LEX_TOKEN_DEF(IVY_SYM_HYPHEN_EQUAL, "-="), LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="), LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="), LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="), LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="), LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="), LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="), LEX_TOKEN_DEF(IVY_SYM_BANG_EQUAL, "!="), LEX_TOKEN_DEF(IVY_SYM_HASH, "#"), LEX_TOKEN_DEF(IVY_SYM_BANG, "!"), LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"), LEX_TOKEN_DEF(IVY_SYM_CARET, "^"), LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"), LEX_TOKEN_DEF(IVY_SYM_COMMA, ","), LEX_TOKEN_DEF(IVY_SYM_SEMICOLON, ";"), LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"), LEX_TOKEN_DEF(IVY_SYM_HYPHEN_RIGHT_ANGLE, "->"), LEX_TOKEN_DEF(IVY_SYM_EQUAL_RIGHT_ANGLE, "=>"), }; static const size_t nr_symbols = sizeof symbols / sizeof symbols[0]; static struct lexer_state *push_lexer_state( struct ivy_lexer *lex, enum lexer_state_type state_type) { struct lexer_state *state = malloc(sizeof *state); if (!state) { return NULL; } memset(state, 0x0, sizeof *state); state->s_type = state_type; state->s_brace_depth = lex->lex_brace_depth; b_queue_push_back(&lex->lex_state, &state->s_entry); return state; } static void pop_lexer_state(struct ivy_lexer *lex) { b_queue_entry *entry = b_queue_pop_back(&lex->lex_state); if (!entry) { return; } struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry); free(state); } static struct lexer_state *get_lexer_state(struct ivy_lexer *lex) { b_queue_entry *entry = b_queue_last(&lex->lex_state); if (!entry) { return NULL; } return b_unbox(struct lexer_state, entry, s_entry); } static void destroy_state_stack(b_queue *state) { b_queue_iterator it; b_queue_iterator_begin(state, &it); while (b_queue_iterator_is_valid(&it)) { struct lexer_state *node = b_unbox(struct lexer_state, it.entry, s_entry); b_queue_iterator_erase(&it); free(node); } } static struct ivy_lexer_symbol_node *get_symbol_node( struct ivy_lexer_symbol_node *node, char c) { b_queue_iterator it; b_queue_foreach (&it, &node->s_children) { struct ivy_lexer_symbol_node *child = b_unbox( struct ivy_lexer_symbol_node, it.entry, s_entry); if (child->s_char == c) { return child; } } return NULL; } static b_string *get_temp_string(struct ivy_lexer *lex) { if (!lex->lex_temp) { lex->lex_temp = b_string_create(); } b_string_clear(lex->lex_temp); return lex->lex_temp; } static enum ivy_status put_symbol( struct ivy_lexer_symbol_node *tree, struct lex_token_def *sym) { for (size_t i = 0; sym->name[i]; i++) { char c = sym->name[i]; struct ivy_lexer_symbol_node *child = get_symbol_node(tree, c); if (child) { tree = child; continue; } child = malloc(sizeof *child); if (!child) { return IVY_ERR_NO_MEMORY; } memset(child, 0x0, sizeof *child); child->s_id = IVY_SYM_NONE; child->s_char = c; b_queue_push_back(&tree->s_children, &child->s_entry); tree = child; } tree->s_id = sym->id; return IVY_OK; } static void destroy_symbol_tree(struct ivy_lexer_symbol_node *tree) { b_queue_iterator it; b_queue_iterator_begin(&tree->s_children, &it); while (b_queue_iterator_is_valid(&it)) { struct ivy_lexer_symbol_node *node = b_unbox( struct ivy_lexer_symbol_node, it.entry, s_entry); b_queue_iterator_erase(&it); destroy_symbol_tree(node); } free(tree); } static struct ivy_lexer_symbol_node *build_symbol_tree(void) { struct ivy_lexer_symbol_node *root = malloc(sizeof *root); if (!root) { return NULL; } memset(root, 0x0, sizeof *root); root->s_id = IVY_SYM_NONE; enum ivy_status status = IVY_OK; for (size_t i = 0; i < nr_symbols; i++) { status = put_symbol(root, &symbols[i]); if (status != IVY_OK) { destroy_symbol_tree(root); return NULL; } } return root; } static void init_keywords(b_dict *keyword_dict) { for (size_t i = 0; i < nr_keywords; i++) { struct lex_token_def *keyword = &keywords[i]; b_dict_put(keyword_dict, keyword->name, B_RV_INT(keyword->id)); } } static enum ivy_keyword find_keyword_by_name(struct ivy_lexer *lex, const char *s) { b_number *id = B_NUMBER(b_dict_at(lex->lex_keywords, s)); if (!id) { return IVY_KW_NONE; } return b_number_get_int(id); } enum ivy_status ivy_lexer_create(struct ivy_lexer **lexp) { struct ivy_lexer *lex = malloc(sizeof *lex); if (!lex) { return IVY_ERR_NO_MEMORY; } memset(lex, 0x0, sizeof *lex); lex->lex_status = IVY_OK; lex->lex_prev_token = IVY_TOK_NONE; lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY); lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY; lex->lex_sym_tree = build_symbol_tree(); if (!lex->lex_sym_tree) { ivy_lexer_destroy(lex); return IVY_ERR_NO_MEMORY; } if (!push_lexer_state(lex, STATE_NORMAL)) { ivy_lexer_destroy(lex); return IVY_ERR_NO_MEMORY; } lex->lex_keywords = b_dict_create(); init_keywords(lex->lex_keywords); *lexp = lex; return IVY_OK; } void ivy_lexer_destroy(struct ivy_lexer *lex) { b_queue_iterator it = {0}; b_queue_iterator_begin(&lex->lex_queue, &it); while (b_queue_iterator_is_valid(&it)) { struct ivy_token *tok = b_unbox(struct ivy_token, it.entry, t_entry); b_queue_iterator_erase(&it); ivy_token_destroy(tok); } if (lex->lex_linebuf) { free(lex->lex_linebuf); } if (lex->lex_sym_tree) { destroy_symbol_tree(lex->lex_sym_tree); } if (lex->lex_temp) { b_string_release(lex->lex_temp); } if (lex->lex_keywords) { b_dict_release(lex->lex_keywords); } destroy_state_stack(&lex->lex_state); free(lex); } void ivy_lexer_set_source(struct ivy_lexer *lex, struct ivy_line_source *src) { lex->lex_source = src; } enum ivy_status ivy_lexer_get_status(struct ivy_lexer *lex) { return lex->lex_status; } static enum ivy_status refill_linebuf(struct ivy_lexer *lex) { if (!lex->lex_source) { return IVY_ERR_EOF; } enum ivy_status status = ivy_line_source_readline( lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap, &lex->lex_linebuf_len, NULL); if (status == IVY_OK) { lex->lex_linebuf_ptr = 0; } return status; } static int peek(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { status = refill_linebuf(lex); } if (status != IVY_OK) { return status; } if (lex->lex_linebuf_len == 0) { return IVY_ERR_EOF; } int c = lex->lex_linebuf[lex->lex_linebuf_ptr]; return c; } static int peek_next(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { status = refill_linebuf(lex); } if (status != IVY_OK) { return status; } if (lex->lex_linebuf_len == 0) { return IVY_ERR_EOF; } if (lex->lex_linebuf_ptr + 1 >= lex->lex_linebuf_len) { return IVY_ERR_EOF; } int c = lex->lex_linebuf[lex->lex_linebuf_ptr + 1]; return c; } static int advance(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { status = refill_linebuf(lex); } if (status != IVY_OK) { return status; } if (lex->lex_linebuf_len == 0) { return IVY_ERR_EOF; } int c = lex->lex_linebuf[lex->lex_linebuf_ptr++]; return c; } static bool input_available(struct ivy_lexer *lex) { return lex->lex_linebuf_ptr < lex->lex_linebuf_len; } static bool char_can_begin_symbol(char c) { for (size_t i = 0; i < nr_symbols; i++) { if (symbols[i].name[0] == c) { return true; } } return false; } static struct ivy_token *create_token(enum ivy_token_type type) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return NULL; } memset(tok, 0x0, sizeof *tok); tok->t_type = type; return tok; } static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok) { b_queue_push_back(&lex->lex_queue, &tok->t_entry); lex->lex_prev_token = tok->t_type; return IVY_OK; } static enum ivy_status push_linefeed(struct ivy_lexer *lex) { if (lex->lex_prev_token == IVY_TOK_LINEFEED) { return IVY_OK; } struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_LINEFEED; return push_token(lex, tok); } static enum ivy_status push_string_start(struct ivy_lexer *lex) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_STR_START; return push_token(lex, tok); } static enum ivy_status push_string_end(struct ivy_lexer *lex) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_STR_END; return push_token(lex, tok); } static enum ivy_status push_string_content(struct ivy_lexer *lex, char *s) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_STRING; tok->t_str = s; return push_token(lex, tok); } static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_SYMBOL; tok->t_symbol = sym; return push_token(lex, tok); } static enum ivy_status push_atom(struct ivy_lexer *lex, char *s) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_ATOM; tok->t_str = s; return push_token(lex, tok); } static enum ivy_status push_int(struct ivy_lexer *lex, unsigned long long v) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_INT; tok->t_int = v; return push_token(lex, tok); } static enum ivy_status push_double(struct ivy_lexer *lex, double v) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_DOUBLE; tok->t_double = v; return push_token(lex, tok); } static enum ivy_status push_keyword(struct ivy_lexer *lex, enum ivy_keyword keyword) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_KEYWORD; tok->t_keyword = keyword; return push_token(lex, tok); } static enum ivy_status read_line_comment(struct ivy_lexer *lex) { while (true) { int c = advance(lex); if (c == IVY_ERR_EOF || c == '\n') { break; } if (c < 0) { return c; } } return IVY_OK; } static enum ivy_status read_block_comment(struct ivy_lexer *lex) { int depth = 1; char buf[2] = {0}; while (depth > 0) { int c = peek(lex); if (c < 0) { return c; } if (!buf[0]) { buf[0] = c; } else if (!buf[1]) { buf[1] = c; } else { buf[0] = buf[1]; buf[1] = c; } if (buf[0] == '/' && buf[1] == '*') { depth++; } else if (buf[0] == '*' && buf[1] == '/') { depth--; } advance(lex); } return IVY_OK; } static enum ivy_status read_squote_marker(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; struct lexer_state *state = get_lexer_state(lex); if (state->s_type == STATE_FSTRING) { /* already within an fstring */ pop_lexer_state(lex); return push_string_end(lex); } /* start of a new fstring */ status = push_string_start(lex); if (status != IVY_OK) { return status; } if (!push_lexer_state(lex, STATE_FSTRING)) { return IVY_ERR_NO_MEMORY; } return IVY_OK; } static enum ivy_status read_dquote_marker(struct ivy_lexer *lex) { struct lexer_state *state = get_lexer_state(lex); if (state->s_type == STATE_STRING) { /* already within a string */ pop_lexer_state(lex); return IVY_OK; } /* start of a new string */ if (!push_lexer_state(lex, STATE_STRING)) { return IVY_ERR_NO_MEMORY; } return IVY_OK; } static enum ivy_status read_atom(struct ivy_lexer *lex) { b_string *str = get_temp_string(lex); while (true) { int c = peek(lex); if (c == IVY_ERR_EOF) { break; } if (c < 0) { return c; } if (!isalnum(c) && c != ':' && c != '_') { break; } char s[] = {c, 0}; b_string_append_cstr(str, s); advance(lex); } char *s = b_string_steal(str); return push_atom(lex, s); } static enum ivy_status read_string_content(struct ivy_lexer *lex) { int c; b_string *str = get_temp_string(lex); struct lexer_state *state = get_lexer_state(lex); if (!str) { return IVY_ERR_NO_MEMORY; } while (true) { c = peek(lex); if (state->s_type == STATE_FSTRING && (c == '\'' || c == '{')) { break; } if (state->s_type == STATE_STRING && c == '"') { break; } char s[2] = {c, 0}; b_string_append_cstr(str, s); advance(lex); } if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) { return IVY_OK; } char *s = b_string_steal(str); enum ivy_status status = push_string_content(lex, s); if (status != IVY_OK) { free(s); } return status; } static enum ivy_status read_symbol(struct ivy_lexer *lex) { struct ivy_lexer_symbol_node *node = lex->lex_sym_tree; struct lexer_state *state = get_lexer_state(lex); while (true) { int c = peek(lex); struct ivy_lexer_symbol_node *next = get_symbol_node(node, c); if (!next) { break; } node = next; advance(lex); } if (!node || node->s_id == IVY_SYM_NONE) { return IVY_ERR_BAD_SYNTAX; } switch (node->s_id) { case IVY_SYM_SQUOTE: return read_squote_marker(lex); case IVY_SYM_DQUOTE: return read_dquote_marker(lex); case IVY_SYM_FORWARD_SLASH_ASTERISK: return read_block_comment(lex); case IVY_SYM_DOUBLE_HYPHEN: return read_line_comment(lex); case IVY_SYM_HASH: return read_atom(lex); case IVY_SYM_LEFT_BRACE: push_symbol(lex, node->s_id); lex->lex_brace_depth++; if (state->s_type == STATE_FSTRING) { push_lexer_state(lex, STATE_INTERPOLATION); } return IVY_OK; case IVY_SYM_RIGHT_BRACE: push_symbol(lex, node->s_id); lex->lex_brace_depth--; if (state->s_type == STATE_INTERPOLATION && lex->lex_brace_depth < state->s_brace_depth) { pop_lexer_state(lex); } return IVY_OK; default: push_symbol(lex, node->s_id); return IVY_OK; } } static enum ivy_status read_number(struct ivy_lexer *lex) { int token_len = 0; int base = 10; int dots = 0; b_string *str = get_temp_string(lex); while (true) { int c = peek(lex); if (c == IVY_ERR_EOF) { break; } if (c < 0) { return c; } if (c == '_') { token_len++; advance(lex); continue; } if (c == '.' && iswdigit(peek_next(lex))) { if (base != 10) { return IVY_ERR_BAD_SYNTAX; } if (dots > 0) { return IVY_ERR_BAD_SYNTAX; } token_len++; dots++; char s[] = {c, 0}; b_string_append_cstr(str, s); advance(lex); continue; } if (isspace(c) || ispunct(c)) { break; } if (c == '0' && token_len == 0) { base = 7; token_len++; advance(lex); continue; } if (c == 'x' && token_len == 1) { base = 16; token_len++; advance(lex); continue; } if (c == 'b' && token_len == 1) { base = 2; token_len++; advance(lex); continue; } if (base == 2 && c != '0' && c != '1') { return IVY_ERR_BAD_SYNTAX; } if (base == 10 && !isdigit(c)) { return IVY_ERR_BAD_SYNTAX; } if (base == 16 && !isxdigit(c)) { return IVY_ERR_BAD_SYNTAX; } char s[] = {c, 0}; b_string_append_cstr(str, s); advance(lex); token_len++; } if (token_len == 1 && base == 7) { return push_int(lex, 0); } const char *s = b_string_ptr(str); char *ep = NULL; /* negative numbers will be lexed as a hyphen followed by a positive * number. */ if (dots > 0) { double v = strtod(s, &ep); if (*ep != '\0') { return IVY_ERR_BAD_SYNTAX; } return push_double(lex, v); } else { unsigned long long v = strtoull(s, &ep, base); if (*ep != '\0') { return IVY_ERR_BAD_SYNTAX; } return push_int(lex, v); } } static enum ivy_status read_ident(struct ivy_lexer *lex) { b_string *str = get_temp_string(lex); bool label = false; while (true) { int c = peek(lex); if (c < 0) { break; } if (c == ':' && peek_next(lex) != ':') { advance(lex); label = true; break; } if (!isalnum(c) && c != '_') { break; } char s[2] = {c, 0}; b_string_append_cstr(str, s); advance(lex); } const char *s = b_string_ptr(str); if (!label && !strcmp(s, "_")) { return push_symbol(lex, IVY_SYM_UNDERSCORE); } enum ivy_keyword keyword = IVY_KW_NONE; if (!label && (keyword = find_keyword_by_name(lex, s)) != IVY_KW_NONE) { return push_keyword(lex, keyword); } struct ivy_token *tok = create_token(label ? IVY_TOK_LABEL : IVY_TOK_IDENT); tok->t_str = b_string_steal(str); return push_token(lex, tok); } static enum ivy_status pump_tokens(struct ivy_lexer *lex) { struct lexer_state *state = get_lexer_state(lex); int c = peek(lex); if (c < 0) { return c; } if (state->s_type == STATE_STRING && c != '"') { return read_string_content(lex); } if (state->s_type == STATE_FSTRING && c != '\'' && c != '{') { return read_string_content(lex); } /* `state` is invalid past this point, as the read_* functions * may perform state transitions. */ state = NULL; if (c == '\n') { while (c == '\n') { advance(lex); if (!input_available(lex)) { break; } c = peek(lex); } if (c < 0) { return c; } return push_linefeed(lex); } while (isspace(c)) { advance(lex); c = peek(lex); } if (isalpha(c) || c == '_') { return read_ident(lex); } if (char_can_begin_symbol(c)) { return read_symbol(lex); } if (isdigit(c)) { return read_number(lex); } return IVY_ERR_BAD_SYNTAX; } struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; while (b_queue_empty(&lex->lex_queue)) { status = pump_tokens(lex); if (status != IVY_OK) { lex->lex_status = status; return NULL; } } lex->lex_status = status; b_queue_entry *entry = b_queue_first(&lex->lex_queue); struct ivy_token *tok = b_unbox(struct ivy_token, entry, t_entry); return tok; } struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; while (b_queue_empty(&lex->lex_queue)) { status = pump_tokens(lex); if (status != IVY_OK) { lex->lex_status = status; return NULL; } } b_queue_entry *entry = b_queue_pop_front(&lex->lex_queue); struct ivy_token *tok = b_unbox(struct ivy_token, entry, t_entry); return tok; } bool ivy_lexer_tokens_available(struct ivy_lexer *lex) { if (!b_queue_empty(&lex->lex_queue)) { return true; } if (input_available(lex)) { return true; } return false; } void ivy_token_destroy(struct ivy_token *tok) { switch (tok->t_type) { case IVY_TOK_ATOM: case IVY_TOK_STRING: case IVY_TOK_IDENT: free(tok->t_str); break; default: break; } free(tok); } #define ENUM_STR(x) \ case x: \ return #x const char *ivy_lex_token_type_to_string(enum ivy_token_type type) { switch (type) { ENUM_STR(IVY_TOK_NONE); ENUM_STR(IVY_TOK_KEYWORD); ENUM_STR(IVY_TOK_SYMBOL); ENUM_STR(IVY_TOK_ATOM); ENUM_STR(IVY_TOK_INT); ENUM_STR(IVY_TOK_DOUBLE); ENUM_STR(IVY_TOK_LABEL); ENUM_STR(IVY_TOK_IDENT); ENUM_STR(IVY_TOK_STRING); ENUM_STR(IVY_TOK_STR_START); ENUM_STR(IVY_TOK_STR_END); ENUM_STR(IVY_TOK_LINEFEED); default: return ""; } } const char *ivy_keyword_to_string(enum ivy_keyword keyword) { switch (keyword) { ENUM_STR(IVY_KW_NONE); ENUM_STR(IVY_KW_PACKAGE); ENUM_STR(IVY_KW_USE); ENUM_STR(IVY_KW_CLASS); ENUM_STR(IVY_KW_PROTOCOL); ENUM_STR(IVY_KW_TRY); ENUM_STR(IVY_KW_THROW); ENUM_STR(IVY_KW_CATCH); ENUM_STR(IVY_KW_IF); ENUM_STR(IVY_KW_THEN); ENUM_STR(IVY_KW_AND); ENUM_STR(IVY_KW_OR); ENUM_STR(IVY_KW_IS); ENUM_STR(IVY_KW_NOT); ENUM_STR(IVY_KW_ELSE); ENUM_STR(IVY_KW_WHILE); ENUM_STR(IVY_KW_FOR); ENUM_STR(IVY_KW_MATCH); ENUM_STR(IVY_KW_UNLESS); ENUM_STR(IVY_KW_IN); ENUM_STR(IVY_KW_DO); ENUM_STR(IVY_KW_GET); ENUM_STR(IVY_KW_SET); ENUM_STR(IVY_KW_END); default: return ""; } } const char *ivy_symbol_to_string(enum ivy_symbol sym) { switch (sym) { ENUM_STR(IVY_SYM_NONE); ENUM_STR(IVY_SYM_DOT); ENUM_STR(IVY_SYM_SQUOTE); ENUM_STR(IVY_SYM_DQUOTE); ENUM_STR(IVY_SYM_LEFT_BRACE); ENUM_STR(IVY_SYM_RIGHT_BRACE); ENUM_STR(IVY_SYM_LEFT_BRACKET); ENUM_STR(IVY_SYM_RIGHT_BRACKET); ENUM_STR(IVY_SYM_LEFT_PAREN); ENUM_STR(IVY_SYM_RIGHT_PAREN); ENUM_STR(IVY_SYM_LEFT_ANGLE); ENUM_STR(IVY_SYM_RIGHT_ANGLE); ENUM_STR(IVY_SYM_COLON); ENUM_STR(IVY_SYM_DOUBLE_COLON); ENUM_STR(IVY_SYM_PLUS); ENUM_STR(IVY_SYM_HYPHEN); ENUM_STR(IVY_SYM_DOUBLE_HYPHEN); ENUM_STR(IVY_SYM_FORWARD_SLASH); ENUM_STR(IVY_SYM_ASTERISK); ENUM_STR(IVY_SYM_PERCENT); ENUM_STR(IVY_SYM_AMPERSAND); ENUM_STR(IVY_SYM_EQUAL); ENUM_STR(IVY_SYM_DOUBLE_EQUAL); ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE); ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE); ENUM_STR(IVY_SYM_LEFT_ANGLE_EQUAL); ENUM_STR(IVY_SYM_RIGHT_ANGLE_EQUAL); ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL); ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL); ENUM_STR(IVY_SYM_PLUS_EQUAL); ENUM_STR(IVY_SYM_HYPHEN_EQUAL); ENUM_STR(IVY_SYM_FORWARD_SLASH_EQUAL); ENUM_STR(IVY_SYM_ASTERISK_EQUAL); ENUM_STR(IVY_SYM_AMPERSAND_EQUAL); ENUM_STR(IVY_SYM_PIPE_EQUAL); ENUM_STR(IVY_SYM_PERCENT_EQUAL); ENUM_STR(IVY_SYM_CARET_EQUAL); ENUM_STR(IVY_SYM_BANG); ENUM_STR(IVY_SYM_PIPE); ENUM_STR(IVY_SYM_CARET); ENUM_STR(IVY_SYM_HASH); ENUM_STR(IVY_SYM_UNDERSCORE); ENUM_STR(IVY_SYM_COMMA); ENUM_STR(IVY_SYM_SEMICOLON); ENUM_STR(IVY_SYM_DOLLAR); ENUM_STR(IVY_SYM_HYPHEN_RIGHT_ANGLE); ENUM_STR(IVY_SYM_EQUAL_RIGHT_ANGLE); ENUM_STR(IVY_SYM_FORWARD_SLASH_ASTERISK); ENUM_STR(IVY_SYM_ASTERISK_FORWARD_SLASH); default: return ""; } }