#include #include #include #include #include #include #define LINEBUF_DEFAULT_CAPACITY 1024 #define LEX_TOKEN_DEF(i, n) \ { \ .id = (i), .name = (n) \ } struct lex_token_def { int id; const char *name; }; static struct lex_token_def keywords[] = { LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"), LEX_TOKEN_DEF(IVY_KW_USE, "use"), LEX_TOKEN_DEF(IVY_KW_CLASS, "class"), LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"), LEX_TOKEN_DEF(IVY_KW_TRY, "try"), LEX_TOKEN_DEF(IVY_KW_THROW, "throw"), LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"), LEX_TOKEN_DEF(IVY_KW_IF, "if"), LEX_TOKEN_DEF(IVY_KW_AND, "and"), LEX_TOKEN_DEF(IVY_KW_OR, "or"), LEX_TOKEN_DEF(IVY_KW_IS, "is"), LEX_TOKEN_DEF(IVY_KW_NOT, "not"), LEX_TOKEN_DEF(IVY_KW_ELSE, "else"), LEX_TOKEN_DEF(IVY_KW_WHILE, "while"), LEX_TOKEN_DEF(IVY_KW_FOR, "for"), LEX_TOKEN_DEF(IVY_KW_MATCH, "match"), LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"), LEX_TOKEN_DEF(IVY_KW_IN, "in"), LEX_TOKEN_DEF(IVY_KW_DO, "do"), LEX_TOKEN_DEF(IVY_KW_END, "end"), }; static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; static struct lex_token_def symbols[] = { LEX_TOKEN_DEF(IVY_SYM_DOT, "."), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["), LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"), LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("), LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"), LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"), LEX_TOKEN_DEF(IVY_SYM_COLON, ":"), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"), LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"), LEX_TOKEN_DEF(IVY_SYM_MINUS, "-"), LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"), LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"), LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"), LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"), LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="), LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="), LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="), LEX_TOKEN_DEF(IVY_SYM_MINUS_EQUAL, "-="), LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="), LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="), LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="), LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="), LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="), LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="), LEX_TOKEN_DEF(IVY_SYM_BANG, "!"), LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"), LEX_TOKEN_DEF(IVY_SYM_CARET, "^"), LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"), LEX_TOKEN_DEF(IVY_SYM_COMMA, ","), LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"), LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"), }; static const size_t nr_symbols = sizeof symbols / sizeof symbols[0]; enum ivy_status ivy_lexer_init(struct ivy_lexer *lex) { memset(lex, 0x0, sizeof *lex); lex->lex_status = IVY_OK; lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY); lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY; return IVY_OK; } void ivy_lexer_finish(struct ivy_lexer *lex) { while (lex->lex_queue) { struct ivy_token *next = lex->lex_queue->t_next; ivy_token_destroy(lex->lex_queue); lex->lex_queue = next; } if (lex->lex_linebuf) { free(lex->lex_linebuf); } memset(lex, 0x0, sizeof *lex); } static enum ivy_status refill_linebuf(struct ivy_lexer *lex) { if (!lex->lex_source) { return IVY_ERR_EOF; } return ivy_line_source_readline( lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap, &lex->lex_linebuf_len, NULL); } static int peek(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { status = refill_linebuf(lex); } if (status != IVY_OK) { return status; } if (lex->lex_linebuf_len == 0) { return IVY_ERR_EOF; } int c = lex->lex_linebuf[lex->lex_linebuf_ptr]; return c; } static int advance(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) { status = refill_linebuf(lex); } if (status != IVY_OK) { return status; } if (lex->lex_linebuf_len == 0) { return IVY_ERR_EOF; } int c = lex->lex_linebuf[lex->lex_linebuf_ptr++]; return c; } static bool char_can_begin_symbol(char c) { for (size_t i = 0; i < nr_symbols; i++) { if (symbols[i].name[0] == c) { return true; } } return false; } static struct ivy_token *create_token(enum ivy_token_type type) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return NULL; } memset(tok, 0x0, sizeof *tok); tok->t_type = type; return tok; } static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok) { struct ivy_token **slot = &lex->lex_queue; while (*slot) { slot = &(*slot)->t_next; } *slot = tok; return IVY_OK; } static enum ivy_status push_linefeed(struct ivy_lexer *lex) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_LINEFEED; return push_token(lex, tok); } static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym) { struct ivy_token *tok = malloc(sizeof *tok); if (!tok) { return IVY_ERR_NO_MEMORY; } memset(tok, 0x0, sizeof *tok); tok->t_type = IVY_TOK_SYMBOL; tok->t_symbol = sym; return push_token(lex, tok); } static enum ivy_status read_ident(struct ivy_lexer *lex) { b_string *str = b_string_create(); int c = peek(lex); while (true) { if (c < 0) { break; } if (!isalnum(c) && c != '_') { break; } char s[2] = {c, 0}; b_string_append_cstr(str, s); } const char *s = b_string_ptr(str); if (!strcmp(s, "_")) { b_string_release(str); push_symbol(lex, IVY_SYM_UNDERSCORE); } struct ivy_token *tok = create_token(IVY_TOK_IDENT); tok->t_str = b_string_steal(str); b_string_release(str); push_token(lex, tok); return IVY_OK; } static enum ivy_status pump_tokens(struct ivy_lexer *lex) { enum ivy_status status; int c = peek(lex); if (c < 0) { return c; } if (c == '\n') { while (c == '\n') { advance(lex); c = peek(lex); } if (c < 0) { return c; } return push_linefeed(lex); } if (isalpha(c) || c == '_') { return read_ident(lex); } return IVY_ERR_BAD_SYNTAX; } struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (!lex->lex_queue) { status = pump_tokens(lex); } if (status != IVY_OK) { lex->lex_status = status; return NULL; } struct ivy_token *tok = lex->lex_queue; return tok; } struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex) { enum ivy_status status = IVY_OK; if (!lex->lex_queue) { status = pump_tokens(lex); } if (status != IVY_OK) { lex->lex_status = status; return NULL; } struct ivy_token *tok = lex->lex_queue; lex->lex_queue = lex->lex_queue->t_next; return tok; } void ivy_token_destroy(struct ivy_token *tok) { switch (tok->t_type) { case IVY_TOK_ATOM: case IVY_TOK_STRING: case IVY_TOK_IDENT: free(tok->t_str); break; default: break; } free(tok); }