336 lines
7.1 KiB
C
336 lines
7.1 KiB
C
|
|
#include <blue/object/string.h>
|
||
|
|
#include <ctype.h>
|
||
|
|
#include <ivy/lang/lex.h>
|
||
|
|
#include <stdbool.h>
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <string.h>
|
||
|
|
|
||
|
|
#define LINEBUF_DEFAULT_CAPACITY 1024
|
||
|
|
|
||
|
|
#define LEX_TOKEN_DEF(i, n) \
|
||
|
|
{ \
|
||
|
|
.id = (i), .name = (n) \
|
||
|
|
}
|
||
|
|
|
||
|
|
struct lex_token_def {
|
||
|
|
int id;
|
||
|
|
const char *name;
|
||
|
|
};
|
||
|
|
|
||
|
|
static struct lex_token_def keywords[] = {
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
|
||
|
|
LEX_TOKEN_DEF(IVY_KW_END, "end"),
|
||
|
|
};
|
||
|
|
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||
|
|
|
||
|
|
static struct lex_token_def symbols[] = {
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_MINUS, "-"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_MINUS_EQUAL, "-="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"),
|
||
|
|
LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"),
|
||
|
|
};
|
||
|
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||
|
|
|
||
|
|
enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
memset(lex, 0x0, sizeof *lex);
|
||
|
|
|
||
|
|
lex->lex_status = IVY_OK;
|
||
|
|
|
||
|
|
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
|
||
|
|
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
||
|
|
|
||
|
|
return IVY_OK;
|
||
|
|
}
|
||
|
|
|
||
|
|
void ivy_lexer_finish(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
while (lex->lex_queue) {
|
||
|
|
struct ivy_token *next = lex->lex_queue->t_next;
|
||
|
|
ivy_token_destroy(lex->lex_queue);
|
||
|
|
lex->lex_queue = next;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (lex->lex_linebuf) {
|
||
|
|
free(lex->lex_linebuf);
|
||
|
|
}
|
||
|
|
|
||
|
|
memset(lex, 0x0, sizeof *lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
if (!lex->lex_source) {
|
||
|
|
return IVY_ERR_EOF;
|
||
|
|
}
|
||
|
|
|
||
|
|
return ivy_line_source_readline(
|
||
|
|
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
|
||
|
|
&lex->lex_linebuf_len, NULL);
|
||
|
|
}
|
||
|
|
|
||
|
|
static int peek(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
enum ivy_status status = IVY_OK;
|
||
|
|
|
||
|
|
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||
|
|
status = refill_linebuf(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (status != IVY_OK) {
|
||
|
|
return status;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (lex->lex_linebuf_len == 0) {
|
||
|
|
return IVY_ERR_EOF;
|
||
|
|
}
|
||
|
|
|
||
|
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
|
||
|
|
return c;
|
||
|
|
}
|
||
|
|
|
||
|
|
static int advance(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
enum ivy_status status = IVY_OK;
|
||
|
|
|
||
|
|
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||
|
|
status = refill_linebuf(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (status != IVY_OK) {
|
||
|
|
return status;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (lex->lex_linebuf_len == 0) {
|
||
|
|
return IVY_ERR_EOF;
|
||
|
|
}
|
||
|
|
|
||
|
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
||
|
|
return c;
|
||
|
|
}
|
||
|
|
|
||
|
|
static bool char_can_begin_symbol(char c)
|
||
|
|
{
|
||
|
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||
|
|
if (symbols[i].name[0] == c) {
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
static struct ivy_token *create_token(enum ivy_token_type type)
|
||
|
|
{
|
||
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||
|
|
if (!tok) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
memset(tok, 0x0, sizeof *tok);
|
||
|
|
|
||
|
|
tok->t_type = type;
|
||
|
|
return tok;
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
|
||
|
|
{
|
||
|
|
struct ivy_token **slot = &lex->lex_queue;
|
||
|
|
|
||
|
|
while (*slot) {
|
||
|
|
slot = &(*slot)->t_next;
|
||
|
|
}
|
||
|
|
|
||
|
|
*slot = tok;
|
||
|
|
return IVY_OK;
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||
|
|
if (!tok) {
|
||
|
|
return IVY_ERR_NO_MEMORY;
|
||
|
|
}
|
||
|
|
|
||
|
|
memset(tok, 0x0, sizeof *tok);
|
||
|
|
|
||
|
|
tok->t_type = IVY_TOK_LINEFEED;
|
||
|
|
return push_token(lex, tok);
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
||
|
|
{
|
||
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||
|
|
if (!tok) {
|
||
|
|
return IVY_ERR_NO_MEMORY;
|
||
|
|
}
|
||
|
|
|
||
|
|
memset(tok, 0x0, sizeof *tok);
|
||
|
|
|
||
|
|
tok->t_type = IVY_TOK_SYMBOL;
|
||
|
|
tok->t_symbol = sym;
|
||
|
|
return push_token(lex, tok);
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
b_string *str = b_string_create();
|
||
|
|
int c = peek(lex);
|
||
|
|
|
||
|
|
while (true) {
|
||
|
|
if (c < 0) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!isalnum(c) && c != '_') {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
char s[2] = {c, 0};
|
||
|
|
b_string_append_cstr(str, s);
|
||
|
|
}
|
||
|
|
|
||
|
|
const char *s = b_string_ptr(str);
|
||
|
|
if (!strcmp(s, "_")) {
|
||
|
|
b_string_release(str);
|
||
|
|
push_symbol(lex, IVY_SYM_UNDERSCORE);
|
||
|
|
}
|
||
|
|
|
||
|
|
struct ivy_token *tok = create_token(IVY_TOK_IDENT);
|
||
|
|
tok->t_str = b_string_steal(str);
|
||
|
|
b_string_release(str);
|
||
|
|
|
||
|
|
push_token(lex, tok);
|
||
|
|
return IVY_OK;
|
||
|
|
}
|
||
|
|
|
||
|
|
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
enum ivy_status status;
|
||
|
|
int c = peek(lex);
|
||
|
|
|
||
|
|
if (c < 0) {
|
||
|
|
return c;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (c == '\n') {
|
||
|
|
while (c == '\n') {
|
||
|
|
advance(lex);
|
||
|
|
c = peek(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (c < 0) {
|
||
|
|
return c;
|
||
|
|
}
|
||
|
|
|
||
|
|
return push_linefeed(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (isalpha(c) || c == '_') {
|
||
|
|
return read_ident(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
return IVY_ERR_BAD_SYNTAX;
|
||
|
|
}
|
||
|
|
|
||
|
|
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
enum ivy_status status = IVY_OK;
|
||
|
|
|
||
|
|
if (!lex->lex_queue) {
|
||
|
|
status = pump_tokens(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (status != IVY_OK) {
|
||
|
|
lex->lex_status = status;
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
struct ivy_token *tok = lex->lex_queue;
|
||
|
|
return tok;
|
||
|
|
}
|
||
|
|
|
||
|
|
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
|
||
|
|
{
|
||
|
|
enum ivy_status status = IVY_OK;
|
||
|
|
|
||
|
|
if (!lex->lex_queue) {
|
||
|
|
status = pump_tokens(lex);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (status != IVY_OK) {
|
||
|
|
lex->lex_status = status;
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
struct ivy_token *tok = lex->lex_queue;
|
||
|
|
lex->lex_queue = lex->lex_queue->t_next;
|
||
|
|
return tok;
|
||
|
|
}
|
||
|
|
|
||
|
|
void ivy_token_destroy(struct ivy_token *tok)
|
||
|
|
{
|
||
|
|
switch (tok->t_type) {
|
||
|
|
case IVY_TOK_ATOM:
|
||
|
|
case IVY_TOK_STRING:
|
||
|
|
case IVY_TOK_IDENT:
|
||
|
|
free(tok->t_str);
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
free(tok);
|
||
|
|
}
|