Files
ivy/lang/lex.c

336 lines
7.1 KiB
C
Raw Normal View History

#include <blue/object/string.h>
#include <ctype.h>
#include <ivy/lang/lex.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#define LINEBUF_DEFAULT_CAPACITY 1024
#define LEX_TOKEN_DEF(i, n) \
{ \
.id = (i), .name = (n) \
}
struct lex_token_def {
int id;
const char *name;
};
static struct lex_token_def keywords[] = {
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
LEX_TOKEN_DEF(IVY_KW_END, "end"),
};
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
LEX_TOKEN_DEF(IVY_SYM_MINUS, "-"),
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
LEX_TOKEN_DEF(IVY_SYM_MINUS_EQUAL, "-="),
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"),
LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
{
memset(lex, 0x0, sizeof *lex);
lex->lex_status = IVY_OK;
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
return IVY_OK;
}
void ivy_lexer_finish(struct ivy_lexer *lex)
{
while (lex->lex_queue) {
struct ivy_token *next = lex->lex_queue->t_next;
ivy_token_destroy(lex->lex_queue);
lex->lex_queue = next;
}
if (lex->lex_linebuf) {
free(lex->lex_linebuf);
}
memset(lex, 0x0, sizeof *lex);
}
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
{
if (!lex->lex_source) {
return IVY_ERR_EOF;
}
return ivy_line_source_readline(
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
&lex->lex_linebuf_len, NULL);
}
static int peek(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
return c;
}
static int advance(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
return c;
}
static bool char_can_begin_symbol(char c)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].name[0] == c) {
return true;
}
}
return false;
}
static struct ivy_token *create_token(enum ivy_token_type type)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return NULL;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = type;
return tok;
}
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
{
struct ivy_token **slot = &lex->lex_queue;
while (*slot) {
slot = &(*slot)->t_next;
}
*slot = tok;
return IVY_OK;
}
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_LINEFEED;
return push_token(lex, tok);
}
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_SYMBOL;
tok->t_symbol = sym;
return push_token(lex, tok);
}
static enum ivy_status read_ident(struct ivy_lexer *lex)
{
b_string *str = b_string_create();
int c = peek(lex);
while (true) {
if (c < 0) {
break;
}
if (!isalnum(c) && c != '_') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
}
const char *s = b_string_ptr(str);
if (!strcmp(s, "_")) {
b_string_release(str);
push_symbol(lex, IVY_SYM_UNDERSCORE);
}
struct ivy_token *tok = create_token(IVY_TOK_IDENT);
tok->t_str = b_string_steal(str);
b_string_release(str);
push_token(lex, tok);
return IVY_OK;
}
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
{
enum ivy_status status;
int c = peek(lex);
if (c < 0) {
return c;
}
if (c == '\n') {
while (c == '\n') {
advance(lex);
c = peek(lex);
}
if (c < 0) {
return c;
}
return push_linefeed(lex);
}
if (isalpha(c) || c == '_') {
return read_ident(lex);
}
return IVY_ERR_BAD_SYNTAX;
}
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (!lex->lex_queue) {
status = pump_tokens(lex);
}
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
struct ivy_token *tok = lex->lex_queue;
return tok;
}
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (!lex->lex_queue) {
status = pump_tokens(lex);
}
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
struct ivy_token *tok = lex->lex_queue;
lex->lex_queue = lex->lex_queue->t_next;
return tok;
}
void ivy_token_destroy(struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_ATOM:
case IVY_TOK_STRING:
case IVY_TOK_IDENT:
free(tok->t_str);
break;
default:
break;
}
free(tok);
}