lang: implement identifier support in the lexer
This commit is contained in:
335
lang/lex.c
Normal file
335
lang/lex.c
Normal file
@@ -0,0 +1,335 @@
|
||||
#include <blue/object/string.h>
|
||||
#include <ctype.h>
|
||||
#include <ivy/lang/lex.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LINEBUF_DEFAULT_CAPACITY 1024
|
||||
|
||||
#define LEX_TOKEN_DEF(i, n) \
|
||||
{ \
|
||||
.id = (i), .name = (n) \
|
||||
}
|
||||
|
||||
struct lex_token_def {
|
||||
int id;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static struct lex_token_def keywords[] = {
|
||||
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
||||
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
|
||||
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
|
||||
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
|
||||
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
|
||||
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
|
||||
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
|
||||
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
|
||||
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
|
||||
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
|
||||
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
|
||||
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
|
||||
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
|
||||
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
|
||||
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
|
||||
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
|
||||
LEX_TOKEN_DEF(IVY_KW_END, "end"),
|
||||
};
|
||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||
|
||||
static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_MINUS, "-"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_MINUS_EQUAL, "-="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"),
|
||||
};
|
||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||
|
||||
enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
|
||||
{
|
||||
memset(lex, 0x0, sizeof *lex);
|
||||
|
||||
lex->lex_status = IVY_OK;
|
||||
|
||||
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
|
||||
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
||||
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
void ivy_lexer_finish(struct ivy_lexer *lex)
|
||||
{
|
||||
while (lex->lex_queue) {
|
||||
struct ivy_token *next = lex->lex_queue->t_next;
|
||||
ivy_token_destroy(lex->lex_queue);
|
||||
lex->lex_queue = next;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf) {
|
||||
free(lex->lex_linebuf);
|
||||
}
|
||||
|
||||
memset(lex, 0x0, sizeof *lex);
|
||||
}
|
||||
|
||||
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
||||
{
|
||||
if (!lex->lex_source) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
return ivy_line_source_readline(
|
||||
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
|
||||
&lex->lex_linebuf_len, NULL);
|
||||
}
|
||||
|
||||
static int peek(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||||
status = refill_linebuf(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf_len == 0) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
|
||||
return c;
|
||||
}
|
||||
|
||||
static int advance(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||||
status = refill_linebuf(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf_len == 0) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
||||
return c;
|
||||
}
|
||||
|
||||
static bool char_can_begin_symbol(char c)
|
||||
{
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (symbols[i].name[0] == c) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct ivy_token *create_token(enum ivy_token_type type)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = type;
|
||||
return tok;
|
||||
}
|
||||
|
||||
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
|
||||
{
|
||||
struct ivy_token **slot = &lex->lex_queue;
|
||||
|
||||
while (*slot) {
|
||||
slot = &(*slot)->t_next;
|
||||
}
|
||||
|
||||
*slot = tok;
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return IVY_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = IVY_TOK_LINEFEED;
|
||||
return push_token(lex, tok);
|
||||
}
|
||||
|
||||
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return IVY_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = IVY_TOK_SYMBOL;
|
||||
tok->t_symbol = sym;
|
||||
return push_token(lex, tok);
|
||||
}
|
||||
|
||||
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
||||
{
|
||||
b_string *str = b_string_create();
|
||||
int c = peek(lex);
|
||||
|
||||
while (true) {
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!isalnum(c) && c != '_') {
|
||||
break;
|
||||
}
|
||||
|
||||
char s[2] = {c, 0};
|
||||
b_string_append_cstr(str, s);
|
||||
}
|
||||
|
||||
const char *s = b_string_ptr(str);
|
||||
if (!strcmp(s, "_")) {
|
||||
b_string_release(str);
|
||||
push_symbol(lex, IVY_SYM_UNDERSCORE);
|
||||
}
|
||||
|
||||
struct ivy_token *tok = create_token(IVY_TOK_IDENT);
|
||||
tok->t_str = b_string_steal(str);
|
||||
b_string_release(str);
|
||||
|
||||
push_token(lex, tok);
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status;
|
||||
int c = peek(lex);
|
||||
|
||||
if (c < 0) {
|
||||
return c;
|
||||
}
|
||||
|
||||
if (c == '\n') {
|
||||
while (c == '\n') {
|
||||
advance(lex);
|
||||
c = peek(lex);
|
||||
}
|
||||
|
||||
if (c < 0) {
|
||||
return c;
|
||||
}
|
||||
|
||||
return push_linefeed(lex);
|
||||
}
|
||||
|
||||
if (isalpha(c) || c == '_') {
|
||||
return read_ident(lex);
|
||||
}
|
||||
|
||||
return IVY_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (!lex->lex_queue) {
|
||||
status = pump_tokens(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
lex->lex_status = status;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ivy_token *tok = lex->lex_queue;
|
||||
return tok;
|
||||
}
|
||||
|
||||
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (!lex->lex_queue) {
|
||||
status = pump_tokens(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
lex->lex_status = status;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ivy_token *tok = lex->lex_queue;
|
||||
lex->lex_queue = lex->lex_queue->t_next;
|
||||
return tok;
|
||||
}
|
||||
|
||||
void ivy_token_destroy(struct ivy_token *tok)
|
||||
{
|
||||
switch (tok->t_type) {
|
||||
case IVY_TOK_ATOM:
|
||||
case IVY_TOK_STRING:
|
||||
case IVY_TOK_IDENT:
|
||||
free(tok->t_str);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
free(tok);
|
||||
}
|
||||
Reference in New Issue
Block a user