lang: implement identifier support in the lexer
This commit is contained in:
@@ -2,3 +2,4 @@ file(GLOB_RECURSE lang_sources *.c *.h include/ivy/lang/*.h)
|
||||
|
||||
add_library(ivy-lang SHARED ${lang_sources})
|
||||
target_include_directories(ivy-lang PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/)
|
||||
target_link_libraries(ivy-lang ivy-common Bluelib::Core Bluelib::Object)
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
#ifndef IVY_LANG_LEX_H_
|
||||
#define IVY_LANG_LEX_H_
|
||||
|
||||
#include <ivy/line-source.h>
|
||||
#include <ivy/status.h>
|
||||
|
||||
enum ivy_token_type {
|
||||
IVY_TOK_NONE = 0,
|
||||
IVY_TOK_KEYWORD,
|
||||
IVY_TOK_SYMBOL,
|
||||
IVY_TOK_ATOM,
|
||||
IVY_TOK_NUMBER,
|
||||
IVY_TOK_LABEL,
|
||||
IVY_TOK_IDENT,
|
||||
IVY_TOK_STRING,
|
||||
IVY_TOK_STR_START,
|
||||
IVY_TOK_STR_END,
|
||||
IVY_TOK_LINEFEED,
|
||||
};
|
||||
|
||||
enum ivy_keyword {
|
||||
IVY_KW_NONE = 0,
|
||||
IVY_KW_PACKAGE,
|
||||
IVY_KW_USE,
|
||||
IVY_KW_CLASS,
|
||||
IVY_KW_PROTOCOL,
|
||||
IVY_KW_TRY,
|
||||
IVY_KW_THROW,
|
||||
IVY_KW_CATCH,
|
||||
IVY_KW_IF,
|
||||
IVY_KW_AND,
|
||||
IVY_KW_OR,
|
||||
IVY_KW_IS,
|
||||
IVY_KW_NOT,
|
||||
IVY_KW_ELSE,
|
||||
IVY_KW_WHILE,
|
||||
IVY_KW_FOR,
|
||||
IVY_KW_MATCH,
|
||||
IVY_KW_UNLESS,
|
||||
IVY_KW_IN,
|
||||
IVY_KW_DO,
|
||||
IVY_KW_END,
|
||||
};
|
||||
|
||||
enum ivy_symbol {
|
||||
IVY_SYM_NONE = 0,
|
||||
IVY_SYM_DOT,
|
||||
IVY_SYM_LEFT_BRACE,
|
||||
IVY_SYM_RIGHT_BRACE,
|
||||
IVY_SYM_LEFT_BRACKET,
|
||||
IVY_SYM_RIGHT_BRACKET,
|
||||
IVY_SYM_LEFT_PAREN,
|
||||
IVY_SYM_RIGHT_PAREN,
|
||||
IVY_SYM_LEFT_ANGLE,
|
||||
IVY_SYM_RIGHT_ANGLE,
|
||||
IVY_SYM_COLON,
|
||||
IVY_SYM_DOUBLE_COLON,
|
||||
IVY_SYM_PLUS,
|
||||
IVY_SYM_MINUS,
|
||||
IVY_SYM_FORWARD_SLASH,
|
||||
IVY_SYM_ASTERISK,
|
||||
IVY_SYM_PERCENT,
|
||||
IVY_SYM_AMPERSAND,
|
||||
IVY_SYM_EQUAL,
|
||||
IVY_SYM_DOUBLE_EQUAL,
|
||||
IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL,
|
||||
IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL,
|
||||
IVY_SYM_PLUS_EQUAL,
|
||||
IVY_SYM_MINUS_EQUAL,
|
||||
IVY_SYM_FORWARD_SLASH_EQUAL,
|
||||
IVY_SYM_ASTERISK_EQUAL,
|
||||
IVY_SYM_AMPERSAND_EQUAL,
|
||||
IVY_SYM_PIPE_EQUAL,
|
||||
IVY_SYM_PERCENT_EQUAL,
|
||||
IVY_SYM_CARET_EQUAL,
|
||||
IVY_SYM_BANG,
|
||||
IVY_SYM_PIPE,
|
||||
IVY_SYM_CARET,
|
||||
IVY_SYM_UNDERSCORE,
|
||||
IVY_SYM_COMMA,
|
||||
IVY_SYM_DOLLAR,
|
||||
IVY_SYM_RIGHT_ARROW,
|
||||
IVY_SYM_BIG_RIGHT_ARROW,
|
||||
IVY_SYM_FORWARD_SLASH_ASTERISK,
|
||||
IVY_SYM_ASTERISK_FORWARD_SLASH,
|
||||
};
|
||||
|
||||
struct ivy_token {
|
||||
enum ivy_token_type t_type;
|
||||
struct ivy_token *t_next;
|
||||
|
||||
union {
|
||||
enum ivy_keyword t_keyword;
|
||||
enum ivy_symbol t_symbol;
|
||||
signed long long t_number;
|
||||
char *t_str;
|
||||
};
|
||||
};
|
||||
|
||||
struct ivy_lexer {
|
||||
struct ivy_line_source *lex_source;
|
||||
enum ivy_status lex_status;
|
||||
struct ivy_token *lex_queue;
|
||||
|
||||
char *lex_linebuf;
|
||||
size_t lex_linebuf_len;
|
||||
size_t lex_linebuf_cap;
|
||||
size_t lex_linebuf_ptr;
|
||||
};
|
||||
|
||||
extern enum ivy_status ivy_lexer_init(struct ivy_lexer *lex);
|
||||
extern void ivy_lexer_finish(struct ivy_lexer *lex);
|
||||
|
||||
extern struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex);
|
||||
extern struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex);
|
||||
|
||||
extern void ivy_token_destroy(struct ivy_token *tok);
|
||||
|
||||
#endif
|
||||
|
||||
335
lang/lex.c
Normal file
335
lang/lex.c
Normal file
@@ -0,0 +1,335 @@
|
||||
#include <blue/object/string.h>
|
||||
#include <ctype.h>
|
||||
#include <ivy/lang/lex.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LINEBUF_DEFAULT_CAPACITY 1024
|
||||
|
||||
#define LEX_TOKEN_DEF(i, n) \
|
||||
{ \
|
||||
.id = (i), .name = (n) \
|
||||
}
|
||||
|
||||
struct lex_token_def {
|
||||
int id;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static struct lex_token_def keywords[] = {
|
||||
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
||||
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
|
||||
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
|
||||
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
|
||||
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
|
||||
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
|
||||
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
|
||||
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
|
||||
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
|
||||
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
|
||||
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
|
||||
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
|
||||
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
|
||||
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
|
||||
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
|
||||
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
|
||||
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
|
||||
LEX_TOKEN_DEF(IVY_KW_END, "end"),
|
||||
};
|
||||
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
||||
|
||||
static struct lex_token_def symbols[] = {
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_MINUS, "-"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_MINUS_EQUAL, "-="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
|
||||
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
|
||||
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"),
|
||||
LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"),
|
||||
};
|
||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||
|
||||
enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
|
||||
{
|
||||
memset(lex, 0x0, sizeof *lex);
|
||||
|
||||
lex->lex_status = IVY_OK;
|
||||
|
||||
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
|
||||
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
||||
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
void ivy_lexer_finish(struct ivy_lexer *lex)
|
||||
{
|
||||
while (lex->lex_queue) {
|
||||
struct ivy_token *next = lex->lex_queue->t_next;
|
||||
ivy_token_destroy(lex->lex_queue);
|
||||
lex->lex_queue = next;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf) {
|
||||
free(lex->lex_linebuf);
|
||||
}
|
||||
|
||||
memset(lex, 0x0, sizeof *lex);
|
||||
}
|
||||
|
||||
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
||||
{
|
||||
if (!lex->lex_source) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
return ivy_line_source_readline(
|
||||
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
|
||||
&lex->lex_linebuf_len, NULL);
|
||||
}
|
||||
|
||||
static int peek(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||||
status = refill_linebuf(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf_len == 0) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
|
||||
return c;
|
||||
}
|
||||
|
||||
static int advance(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
||||
status = refill_linebuf(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (lex->lex_linebuf_len == 0) {
|
||||
return IVY_ERR_EOF;
|
||||
}
|
||||
|
||||
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
||||
return c;
|
||||
}
|
||||
|
||||
static bool char_can_begin_symbol(char c)
|
||||
{
|
||||
for (size_t i = 0; i < nr_symbols; i++) {
|
||||
if (symbols[i].name[0] == c) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct ivy_token *create_token(enum ivy_token_type type)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = type;
|
||||
return tok;
|
||||
}
|
||||
|
||||
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
|
||||
{
|
||||
struct ivy_token **slot = &lex->lex_queue;
|
||||
|
||||
while (*slot) {
|
||||
slot = &(*slot)->t_next;
|
||||
}
|
||||
|
||||
*slot = tok;
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return IVY_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = IVY_TOK_LINEFEED;
|
||||
return push_token(lex, tok);
|
||||
}
|
||||
|
||||
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
||||
{
|
||||
struct ivy_token *tok = malloc(sizeof *tok);
|
||||
if (!tok) {
|
||||
return IVY_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
memset(tok, 0x0, sizeof *tok);
|
||||
|
||||
tok->t_type = IVY_TOK_SYMBOL;
|
||||
tok->t_symbol = sym;
|
||||
return push_token(lex, tok);
|
||||
}
|
||||
|
||||
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
||||
{
|
||||
b_string *str = b_string_create();
|
||||
int c = peek(lex);
|
||||
|
||||
while (true) {
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!isalnum(c) && c != '_') {
|
||||
break;
|
||||
}
|
||||
|
||||
char s[2] = {c, 0};
|
||||
b_string_append_cstr(str, s);
|
||||
}
|
||||
|
||||
const char *s = b_string_ptr(str);
|
||||
if (!strcmp(s, "_")) {
|
||||
b_string_release(str);
|
||||
push_symbol(lex, IVY_SYM_UNDERSCORE);
|
||||
}
|
||||
|
||||
struct ivy_token *tok = create_token(IVY_TOK_IDENT);
|
||||
tok->t_str = b_string_steal(str);
|
||||
b_string_release(str);
|
||||
|
||||
push_token(lex, tok);
|
||||
return IVY_OK;
|
||||
}
|
||||
|
||||
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status;
|
||||
int c = peek(lex);
|
||||
|
||||
if (c < 0) {
|
||||
return c;
|
||||
}
|
||||
|
||||
if (c == '\n') {
|
||||
while (c == '\n') {
|
||||
advance(lex);
|
||||
c = peek(lex);
|
||||
}
|
||||
|
||||
if (c < 0) {
|
||||
return c;
|
||||
}
|
||||
|
||||
return push_linefeed(lex);
|
||||
}
|
||||
|
||||
if (isalpha(c) || c == '_') {
|
||||
return read_ident(lex);
|
||||
}
|
||||
|
||||
return IVY_ERR_BAD_SYNTAX;
|
||||
}
|
||||
|
||||
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (!lex->lex_queue) {
|
||||
status = pump_tokens(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
lex->lex_status = status;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ivy_token *tok = lex->lex_queue;
|
||||
return tok;
|
||||
}
|
||||
|
||||
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
|
||||
{
|
||||
enum ivy_status status = IVY_OK;
|
||||
|
||||
if (!lex->lex_queue) {
|
||||
status = pump_tokens(lex);
|
||||
}
|
||||
|
||||
if (status != IVY_OK) {
|
||||
lex->lex_status = status;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ivy_token *tok = lex->lex_queue;
|
||||
lex->lex_queue = lex->lex_queue->t_next;
|
||||
return tok;
|
||||
}
|
||||
|
||||
void ivy_token_destroy(struct ivy_token *tok)
|
||||
{
|
||||
switch (tok->t_type) {
|
||||
case IVY_TOK_ATOM:
|
||||
case IVY_TOK_STRING:
|
||||
case IVY_TOK_IDENT:
|
||||
free(tok->t_str);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
free(tok);
|
||||
}
|
||||
Reference in New Issue
Block a user