Files
ivy/lang/lex.c
Max Wash b630aa009c lang: lex: add function to check if tokens are available
if this function returns false, any attempt to read tokens from the lexer will cause another line of input to be retrieved from the lexer's line source.
2024-11-23 19:27:29 +00:00

1191 lines
24 KiB
C

#include "lex.h"
#include <blue/core/hash.h>
#include <blue/core/queue.h>
#include <blue/object/dict.h>
#include <blue/object/number.h>
#include <blue/object/string.h>
#include <ctype.h>
#include <ivy/lang/lex.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#define LINEBUF_DEFAULT_CAPACITY 1024
#define LEX_TOKEN_DEF(i, n) \
{ \
.id = (i), .name = (n) \
}
static struct lex_token_def keywords[] = {
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
LEX_TOKEN_DEF(IVY_KW_GET, "get"),
LEX_TOKEN_DEF(IVY_KW_SET, "set"),
LEX_TOKEN_DEF(IVY_KW_END, "end"),
};
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
LEX_TOKEN_DEF(IVY_SYM_SQUOTE, "'"),
LEX_TOKEN_DEF(IVY_SYM_DQUOTE, "\""),
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
LEX_TOKEN_DEF(IVY_SYM_HYPHEN, "-"),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_HYPHEN, "--"),
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_ASTERISK, "/*"),
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_FORWARD_SLASH, "*/"),
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE, "<<"),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE, ">>"),
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE_EQUAL, "<="),
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE_EQUAL, ">="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
LEX_TOKEN_DEF(IVY_SYM_HYPHEN_EQUAL, "-="),
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
LEX_TOKEN_DEF(IVY_SYM_HASH, "#"),
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
LEX_TOKEN_DEF(IVY_SYM_SEMICOLON, ";"),
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
LEX_TOKEN_DEF(IVY_SYM_HYPHEN_RIGHT_ANGLE, "->"),
LEX_TOKEN_DEF(IVY_SYM_EQUAL_RIGHT_ANGLE, "=>"),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
static struct lexer_state *push_lexer_state(
struct ivy_lexer *lex, enum lexer_state_type state_type)
{
struct lexer_state *state = malloc(sizeof *state);
if (!state) {
return NULL;
}
memset(state, 0x0, sizeof *state);
state->s_type = state_type;
state->s_brace_depth = lex->lex_brace_depth;
b_queue_push_back(&lex->lex_state, &state->s_entry);
return state;
}
static void pop_lexer_state(struct ivy_lexer *lex)
{
b_queue_entry *entry = b_queue_pop_back(&lex->lex_state);
if (!entry) {
return;
}
struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry);
free(state);
}
static struct lexer_state *get_lexer_state(struct ivy_lexer *lex)
{
b_queue_entry *entry = b_queue_last(&lex->lex_state);
if (!entry) {
return NULL;
}
return b_unbox(struct lexer_state, entry, s_entry);
}
static void destroy_state_stack(b_queue *state)
{
b_queue_iterator it;
b_queue_iterator_begin(state, &it);
while (b_queue_iterator_is_valid(&it)) {
struct lexer_state *node
= b_unbox(struct lexer_state, it.entry, s_entry);
b_queue_iterator_erase(&it);
free(node);
}
}
static struct ivy_lexer_symbol_node *get_symbol_node(
struct ivy_lexer_symbol_node *node, char c)
{
b_queue_iterator it;
b_queue_foreach (&it, &node->s_children) {
struct ivy_lexer_symbol_node *child = b_unbox(
struct ivy_lexer_symbol_node, it.entry, s_entry);
if (child->s_char == c) {
return child;
}
}
return NULL;
}
static b_string *get_temp_string(struct ivy_lexer *lex)
{
if (!lex->lex_temp) {
lex->lex_temp = b_string_create();
}
b_string_clear(lex->lex_temp);
return lex->lex_temp;
}
static enum ivy_status put_symbol(
struct ivy_lexer_symbol_node *tree, struct lex_token_def *sym)
{
for (size_t i = 0; sym->name[i]; i++) {
char c = sym->name[i];
struct ivy_lexer_symbol_node *child = get_symbol_node(tree, c);
if (child) {
tree = child;
continue;
}
child = malloc(sizeof *child);
if (!child) {
return IVY_ERR_NO_MEMORY;
}
memset(child, 0x0, sizeof *child);
child->s_id = IVY_SYM_NONE;
child->s_char = c;
b_queue_push_back(&tree->s_children, &child->s_entry);
tree = child;
}
tree->s_id = sym->id;
return IVY_OK;
}
static void destroy_symbol_tree(struct ivy_lexer_symbol_node *tree)
{
b_queue_iterator it;
b_queue_iterator_begin(&tree->s_children, &it);
while (b_queue_iterator_is_valid(&it)) {
struct ivy_lexer_symbol_node *node = b_unbox(
struct ivy_lexer_symbol_node, it.entry, s_entry);
b_queue_iterator_erase(&it);
destroy_symbol_tree(node);
}
free(tree);
}
static struct ivy_lexer_symbol_node *build_symbol_tree(void)
{
struct ivy_lexer_symbol_node *root = malloc(sizeof *root);
if (!root) {
return NULL;
}
memset(root, 0x0, sizeof *root);
root->s_id = IVY_SYM_NONE;
enum ivy_status status = IVY_OK;
for (size_t i = 0; i < nr_symbols; i++) {
status = put_symbol(root, &symbols[i]);
if (status != IVY_OK) {
destroy_symbol_tree(root);
return NULL;
}
}
return root;
}
static void init_keywords(b_dict *keyword_dict)
{
for (size_t i = 0; i < nr_keywords; i++) {
struct lex_token_def *keyword = &keywords[i];
b_dict_put(keyword_dict, keyword->name, B_RV_INT(keyword->id));
}
}
static enum ivy_keyword find_keyword_by_name(struct ivy_lexer *lex, const char *s)
{
b_number *id = B_NUMBER(b_dict_at(lex->lex_keywords, s));
if (!id) {
return IVY_KW_NONE;
}
return b_number_get_int(id);
}
enum ivy_status ivy_lexer_create(struct ivy_lexer **lexp)
{
struct ivy_lexer *lex = malloc(sizeof *lex);
if (!lex) {
return IVY_ERR_NO_MEMORY;
}
memset(lex, 0x0, sizeof *lex);
lex->lex_status = IVY_OK;
lex->lex_prev_token = IVY_TOK_NONE;
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
lex->lex_sym_tree = build_symbol_tree();
if (!lex->lex_sym_tree) {
ivy_lexer_destroy(lex);
return IVY_ERR_NO_MEMORY;
}
if (!push_lexer_state(lex, STATE_NORMAL)) {
ivy_lexer_destroy(lex);
return IVY_ERR_NO_MEMORY;
}
lex->lex_keywords = b_dict_create();
init_keywords(lex->lex_keywords);
*lexp = lex;
return IVY_OK;
}
void ivy_lexer_destroy(struct ivy_lexer *lex)
{
while (lex->lex_queue) {
struct ivy_token *next = lex->lex_queue->t_next;
ivy_token_destroy(lex->lex_queue);
lex->lex_queue = next;
}
if (lex->lex_linebuf) {
free(lex->lex_linebuf);
}
if (lex->lex_sym_tree) {
destroy_symbol_tree(lex->lex_sym_tree);
}
if (lex->lex_temp) {
b_string_release(lex->lex_temp);
}
if (lex->lex_keywords) {
b_dict_release(lex->lex_keywords);
}
destroy_state_stack(&lex->lex_state);
free(lex);
}
void ivy_lexer_set_source(struct ivy_lexer *lex, struct ivy_line_source *src)
{
lex->lex_source = src;
}
enum ivy_status ivy_lexer_get_status(struct ivy_lexer *lex)
{
return lex->lex_status;
}
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
{
if (!lex->lex_source) {
return IVY_ERR_EOF;
}
enum ivy_status status = ivy_line_source_readline(
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
&lex->lex_linebuf_len, NULL);
if (status == IVY_OK) {
lex->lex_linebuf_ptr = 0;
}
return status;
}
static int peek(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
return c;
}
static int peek_next(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
if (lex->lex_linebuf_ptr + 1 >= lex->lex_linebuf_len) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr + 1];
return c;
}
static int advance(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
return c;
}
static bool input_available(struct ivy_lexer *lex)
{
return lex->lex_linebuf_ptr < lex->lex_linebuf_len;
}
static bool char_can_begin_symbol(char c)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].name[0] == c) {
return true;
}
}
return false;
}
static struct ivy_token *create_token(enum ivy_token_type type)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return NULL;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = type;
return tok;
}
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
{
struct ivy_token **slot = &lex->lex_queue;
while (*slot) {
slot = &(*slot)->t_next;
}
*slot = tok;
lex->lex_prev_token = tok->t_type;
return IVY_OK;
}
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
{
if (lex->lex_prev_token == IVY_TOK_LINEFEED) {
return IVY_OK;
}
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_LINEFEED;
return push_token(lex, tok);
}
static enum ivy_status push_string_start(struct ivy_lexer *lex)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_STR_START;
return push_token(lex, tok);
}
static enum ivy_status push_string_end(struct ivy_lexer *lex)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_STR_END;
return push_token(lex, tok);
}
static enum ivy_status push_string_content(struct ivy_lexer *lex, char *s)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_STRING;
tok->t_str = s;
return push_token(lex, tok);
}
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_SYMBOL;
tok->t_symbol = sym;
return push_token(lex, tok);
}
static enum ivy_status push_atom(struct ivy_lexer *lex, char *s)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_ATOM;
tok->t_str = s;
return push_token(lex, tok);
}
static enum ivy_status push_int(struct ivy_lexer *lex, unsigned long long v)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_INT;
tok->t_int = v;
return push_token(lex, tok);
}
static enum ivy_status push_double(struct ivy_lexer *lex, double v)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_DOUBLE;
tok->t_double = v;
return push_token(lex, tok);
}
static enum ivy_status push_keyword(struct ivy_lexer *lex, enum ivy_keyword keyword)
{
struct ivy_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_TOK_KEYWORD;
tok->t_keyword = keyword;
return push_token(lex, tok);
}
static enum ivy_status read_line_comment(struct ivy_lexer *lex)
{
while (true) {
int c = advance(lex);
if (c == IVY_ERR_EOF || c == '\n') {
break;
}
if (c < 0) {
return c;
}
}
return IVY_OK;
}
static enum ivy_status read_block_comment(struct ivy_lexer *lex)
{
int depth = 1;
char buf[2] = {0};
while (depth > 0) {
int c = peek(lex);
if (c < 0) {
return c;
}
if (!buf[0]) {
buf[0] = c;
} else if (!buf[1]) {
buf[1] = c;
} else {
buf[0] = buf[1];
buf[1] = c;
}
if (buf[0] == '/' && buf[1] == '*') {
depth++;
} else if (buf[0] == '*' && buf[1] == '/') {
depth--;
}
advance(lex);
}
return IVY_OK;
}
static enum ivy_status read_squote_marker(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
struct lexer_state *state = get_lexer_state(lex);
if (state->s_type == STATE_FSTRING) {
/* already within an fstring */
pop_lexer_state(lex);
return push_string_end(lex);
}
/* start of a new fstring */
status = push_string_start(lex);
if (status != IVY_OK) {
return status;
}
if (!push_lexer_state(lex, STATE_FSTRING)) {
return IVY_ERR_NO_MEMORY;
}
return IVY_OK;
}
static enum ivy_status read_dquote_marker(struct ivy_lexer *lex)
{
struct lexer_state *state = get_lexer_state(lex);
if (state->s_type == STATE_STRING) {
/* already within a string */
pop_lexer_state(lex);
return IVY_OK;
}
/* start of a new string */
if (!push_lexer_state(lex, STATE_STRING)) {
return IVY_ERR_NO_MEMORY;
}
return IVY_OK;
}
static enum ivy_status read_atom(struct ivy_lexer *lex)
{
b_string *str = get_temp_string(lex);
while (true) {
int c = peek(lex);
if (c == IVY_ERR_EOF) {
break;
}
if (c < 0) {
return c;
}
if (!isalnum(c) && c != ':' && c != '_') {
break;
}
char s[] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
char *s = b_string_steal(str);
return push_atom(lex, s);
}
static enum ivy_status read_string_content(struct ivy_lexer *lex)
{
int c;
b_string *str = get_temp_string(lex);
struct lexer_state *state = get_lexer_state(lex);
if (!str) {
return IVY_ERR_NO_MEMORY;
}
while (true) {
c = peek(lex);
if (state->s_type == STATE_FSTRING && (c == '\'' || c == '{')) {
break;
}
if (state->s_type == STATE_STRING && c == '"') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
return IVY_OK;
}
char *s = b_string_steal(str);
enum ivy_status status = push_string_content(lex, s);
if (status != IVY_OK) {
free(s);
}
return status;
}
static enum ivy_status read_symbol(struct ivy_lexer *lex)
{
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
struct lexer_state *state = get_lexer_state(lex);
while (true) {
int c = peek(lex);
struct ivy_lexer_symbol_node *next = get_symbol_node(node, c);
if (!next) {
break;
}
node = next;
advance(lex);
}
if (!node || node->s_id == IVY_SYM_NONE) {
return IVY_ERR_BAD_SYNTAX;
}
switch (node->s_id) {
case IVY_SYM_SQUOTE:
return read_squote_marker(lex);
case IVY_SYM_DQUOTE:
return read_dquote_marker(lex);
case IVY_SYM_FORWARD_SLASH_ASTERISK:
return read_block_comment(lex);
case IVY_SYM_DOUBLE_HYPHEN:
return read_line_comment(lex);
case IVY_SYM_HASH:
return read_atom(lex);
case IVY_SYM_LEFT_BRACE:
push_symbol(lex, node->s_id);
lex->lex_brace_depth++;
if (state->s_type == STATE_FSTRING) {
push_lexer_state(lex, STATE_INTERPOLATION);
}
return IVY_OK;
case IVY_SYM_RIGHT_BRACE:
push_symbol(lex, node->s_id);
lex->lex_brace_depth--;
if (state->s_type == STATE_INTERPOLATION
&& lex->lex_brace_depth < state->s_brace_depth) {
pop_lexer_state(lex);
}
return IVY_OK;
default:
push_symbol(lex, node->s_id);
return IVY_OK;
}
}
static enum ivy_status read_number(struct ivy_lexer *lex)
{
int token_len = 0;
int base = 10;
int dots = 0;
b_string *str = get_temp_string(lex);
while (true) {
int c = peek(lex);
if (c == IVY_ERR_EOF) {
break;
}
if (c < 0) {
return c;
}
if (c == '_') {
token_len++;
advance(lex);
continue;
}
if (c == '.' && iswdigit(peek_next(lex))) {
if (base != 10) {
return IVY_ERR_BAD_SYNTAX;
}
if (dots > 0) {
return IVY_ERR_BAD_SYNTAX;
}
token_len++;
dots++;
char s[] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
continue;
}
if (isspace(c) || ispunct(c)) {
break;
}
if (c == '0' && token_len == 0) {
base = 7;
token_len++;
advance(lex);
continue;
}
if (c == 'x' && token_len == 1) {
base = 16;
token_len++;
advance(lex);
continue;
}
if (c == 'b' && token_len == 1) {
base = 2;
token_len++;
advance(lex);
continue;
}
if (base == 2 && c != '0' && c != '1') {
return IVY_ERR_BAD_SYNTAX;
}
if (base == 10 && !isdigit(c)) {
return IVY_ERR_BAD_SYNTAX;
}
if (base == 16 && !isxdigit(c)) {
return IVY_ERR_BAD_SYNTAX;
}
char s[] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
token_len++;
}
if (token_len == 1 && base == 7) {
return push_int(lex, 0);
}
const char *s = b_string_ptr(str);
char *ep = NULL;
/* negative numbers will be lexed as a hyphen followed by a positive
* number. */
if (dots > 0) {
double v = strtod(s, &ep);
if (*ep != '\0') {
return IVY_ERR_BAD_SYNTAX;
}
return push_double(lex, v);
} else {
unsigned long long v = strtoull(s, &ep, base);
if (*ep != '\0') {
return IVY_ERR_BAD_SYNTAX;
}
return push_int(lex, v);
}
}
static enum ivy_status read_ident(struct ivy_lexer *lex)
{
b_string *str = get_temp_string(lex);
bool label = false;
while (true) {
int c = peek(lex);
if (c < 0) {
break;
}
if (c == ':' && peek_next(lex) != ':') {
advance(lex);
label = true;
break;
}
if (!isalnum(c) && c != '_') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
const char *s = b_string_ptr(str);
if (!label && !strcmp(s, "_")) {
return push_symbol(lex, IVY_SYM_UNDERSCORE);
}
enum ivy_keyword keyword = IVY_KW_NONE;
if (!label && (keyword = find_keyword_by_name(lex, s)) != IVY_KW_NONE) {
return push_keyword(lex, keyword);
}
struct ivy_token *tok
= create_token(label ? IVY_TOK_LABEL : IVY_TOK_IDENT);
tok->t_str = b_string_steal(str);
return push_token(lex, tok);
}
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
{
struct lexer_state *state = get_lexer_state(lex);
int c = peek(lex);
if (c < 0) {
return c;
}
if (state->s_type == STATE_STRING && c != '"') {
return read_string_content(lex);
}
if (state->s_type == STATE_FSTRING && c != '\'' && c != '{') {
return read_string_content(lex);
}
/* `state` is invalid past this point, as the read_* functions
* may perform state transitions. */
state = NULL;
if (c == '\n') {
while (c == '\n') {
advance(lex);
if (!input_available(lex)) {
break;
}
c = peek(lex);
}
if (c < 0) {
return c;
}
return push_linefeed(lex);
}
while (isspace(c)) {
advance(lex);
c = peek(lex);
}
if (isalpha(c) || c == '_') {
return read_ident(lex);
}
if (char_can_begin_symbol(c)) {
return read_symbol(lex);
}
if (isdigit(c)) {
return read_number(lex);
}
return IVY_ERR_BAD_SYNTAX;
}
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
while (!lex->lex_queue) {
status = pump_tokens(lex);
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
}
lex->lex_status = status;
struct ivy_token *tok = lex->lex_queue;
return tok;
}
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
{
enum ivy_status status = IVY_OK;
while (!lex->lex_queue) {
status = pump_tokens(lex);
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
}
struct ivy_token *tok = lex->lex_queue;
lex->lex_queue = lex->lex_queue->t_next;
return tok;
}
bool ivy_lexer_tokens_available(struct ivy_lexer *lex)
{
if (lex->lex_queue) {
return true;
}
if (input_available(lex)) {
return true;
}
return false;
}
void ivy_token_destroy(struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_ATOM:
case IVY_TOK_STRING:
case IVY_TOK_IDENT:
free(tok->t_str);
break;
default:
break;
}
free(tok);
}
#define ENUM_STR(x) \
case x: \
return #x
const char *ivy_lex_token_type_to_string(enum ivy_token_type type)
{
switch (type) {
ENUM_STR(IVY_TOK_NONE);
ENUM_STR(IVY_TOK_KEYWORD);
ENUM_STR(IVY_TOK_SYMBOL);
ENUM_STR(IVY_TOK_ATOM);
ENUM_STR(IVY_TOK_INT);
ENUM_STR(IVY_TOK_DOUBLE);
ENUM_STR(IVY_TOK_LABEL);
ENUM_STR(IVY_TOK_IDENT);
ENUM_STR(IVY_TOK_STRING);
ENUM_STR(IVY_TOK_STR_START);
ENUM_STR(IVY_TOK_STR_END);
ENUM_STR(IVY_TOK_LINEFEED);
default:
return "";
}
}
const char *ivy_keyword_to_string(enum ivy_keyword keyword)
{
switch (keyword) {
ENUM_STR(IVY_KW_NONE);
ENUM_STR(IVY_KW_PACKAGE);
ENUM_STR(IVY_KW_USE);
ENUM_STR(IVY_KW_CLASS);
ENUM_STR(IVY_KW_PROTOCOL);
ENUM_STR(IVY_KW_TRY);
ENUM_STR(IVY_KW_THROW);
ENUM_STR(IVY_KW_CATCH);
ENUM_STR(IVY_KW_IF);
ENUM_STR(IVY_KW_AND);
ENUM_STR(IVY_KW_OR);
ENUM_STR(IVY_KW_IS);
ENUM_STR(IVY_KW_NOT);
ENUM_STR(IVY_KW_ELSE);
ENUM_STR(IVY_KW_WHILE);
ENUM_STR(IVY_KW_FOR);
ENUM_STR(IVY_KW_MATCH);
ENUM_STR(IVY_KW_UNLESS);
ENUM_STR(IVY_KW_IN);
ENUM_STR(IVY_KW_DO);
ENUM_STR(IVY_KW_GET);
ENUM_STR(IVY_KW_SET);
ENUM_STR(IVY_KW_END);
default:
return "";
}
}
const char *ivy_symbol_to_string(enum ivy_symbol sym)
{
switch (sym) {
ENUM_STR(IVY_SYM_NONE);
ENUM_STR(IVY_SYM_DOT);
ENUM_STR(IVY_SYM_SQUOTE);
ENUM_STR(IVY_SYM_DQUOTE);
ENUM_STR(IVY_SYM_LEFT_BRACE);
ENUM_STR(IVY_SYM_RIGHT_BRACE);
ENUM_STR(IVY_SYM_LEFT_BRACKET);
ENUM_STR(IVY_SYM_RIGHT_BRACKET);
ENUM_STR(IVY_SYM_LEFT_PAREN);
ENUM_STR(IVY_SYM_RIGHT_PAREN);
ENUM_STR(IVY_SYM_LEFT_ANGLE);
ENUM_STR(IVY_SYM_RIGHT_ANGLE);
ENUM_STR(IVY_SYM_COLON);
ENUM_STR(IVY_SYM_DOUBLE_COLON);
ENUM_STR(IVY_SYM_PLUS);
ENUM_STR(IVY_SYM_HYPHEN);
ENUM_STR(IVY_SYM_DOUBLE_HYPHEN);
ENUM_STR(IVY_SYM_FORWARD_SLASH);
ENUM_STR(IVY_SYM_ASTERISK);
ENUM_STR(IVY_SYM_PERCENT);
ENUM_STR(IVY_SYM_AMPERSAND);
ENUM_STR(IVY_SYM_EQUAL);
ENUM_STR(IVY_SYM_DOUBLE_EQUAL);
ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE);
ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE);
ENUM_STR(IVY_SYM_LEFT_ANGLE_EQUAL);
ENUM_STR(IVY_SYM_RIGHT_ANGLE_EQUAL);
ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL);
ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL);
ENUM_STR(IVY_SYM_PLUS_EQUAL);
ENUM_STR(IVY_SYM_HYPHEN_EQUAL);
ENUM_STR(IVY_SYM_FORWARD_SLASH_EQUAL);
ENUM_STR(IVY_SYM_ASTERISK_EQUAL);
ENUM_STR(IVY_SYM_AMPERSAND_EQUAL);
ENUM_STR(IVY_SYM_PIPE_EQUAL);
ENUM_STR(IVY_SYM_PERCENT_EQUAL);
ENUM_STR(IVY_SYM_CARET_EQUAL);
ENUM_STR(IVY_SYM_BANG);
ENUM_STR(IVY_SYM_PIPE);
ENUM_STR(IVY_SYM_CARET);
ENUM_STR(IVY_SYM_HASH);
ENUM_STR(IVY_SYM_UNDERSCORE);
ENUM_STR(IVY_SYM_COMMA);
ENUM_STR(IVY_SYM_SEMICOLON);
ENUM_STR(IVY_SYM_DOLLAR);
ENUM_STR(IVY_SYM_HYPHEN_RIGHT_ANGLE);
ENUM_STR(IVY_SYM_EQUAL_RIGHT_ANGLE);
ENUM_STR(IVY_SYM_FORWARD_SLASH_ASTERISK);
ENUM_STR(IVY_SYM_ASTERISK_FORWARD_SLASH);
default:
return "";
}
}