2024-11-16 17:05:29 +00:00
|
|
|
#include <blue/core/hash.h>
|
|
|
|
|
#include <blue/core/queue.h>
|
2024-11-13 21:37:00 +00:00
|
|
|
#include <blue/object/string.h>
|
2024-11-19 10:49:42 +00:00
|
|
|
#include <blue/object/dict.h>
|
|
|
|
|
#include <blue/object/number.h>
|
2024-11-13 21:37:00 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <ivy/lang/lex.h>
|
|
|
|
|
#include <stdbool.h>
|
2024-11-16 17:05:29 +00:00
|
|
|
#include <stdio.h>
|
2024-11-13 21:37:00 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2024-11-19 15:22:16 +00:00
|
|
|
#include "lex.h"
|
2024-11-13 21:37:00 +00:00
|
|
|
|
|
|
|
|
#define LINEBUF_DEFAULT_CAPACITY 1024
|
|
|
|
|
|
|
|
|
|
#define LEX_TOKEN_DEF(i, n) \
|
|
|
|
|
{ \
|
|
|
|
|
.id = (i), .name = (n) \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct lex_token_def keywords[] = {
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_USE, "use"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_CLASS, "class"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_PROTOCOL, "protocol"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_TRY, "try"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_THROW, "throw"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_CATCH, "catch"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_IF, "if"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_AND, "and"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_OR, "or"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_IS, "is"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_NOT, "not"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_ELSE, "else"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_WHILE, "while"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_FOR, "for"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_MATCH, "match"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_IN, "in"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_DO, "do"),
|
2024-11-17 09:31:58 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_KW_GET, "get"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_KW_SET, "set"),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_KW_END, "end"),
|
|
|
|
|
};
|
|
|
|
|
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
|
|
|
|
|
|
|
|
|
static struct lex_token_def symbols[] = {
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
2024-11-16 21:30:32 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_SQUOTE, "'"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DQUOTE, "\""),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
|
2024-11-16 17:05:29 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_HYPHEN, "-"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_HYPHEN, "--"),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
|
2024-11-16 17:05:29 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_ASTERISK, "/*"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_FORWARD_SLASH, "*/"),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
|
2024-11-16 17:05:29 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE, "<<"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE, ">>"),
|
2024-11-19 15:57:31 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE_EQUAL, "<="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE_EQUAL, ">="),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
|
2024-11-16 17:05:29 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_HYPHEN_EQUAL, "-="),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
|
2024-11-17 09:31:58 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_HASH, "#"),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_CARET, "^"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_COMMA, ","),
|
2024-11-17 09:31:58 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_SEMICOLON, ";"),
|
2024-11-13 21:37:00 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"),
|
2024-11-17 09:31:58 +00:00
|
|
|
LEX_TOKEN_DEF(IVY_SYM_HYPHEN_RIGHT_ANGLE, "->"),
|
|
|
|
|
LEX_TOKEN_DEF(IVY_SYM_EQUAL_RIGHT_ANGLE, "=>"),
|
2024-11-13 21:37:00 +00:00
|
|
|
};
|
|
|
|
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
static struct lexer_state *push_lexer_state(
|
|
|
|
|
struct ivy_lexer *lex, enum lexer_state_type state_type)
|
|
|
|
|
{
|
|
|
|
|
struct lexer_state *state = malloc(sizeof *state);
|
|
|
|
|
if (!state) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(state, 0x0, sizeof *state);
|
|
|
|
|
|
|
|
|
|
state->s_type = state_type;
|
|
|
|
|
state->s_brace_depth = lex->lex_brace_depth;
|
|
|
|
|
b_queue_push_back(&lex->lex_state, &state->s_entry);
|
|
|
|
|
|
|
|
|
|
return state;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void pop_lexer_state(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
b_queue_entry *entry = b_queue_pop_back(&lex->lex_state);
|
|
|
|
|
if (!entry) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry);
|
|
|
|
|
free(state);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct lexer_state *get_lexer_state(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
b_queue_entry *entry = b_queue_last(&lex->lex_state);
|
|
|
|
|
if (!entry) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return b_unbox(struct lexer_state, entry, s_entry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void destroy_state_stack(b_queue *state)
|
|
|
|
|
{
|
|
|
|
|
b_queue_iterator it;
|
|
|
|
|
b_queue_iterator_begin(state, &it);
|
|
|
|
|
while (b_queue_iterator_is_valid(&it)) {
|
|
|
|
|
struct lexer_state *node
|
|
|
|
|
= b_unbox(struct lexer_state, it.entry, s_entry);
|
|
|
|
|
b_queue_iterator_erase(&it);
|
|
|
|
|
|
|
|
|
|
free(node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
static struct ivy_lexer_symbol_node *get_symbol_node(
|
|
|
|
|
struct ivy_lexer_symbol_node *node, char c)
|
|
|
|
|
{
|
|
|
|
|
b_queue_iterator it;
|
|
|
|
|
b_queue_foreach (&it, &node->s_children) {
|
|
|
|
|
struct ivy_lexer_symbol_node *child = b_unbox(
|
|
|
|
|
struct ivy_lexer_symbol_node, it.entry, s_entry);
|
|
|
|
|
if (child->s_char == c) {
|
|
|
|
|
return child;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
static b_string *get_temp_string(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
if (!lex->lex_temp) {
|
|
|
|
|
lex->lex_temp = b_string_create();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
b_string_clear(lex->lex_temp);
|
|
|
|
|
return lex->lex_temp;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
static enum ivy_status put_symbol(
|
|
|
|
|
struct ivy_lexer_symbol_node *tree, struct lex_token_def *sym)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; sym->name[i]; i++) {
|
|
|
|
|
char c = sym->name[i];
|
|
|
|
|
struct ivy_lexer_symbol_node *child = get_symbol_node(tree, c);
|
|
|
|
|
if (child) {
|
|
|
|
|
tree = child;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
child = malloc(sizeof *child);
|
|
|
|
|
if (!child) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 21:30:32 +00:00
|
|
|
memset(child, 0x0, sizeof *child);
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
child->s_id = IVY_SYM_NONE;
|
|
|
|
|
child->s_char = c;
|
|
|
|
|
|
|
|
|
|
b_queue_push_back(&tree->s_children, &child->s_entry);
|
|
|
|
|
tree = child;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tree->s_id = sym->id;
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void destroy_symbol_tree(struct ivy_lexer_symbol_node *tree)
|
|
|
|
|
{
|
|
|
|
|
b_queue_iterator it;
|
|
|
|
|
b_queue_iterator_begin(&tree->s_children, &it);
|
|
|
|
|
while (b_queue_iterator_is_valid(&it)) {
|
|
|
|
|
struct ivy_lexer_symbol_node *node = b_unbox(
|
|
|
|
|
struct ivy_lexer_symbol_node, it.entry, s_entry);
|
|
|
|
|
b_queue_iterator_erase(&it);
|
|
|
|
|
|
|
|
|
|
destroy_symbol_tree(node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(tree);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct ivy_lexer_symbol_node *build_symbol_tree(void)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_lexer_symbol_node *root = malloc(sizeof *root);
|
|
|
|
|
if (!root) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(root, 0x0, sizeof *root);
|
|
|
|
|
root->s_id = IVY_SYM_NONE;
|
|
|
|
|
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
for (size_t i = 0; i < nr_symbols; i++) {
|
|
|
|
|
status = put_symbol(root, &symbols[i]);
|
|
|
|
|
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
destroy_symbol_tree(root);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return root;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 10:49:42 +00:00
|
|
|
static void init_keywords(b_dict *keyword_dict)
|
2024-11-16 17:05:29 +00:00
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < nr_keywords; i++) {
|
2024-11-19 10:49:42 +00:00
|
|
|
struct lex_token_def *keyword = &keywords[i];
|
|
|
|
|
b_dict_put(keyword_dict, keyword->name, B_RV_INT(keyword->id));
|
2024-11-16 17:05:29 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 10:49:42 +00:00
|
|
|
static enum ivy_keyword find_keyword_by_name(struct ivy_lexer *lex, const char *s)
|
2024-11-16 17:05:29 +00:00
|
|
|
{
|
2024-11-19 15:57:31 +00:00
|
|
|
b_number *id = B_NUMBER(b_dict_at(lex->lex_keywords, s));
|
2024-11-19 10:49:42 +00:00
|
|
|
if (!id) {
|
|
|
|
|
return IVY_KW_NONE;
|
2024-11-16 17:05:29 +00:00
|
|
|
}
|
|
|
|
|
|
2024-11-19 10:49:42 +00:00
|
|
|
return b_number_get_int(id);
|
2024-11-16 17:05:29 +00:00
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
enum ivy_status ivy_lexer_create(struct ivy_lexer **lexp)
|
2024-11-13 21:37:00 +00:00
|
|
|
{
|
2024-11-16 23:03:43 +00:00
|
|
|
struct ivy_lexer *lex = malloc(sizeof *lex);
|
|
|
|
|
if (!lex) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
memset(lex, 0x0, sizeof *lex);
|
|
|
|
|
|
|
|
|
|
lex->lex_status = IVY_OK;
|
2024-11-16 17:05:29 +00:00
|
|
|
lex->lex_prev_token = IVY_TOK_NONE;
|
2024-11-13 21:37:00 +00:00
|
|
|
|
|
|
|
|
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
|
|
|
|
|
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
lex->lex_sym_tree = build_symbol_tree();
|
2024-11-16 23:03:43 +00:00
|
|
|
if (!lex->lex_sym_tree) {
|
|
|
|
|
ivy_lexer_destroy(lex);
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!push_lexer_state(lex, STATE_NORMAL)) {
|
|
|
|
|
ivy_lexer_destroy(lex);
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 10:49:42 +00:00
|
|
|
lex->lex_keywords = b_dict_create();
|
|
|
|
|
init_keywords(lex->lex_keywords);
|
2024-11-16 23:03:43 +00:00
|
|
|
*lexp = lex;
|
2024-11-16 17:05:29 +00:00
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
void ivy_lexer_destroy(struct ivy_lexer *lex)
|
2024-11-13 21:37:00 +00:00
|
|
|
{
|
|
|
|
|
while (lex->lex_queue) {
|
|
|
|
|
struct ivy_token *next = lex->lex_queue->t_next;
|
|
|
|
|
ivy_token_destroy(lex->lex_queue);
|
|
|
|
|
lex->lex_queue = next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf) {
|
|
|
|
|
free(lex->lex_linebuf);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
if (lex->lex_sym_tree) {
|
|
|
|
|
destroy_symbol_tree(lex->lex_sym_tree);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
if (lex->lex_temp) {
|
|
|
|
|
b_string_release(lex->lex_temp);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 10:49:42 +00:00
|
|
|
if (lex->lex_keywords) {
|
|
|
|
|
b_dict_release(lex->lex_keywords);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
destroy_state_stack(&lex->lex_state);
|
|
|
|
|
|
|
|
|
|
free(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ivy_lexer_set_source(struct ivy_lexer *lex, struct ivy_line_source *src)
|
|
|
|
|
{
|
|
|
|
|
lex->lex_source = src;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum ivy_status ivy_lexer_get_status(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
return lex->lex_status;
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
if (!lex->lex_source) {
|
|
|
|
|
return IVY_ERR_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
enum ivy_status status = ivy_line_source_readline(
|
2024-11-13 21:37:00 +00:00
|
|
|
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
|
|
|
|
|
&lex->lex_linebuf_len, NULL);
|
2024-11-16 17:05:29 +00:00
|
|
|
|
|
|
|
|
if (status == IVY_OK) {
|
|
|
|
|
lex->lex_linebuf_ptr = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return status;
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int peek(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
|
|
|
|
status = refill_linebuf(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_len == 0) {
|
|
|
|
|
return IVY_ERR_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
static int peek_next(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
|
|
|
|
status = refill_linebuf(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_len == 0) {
|
|
|
|
|
return IVY_ERR_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_ptr + 1 >= lex->lex_linebuf_len) {
|
|
|
|
|
return IVY_ERR_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr + 1];
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
static int advance(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
|
|
|
|
|
status = refill_linebuf(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lex->lex_linebuf_len == 0) {
|
|
|
|
|
return IVY_ERR_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-18 15:19:26 +00:00
|
|
|
static bool input_available(struct ivy_lexer* lex)
|
|
|
|
|
{
|
|
|
|
|
return lex->lex_linebuf_ptr < lex->lex_linebuf_len;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
static bool char_can_begin_symbol(char c)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < nr_symbols; i++) {
|
|
|
|
|
if (symbols[i].name[0] == c) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct ivy_token *create_token(enum ivy_token_type type)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = type;
|
|
|
|
|
return tok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token **slot = &lex->lex_queue;
|
|
|
|
|
|
|
|
|
|
while (*slot) {
|
|
|
|
|
slot = &(*slot)->t_next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*slot = tok;
|
2024-11-16 17:05:29 +00:00
|
|
|
lex->lex_prev_token = tok->t_type;
|
2024-11-13 21:37:00 +00:00
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status push_linefeed(struct ivy_lexer *lex)
|
|
|
|
|
{
|
2024-11-16 17:05:29 +00:00
|
|
|
if (lex->lex_prev_token == IVY_TOK_LINEFEED) {
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_LINEFEED;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 21:30:32 +00:00
|
|
|
static enum ivy_status push_string_start(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_STR_START;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status push_string_end(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_STR_END;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status push_string_content(struct ivy_lexer *lex, char *s)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_STRING;
|
|
|
|
|
tok->t_str = s;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_SYMBOL;
|
|
|
|
|
tok->t_symbol = sym;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
static enum ivy_status push_atom(struct ivy_lexer *lex, char *s)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_ATOM;
|
|
|
|
|
tok->t_str = s;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status push_number(struct ivy_lexer *lex, unsigned long long v)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_NUMBER;
|
|
|
|
|
tok->t_number = v;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
static enum ivy_status push_keyword(struct ivy_lexer *lex, enum ivy_keyword keyword)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_token *tok = malloc(sizeof *tok);
|
|
|
|
|
if (!tok) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
|
|
|
|
|
|
tok->t_type = IVY_TOK_KEYWORD;
|
|
|
|
|
tok->t_keyword = keyword;
|
|
|
|
|
return push_token(lex, tok);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status read_line_comment(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
while (true) {
|
|
|
|
|
int c = advance(lex);
|
|
|
|
|
|
|
|
|
|
if (c == IVY_ERR_EOF || c == '\n') {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status read_block_comment(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
int depth = 1;
|
|
|
|
|
char buf[2] = {0};
|
|
|
|
|
|
|
|
|
|
while (depth > 0) {
|
|
|
|
|
int c = peek(lex);
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!buf[0]) {
|
|
|
|
|
buf[0] = c;
|
|
|
|
|
} else if (!buf[1]) {
|
|
|
|
|
buf[1] = c;
|
|
|
|
|
} else {
|
|
|
|
|
buf[0] = buf[1];
|
|
|
|
|
buf[1] = c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (buf[0] == '/' && buf[1] == '*') {
|
|
|
|
|
depth++;
|
|
|
|
|
} else if (buf[0] == '*' && buf[1] == '/') {
|
|
|
|
|
depth--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
advance(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 21:30:32 +00:00
|
|
|
static enum ivy_status read_squote_marker(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
2024-11-16 23:03:43 +00:00
|
|
|
struct lexer_state *state = get_lexer_state(lex);
|
2024-11-16 21:30:32 +00:00
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
if (state->s_type == STATE_FSTRING) {
|
2024-11-16 21:30:32 +00:00
|
|
|
/* already within an fstring */
|
2024-11-16 23:03:43 +00:00
|
|
|
pop_lexer_state(lex);
|
|
|
|
|
return push_string_end(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* start of a new fstring */
|
|
|
|
|
status = push_string_start(lex);
|
|
|
|
|
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!push_lexer_state(lex, STATE_FSTRING)) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
2024-11-16 21:30:32 +00:00
|
|
|
}
|
2024-11-16 23:03:43 +00:00
|
|
|
|
|
|
|
|
return IVY_OK;
|
2024-11-16 21:30:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status read_dquote_marker(struct ivy_lexer *lex)
|
|
|
|
|
{
|
2024-11-17 09:31:58 +00:00
|
|
|
struct lexer_state *state = get_lexer_state(lex);
|
|
|
|
|
|
|
|
|
|
if (state->s_type == STATE_STRING) {
|
|
|
|
|
/* already within a string */
|
|
|
|
|
pop_lexer_state(lex);
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* start of a new string */
|
|
|
|
|
if (!push_lexer_state(lex, STATE_STRING)) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
return IVY_OK;
|
2024-11-16 21:30:32 +00:00
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
static enum ivy_status read_atom(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
b_string *str = get_temp_string(lex);
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
int c = peek(lex);
|
|
|
|
|
|
|
|
|
|
if (c == IVY_ERR_EOF) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!isalnum(c) && c != ':' && c != '_') {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char s[] = {c, 0};
|
|
|
|
|
b_string_append_cstr(str, s);
|
|
|
|
|
advance(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *s = b_string_steal(str);
|
|
|
|
|
return push_atom(lex, s);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 21:30:32 +00:00
|
|
|
static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
int c;
|
2024-11-17 09:31:58 +00:00
|
|
|
b_string *str = get_temp_string(lex);
|
2024-11-16 23:03:43 +00:00
|
|
|
struct lexer_state *state = get_lexer_state(lex);
|
2024-11-16 21:30:32 +00:00
|
|
|
|
|
|
|
|
if (!str) {
|
|
|
|
|
return IVY_ERR_NO_MEMORY;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
c = peek(lex);
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
if (state->s_type == STATE_FSTRING && (c == '\'' || c == '{')) {
|
2024-11-16 21:30:32 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
if (state->s_type == STATE_STRING && c == '"') {
|
2024-11-16 21:30:32 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char s[2] = {c, 0};
|
|
|
|
|
b_string_append_cstr(str, s);
|
2024-11-16 23:03:43 +00:00
|
|
|
advance(lex);
|
2024-11-16 21:30:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *s = b_string_steal(str);
|
|
|
|
|
|
|
|
|
|
enum ivy_status status = push_string_content(lex, s);
|
|
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
free(s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
2024-11-16 23:03:43 +00:00
|
|
|
struct lexer_state *state = get_lexer_state(lex);
|
2024-11-16 17:05:29 +00:00
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
int c = peek(lex);
|
|
|
|
|
|
|
|
|
|
struct ivy_lexer_symbol_node *next = get_symbol_node(node, c);
|
|
|
|
|
if (!next) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
node = next;
|
|
|
|
|
advance(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!node || node->s_id == IVY_SYM_NONE) {
|
|
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 21:30:32 +00:00
|
|
|
switch (node->s_id) {
|
|
|
|
|
case IVY_SYM_SQUOTE:
|
|
|
|
|
return read_squote_marker(lex);
|
|
|
|
|
case IVY_SYM_DQUOTE:
|
|
|
|
|
return read_dquote_marker(lex);
|
|
|
|
|
case IVY_SYM_FORWARD_SLASH_ASTERISK:
|
2024-11-16 17:05:29 +00:00
|
|
|
return read_block_comment(lex);
|
2024-11-16 21:30:32 +00:00
|
|
|
case IVY_SYM_DOUBLE_HYPHEN:
|
2024-11-16 17:05:29 +00:00
|
|
|
return read_line_comment(lex);
|
2024-11-17 09:31:58 +00:00
|
|
|
case IVY_SYM_HASH:
|
|
|
|
|
return read_atom(lex);
|
2024-11-16 23:03:43 +00:00
|
|
|
case IVY_SYM_LEFT_BRACE:
|
|
|
|
|
push_symbol(lex, node->s_id);
|
|
|
|
|
lex->lex_brace_depth++;
|
|
|
|
|
|
|
|
|
|
if (state->s_type == STATE_FSTRING) {
|
|
|
|
|
push_lexer_state(lex, STATE_INTERPOLATION);
|
|
|
|
|
}
|
|
|
|
|
return IVY_OK;
|
|
|
|
|
case IVY_SYM_RIGHT_BRACE:
|
|
|
|
|
push_symbol(lex, node->s_id);
|
|
|
|
|
lex->lex_brace_depth--;
|
|
|
|
|
|
|
|
|
|
if (state->s_type == STATE_INTERPOLATION
|
|
|
|
|
&& lex->lex_brace_depth < state->s_brace_depth) {
|
|
|
|
|
pop_lexer_state(lex);
|
|
|
|
|
}
|
|
|
|
|
return IVY_OK;
|
2024-11-16 21:30:32 +00:00
|
|
|
default:
|
|
|
|
|
push_symbol(lex, node->s_id);
|
|
|
|
|
return IVY_OK;
|
2024-11-16 17:05:29 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
static enum ivy_status read_number(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
int token_len = 0;
|
|
|
|
|
int base = 10;
|
|
|
|
|
b_string *str = get_temp_string(lex);
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
int c = peek(lex);
|
|
|
|
|
if (c == IVY_ERR_EOF) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c == '_') {
|
|
|
|
|
token_len++;
|
|
|
|
|
advance(lex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isspace(c) || ispunct(c)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c == '0' && token_len == 0) {
|
|
|
|
|
base = 7;
|
|
|
|
|
token_len++;
|
|
|
|
|
advance(lex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c == 'x' && token_len == 1) {
|
|
|
|
|
base = 16;
|
|
|
|
|
token_len++;
|
|
|
|
|
advance(lex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c == 'b' && token_len == 1) {
|
|
|
|
|
base = 2;
|
|
|
|
|
token_len++;
|
|
|
|
|
advance(lex);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (base == 2 && c != '0' && c != '1') {
|
|
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (base == 10 && !isdigit(c)) {
|
|
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (base == 16 && !isxdigit(c)) {
|
|
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char s[] = {c, 0};
|
|
|
|
|
b_string_append_cstr(str, s);
|
|
|
|
|
advance(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (token_len == 1 && base == 7) {
|
|
|
|
|
return push_number(lex, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *s = b_string_ptr(str);
|
|
|
|
|
char *ep;
|
|
|
|
|
|
|
|
|
|
/* negative numbers will be lexed as a hyphen followed by a positive
|
|
|
|
|
* number. */
|
|
|
|
|
unsigned long long v = strtoull(s, &ep, base);
|
|
|
|
|
|
|
|
|
|
if (*ep != '\0') {
|
|
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return push_number(lex, v);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
|
|
|
|
{
|
2024-11-17 09:31:58 +00:00
|
|
|
b_string *str = get_temp_string(lex);
|
2024-11-16 17:05:29 +00:00
|
|
|
bool label = false;
|
2024-11-13 21:37:00 +00:00
|
|
|
|
|
|
|
|
while (true) {
|
2024-11-16 17:05:29 +00:00
|
|
|
int c = peek(lex);
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
if (c < 0) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
if (c == ':' && peek_next(lex) != ':') {
|
|
|
|
|
advance(lex);
|
|
|
|
|
label = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
if (!isalnum(c) && c != '_') {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char s[2] = {c, 0};
|
|
|
|
|
b_string_append_cstr(str, s);
|
2024-11-16 17:05:29 +00:00
|
|
|
advance(lex);
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *s = b_string_ptr(str);
|
|
|
|
|
if (!strcmp(s, "_")) {
|
|
|
|
|
push_symbol(lex, IVY_SYM_UNDERSCORE);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
enum ivy_keyword keyword = IVY_KW_NONE;
|
2024-11-19 10:49:42 +00:00
|
|
|
if (!label && (keyword = find_keyword_by_name(lex, s)) != IVY_KW_NONE) {
|
2024-11-16 17:05:29 +00:00
|
|
|
return push_keyword(lex, keyword);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ivy_token *tok
|
|
|
|
|
= create_token(label ? IVY_TOK_LABEL : IVY_TOK_IDENT);
|
2024-11-13 21:37:00 +00:00
|
|
|
tok->t_str = b_string_steal(str);
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
return push_token(lex, tok);
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
|
|
|
|
{
|
2024-11-16 23:03:43 +00:00
|
|
|
struct lexer_state *state = get_lexer_state(lex);
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
int c = peek(lex);
|
|
|
|
|
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
if (state->s_type == STATE_STRING && c != '"') {
|
2024-11-16 21:30:32 +00:00
|
|
|
return read_string_content(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 23:03:43 +00:00
|
|
|
if (state->s_type == STATE_FSTRING && c != '\'' && c != '{') {
|
2024-11-16 21:30:32 +00:00
|
|
|
return read_string_content(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 09:31:58 +00:00
|
|
|
/* `state` is invalid past this point, as the read_* functions
|
|
|
|
|
* may perform state transitions. */
|
2024-11-16 23:03:43 +00:00
|
|
|
state = NULL;
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
if (c == '\n') {
|
|
|
|
|
while (c == '\n') {
|
|
|
|
|
advance(lex);
|
2024-11-18 15:19:26 +00:00
|
|
|
|
|
|
|
|
if (!input_available(lex)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
c = peek(lex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (c < 0) {
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return push_linefeed(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
while (isspace(c)) {
|
|
|
|
|
advance(lex);
|
|
|
|
|
c = peek(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
if (isalpha(c) || c == '_') {
|
|
|
|
|
return read_ident(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 14:00:42 +00:00
|
|
|
if (char_can_begin_symbol(c)) {
|
|
|
|
|
return read_symbol(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-17 10:58:31 +00:00
|
|
|
if (isdigit(c)) {
|
2024-11-17 09:31:58 +00:00
|
|
|
return read_number(lex);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-13 21:37:00 +00:00
|
|
|
return IVY_ERR_BAD_SYNTAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
while (!lex->lex_queue) {
|
2024-11-13 21:37:00 +00:00
|
|
|
status = pump_tokens(lex);
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
lex->lex_status = status;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
lex->lex_status = status;
|
2024-11-13 21:37:00 +00:00
|
|
|
struct ivy_token *tok = lex->lex_queue;
|
|
|
|
|
return tok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex)
|
|
|
|
|
{
|
|
|
|
|
enum ivy_status status = IVY_OK;
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
while (!lex->lex_queue) {
|
2024-11-13 21:37:00 +00:00
|
|
|
status = pump_tokens(lex);
|
|
|
|
|
|
2024-11-16 17:05:29 +00:00
|
|
|
if (status != IVY_OK) {
|
|
|
|
|
lex->lex_status = status;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2024-11-13 21:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ivy_token *tok = lex->lex_queue;
|
|
|
|
|
lex->lex_queue = lex->lex_queue->t_next;
|
|
|
|
|
return tok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ivy_token_destroy(struct ivy_token *tok)
|
|
|
|
|
{
|
|
|
|
|
switch (tok->t_type) {
|
|
|
|
|
case IVY_TOK_ATOM:
|
|
|
|
|
case IVY_TOK_STRING:
|
|
|
|
|
case IVY_TOK_IDENT:
|
|
|
|
|
free(tok->t_str);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(tok);
|
|
|
|
|
}
|
2024-11-16 17:05:29 +00:00
|
|
|
|
|
|
|
|
#define ENUM_STR(x) \
|
|
|
|
|
case x: \
|
|
|
|
|
return #x
|
|
|
|
|
|
|
|
|
|
const char *ivy_lex_token_type_to_string(enum ivy_token_type type)
|
|
|
|
|
{
|
|
|
|
|
switch (type) {
|
|
|
|
|
ENUM_STR(IVY_TOK_NONE);
|
|
|
|
|
ENUM_STR(IVY_TOK_KEYWORD);
|
|
|
|
|
ENUM_STR(IVY_TOK_SYMBOL);
|
|
|
|
|
ENUM_STR(IVY_TOK_ATOM);
|
|
|
|
|
ENUM_STR(IVY_TOK_NUMBER);
|
|
|
|
|
ENUM_STR(IVY_TOK_LABEL);
|
|
|
|
|
ENUM_STR(IVY_TOK_IDENT);
|
|
|
|
|
ENUM_STR(IVY_TOK_STRING);
|
|
|
|
|
ENUM_STR(IVY_TOK_STR_START);
|
|
|
|
|
ENUM_STR(IVY_TOK_STR_END);
|
|
|
|
|
ENUM_STR(IVY_TOK_LINEFEED);
|
|
|
|
|
default:
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *ivy_keyword_to_string(enum ivy_keyword keyword)
|
|
|
|
|
{
|
|
|
|
|
switch (keyword) {
|
|
|
|
|
ENUM_STR(IVY_KW_NONE);
|
|
|
|
|
ENUM_STR(IVY_KW_PACKAGE);
|
|
|
|
|
ENUM_STR(IVY_KW_USE);
|
|
|
|
|
ENUM_STR(IVY_KW_CLASS);
|
|
|
|
|
ENUM_STR(IVY_KW_PROTOCOL);
|
|
|
|
|
ENUM_STR(IVY_KW_TRY);
|
|
|
|
|
ENUM_STR(IVY_KW_THROW);
|
|
|
|
|
ENUM_STR(IVY_KW_CATCH);
|
|
|
|
|
ENUM_STR(IVY_KW_IF);
|
|
|
|
|
ENUM_STR(IVY_KW_AND);
|
|
|
|
|
ENUM_STR(IVY_KW_OR);
|
|
|
|
|
ENUM_STR(IVY_KW_IS);
|
|
|
|
|
ENUM_STR(IVY_KW_NOT);
|
|
|
|
|
ENUM_STR(IVY_KW_ELSE);
|
|
|
|
|
ENUM_STR(IVY_KW_WHILE);
|
|
|
|
|
ENUM_STR(IVY_KW_FOR);
|
|
|
|
|
ENUM_STR(IVY_KW_MATCH);
|
|
|
|
|
ENUM_STR(IVY_KW_UNLESS);
|
|
|
|
|
ENUM_STR(IVY_KW_IN);
|
|
|
|
|
ENUM_STR(IVY_KW_DO);
|
2024-11-19 15:57:31 +00:00
|
|
|
ENUM_STR(IVY_KW_GET);
|
|
|
|
|
ENUM_STR(IVY_KW_SET);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_KW_END);
|
|
|
|
|
default:
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *ivy_symbol_to_string(enum ivy_symbol sym)
|
|
|
|
|
{
|
|
|
|
|
switch (sym) {
|
|
|
|
|
ENUM_STR(IVY_SYM_NONE);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOT);
|
2024-11-19 15:57:31 +00:00
|
|
|
ENUM_STR(IVY_SYM_SQUOTE);
|
|
|
|
|
ENUM_STR(IVY_SYM_DQUOTE);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_SYM_LEFT_BRACE);
|
|
|
|
|
ENUM_STR(IVY_SYM_RIGHT_BRACE);
|
|
|
|
|
ENUM_STR(IVY_SYM_LEFT_BRACKET);
|
|
|
|
|
ENUM_STR(IVY_SYM_RIGHT_BRACKET);
|
|
|
|
|
ENUM_STR(IVY_SYM_LEFT_PAREN);
|
|
|
|
|
ENUM_STR(IVY_SYM_RIGHT_PAREN);
|
|
|
|
|
ENUM_STR(IVY_SYM_LEFT_ANGLE);
|
|
|
|
|
ENUM_STR(IVY_SYM_RIGHT_ANGLE);
|
|
|
|
|
ENUM_STR(IVY_SYM_COLON);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_COLON);
|
|
|
|
|
ENUM_STR(IVY_SYM_PLUS);
|
|
|
|
|
ENUM_STR(IVY_SYM_HYPHEN);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_HYPHEN);
|
|
|
|
|
ENUM_STR(IVY_SYM_FORWARD_SLASH);
|
|
|
|
|
ENUM_STR(IVY_SYM_ASTERISK);
|
|
|
|
|
ENUM_STR(IVY_SYM_PERCENT);
|
|
|
|
|
ENUM_STR(IVY_SYM_AMPERSAND);
|
|
|
|
|
ENUM_STR(IVY_SYM_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE);
|
2024-11-19 15:57:31 +00:00
|
|
|
ENUM_STR(IVY_SYM_LEFT_ANGLE_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_RIGHT_ANGLE_EQUAL);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_PLUS_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_HYPHEN_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_FORWARD_SLASH_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_ASTERISK_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_AMPERSAND_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_PIPE_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_PERCENT_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_CARET_EQUAL);
|
|
|
|
|
ENUM_STR(IVY_SYM_BANG);
|
|
|
|
|
ENUM_STR(IVY_SYM_PIPE);
|
|
|
|
|
ENUM_STR(IVY_SYM_CARET);
|
2024-11-19 15:57:31 +00:00
|
|
|
ENUM_STR(IVY_SYM_HASH);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_SYM_UNDERSCORE);
|
|
|
|
|
ENUM_STR(IVY_SYM_COMMA);
|
2024-11-17 09:31:58 +00:00
|
|
|
ENUM_STR(IVY_SYM_SEMICOLON);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_SYM_DOLLAR);
|
2024-11-17 09:31:58 +00:00
|
|
|
ENUM_STR(IVY_SYM_HYPHEN_RIGHT_ANGLE);
|
|
|
|
|
ENUM_STR(IVY_SYM_EQUAL_RIGHT_ANGLE);
|
2024-11-16 17:05:29 +00:00
|
|
|
ENUM_STR(IVY_SYM_FORWARD_SLASH_ASTERISK);
|
|
|
|
|
ENUM_STR(IVY_SYM_ASTERISK_FORWARD_SLASH);
|
|
|
|
|
default:
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
}
|