lang: lex: implement state stack; make lexer structure opaque
the state stack is used to track whether a string is currently being read, what kind of string it is, and whether or not we are in an interpolation within that string.
This commit is contained in:
@@ -104,24 +104,15 @@ struct ivy_token {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct ivy_lexer_symbol_node;
|
struct ivy_lexer_symbol_node;
|
||||||
|
struct ivy_lexer_state;
|
||||||
|
struct ivy_lexer;
|
||||||
|
|
||||||
|
IVY_API enum ivy_status ivy_lexer_create(struct ivy_lexer **lex);
|
||||||
|
IVY_API void ivy_lexer_destroy(struct ivy_lexer *lex);
|
||||||
|
|
||||||
struct ivy_lexer {
|
IVY_API void ivy_lexer_set_source(
|
||||||
struct ivy_line_source *lex_source;
|
struct ivy_lexer *lex, struct ivy_line_source *src);
|
||||||
enum ivy_status lex_status;
|
IVY_API enum ivy_status ivy_lexer_get_status(struct ivy_lexer *lex);
|
||||||
struct ivy_token *lex_queue;
|
|
||||||
struct ivy_lexer_symbol_node *lex_sym_tree;
|
|
||||||
enum ivy_token_type lex_prev_token;
|
|
||||||
unsigned int lex_state;
|
|
||||||
|
|
||||||
char *lex_linebuf;
|
|
||||||
size_t lex_linebuf_len;
|
|
||||||
size_t lex_linebuf_cap;
|
|
||||||
size_t lex_linebuf_ptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
IVY_API enum ivy_status ivy_lexer_init(struct ivy_lexer *lex);
|
|
||||||
IVY_API void ivy_lexer_finish(struct ivy_lexer *lex);
|
|
||||||
|
|
||||||
IVY_API struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex);
|
IVY_API struct ivy_token *ivy_lexer_peek(struct ivy_lexer *lex);
|
||||||
IVY_API struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex);
|
IVY_API struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex);
|
||||||
|
|||||||
169
lang/lex.c
169
lang/lex.c
@@ -15,6 +15,24 @@
|
|||||||
.id = (i), .name = (n) \
|
.id = (i), .name = (n) \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ivy_lexer {
|
||||||
|
struct ivy_lexer_symbol_node *lex_sym_tree;
|
||||||
|
struct ivy_line_source *lex_source;
|
||||||
|
|
||||||
|
enum ivy_status lex_status;
|
||||||
|
|
||||||
|
struct ivy_token *lex_queue;
|
||||||
|
enum ivy_token_type lex_prev_token;
|
||||||
|
|
||||||
|
b_queue lex_state;
|
||||||
|
unsigned int lex_brace_depth;
|
||||||
|
|
||||||
|
char *lex_linebuf;
|
||||||
|
size_t lex_linebuf_len;
|
||||||
|
size_t lex_linebuf_cap;
|
||||||
|
size_t lex_linebuf_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
enum lexer_state_type {
|
enum lexer_state_type {
|
||||||
STATE_NORMAL,
|
STATE_NORMAL,
|
||||||
STATE_STRING,
|
STATE_STRING,
|
||||||
@@ -24,6 +42,8 @@ enum lexer_state_type {
|
|||||||
|
|
||||||
struct lexer_state {
|
struct lexer_state {
|
||||||
enum lexer_state_type s_type;
|
enum lexer_state_type s_type;
|
||||||
|
unsigned int s_brace_depth;
|
||||||
|
b_queue_entry s_entry;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ivy_lexer_symbol_node {
|
struct ivy_lexer_symbol_node {
|
||||||
@@ -112,6 +132,57 @@ static struct lex_token_def symbols[] = {
|
|||||||
};
|
};
|
||||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||||
|
|
||||||
|
static struct lexer_state *push_lexer_state(
|
||||||
|
struct ivy_lexer *lex, enum lexer_state_type state_type)
|
||||||
|
{
|
||||||
|
struct lexer_state *state = malloc(sizeof *state);
|
||||||
|
if (!state) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(state, 0x0, sizeof *state);
|
||||||
|
|
||||||
|
state->s_type = state_type;
|
||||||
|
state->s_brace_depth = lex->lex_brace_depth;
|
||||||
|
b_queue_push_back(&lex->lex_state, &state->s_entry);
|
||||||
|
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pop_lexer_state(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
b_queue_entry *entry = b_queue_pop_back(&lex->lex_state);
|
||||||
|
if (!entry) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry);
|
||||||
|
free(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct lexer_state *get_lexer_state(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
b_queue_entry *entry = b_queue_last(&lex->lex_state);
|
||||||
|
if (!entry) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b_unbox(struct lexer_state, entry, s_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void destroy_state_stack(b_queue *state)
|
||||||
|
{
|
||||||
|
b_queue_iterator it;
|
||||||
|
b_queue_iterator_begin(state, &it);
|
||||||
|
while (b_queue_iterator_is_valid(&it)) {
|
||||||
|
struct lexer_state *node
|
||||||
|
= b_unbox(struct lexer_state, it.entry, s_entry);
|
||||||
|
b_queue_iterator_erase(&it);
|
||||||
|
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct ivy_lexer_symbol_node *get_symbol_node(
|
static struct ivy_lexer_symbol_node *get_symbol_node(
|
||||||
struct ivy_lexer_symbol_node *node, char c)
|
struct ivy_lexer_symbol_node *node, char c)
|
||||||
{
|
{
|
||||||
@@ -245,8 +316,13 @@ static enum ivy_keyword find_keyword_by_name(const char *s)
|
|||||||
return IVY_KW_NONE;
|
return IVY_KW_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
|
enum ivy_status ivy_lexer_create(struct ivy_lexer **lexp)
|
||||||
{
|
{
|
||||||
|
struct ivy_lexer *lex = malloc(sizeof *lex);
|
||||||
|
if (!lex) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
memset(lex, 0x0, sizeof *lex);
|
memset(lex, 0x0, sizeof *lex);
|
||||||
|
|
||||||
lex->lex_status = IVY_OK;
|
lex->lex_status = IVY_OK;
|
||||||
@@ -256,15 +332,26 @@ enum ivy_status ivy_lexer_init(struct ivy_lexer *lex)
|
|||||||
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
|
||||||
|
|
||||||
lex->lex_sym_tree = build_symbol_tree();
|
lex->lex_sym_tree = build_symbol_tree();
|
||||||
|
if (!lex->lex_sym_tree) {
|
||||||
|
ivy_lexer_destroy(lex);
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!push_lexer_state(lex, STATE_NORMAL)) {
|
||||||
|
ivy_lexer_destroy(lex);
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
print_symbol_node(lex->lex_sym_tree, 0);
|
print_symbol_node(lex->lex_sym_tree, 0);
|
||||||
|
|
||||||
/* TODO only do keyword initialisation once */
|
/* TODO only do keyword initialisation once */
|
||||||
init_keywords();
|
init_keywords();
|
||||||
|
*lexp = lex;
|
||||||
|
|
||||||
return IVY_OK;
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ivy_lexer_finish(struct ivy_lexer *lex)
|
void ivy_lexer_destroy(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
while (lex->lex_queue) {
|
while (lex->lex_queue) {
|
||||||
struct ivy_token *next = lex->lex_queue->t_next;
|
struct ivy_token *next = lex->lex_queue->t_next;
|
||||||
@@ -280,7 +367,19 @@ void ivy_lexer_finish(struct ivy_lexer *lex)
|
|||||||
destroy_symbol_tree(lex->lex_sym_tree);
|
destroy_symbol_tree(lex->lex_sym_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(lex, 0x0, sizeof *lex);
|
destroy_state_stack(&lex->lex_state);
|
||||||
|
|
||||||
|
free(lex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ivy_lexer_set_source(struct ivy_lexer *lex, struct ivy_line_source *src)
|
||||||
|
{
|
||||||
|
lex->lex_source = src;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ivy_status ivy_lexer_get_status(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
return lex->lex_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
||||||
@@ -538,27 +637,38 @@ static enum ivy_status read_block_comment(struct ivy_lexer *lex)
|
|||||||
static enum ivy_status read_squote_marker(struct ivy_lexer *lex)
|
static enum ivy_status read_squote_marker(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
enum ivy_status status = IVY_OK;
|
enum ivy_status status = IVY_OK;
|
||||||
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
|
|
||||||
if (lex->lex_state & STATE_FSTRING) {
|
if (state->s_type == STATE_FSTRING) {
|
||||||
/* already within an fstring */
|
/* already within an fstring */
|
||||||
lex->lex_state &= ~STATE_FSTRING;
|
pop_lexer_state(lex);
|
||||||
return push_string_end(lex);
|
return push_string_end(lex);
|
||||||
} else {
|
|
||||||
/* start of a new fstring */
|
|
||||||
status = push_string_start(lex);
|
|
||||||
lex->lex_state |= STATE_FSTRING;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* start of a new fstring */
|
||||||
|
status = push_string_start(lex);
|
||||||
|
|
||||||
|
if (status != IVY_OK) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!push_lexer_state(lex, STATE_FSTRING)) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ivy_status read_dquote_marker(struct ivy_lexer *lex)
|
static enum ivy_status read_dquote_marker(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
b_string *str = b_string_create();
|
b_string *str = b_string_create();
|
||||||
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
|
|
||||||
if (!str) {
|
if (!str) {
|
||||||
return IVY_ERR_NO_MEMORY;
|
return IVY_ERR_NO_MEMORY;
|
||||||
@@ -567,20 +677,17 @@ static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
|||||||
while (true) {
|
while (true) {
|
||||||
c = peek(lex);
|
c = peek(lex);
|
||||||
|
|
||||||
if (c == '{') {
|
if (state->s_type == STATE_FSTRING && (c == '\'' || c == '{')) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((lex->lex_state & STATE_FSTRING) && c == '\'') {
|
if (state->s_type == STATE_STRING && c == '"') {
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((lex->lex_state & STATE_STRING) && c == '"') {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
char s[2] = {c, 0};
|
char s[2] = {c, 0};
|
||||||
b_string_append_cstr(str, s);
|
b_string_append_cstr(str, s);
|
||||||
|
advance(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
|
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
|
||||||
@@ -602,6 +709,7 @@ static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
|||||||
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
||||||
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int c = peek(lex);
|
int c = peek(lex);
|
||||||
@@ -628,6 +736,23 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
|||||||
return read_block_comment(lex);
|
return read_block_comment(lex);
|
||||||
case IVY_SYM_DOUBLE_HYPHEN:
|
case IVY_SYM_DOUBLE_HYPHEN:
|
||||||
return read_line_comment(lex);
|
return read_line_comment(lex);
|
||||||
|
case IVY_SYM_LEFT_BRACE:
|
||||||
|
push_symbol(lex, node->s_id);
|
||||||
|
lex->lex_brace_depth++;
|
||||||
|
|
||||||
|
if (state->s_type == STATE_FSTRING) {
|
||||||
|
push_lexer_state(lex, STATE_INTERPOLATION);
|
||||||
|
}
|
||||||
|
return IVY_OK;
|
||||||
|
case IVY_SYM_RIGHT_BRACE:
|
||||||
|
push_symbol(lex, node->s_id);
|
||||||
|
lex->lex_brace_depth--;
|
||||||
|
|
||||||
|
if (state->s_type == STATE_INTERPOLATION
|
||||||
|
&& lex->lex_brace_depth < state->s_brace_depth) {
|
||||||
|
pop_lexer_state(lex);
|
||||||
|
}
|
||||||
|
return IVY_OK;
|
||||||
default:
|
default:
|
||||||
push_symbol(lex, node->s_id);
|
push_symbol(lex, node->s_id);
|
||||||
return IVY_OK;
|
return IVY_OK;
|
||||||
@@ -684,20 +809,26 @@ static enum ivy_status read_ident(struct ivy_lexer *lex)
|
|||||||
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
enum ivy_status status;
|
enum ivy_status status;
|
||||||
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
|
|
||||||
int c = peek(lex);
|
int c = peek(lex);
|
||||||
|
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lex->lex_state & STATE_STRING && c != '"') {
|
if (state->s_type == STATE_STRING && c != '"') {
|
||||||
return read_string_content(lex);
|
return read_string_content(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((lex->lex_state & STATE_FSTRING) && !(lex->lex_state & STATE_INTERPOLATION)) {
|
if (state->s_type == STATE_FSTRING && c != '\'' && c != '{') {
|
||||||
return read_string_content(lex);
|
return read_string_content(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* `state` is invalid past this point, as the read_* functions may
|
||||||
|
* perform state transitions. */
|
||||||
|
state = NULL;
|
||||||
|
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
while (c == '\n') {
|
while (c == '\n') {
|
||||||
advance(lex);
|
advance(lex);
|
||||||
|
|||||||
Reference in New Issue
Block a user