lang: lex: start implementing strings and interpolation
This commit is contained in:
@@ -46,6 +46,8 @@ enum ivy_keyword {
|
|||||||
enum ivy_symbol {
|
enum ivy_symbol {
|
||||||
IVY_SYM_NONE = 0,
|
IVY_SYM_NONE = 0,
|
||||||
IVY_SYM_DOT,
|
IVY_SYM_DOT,
|
||||||
|
IVY_SYM_SQUOTE,
|
||||||
|
IVY_SYM_DQUOTE,
|
||||||
IVY_SYM_LEFT_BRACE,
|
IVY_SYM_LEFT_BRACE,
|
||||||
IVY_SYM_RIGHT_BRACE,
|
IVY_SYM_RIGHT_BRACE,
|
||||||
IVY_SYM_LEFT_BRACKET,
|
IVY_SYM_LEFT_BRACKET,
|
||||||
@@ -103,12 +105,14 @@ struct ivy_token {
|
|||||||
|
|
||||||
struct ivy_lexer_symbol_node;
|
struct ivy_lexer_symbol_node;
|
||||||
|
|
||||||
|
|
||||||
struct ivy_lexer {
|
struct ivy_lexer {
|
||||||
struct ivy_line_source *lex_source;
|
struct ivy_line_source *lex_source;
|
||||||
enum ivy_status lex_status;
|
enum ivy_status lex_status;
|
||||||
struct ivy_token *lex_queue;
|
struct ivy_token *lex_queue;
|
||||||
struct ivy_lexer_symbol_node *lex_sym_tree;
|
struct ivy_lexer_symbol_node *lex_sym_tree;
|
||||||
enum ivy_token_type lex_prev_token;
|
enum ivy_token_type lex_prev_token;
|
||||||
|
unsigned int lex_state;
|
||||||
|
|
||||||
char *lex_linebuf;
|
char *lex_linebuf;
|
||||||
size_t lex_linebuf_len;
|
size_t lex_linebuf_len;
|
||||||
@@ -124,8 +128,8 @@ IVY_API struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex);
|
|||||||
|
|
||||||
IVY_API void ivy_token_destroy(struct ivy_token *tok);
|
IVY_API void ivy_token_destroy(struct ivy_token *tok);
|
||||||
|
|
||||||
extern const char *ivy_lex_token_type_to_string(enum ivy_token_type type);
|
IVY_API const char *ivy_lex_token_type_to_string(enum ivy_token_type type);
|
||||||
extern const char *ivy_keyword_to_string(enum ivy_keyword keyword);
|
IVY_API const char *ivy_keyword_to_string(enum ivy_keyword keyword);
|
||||||
extern const char *ivy_symbol_to_string(enum ivy_symbol sym);
|
IVY_API const char *ivy_symbol_to_string(enum ivy_symbol sym);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
144
lang/lex.c
144
lang/lex.c
@@ -15,6 +15,17 @@
|
|||||||
.id = (i), .name = (n) \
|
.id = (i), .name = (n) \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum lexer_state_type {
|
||||||
|
STATE_NORMAL,
|
||||||
|
STATE_STRING,
|
||||||
|
STATE_FSTRING,
|
||||||
|
STATE_INTERPOLATION,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lexer_state {
|
||||||
|
enum lexer_state_type s_type;
|
||||||
|
};
|
||||||
|
|
||||||
struct ivy_lexer_symbol_node {
|
struct ivy_lexer_symbol_node {
|
||||||
char s_char;
|
char s_char;
|
||||||
enum ivy_symbol s_id;
|
enum ivy_symbol s_id;
|
||||||
@@ -55,6 +66,8 @@ static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
|
|||||||
|
|
||||||
static struct lex_token_def symbols[] = {
|
static struct lex_token_def symbols[] = {
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
LEX_TOKEN_DEF(IVY_SYM_DOT, "."),
|
||||||
|
LEX_TOKEN_DEF(IVY_SYM_SQUOTE, "'"),
|
||||||
|
LEX_TOKEN_DEF(IVY_SYM_DQUOTE, "\""),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["),
|
||||||
@@ -130,6 +143,8 @@ static enum ivy_status put_symbol(
|
|||||||
return IVY_ERR_NO_MEMORY;
|
return IVY_ERR_NO_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(child, 0x0, sizeof *child);
|
||||||
|
|
||||||
child->s_id = IVY_SYM_NONE;
|
child->s_id = IVY_SYM_NONE;
|
||||||
child->s_char = c;
|
child->s_char = c;
|
||||||
|
|
||||||
@@ -403,6 +418,46 @@ static enum ivy_status push_linefeed(struct ivy_lexer *lex)
|
|||||||
return push_token(lex, tok);
|
return push_token(lex, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum ivy_status push_string_start(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||||||
|
if (!tok) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(tok, 0x0, sizeof *tok);
|
||||||
|
|
||||||
|
tok->t_type = IVY_TOK_STR_START;
|
||||||
|
return push_token(lex, tok);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ivy_status push_string_end(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||||||
|
if (!tok) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(tok, 0x0, sizeof *tok);
|
||||||
|
|
||||||
|
tok->t_type = IVY_TOK_STR_END;
|
||||||
|
return push_token(lex, tok);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ivy_status push_string_content(struct ivy_lexer *lex, char *s)
|
||||||
|
{
|
||||||
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||||||
|
if (!tok) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(tok, 0x0, sizeof *tok);
|
||||||
|
|
||||||
|
tok->t_type = IVY_TOK_STRING;
|
||||||
|
tok->t_str = s;
|
||||||
|
return push_token(lex, tok);
|
||||||
|
}
|
||||||
|
|
||||||
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym)
|
||||||
{
|
{
|
||||||
struct ivy_token *tok = malloc(sizeof *tok);
|
struct ivy_token *tok = malloc(sizeof *tok);
|
||||||
@@ -480,10 +535,72 @@ static enum ivy_status read_block_comment(struct ivy_lexer *lex)
|
|||||||
return IVY_OK;
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum ivy_status read_squote_marker(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
enum ivy_status status = IVY_OK;
|
||||||
|
|
||||||
|
if (lex->lex_state & STATE_FSTRING) {
|
||||||
|
/* already within an fstring */
|
||||||
|
lex->lex_state &= ~STATE_FSTRING;
|
||||||
|
return push_string_end(lex);
|
||||||
|
} else {
|
||||||
|
/* start of a new fstring */
|
||||||
|
status = push_string_start(lex);
|
||||||
|
lex->lex_state |= STATE_FSTRING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ivy_status read_dquote_marker(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
b_string *str = b_string_create();
|
||||||
|
|
||||||
|
if (!str) {
|
||||||
|
return IVY_ERR_NO_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
c = peek(lex);
|
||||||
|
|
||||||
|
if (c == '{') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((lex->lex_state & STATE_FSTRING) && c == '\'') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((lex->lex_state & STATE_STRING) && c == '"') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
char s[2] = {c, 0};
|
||||||
|
b_string_append_cstr(str, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
|
||||||
|
b_string_release(str);
|
||||||
|
return IVY_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *s = b_string_steal(str);
|
||||||
|
b_string_release(str);
|
||||||
|
|
||||||
|
enum ivy_status status = push_string_content(lex, s);
|
||||||
|
if (status != IVY_OK) {
|
||||||
|
free(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
char sym_buf[32];
|
|
||||||
unsigned int sym_len = 0;
|
|
||||||
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
@@ -502,14 +619,19 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
|||||||
return IVY_ERR_BAD_SYNTAX;
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->s_id == IVY_SYM_FORWARD_SLASH_ASTERISK) {
|
switch (node->s_id) {
|
||||||
|
case IVY_SYM_SQUOTE:
|
||||||
|
return read_squote_marker(lex);
|
||||||
|
case IVY_SYM_DQUOTE:
|
||||||
|
return read_dquote_marker(lex);
|
||||||
|
case IVY_SYM_FORWARD_SLASH_ASTERISK:
|
||||||
return read_block_comment(lex);
|
return read_block_comment(lex);
|
||||||
} else if (node->s_id == IVY_SYM_DOUBLE_HYPHEN) {
|
case IVY_SYM_DOUBLE_HYPHEN:
|
||||||
return read_line_comment(lex);
|
return read_line_comment(lex);
|
||||||
|
default:
|
||||||
|
push_symbol(lex, node->s_id);
|
||||||
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
push_symbol(lex, node->s_id);
|
|
||||||
return IVY_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
||||||
@@ -568,6 +690,14 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lex->lex_state & STATE_STRING && c != '"') {
|
||||||
|
return read_string_content(lex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((lex->lex_state & STATE_FSTRING) && !(lex->lex_state & STATE_INTERPOLATION)) {
|
||||||
|
return read_string_content(lex);
|
||||||
|
}
|
||||||
|
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
while (c == '\n') {
|
while (c == '\n') {
|
||||||
advance(lex);
|
advance(lex);
|
||||||
|
|||||||
Reference in New Issue
Block a user