diff --git a/lang/include/ivy/lang/lex.h b/lang/include/ivy/lang/lex.h index e728f8a..bd50059 100644 --- a/lang/include/ivy/lang/lex.h +++ b/lang/include/ivy/lang/lex.h @@ -46,6 +46,8 @@ enum ivy_keyword { enum ivy_symbol { IVY_SYM_NONE = 0, IVY_SYM_DOT, + IVY_SYM_SQUOTE, + IVY_SYM_DQUOTE, IVY_SYM_LEFT_BRACE, IVY_SYM_RIGHT_BRACE, IVY_SYM_LEFT_BRACKET, @@ -103,12 +105,14 @@ struct ivy_token { struct ivy_lexer_symbol_node; + struct ivy_lexer { struct ivy_line_source *lex_source; enum ivy_status lex_status; struct ivy_token *lex_queue; struct ivy_lexer_symbol_node *lex_sym_tree; enum ivy_token_type lex_prev_token; + unsigned int lex_state; char *lex_linebuf; size_t lex_linebuf_len; @@ -124,8 +128,8 @@ IVY_API struct ivy_token *ivy_lexer_read(struct ivy_lexer *lex); IVY_API void ivy_token_destroy(struct ivy_token *tok); -extern const char *ivy_lex_token_type_to_string(enum ivy_token_type type); -extern const char *ivy_keyword_to_string(enum ivy_keyword keyword); -extern const char *ivy_symbol_to_string(enum ivy_symbol sym); +IVY_API const char *ivy_lex_token_type_to_string(enum ivy_token_type type); +IVY_API const char *ivy_keyword_to_string(enum ivy_keyword keyword); +IVY_API const char *ivy_symbol_to_string(enum ivy_symbol sym); #endif diff --git a/lang/lex.c b/lang/lex.c index ba16102..3f108f4 100644 --- a/lang/lex.c +++ b/lang/lex.c @@ -15,6 +15,17 @@ .id = (i), .name = (n) \ } +enum lexer_state_type { + STATE_NORMAL, + STATE_STRING, + STATE_FSTRING, + STATE_INTERPOLATION, +}; + +struct lexer_state { + enum lexer_state_type s_type; +}; + struct ivy_lexer_symbol_node { char s_char; enum ivy_symbol s_id; @@ -55,6 +66,8 @@ static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; static struct lex_token_def symbols[] = { LEX_TOKEN_DEF(IVY_SYM_DOT, "."), + LEX_TOKEN_DEF(IVY_SYM_SQUOTE, "'"), + LEX_TOKEN_DEF(IVY_SYM_DQUOTE, "\""), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACE, "{"), LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACE, "}"), LEX_TOKEN_DEF(IVY_SYM_LEFT_BRACKET, "["), @@ -130,6 +143,8 @@ static enum ivy_status put_symbol( return IVY_ERR_NO_MEMORY; } + memset(child, 0x0, sizeof *child); + child->s_id = IVY_SYM_NONE; child->s_char = c; @@ -403,6 +418,46 @@ static enum ivy_status push_linefeed(struct ivy_lexer *lex) return push_token(lex, tok); } +static enum ivy_status push_string_start(struct ivy_lexer *lex) +{ + struct ivy_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_TOK_STR_START; + return push_token(lex, tok); +} + +static enum ivy_status push_string_end(struct ivy_lexer *lex) +{ + struct ivy_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_TOK_STR_END; + return push_token(lex, tok); +} + +static enum ivy_status push_string_content(struct ivy_lexer *lex, char *s) +{ + struct ivy_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_TOK_STRING; + tok->t_str = s; + return push_token(lex, tok); +} + static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym) { struct ivy_token *tok = malloc(sizeof *tok); @@ -480,10 +535,72 @@ static enum ivy_status read_block_comment(struct ivy_lexer *lex) return IVY_OK; } +static enum ivy_status read_squote_marker(struct ivy_lexer *lex) +{ + enum ivy_status status = IVY_OK; + + if (lex->lex_state & STATE_FSTRING) { + /* already within an fstring */ + lex->lex_state &= ~STATE_FSTRING; + return push_string_end(lex); + } else { + /* start of a new fstring */ + status = push_string_start(lex); + lex->lex_state |= STATE_FSTRING; + } +} + +static enum ivy_status read_dquote_marker(struct ivy_lexer *lex) +{ + +} + +static enum ivy_status read_string_content(struct ivy_lexer *lex) +{ + int c; + b_string *str = b_string_create(); + + if (!str) { + return IVY_ERR_NO_MEMORY; + } + + while (true) { + c = peek(lex); + + if (c == '{') { + break; + } + + if ((lex->lex_state & STATE_FSTRING) && c == '\'') { + break; + } + + if ((lex->lex_state & STATE_STRING) && c == '"') { + break; + } + + char s[2] = {c, 0}; + b_string_append_cstr(str, s); + } + + if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) { + b_string_release(str); + return IVY_OK; + } + + char *s = b_string_steal(str); + b_string_release(str); + + enum ivy_status status = push_string_content(lex, s); + if (status != IVY_OK) { + free(s); + } + + return status; +} + static enum ivy_status read_symbol(struct ivy_lexer *lex) { - char sym_buf[32]; - unsigned int sym_len = 0; struct ivy_lexer_symbol_node *node = lex->lex_sym_tree; while (true) { @@ -502,14 +619,19 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex) return IVY_ERR_BAD_SYNTAX; } - if (node->s_id == IVY_SYM_FORWARD_SLASH_ASTERISK) { + switch (node->s_id) { + case IVY_SYM_SQUOTE: + return read_squote_marker(lex); + case IVY_SYM_DQUOTE: + return read_dquote_marker(lex); + case IVY_SYM_FORWARD_SLASH_ASTERISK: return read_block_comment(lex); - } else if (node->s_id == IVY_SYM_DOUBLE_HYPHEN) { + case IVY_SYM_DOUBLE_HYPHEN: return read_line_comment(lex); + default: + push_symbol(lex, node->s_id); + return IVY_OK; } - - push_symbol(lex, node->s_id); - return IVY_OK; } static enum ivy_status read_ident(struct ivy_lexer *lex) @@ -568,6 +690,14 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex) return c; } + if (lex->lex_state & STATE_STRING && c != '"') { + return read_string_content(lex); + } + + if ((lex->lex_state & STATE_FSTRING) && !(lex->lex_state & STATE_INTERPOLATION)) { + return read_string_content(lex); + } + if (c == '\n') { while (c == '\n') { advance(lex);