diff --git a/lang/include/ivy/lang/lex.h b/lang/include/ivy/lang/lex.h index 774541a..77e9142 100644 --- a/lang/include/ivy/lang/lex.h +++ b/lang/include/ivy/lang/lex.h @@ -40,6 +40,8 @@ enum ivy_keyword { IVY_KW_UNLESS, IVY_KW_IN, IVY_KW_DO, + IVY_KW_GET, + IVY_KW_SET, IVY_KW_END, }; @@ -79,14 +81,16 @@ enum ivy_symbol { IVY_SYM_PIPE_EQUAL, IVY_SYM_PERCENT_EQUAL, IVY_SYM_CARET_EQUAL, + IVY_SYM_HASH, IVY_SYM_BANG, IVY_SYM_PIPE, IVY_SYM_CARET, IVY_SYM_UNDERSCORE, IVY_SYM_COMMA, + IVY_SYM_SEMICOLON, IVY_SYM_DOLLAR, - IVY_SYM_RIGHT_ARROW, - IVY_SYM_BIG_RIGHT_ARROW, + IVY_SYM_HYPHEN_RIGHT_ANGLE, + IVY_SYM_EQUAL_RIGHT_ANGLE, IVY_SYM_FORWARD_SLASH_ASTERISK, IVY_SYM_ASTERISK_FORWARD_SLASH, }; @@ -98,7 +102,7 @@ struct ivy_token { union { enum ivy_keyword t_keyword; enum ivy_symbol t_symbol; - signed long long t_number; + unsigned long long t_number; char *t_str; }; }; diff --git a/lang/lex.c b/lang/lex.c index 1909e27..697b455 100644 --- a/lang/lex.c +++ b/lang/lex.c @@ -24,6 +24,7 @@ struct ivy_lexer { struct ivy_token *lex_queue; enum ivy_token_type lex_prev_token; + b_string *lex_temp; b_queue lex_state; unsigned int lex_brace_depth; @@ -80,6 +81,8 @@ static struct lex_token_def keywords[] = { LEX_TOKEN_DEF(IVY_KW_UNLESS, "unless"), LEX_TOKEN_DEF(IVY_KW_IN, "in"), LEX_TOKEN_DEF(IVY_KW_DO, "do"), + LEX_TOKEN_DEF(IVY_KW_GET, "get"), + LEX_TOKEN_DEF(IVY_KW_SET, "set"), LEX_TOKEN_DEF(IVY_KW_END, "end"), }; static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; @@ -121,14 +124,16 @@ static struct lex_token_def symbols[] = { LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="), LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="), LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="), + LEX_TOKEN_DEF(IVY_SYM_HASH, "#"), LEX_TOKEN_DEF(IVY_SYM_BANG, "!"), LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"), LEX_TOKEN_DEF(IVY_SYM_CARET, "^"), LEX_TOKEN_DEF(IVY_SYM_UNDERSCORE, "_"), LEX_TOKEN_DEF(IVY_SYM_COMMA, ","), + LEX_TOKEN_DEF(IVY_SYM_SEMICOLON, ";"), LEX_TOKEN_DEF(IVY_SYM_DOLLAR, "$"), - LEX_TOKEN_DEF(IVY_SYM_RIGHT_ARROW, "->"), - LEX_TOKEN_DEF(IVY_SYM_BIG_RIGHT_ARROW, "=>"), + LEX_TOKEN_DEF(IVY_SYM_HYPHEN_RIGHT_ANGLE, "->"), + LEX_TOKEN_DEF(IVY_SYM_EQUAL_RIGHT_ANGLE, "=>"), }; static const size_t nr_symbols = sizeof symbols / sizeof symbols[0]; @@ -198,6 +203,16 @@ static struct ivy_lexer_symbol_node *get_symbol_node( return NULL; } +static b_string *get_temp_string(struct ivy_lexer *lex) +{ + if (!lex->lex_temp) { + lex->lex_temp = b_string_create(); + } + + b_string_clear(lex->lex_temp); + return lex->lex_temp; +} + static enum ivy_status put_symbol( struct ivy_lexer_symbol_node *tree, struct lex_token_def *sym) { @@ -367,6 +382,10 @@ void ivy_lexer_destroy(struct ivy_lexer *lex) destroy_symbol_tree(lex->lex_sym_tree); } + if (lex->lex_temp) { + b_string_release(lex->lex_temp); + } + destroy_state_stack(&lex->lex_state); free(lex); @@ -571,6 +590,34 @@ static enum ivy_status push_symbol(struct ivy_lexer *lex, enum ivy_symbol sym) return push_token(lex, tok); } +static enum ivy_status push_atom(struct ivy_lexer *lex, char *s) +{ + struct ivy_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_TOK_ATOM; + tok->t_str = s; + return push_token(lex, tok); +} + +static enum ivy_status push_number(struct ivy_lexer *lex, unsigned long long v) +{ + struct ivy_token *tok = malloc(sizeof *tok); + if (!tok) { + return IVY_ERR_NO_MEMORY; + } + + memset(tok, 0x0, sizeof *tok); + + tok->t_type = IVY_TOK_NUMBER; + tok->t_number = v; + return push_token(lex, tok); +} + static enum ivy_status push_keyword(struct ivy_lexer *lex, enum ivy_keyword keyword) { struct ivy_token *tok = malloc(sizeof *tok); @@ -661,13 +708,54 @@ static enum ivy_status read_squote_marker(struct ivy_lexer *lex) static enum ivy_status read_dquote_marker(struct ivy_lexer *lex) { + struct lexer_state *state = get_lexer_state(lex); + + if (state->s_type == STATE_STRING) { + /* already within a string */ + pop_lexer_state(lex); + return IVY_OK; + } + + /* start of a new string */ + if (!push_lexer_state(lex, STATE_STRING)) { + return IVY_ERR_NO_MEMORY; + } + return IVY_OK; } +static enum ivy_status read_atom(struct ivy_lexer *lex) +{ + b_string *str = get_temp_string(lex); + + while (true) { + int c = peek(lex); + + if (c == IVY_ERR_EOF) { + break; + } + + if (c < 0) { + return c; + } + + if (!isalnum(c) && c != ':' && c != '_') { + break; + } + + char s[] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + char *s = b_string_steal(str); + return push_atom(lex, s); +} + static enum ivy_status read_string_content(struct ivy_lexer *lex) { int c; - b_string *str = b_string_create(); + b_string *str = get_temp_string(lex); struct lexer_state *state = get_lexer_state(lex); if (!str) { @@ -691,12 +779,10 @@ static enum ivy_status read_string_content(struct ivy_lexer *lex) } if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) { - b_string_release(str); return IVY_OK; } char *s = b_string_steal(str); - b_string_release(str); enum ivy_status status = push_string_content(lex, s); if (status != IVY_OK) { @@ -736,6 +822,8 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex) return read_block_comment(lex); case IVY_SYM_DOUBLE_HYPHEN: return read_line_comment(lex); + case IVY_SYM_HASH: + return read_atom(lex); case IVY_SYM_LEFT_BRACE: push_symbol(lex, node->s_id); lex->lex_brace_depth++; @@ -759,9 +847,91 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex) } } +static enum ivy_status read_number(struct ivy_lexer *lex) +{ + int token_len = 0; + int base = 10; + b_string *str = get_temp_string(lex); + + while (true) { + int c = peek(lex); + if (c == IVY_ERR_EOF) { + break; + } + + if (c < 0) { + return c; + } + + if (c == '_') { + token_len++; + advance(lex); + continue; + } + + if (isspace(c) || ispunct(c)) { + break; + } + + if (c == '0' && token_len == 0) { + base = 7; + token_len++; + advance(lex); + continue; + } + + if (c == 'x' && token_len == 1) { + base = 16; + token_len++; + advance(lex); + continue; + } + + if (c == 'b' && token_len == 1) { + base = 2; + token_len++; + advance(lex); + continue; + } + + if (base == 2 && c != '0' && c != '1') { + return IVY_ERR_BAD_SYNTAX; + } + + if (base == 10 && !isdigit(c)) { + return IVY_ERR_BAD_SYNTAX; + } + + if (base == 16 && !isxdigit(c)) { + return IVY_ERR_BAD_SYNTAX; + } + + char s[] = {c, 0}; + b_string_append_cstr(str, s); + advance(lex); + } + + if (token_len == 1 && base == 7) { + return push_number(lex, 0); + } + + const char *s = b_string_ptr(str); + char *ep; + + /* negative numbers will be lexed as a hyphen followed by a positive + * number. */ + unsigned long long v = strtoull(s, &ep, base); + + if (*ep != '\0') { + return IVY_ERR_BAD_SYNTAX; + } + + return push_number(lex, v); +} + static enum ivy_status read_ident(struct ivy_lexer *lex) { - b_string *str = b_string_create(); + b_string *str = get_temp_string(lex); bool label = false; while (true) { @@ -788,20 +958,17 @@ static enum ivy_status read_ident(struct ivy_lexer *lex) const char *s = b_string_ptr(str); if (!strcmp(s, "_")) { - b_string_release(str); push_symbol(lex, IVY_SYM_UNDERSCORE); } enum ivy_keyword keyword = IVY_KW_NONE; if (!label && (keyword = find_keyword_by_name(s)) != IVY_KW_NONE) { - b_string_release(str); return push_keyword(lex, keyword); } struct ivy_token *tok = create_token(label ? IVY_TOK_LABEL : IVY_TOK_IDENT); tok->t_str = b_string_steal(str); - b_string_release(str); return push_token(lex, tok); } @@ -825,8 +992,8 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex) return read_string_content(lex); } - /* `state` is invalid past this point, as the read_* functions may - * perform state transitions. */ + /* `state` is invalid past this point, as the read_* functions + * may perform state transitions. */ state = NULL; if (c == '\n') { @@ -855,6 +1022,10 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex) return read_ident(lex); } + if (isnumber(c)) { + return read_number(lex); + } + return IVY_ERR_BAD_SYNTAX; } @@ -1002,9 +1173,10 @@ const char *ivy_symbol_to_string(enum ivy_symbol sym) ENUM_STR(IVY_SYM_CARET); ENUM_STR(IVY_SYM_UNDERSCORE); ENUM_STR(IVY_SYM_COMMA); + ENUM_STR(IVY_SYM_SEMICOLON); ENUM_STR(IVY_SYM_DOLLAR); - ENUM_STR(IVY_SYM_RIGHT_ARROW); - ENUM_STR(IVY_SYM_BIG_RIGHT_ARROW); + ENUM_STR(IVY_SYM_HYPHEN_RIGHT_ANGLE); + ENUM_STR(IVY_SYM_EQUAL_RIGHT_ANGLE); ENUM_STR(IVY_SYM_FORWARD_SLASH_ASTERISK); ENUM_STR(IVY_SYM_ASTERISK_FORWARD_SLASH); default: