lang: lex: add support for kebab-case identifiers and negative numbers
identifiers can now contain hyphens, with the following restrictions: * an identifier cannot start or end with a hyphen. * an identifier cannot contain more than one hyphen in a row. kebab-case identifiers can be used for type and variable names, as well as message identifiers and labels. to avoid ambiguity, the lexer now enforces whitespace around most binary operators (with a few exceptions, such as semicolons). trying to compile a "compact" arithmetic expression, such as y=1+2 will now result in a "missing whitespace" error.
This commit is contained in:
@@ -128,7 +128,7 @@ struct ivy_token {
|
|||||||
union {
|
union {
|
||||||
enum ivy_keyword t_keyword;
|
enum ivy_keyword t_keyword;
|
||||||
enum ivy_symbol t_symbol;
|
enum ivy_symbol t_symbol;
|
||||||
unsigned long long t_int;
|
long long t_int;
|
||||||
double t_double;
|
double t_double;
|
||||||
char *t_str;
|
char *t_str;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -14,8 +14,10 @@ static void print_symbol_node(struct ivy_lexer_symbol_node *node, int depth)
|
|||||||
|
|
||||||
b_printf("[cyan]%c[reset]", node->s_char);
|
b_printf("[cyan]%c[reset]", node->s_char);
|
||||||
|
|
||||||
if (node->s_id != IVY_SYM_NONE) {
|
if (node->s_def != NULL) {
|
||||||
b_printf(" ([magenta]%s[reset])", ivy_symbol_to_string(node->s_id));
|
b_printf(
|
||||||
|
" ([magenta]%s[reset])",
|
||||||
|
ivy_symbol_to_string(node->s_def->id));
|
||||||
}
|
}
|
||||||
|
|
||||||
b_printf("\n");
|
b_printf("\n");
|
||||||
|
|||||||
286
lang/lex.c
286
lang/lex.c
@@ -17,7 +17,9 @@
|
|||||||
|
|
||||||
#define LINEBUF_DEFAULT_CAPACITY 1024
|
#define LINEBUF_DEFAULT_CAPACITY 1024
|
||||||
|
|
||||||
#define LEX_TOKEN_DEF(i, n) {.id = (i), .name = (n)}
|
#define LEX_TOKEN_DEF2(i, n, f) {.id = (i), .name = (n), .flags = (f)}
|
||||||
|
#define LEX_TOKEN_DEF(i, n) LEX_TOKEN_DEF2(i, n, 0)
|
||||||
|
#define LEX_TOKEN_DEF_W(i, n) LEX_TOKEN_DEF2(i, n, LEX_TOK_REQUIRES_WHITESPACE)
|
||||||
|
|
||||||
static struct lex_token_def keywords[] = {
|
static struct lex_token_def keywords[] = {
|
||||||
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
LEX_TOKEN_DEF(IVY_KW_PACKAGE, "package"),
|
||||||
@@ -65,36 +67,36 @@ static struct lex_token_def symbols[] = {
|
|||||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_BRACKET, "]"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
LEX_TOKEN_DEF(IVY_SYM_LEFT_PAREN, "("),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
LEX_TOKEN_DEF(IVY_SYM_RIGHT_PAREN, ")"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE, "<"),
|
LEX_TOKEN_DEF_W(IVY_SYM_LEFT_ANGLE, "<"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE, ">"),
|
LEX_TOKEN_DEF_W(IVY_SYM_RIGHT_ANGLE, ">"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
LEX_TOKEN_DEF(IVY_SYM_COLON, ":"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_COLON, "::"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PLUS, "+"),
|
LEX_TOKEN_DEF_W(IVY_SYM_PLUS, "+"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_HYPHEN, "-"),
|
LEX_TOKEN_DEF_W(IVY_SYM_HYPHEN, "-"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_HYPHEN, "--"),
|
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_HYPHEN, "--"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH, "/"),
|
LEX_TOKEN_DEF_W(IVY_SYM_FORWARD_SLASH, "/"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK, "*"),
|
LEX_TOKEN_DEF_W(IVY_SYM_ASTERISK, "*"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_ASTERISK, "/*"),
|
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_ASTERISK, "/*"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_FORWARD_SLASH, "*/"),
|
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_FORWARD_SLASH, "*/"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT, "%"),
|
LEX_TOKEN_DEF_W(IVY_SYM_PERCENT, "%"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND, "&"),
|
LEX_TOKEN_DEF_W(IVY_SYM_AMPERSAND, "&"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_EQUAL, "="),
|
LEX_TOKEN_DEF_W(IVY_SYM_EQUAL, "="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_EQUAL, "=="),
|
LEX_TOKEN_DEF_W(IVY_SYM_DOUBLE_EQUAL, "=="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE, "<<"),
|
LEX_TOKEN_DEF_W(IVY_SYM_DOUBLE_LEFT_ANGLE, "<<"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE, ">>"),
|
LEX_TOKEN_DEF_W(IVY_SYM_DOUBLE_RIGHT_ANGLE, ">>"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_LEFT_ANGLE_EQUAL, "<="),
|
LEX_TOKEN_DEF_W(IVY_SYM_LEFT_ANGLE_EQUAL, "<="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_RIGHT_ANGLE_EQUAL, ">="),
|
LEX_TOKEN_DEF_W(IVY_SYM_RIGHT_ANGLE_EQUAL, ">="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
LEX_TOKEN_DEF_W(IVY_SYM_DOUBLE_LEFT_ANGLE_EQUAL, "<<="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
LEX_TOKEN_DEF_W(IVY_SYM_DOUBLE_RIGHT_ANGLE_EQUAL, ">>="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PLUS_EQUAL, "+="),
|
LEX_TOKEN_DEF_W(IVY_SYM_PLUS_EQUAL, "+="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_HYPHEN_EQUAL, "-="),
|
LEX_TOKEN_DEF_W(IVY_SYM_HYPHEN_EQUAL, "-="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
LEX_TOKEN_DEF_W(IVY_SYM_FORWARD_SLASH_EQUAL, "/="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_ASTERISK_EQUAL, "*="),
|
LEX_TOKEN_DEF_W(IVY_SYM_ASTERISK_EQUAL, "*="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
LEX_TOKEN_DEF_W(IVY_SYM_AMPERSAND_EQUAL, "&="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PIPE_EQUAL, "|="),
|
LEX_TOKEN_DEF_W(IVY_SYM_PIPE_EQUAL, "|="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PERCENT_EQUAL, "%="),
|
LEX_TOKEN_DEF_W(IVY_SYM_PERCENT_EQUAL, "%="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_CARET_EQUAL, "^="),
|
LEX_TOKEN_DEF_W(IVY_SYM_CARET_EQUAL, "^="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_BANG_EQUAL, "!="),
|
LEX_TOKEN_DEF_W(IVY_SYM_BANG_EQUAL, "!="),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_HASH, "#"),
|
LEX_TOKEN_DEF(IVY_SYM_HASH, "#"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
LEX_TOKEN_DEF(IVY_SYM_BANG, "!"),
|
||||||
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
LEX_TOKEN_DEF(IVY_SYM_PIPE, "|"),
|
||||||
@@ -111,6 +113,44 @@ static struct lex_token_def symbols[] = {
|
|||||||
};
|
};
|
||||||
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
||||||
|
|
||||||
|
static void report_unrecognised_char(struct ivy_lexer *lex, int c)
|
||||||
|
{
|
||||||
|
struct ivy_diag *diag = ivy_diag_ctx_create_diag(
|
||||||
|
lex->lex_diag_ctx, IVY_LANG_E_UNRECOGNISED_SYMBOL);
|
||||||
|
|
||||||
|
ivy_diag_set_location(diag, lex->lex_cursor_row, lex->lex_cursor_col);
|
||||||
|
ivy_diag_push_msg(diag, IVY_LANG_MSG_UNKNOWN_SYMBOL_ENCOUNTERED);
|
||||||
|
|
||||||
|
const struct ivy_diag_highlight hl[] = {
|
||||||
|
IVY_DIAG_HL(
|
||||||
|
ERROR, lex->lex_cursor_row, lex->lex_cursor_col,
|
||||||
|
lex->lex_cursor_row, lex->lex_cursor_col),
|
||||||
|
};
|
||||||
|
const size_t nr_hl = sizeof hl / sizeof hl[0];
|
||||||
|
|
||||||
|
ivy_diag_push_snippet(
|
||||||
|
diag, lex->lex_cursor_row, lex->lex_cursor_row, NULL, 0, hl, nr_hl);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void report_missing_whitespace(struct ivy_lexer *lex, int msg)
|
||||||
|
{
|
||||||
|
struct ivy_diag *diag = ivy_diag_ctx_create_diag(
|
||||||
|
lex->lex_diag_ctx, IVY_LANG_E_MISSING_WHITESPACE);
|
||||||
|
|
||||||
|
ivy_diag_set_location(diag, lex->lex_cursor_row, lex->lex_cursor_col);
|
||||||
|
ivy_diag_push_msg(diag, msg);
|
||||||
|
|
||||||
|
const struct ivy_diag_highlight hl[] = {
|
||||||
|
IVY_DIAG_HL(
|
||||||
|
ERROR, lex->lex_token_start_row, lex->lex_token_start_col,
|
||||||
|
lex->lex_token_end_row, lex->lex_token_end_col),
|
||||||
|
};
|
||||||
|
const size_t nr_hl = sizeof hl / sizeof hl[0];
|
||||||
|
|
||||||
|
ivy_diag_push_snippet(
|
||||||
|
diag, lex->lex_cursor_row, lex->lex_cursor_row, NULL, 0, hl, nr_hl);
|
||||||
|
}
|
||||||
|
|
||||||
static struct lexer_state *push_lexer_state(
|
static struct lexer_state *push_lexer_state(
|
||||||
struct ivy_lexer *lex, enum lexer_state_type state_type)
|
struct ivy_lexer *lex, enum lexer_state_type state_type)
|
||||||
{
|
{
|
||||||
@@ -209,14 +249,14 @@ static enum ivy_status put_symbol(
|
|||||||
|
|
||||||
memset(child, 0x0, sizeof *child);
|
memset(child, 0x0, sizeof *child);
|
||||||
|
|
||||||
child->s_id = IVY_SYM_NONE;
|
child->s_def = NULL;
|
||||||
child->s_char = c;
|
child->s_char = c;
|
||||||
|
|
||||||
b_queue_push_back(&tree->s_children, &child->s_entry);
|
b_queue_push_back(&tree->s_children, &child->s_entry);
|
||||||
tree = child;
|
tree = child;
|
||||||
}
|
}
|
||||||
|
|
||||||
tree->s_id = sym->id;
|
tree->s_def = sym;
|
||||||
return IVY_OK;
|
return IVY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -245,7 +285,7 @@ static struct ivy_lexer_symbol_node *build_symbol_tree(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
memset(root, 0x0, sizeof *root);
|
memset(root, 0x0, sizeof *root);
|
||||||
root->s_id = IVY_SYM_NONE;
|
root->s_def = NULL;
|
||||||
|
|
||||||
enum ivy_status status = IVY_OK;
|
enum ivy_status status = IVY_OK;
|
||||||
for (size_t i = 0; i < nr_symbols; i++) {
|
for (size_t i = 0; i < nr_symbols; i++) {
|
||||||
@@ -380,6 +420,11 @@ static enum ivy_status refill_linebuf(struct ivy_lexer *lex)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int peek_prev(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
return lex->lex_prev_char;
|
||||||
|
}
|
||||||
|
|
||||||
static int peek(struct ivy_lexer *lex)
|
static int peek(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
enum ivy_status status = IVY_OK;
|
enum ivy_status status = IVY_OK;
|
||||||
@@ -441,9 +486,11 @@ static int advance(struct ivy_lexer *lex)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
|
||||||
|
lex->lex_prev_char = c;
|
||||||
|
lex->lex_cur_char = lex->lex_linebuf[lex->lex_linebuf_ptr];
|
||||||
|
|
||||||
lex->lex_cursor_col++;
|
lex->lex_cursor_col++;
|
||||||
if (c == '\n') {
|
if (lex->lex_cur_char == '\n') {
|
||||||
lex->lex_cursor_col = 1;
|
lex->lex_cursor_col = 1;
|
||||||
lex->lex_cursor_row++;
|
lex->lex_cursor_row++;
|
||||||
}
|
}
|
||||||
@@ -802,73 +849,16 @@ static enum ivy_status read_string_content(struct ivy_lexer *lex)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
static enum ivy_status read_number(struct ivy_lexer *lex, bool negate)
|
||||||
{
|
|
||||||
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
|
||||||
struct lexer_state *state = get_lexer_state(lex);
|
|
||||||
set_token_start(lex);
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
int c = peek(lex);
|
|
||||||
if (c < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ivy_lexer_symbol_node *next = get_symbol_node(node, c);
|
|
||||||
if (!next) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
node = next;
|
|
||||||
set_token_end(lex);
|
|
||||||
advance(lex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!node || node->s_id == IVY_SYM_NONE) {
|
|
||||||
return IVY_ERR_BAD_SYNTAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (node->s_id) {
|
|
||||||
case IVY_SYM_SQUOTE:
|
|
||||||
return read_squote_marker(lex);
|
|
||||||
case IVY_SYM_DQUOTE:
|
|
||||||
return read_dquote_marker(lex);
|
|
||||||
case IVY_SYM_FORWARD_SLASH_ASTERISK:
|
|
||||||
return read_block_comment(lex);
|
|
||||||
case IVY_SYM_DOUBLE_HYPHEN:
|
|
||||||
return read_line_comment(lex);
|
|
||||||
case IVY_SYM_DOLLAR:
|
|
||||||
return read_atom(lex);
|
|
||||||
case IVY_SYM_LEFT_BRACE:
|
|
||||||
push_symbol(lex, node->s_id);
|
|
||||||
lex->lex_brace_depth++;
|
|
||||||
|
|
||||||
if (state->s_type == STATE_FSTRING) {
|
|
||||||
push_lexer_state(lex, STATE_INTERPOLATION);
|
|
||||||
}
|
|
||||||
return IVY_OK;
|
|
||||||
case IVY_SYM_RIGHT_BRACE:
|
|
||||||
push_symbol(lex, node->s_id);
|
|
||||||
lex->lex_brace_depth--;
|
|
||||||
|
|
||||||
if (state->s_type == STATE_INTERPOLATION
|
|
||||||
&& lex->lex_brace_depth < state->s_brace_depth) {
|
|
||||||
pop_lexer_state(lex);
|
|
||||||
}
|
|
||||||
return IVY_OK;
|
|
||||||
default:
|
|
||||||
push_symbol(lex, node->s_id);
|
|
||||||
return IVY_OK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static enum ivy_status read_number(struct ivy_lexer *lex)
|
|
||||||
{
|
{
|
||||||
int token_len = 0;
|
int token_len = 0;
|
||||||
int base = 10;
|
int base = 10;
|
||||||
int dots = 0;
|
int dots = 0;
|
||||||
b_string *str = get_temp_string(lex);
|
b_string *str = get_temp_string(lex);
|
||||||
set_token_start(lex);
|
|
||||||
|
if (!negate) {
|
||||||
|
set_token_start(lex);
|
||||||
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int c = peek(lex);
|
int c = peek(lex);
|
||||||
@@ -969,6 +959,10 @@ static enum ivy_status read_number(struct ivy_lexer *lex)
|
|||||||
return IVY_ERR_BAD_SYNTAX;
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
v *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
return push_double(lex, v);
|
return push_double(lex, v);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@@ -978,15 +972,95 @@ static enum ivy_status read_number(struct ivy_lexer *lex)
|
|||||||
return IVY_ERR_BAD_SYNTAX;
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
v *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
return push_int(lex, v);
|
return push_int(lex, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum ivy_status read_symbol(struct ivy_lexer *lex)
|
||||||
|
{
|
||||||
|
struct ivy_lexer_symbol_node *node = lex->lex_sym_tree;
|
||||||
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
|
set_token_start(lex);
|
||||||
|
char prefix = peek_prev(lex);
|
||||||
|
char prev = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int c = peek(lex);
|
||||||
|
if (c < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ivy_lexer_symbol_node *next = get_symbol_node(node, c);
|
||||||
|
if (!next) {
|
||||||
|
prev = c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
node = next;
|
||||||
|
set_token_end(lex);
|
||||||
|
advance(lex);
|
||||||
|
prev = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!node || node->s_def == NULL) {
|
||||||
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->s_def->id == IVY_SYM_HYPHEN && isdigit(prev)) {
|
||||||
|
return read_number(lex, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((node->s_def->flags & LEX_TOK_REQUIRES_WHITESPACE)
|
||||||
|
&& (!isspace(prev) || !isspace(prefix))) {
|
||||||
|
report_missing_whitespace(
|
||||||
|
lex, IVY_LANG_MSG_WHITESPACE_REQUIRED_AROUND_BINARY_OP);
|
||||||
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (node->s_def->id) {
|
||||||
|
case IVY_SYM_SQUOTE:
|
||||||
|
return read_squote_marker(lex);
|
||||||
|
case IVY_SYM_DQUOTE:
|
||||||
|
return read_dquote_marker(lex);
|
||||||
|
case IVY_SYM_FORWARD_SLASH_ASTERISK:
|
||||||
|
return read_block_comment(lex);
|
||||||
|
case IVY_SYM_DOUBLE_HYPHEN:
|
||||||
|
return read_line_comment(lex);
|
||||||
|
case IVY_SYM_DOLLAR:
|
||||||
|
return read_atom(lex);
|
||||||
|
case IVY_SYM_LEFT_BRACE:
|
||||||
|
push_symbol(lex, node->s_def->id);
|
||||||
|
lex->lex_brace_depth++;
|
||||||
|
|
||||||
|
if (state->s_type == STATE_FSTRING) {
|
||||||
|
push_lexer_state(lex, STATE_INTERPOLATION);
|
||||||
|
}
|
||||||
|
return IVY_OK;
|
||||||
|
case IVY_SYM_RIGHT_BRACE:
|
||||||
|
push_symbol(lex, node->s_def->id);
|
||||||
|
lex->lex_brace_depth--;
|
||||||
|
|
||||||
|
if (state->s_type == STATE_INTERPOLATION
|
||||||
|
&& lex->lex_brace_depth < state->s_brace_depth) {
|
||||||
|
pop_lexer_state(lex);
|
||||||
|
}
|
||||||
|
return IVY_OK;
|
||||||
|
default:
|
||||||
|
push_symbol(lex, node->s_def->id);
|
||||||
|
return IVY_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
static enum ivy_status read_ident(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
b_string *str = get_temp_string(lex);
|
b_string *str = get_temp_string(lex);
|
||||||
bool label = false;
|
bool label = false;
|
||||||
set_token_start(lex);
|
set_token_start(lex);
|
||||||
|
char prev = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int c = peek(lex);
|
int c = peek(lex);
|
||||||
@@ -1002,14 +1076,19 @@ static enum ivy_status read_ident(struct ivy_lexer *lex)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isalnum(c) && c != '_') {
|
if (!isalnum(c) && c != '_' && c != '-') {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c == '-' && prev == '-') {
|
||||||
|
return IVY_ERR_BAD_SYNTAX;
|
||||||
|
}
|
||||||
|
|
||||||
char s[2] = {c, 0};
|
char s[2] = {c, 0};
|
||||||
b_string_append_cstr(str, s);
|
b_string_append_cstr(str, s);
|
||||||
set_token_end(lex);
|
set_token_end(lex);
|
||||||
advance(lex);
|
advance(lex);
|
||||||
|
prev = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *s = b_string_ptr(str);
|
const char *s = b_string_ptr(str);
|
||||||
@@ -1029,25 +1108,6 @@ static enum ivy_status read_ident(struct ivy_lexer *lex)
|
|||||||
return push_token(lex, tok);
|
return push_token(lex, tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void report_unrecognised_char(struct ivy_lexer *lex, int c)
|
|
||||||
{
|
|
||||||
struct ivy_diag *diag = ivy_diag_ctx_create_diag(
|
|
||||||
lex->lex_diag_ctx, IVY_LANG_E_UNRECOGNISED_SYMBOL);
|
|
||||||
|
|
||||||
ivy_diag_set_location(diag, lex->lex_cursor_row, lex->lex_cursor_col);
|
|
||||||
ivy_diag_push_msg(diag, IVY_LANG_MSG_UNKNOWN_SYMBOL_ENCOUNTERED);
|
|
||||||
|
|
||||||
const struct ivy_diag_highlight hl[] = {
|
|
||||||
IVY_DIAG_HL(
|
|
||||||
ERROR, lex->lex_cursor_row, lex->lex_cursor_col,
|
|
||||||
lex->lex_cursor_row, lex->lex_cursor_col),
|
|
||||||
};
|
|
||||||
const size_t nr_hl = sizeof hl / sizeof hl[0];
|
|
||||||
|
|
||||||
ivy_diag_push_snippet(
|
|
||||||
diag, lex->lex_cursor_row, lex->lex_cursor_row, NULL, 0, hl, nr_hl);
|
|
||||||
}
|
|
||||||
|
|
||||||
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
||||||
{
|
{
|
||||||
struct lexer_state *state = get_lexer_state(lex);
|
struct lexer_state *state = get_lexer_state(lex);
|
||||||
@@ -1106,7 +1166,7 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isdigit(c)) {
|
if (isdigit(c)) {
|
||||||
return read_number(lex);
|
return read_number(lex, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
report_unrecognised_char(lex, c);
|
report_unrecognised_char(lex, c);
|
||||||
|
|||||||
@@ -8,12 +8,17 @@
|
|||||||
#include <ivy/status.h>
|
#include <ivy/status.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
enum lex_token_flags {
|
||||||
|
LEX_TOK_REQUIRES_WHITESPACE = 0x01u,
|
||||||
|
};
|
||||||
|
|
||||||
struct ivy_lexer {
|
struct ivy_lexer {
|
||||||
struct ivy_lexer_symbol_node *lex_sym_tree;
|
struct ivy_lexer_symbol_node *lex_sym_tree;
|
||||||
struct ivy_diag_ctx *lex_diag_ctx;
|
struct ivy_diag_ctx *lex_diag_ctx;
|
||||||
struct ivy_line_source *lex_source;
|
struct ivy_line_source *lex_source;
|
||||||
b_dict *lex_keywords;
|
b_dict *lex_keywords;
|
||||||
enum ivy_status lex_status;
|
enum ivy_status lex_status;
|
||||||
|
int lex_prev_char, lex_cur_char;
|
||||||
|
|
||||||
b_queue lex_queue;
|
b_queue lex_queue;
|
||||||
enum ivy_token_type lex_prev_token;
|
enum ivy_token_type lex_prev_token;
|
||||||
@@ -47,7 +52,7 @@ struct lexer_state {
|
|||||||
|
|
||||||
struct ivy_lexer_symbol_node {
|
struct ivy_lexer_symbol_node {
|
||||||
char s_char;
|
char s_char;
|
||||||
enum ivy_symbol s_id;
|
struct lex_token_def *s_def;
|
||||||
|
|
||||||
b_queue_entry s_entry;
|
b_queue_entry s_entry;
|
||||||
b_queue s_children;
|
b_queue s_children;
|
||||||
@@ -55,6 +60,7 @@ struct ivy_lexer_symbol_node {
|
|||||||
|
|
||||||
struct lex_token_def {
|
struct lex_token_def {
|
||||||
int id;
|
int id;
|
||||||
|
enum lex_token_flags flags;
|
||||||
const char *name;
|
const char *name;
|
||||||
uint64_t name_hash;
|
uint64_t name_hash;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user