lang: lex: add support for kebab-case identifiers and negative numbers

identifiers can now contain hyphens, with the following restrictions:
 * an identifier cannot start or end with a hyphen.
 * an identifier cannot contain more than one hyphen in a row.

kebab-case identifiers can be used for type and variable names, as well
as message identifiers and labels.

to avoid ambiguity, the lexer now enforces whitespace around most binary
operators (with a few exceptions, such as semicolons). trying to
compile a "compact" arithmetic expression, such as

	y=1+2

will now result in a "missing whitespace" error.
This commit is contained in:
2025-11-07 09:49:29 +00:00
parent 1a544b6411
commit b0cbe42fc4
4 changed files with 185 additions and 117 deletions

View File

@@ -8,12 +8,17 @@
#include <ivy/status.h>
#include <stdint.h>
enum lex_token_flags {
LEX_TOK_REQUIRES_WHITESPACE = 0x01u,
};
struct ivy_lexer {
struct ivy_lexer_symbol_node *lex_sym_tree;
struct ivy_diag_ctx *lex_diag_ctx;
struct ivy_line_source *lex_source;
b_dict *lex_keywords;
enum ivy_status lex_status;
int lex_prev_char, lex_cur_char;
b_queue lex_queue;
enum ivy_token_type lex_prev_token;
@@ -47,7 +52,7 @@ struct lexer_state {
struct ivy_lexer_symbol_node {
char s_char;
enum ivy_symbol s_id;
struct lex_token_def *s_def;
b_queue_entry s_entry;
b_queue s_children;
@@ -55,6 +60,7 @@ struct ivy_lexer_symbol_node {
struct lex_token_def {
int id;
enum lex_token_flags flags;
const char *name;
uint64_t name_hash;
};