identifiers can now contain hyphens, with the following restrictions: * an identifier cannot start or end with a hyphen. * an identifier cannot contain more than one hyphen in a row. kebab-case identifiers can be used for type and variable names, as well as message identifiers and labels. to avoid ambiguity, the lexer now enforces whitespace around most binary operators (with a few exceptions, such as semicolons). trying to compile a "compact" arithmetic expression, such as y=1+2 will now result in a "missing whitespace" error.
69 lines
1.3 KiB
C
69 lines
1.3 KiB
C
#ifndef _LEX_H_
|
|
#define _LEX_H_
|
|
|
|
#include <blue/core/queue.h>
|
|
#include <blue/ds/dict.h>
|
|
#include <blue/ds/string.h>
|
|
#include <ivy/lang/lex.h>
|
|
#include <ivy/status.h>
|
|
#include <stdint.h>
|
|
|
|
enum lex_token_flags {
|
|
LEX_TOK_REQUIRES_WHITESPACE = 0x01u,
|
|
};
|
|
|
|
struct ivy_lexer {
|
|
struct ivy_lexer_symbol_node *lex_sym_tree;
|
|
struct ivy_diag_ctx *lex_diag_ctx;
|
|
struct ivy_line_source *lex_source;
|
|
b_dict *lex_keywords;
|
|
enum ivy_status lex_status;
|
|
int lex_prev_char, lex_cur_char;
|
|
|
|
b_queue lex_queue;
|
|
enum ivy_token_type lex_prev_token;
|
|
|
|
b_string *lex_temp;
|
|
b_queue lex_state;
|
|
unsigned int lex_brace_depth;
|
|
|
|
unsigned long lex_token_start_row, lex_token_start_col;
|
|
unsigned long lex_token_end_row, lex_token_end_col;
|
|
unsigned long lex_cursor_row, lex_cursor_col;
|
|
|
|
char *lex_linebuf;
|
|
size_t lex_linebuf_len;
|
|
size_t lex_linebuf_cap;
|
|
size_t lex_linebuf_ptr;
|
|
};
|
|
|
|
enum lexer_state_type {
|
|
STATE_NORMAL,
|
|
STATE_STRING,
|
|
STATE_FSTRING,
|
|
STATE_INTERPOLATION,
|
|
};
|
|
|
|
struct lexer_state {
|
|
enum lexer_state_type s_type;
|
|
unsigned int s_brace_depth;
|
|
b_queue_entry s_entry;
|
|
};
|
|
|
|
struct ivy_lexer_symbol_node {
|
|
char s_char;
|
|
struct lex_token_def *s_def;
|
|
|
|
b_queue_entry s_entry;
|
|
b_queue s_children;
|
|
};
|
|
|
|
struct lex_token_def {
|
|
int id;
|
|
enum lex_token_flags flags;
|
|
const char *name;
|
|
uint64_t name_hash;
|
|
};
|
|
|
|
#endif
|