899 lines
17 KiB
C
899 lines
17 KiB
C
#include "lex.h"
|
|
|
|
#include <blue/core/hash.h>
|
|
#include <blue/core/misc.h>
|
|
#include <blue/core/queue.h>
|
|
#include <blue/ds/dict.h>
|
|
#include <blue/ds/number.h>
|
|
#include <blue/ds/string.h>
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wctype.h>
|
|
|
|
#define LINEBUF_DEFAULT_CAPACITY 1024
|
|
|
|
#define LEX_TOKEN_DEF(i, n) {.id = (i), .name = (n)}
|
|
|
|
#define IS_VALID_IDENT_CHAR(c) \
|
|
(b_wchar_is_alnum(c) || c == '.' || c == '-' || c == '_')
|
|
#define IS_VALID_IDENT_START_CHAR(c) \
|
|
(b_wchar_is_alpha(c) || c == '.' || c == '_')
|
|
#define IS_VALID_REG_START_CHAR(c) (b_wchar_is_alnum(c) || c == '.' || c == '_')
|
|
|
|
static struct lex_token_def symbols[] = {
|
|
LEX_TOKEN_DEF(MIE_SYM_COLON, ":"),
|
|
LEX_TOKEN_DEF(MIE_SYM_EQUAL, "="),
|
|
LEX_TOKEN_DEF(MIE_SYM_COMMA, ","),
|
|
LEX_TOKEN_DEF(MIE_SYM_HYPHEN, "-"),
|
|
LEX_TOKEN_DEF(MIE_SYM_ASTERISK, "*"),
|
|
LEX_TOKEN_DEF(MIE_SYM_PLUS, "+"),
|
|
LEX_TOKEN_DEF(MIE_SYM_PERCENT, "%"),
|
|
LEX_TOKEN_DEF(MIE_SYM_DOLLAR, "$"),
|
|
LEX_TOKEN_DEF(MIE_SYM_CARET, "^"),
|
|
LEX_TOKEN_DEF(MIE_SYM_HASH, "#"),
|
|
LEX_TOKEN_DEF(MIE_SYM_ATSIGN, "@"),
|
|
LEX_TOKEN_DEF(MIE_SYM_TILDE, "~"),
|
|
LEX_TOKEN_DEF(MIE_SYM_LEFT_BRACE, "{"),
|
|
LEX_TOKEN_DEF(MIE_SYM_RIGHT_BRACE, "}"),
|
|
LEX_TOKEN_DEF(MIE_SYM_LEFT_BRACKET, "["),
|
|
LEX_TOKEN_DEF(MIE_SYM_RIGHT_BRACKET, "]"),
|
|
LEX_TOKEN_DEF(MIE_SYM_LEFT_PAREN, "("),
|
|
LEX_TOKEN_DEF(MIE_SYM_RIGHT_PAREN, ")"),
|
|
LEX_TOKEN_DEF(MIE_SYM_LEFT_ANGLE, "<"),
|
|
LEX_TOKEN_DEF(MIE_SYM_RIGHT_ANGLE, ">"),
|
|
LEX_TOKEN_DEF(MIE_SYM_HYPHEN_RIGHT_ANGLE, "->"),
|
|
};
|
|
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
|
|
|
|
static struct mie_lex_symbol_node *get_symbol_node(
|
|
struct mie_lex_symbol_node *node, char c)
|
|
{
|
|
b_queue_entry *entry = b_queue_first(&node->s_children);
|
|
while (entry) {
|
|
struct mie_lex_symbol_node *child
|
|
= b_unbox(struct mie_lex_symbol_node, entry, s_entry);
|
|
if (child->s_char == c) {
|
|
return child;
|
|
}
|
|
|
|
entry = b_queue_next(entry);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static b_string *get_temp_string(struct mie_lex *lex)
|
|
{
|
|
if (!lex->lex_temp) {
|
|
lex->lex_temp = b_string_create();
|
|
}
|
|
|
|
b_string_clear(lex->lex_temp);
|
|
return lex->lex_temp;
|
|
}
|
|
|
|
static enum mie_status put_symbol(
|
|
struct mie_lex_symbol_node *tree, struct lex_token_def *sym)
|
|
{
|
|
for (size_t i = 0; sym->name[i]; i++) {
|
|
char c = sym->name[i];
|
|
struct mie_lex_symbol_node *child = get_symbol_node(tree, c);
|
|
if (child) {
|
|
tree = child;
|
|
continue;
|
|
}
|
|
|
|
child = malloc(sizeof *child);
|
|
if (!child) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(child, 0x0, sizeof *child);
|
|
|
|
child->s_def = NULL;
|
|
child->s_char = c;
|
|
|
|
b_queue_push_back(&tree->s_children, &child->s_entry);
|
|
tree = child;
|
|
}
|
|
|
|
tree->s_def = sym;
|
|
return MIE_SUCCESS;
|
|
}
|
|
|
|
static void destroy_symbol_tree(struct mie_lex_symbol_node *tree)
|
|
{
|
|
b_queue_entry *entry = b_queue_first(&tree->s_children);
|
|
while (entry) {
|
|
struct mie_lex_symbol_node *node
|
|
= b_unbox(struct mie_lex_symbol_node, entry, s_entry);
|
|
b_queue_entry *next = b_queue_next(entry);
|
|
b_queue_delete(&tree->s_children, entry);
|
|
|
|
destroy_symbol_tree(node);
|
|
|
|
entry = next;
|
|
}
|
|
|
|
free(tree);
|
|
}
|
|
|
|
static struct mie_lex_symbol_node *build_symbol_tree(void)
|
|
{
|
|
struct mie_lex_symbol_node *root = malloc(sizeof *root);
|
|
if (!root) {
|
|
return NULL;
|
|
}
|
|
|
|
memset(root, 0x0, sizeof *root);
|
|
root->s_def = NULL;
|
|
|
|
enum mie_status status = MIE_SUCCESS;
|
|
for (size_t i = 0; i < nr_symbols; i++) {
|
|
status = put_symbol(root, &symbols[i]);
|
|
|
|
if (status != MIE_SUCCESS) {
|
|
destroy_symbol_tree(root);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return root;
|
|
}
|
|
|
|
struct mie_lex *mie_lex_create(b_stream *src)
|
|
{
|
|
struct mie_lex *lex = malloc(sizeof *lex);
|
|
if (!lex) {
|
|
return NULL;
|
|
}
|
|
|
|
memset(lex, 0x0, sizeof *lex);
|
|
|
|
lex->lex_cursor_row = lex->lex_cursor_col = 1;
|
|
|
|
lex->lex_status = MIE_SUCCESS;
|
|
lex->lex_source = src;
|
|
lex->lex_linebuf = b_string_create();
|
|
|
|
lex->lex_sym_tree = build_symbol_tree();
|
|
if (!lex->lex_sym_tree) {
|
|
mie_lex_destroy(lex);
|
|
return NULL;
|
|
}
|
|
|
|
return lex;
|
|
}
|
|
|
|
void mie_lex_destroy(struct mie_lex *lex)
|
|
{
|
|
b_queue_entry *entry = b_queue_first(&lex->lex_queue);
|
|
|
|
while (entry) {
|
|
struct mie_token *tok
|
|
= b_unbox(struct mie_token, entry, tok_entry);
|
|
b_queue_entry *next = b_queue_next(entry);
|
|
b_queue_delete(&lex->lex_queue, entry);
|
|
|
|
mie_token_destroy(tok);
|
|
|
|
entry = next;
|
|
}
|
|
|
|
if (lex->lex_linebuf) {
|
|
free(lex->lex_linebuf);
|
|
}
|
|
|
|
if (lex->lex_sym_tree) {
|
|
destroy_symbol_tree(lex->lex_sym_tree);
|
|
}
|
|
|
|
if (lex->lex_temp) {
|
|
b_string_unref(lex->lex_temp);
|
|
}
|
|
|
|
free(lex);
|
|
}
|
|
|
|
enum mie_status mie_lex_get_status(const struct mie_lex *lex)
|
|
{
|
|
return lex->lex_status;
|
|
}
|
|
|
|
static enum mie_status refill_linebuf(struct mie_lex *lex)
|
|
{
|
|
if (!lex->lex_source) {
|
|
return MIE_ERR_EOF;
|
|
}
|
|
|
|
if (lex->lex_linebuf_ptr) {
|
|
b_iterator_unref(lex->lex_linebuf_ptr);
|
|
lex->lex_linebuf_ptr = NULL;
|
|
}
|
|
|
|
b_stringstream *s = b_stringstream_create();
|
|
|
|
b_status status = b_stream_read_line_s(lex->lex_source, s);
|
|
|
|
if (status == B_ERR_NO_DATA) {
|
|
return MIE_ERR_EOF;
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return MIE_ERR_INTERNAL_FAILURE;
|
|
}
|
|
|
|
b_string_replace_all_with_stringstream(lex->lex_linebuf, s);
|
|
b_stringstream_unref(s);
|
|
|
|
lex->lex_linebuf_ptr = b_iterator_begin(lex->lex_linebuf);
|
|
|
|
return MIE_SUCCESS;
|
|
}
|
|
|
|
static int peek(struct mie_lex *lex)
|
|
{
|
|
enum mie_status status = MIE_SUCCESS;
|
|
|
|
if (!lex->lex_linebuf_ptr || !b_iterator_is_valid(lex->lex_linebuf_ptr)) {
|
|
status = refill_linebuf(lex);
|
|
}
|
|
|
|
if (status != MIE_SUCCESS) {
|
|
return -status;
|
|
}
|
|
|
|
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
|
|
return -MIE_ERR_EOF;
|
|
}
|
|
|
|
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
|
|
return c;
|
|
}
|
|
|
|
static int advance(struct mie_lex *lex)
|
|
{
|
|
enum mie_status status = MIE_SUCCESS;
|
|
|
|
if (!b_iterator_is_valid(lex->lex_linebuf_ptr)) {
|
|
status = refill_linebuf(lex);
|
|
}
|
|
|
|
if (status != MIE_SUCCESS) {
|
|
return -status;
|
|
}
|
|
|
|
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
|
|
return -MIE_ERR_EOF;
|
|
}
|
|
|
|
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
|
|
b_iterator_move_next(lex->lex_linebuf_ptr);
|
|
|
|
lex->lex_cursor_col++;
|
|
if (c == '\n') {
|
|
lex->lex_cursor_col = 1;
|
|
lex->lex_cursor_row++;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
static bool input_available(struct mie_lex *lex)
|
|
{
|
|
return lex->lex_linebuf_ptr && b_iterator_is_valid(lex->lex_linebuf_ptr);
|
|
}
|
|
|
|
static bool char_can_begin_symbol(char c)
|
|
{
|
|
for (size_t i = 0; i < nr_symbols; i++) {
|
|
if (symbols[i].name[0] == c) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static struct mie_token *create_token(enum mie_token_type type)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return NULL;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = type;
|
|
return tok;
|
|
}
|
|
|
|
static void set_token_start(struct mie_lex *lex)
|
|
{
|
|
lex->lex_token_start_row = lex->lex_cursor_row;
|
|
lex->lex_token_start_col = lex->lex_cursor_col;
|
|
}
|
|
|
|
static void set_token_end(struct mie_lex *lex)
|
|
{
|
|
lex->lex_token_end_row = lex->lex_cursor_row;
|
|
lex->lex_token_end_col = lex->lex_cursor_col;
|
|
}
|
|
|
|
static enum mie_status push_token(struct mie_lex *lex, struct mie_token *tok)
|
|
{
|
|
tok->tok_location.s_start.c_row = lex->lex_token_start_row;
|
|
tok->tok_location.s_start.c_col = lex->lex_token_start_col;
|
|
tok->tok_location.s_end.c_row = lex->lex_token_end_row;
|
|
tok->tok_location.s_end.c_col = lex->lex_token_end_col;
|
|
|
|
b_queue_push_back(&lex->lex_queue, &tok->tok_entry);
|
|
return MIE_SUCCESS;
|
|
}
|
|
|
|
static enum mie_status push_linefeed(struct mie_lex *lex)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = MIE_TOK_LINEFEED;
|
|
tok->tok_value_type = MIE_TOK_V_NONE;
|
|
return push_token(lex, tok);
|
|
}
|
|
|
|
static enum mie_status push_symbol(struct mie_lex *lex, enum mie_token_symbol sym)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = MIE_TOK_SYMBOL;
|
|
tok->tok_value_type = MIE_TOK_V_SYMBOL;
|
|
tok->tok_sym = sym;
|
|
return push_token(lex, tok);
|
|
}
|
|
|
|
static enum mie_status push_string_token(
|
|
struct mie_lex *lex, enum mie_token_type type, char *s)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
char *ep = NULL;
|
|
long long v = strtoll(s, &ep, 10);
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = type;
|
|
|
|
if (*ep == '\0') {
|
|
tok->tok_int = v;
|
|
tok->tok_value_type = MIE_TOK_V_INT;
|
|
free(s);
|
|
} else {
|
|
tok->tok_str = s;
|
|
tok->tok_value_type = MIE_TOK_V_STRING;
|
|
}
|
|
|
|
return push_token(lex, tok);
|
|
}
|
|
|
|
static enum mie_status push_int(struct mie_lex *lex, unsigned long long v)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = MIE_TOK_INT;
|
|
tok->tok_value_type = MIE_TOK_V_INT;
|
|
tok->tok_int = v;
|
|
return push_token(lex, tok);
|
|
}
|
|
|
|
static enum mie_status push_double(struct mie_lex *lex, double v)
|
|
{
|
|
struct mie_token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return MIE_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = MIE_TOK_DOUBLE;
|
|
tok->tok_value_type = MIE_TOK_V_DOUBLE;
|
|
tok->tok_double = v;
|
|
return push_token(lex, tok);
|
|
}
|
|
|
|
static enum mie_status read_line_comment(struct mie_lex *lex)
|
|
{
|
|
while (true) {
|
|
b_wchar c = advance(lex);
|
|
|
|
if (c == -MIE_ERR_EOF || c == '\n') {
|
|
break;
|
|
}
|
|
|
|
if (c < 0) {
|
|
return -c;
|
|
}
|
|
}
|
|
|
|
return MIE_SUCCESS;
|
|
}
|
|
|
|
static enum mie_status read_number(struct mie_lex *lex, bool negate)
|
|
{
|
|
int token_len = 0;
|
|
int base = 10;
|
|
int dots = 0;
|
|
b_string *str = get_temp_string(lex);
|
|
|
|
if (!negate) {
|
|
set_token_start(lex);
|
|
}
|
|
|
|
while (true) {
|
|
b_wchar c = peek(lex);
|
|
if (c == -MIE_ERR_EOF) {
|
|
break;
|
|
}
|
|
|
|
if (c < 0) {
|
|
return -c;
|
|
}
|
|
|
|
if (c == '_') {
|
|
token_len++;
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (c == '.') {
|
|
if (base != 10) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (dots > 0) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
token_len++;
|
|
dots++;
|
|
char s[] = {c, 0};
|
|
b_string_append_cstr(str, s);
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (b_wchar_is_space(c) || b_wchar_is_punct(c)) {
|
|
break;
|
|
}
|
|
|
|
if (c == 'x' && token_len == 1) {
|
|
base = 16;
|
|
token_len++;
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (c == 'b' && token_len == 1) {
|
|
base = 2;
|
|
token_len++;
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (base == 2 && c != '0' && c != '1') {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (base == 10 && !isdigit(c)) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (base == 16 && !isxdigit(c)) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
b_string_append_wc(str, c);
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
token_len++;
|
|
}
|
|
|
|
if (token_len == 1 && base == 7) {
|
|
return push_int(lex, 0);
|
|
}
|
|
|
|
const char *s = b_string_ptr(str);
|
|
char *ep = NULL;
|
|
|
|
/* negative numbers will be lexed as a hyphen followed by a positive
|
|
* number. */
|
|
|
|
if (dots > 0) {
|
|
double v = strtod(s, &ep);
|
|
|
|
if (*ep != '\0') {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (negate) {
|
|
v *= -1;
|
|
}
|
|
|
|
return push_double(lex, v);
|
|
} else {
|
|
unsigned long long v = strtoull(s, &ep, base);
|
|
|
|
if (*ep != '\0') {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (negate) {
|
|
v *= -1;
|
|
}
|
|
|
|
return push_int(lex, v);
|
|
}
|
|
}
|
|
|
|
static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type)
|
|
{
|
|
int dots = 0;
|
|
b_string *str = get_temp_string(lex);
|
|
b_wchar prev = 0;
|
|
|
|
if (type == MIE_TOK_NONE) {
|
|
set_token_start(lex);
|
|
}
|
|
|
|
while (1) {
|
|
b_wchar c = peek(lex);
|
|
|
|
if ((c == '.' || c == '-') && prev == c) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (c == '.') {
|
|
dots++;
|
|
}
|
|
|
|
if (!IS_VALID_IDENT_CHAR(c)) {
|
|
break;
|
|
}
|
|
|
|
prev = c;
|
|
b_string_append_wc(str, c);
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
}
|
|
|
|
if (type == MIE_TOK_NONE) {
|
|
type = dots > 0 ? MIE_TOK_NAME : MIE_TOK_WORD;
|
|
}
|
|
|
|
char *s = b_string_steal(str);
|
|
|
|
switch (type) {
|
|
case MIE_TOK_INSTNAME:
|
|
if (dots > 0) {
|
|
return push_string_token(lex, type, s);
|
|
} else {
|
|
push_symbol(lex, MIE_SYM_ASTERISK);
|
|
return push_string_token(lex, MIE_TOK_WORD, s);
|
|
}
|
|
break;
|
|
default:
|
|
return push_string_token(lex, type, s);
|
|
}
|
|
}
|
|
|
|
static enum mie_status read_string(struct mie_lex *lex)
|
|
{
|
|
b_string *str = get_temp_string(lex);
|
|
|
|
b_wchar c = peek(lex);
|
|
bool esc = false;
|
|
|
|
if (c != '"') {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
advance(lex);
|
|
|
|
while (1) {
|
|
b_wchar c = peek(lex);
|
|
|
|
if (esc) {
|
|
switch (c) {
|
|
case '\\':
|
|
case '"':
|
|
b_string_append_wc(str, c);
|
|
break;
|
|
default:
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
esc = false;
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (c == '\\') {
|
|
esc = true;
|
|
advance(lex);
|
|
continue;
|
|
}
|
|
|
|
if (c == '"') {
|
|
advance(lex);
|
|
break;
|
|
}
|
|
|
|
b_string_append_wc(str, c);
|
|
advance(lex);
|
|
}
|
|
|
|
char *s = b_string_steal(str);
|
|
return push_string_token(lex, MIE_TOK_STRING, s);
|
|
}
|
|
|
|
static enum mie_status read_symbol(struct mie_lex *lex)
|
|
{
|
|
struct mie_lex_symbol_node *node = lex->lex_sym_tree;
|
|
set_token_start(lex);
|
|
b_wchar prev = 0;
|
|
|
|
while (true) {
|
|
b_wchar c = peek(lex);
|
|
if (c < 0) {
|
|
break;
|
|
}
|
|
|
|
struct mie_lex_symbol_node *next = get_symbol_node(node, c);
|
|
if (!next) {
|
|
prev = c;
|
|
break;
|
|
}
|
|
|
|
node = next;
|
|
set_token_end(lex);
|
|
advance(lex);
|
|
prev = c;
|
|
}
|
|
|
|
if (!node || node->s_def == NULL) {
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
if (node->s_def->id == MIE_SYM_HYPHEN && isdigit(prev)) {
|
|
return read_number(lex, true);
|
|
}
|
|
|
|
if (IS_VALID_IDENT_START_CHAR(prev)) {
|
|
switch (node->s_def->id) {
|
|
case MIE_SYM_ASTERISK:
|
|
return read_ident(lex, MIE_TOK_INSTNAME);
|
|
case MIE_SYM_PLUS:
|
|
return read_ident(lex, MIE_TOK_GRAPHNAME);
|
|
case MIE_SYM_PERCENT:
|
|
return read_ident(lex, MIE_TOK_VREGNAME);
|
|
case MIE_SYM_DOLLAR:
|
|
return read_ident(lex, MIE_TOK_MREGNAME);
|
|
case MIE_SYM_CARET:
|
|
return read_ident(lex, MIE_TOK_BLOCKNAME);
|
|
case MIE_SYM_TILDE:
|
|
return read_ident(lex, MIE_TOK_OPNAME);
|
|
case MIE_SYM_HASH:
|
|
return read_ident(lex, MIE_TOK_TYPENAME);
|
|
case MIE_SYM_ATSIGN:
|
|
return read_ident(lex, MIE_TOK_SYMNAME);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (IS_VALID_REG_START_CHAR(prev)) {
|
|
switch (node->s_def->id) {
|
|
case MIE_SYM_PERCENT:
|
|
return read_ident(lex, MIE_TOK_VREGNAME);
|
|
case MIE_SYM_DOLLAR:
|
|
return read_ident(lex, MIE_TOK_MREGNAME);
|
|
case MIE_SYM_ATSIGN:
|
|
return read_ident(lex, MIE_TOK_SYMNAME);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return push_symbol(lex, node->s_def->id);
|
|
}
|
|
|
|
static void skip_whitespace(struct mie_lex *lex)
|
|
{
|
|
b_wchar c = peek(lex);
|
|
|
|
while (b_wchar_is_space(c)) {
|
|
advance(lex);
|
|
c = peek(lex);
|
|
}
|
|
}
|
|
|
|
static bool should_skip(b_wchar c, bool skip_linefeeds)
|
|
{
|
|
bool skip = b_wchar_is_space(c);
|
|
|
|
if (!skip_linefeeds) {
|
|
skip = (skip && c != '\n');
|
|
}
|
|
|
|
return skip;
|
|
}
|
|
|
|
static void skip_ignored_chars(struct mie_lex *lex, bool include_linefeeds)
|
|
{
|
|
b_wchar c = peek(lex);
|
|
|
|
while (1) {
|
|
while (should_skip(c, include_linefeeds)) {
|
|
advance(lex);
|
|
c = peek(lex);
|
|
}
|
|
|
|
if (c != ';') {
|
|
break;
|
|
}
|
|
|
|
advance(lex);
|
|
c = peek(lex);
|
|
|
|
while (c != '\n') {
|
|
advance(lex);
|
|
c = peek(lex);
|
|
}
|
|
|
|
advance(lex);
|
|
c = peek(lex);
|
|
}
|
|
}
|
|
|
|
static enum mie_status pump_tokens(struct mie_lex *lex)
|
|
{
|
|
b_wchar c = peek(lex);
|
|
|
|
if (c < 0) {
|
|
return -c;
|
|
}
|
|
|
|
while (1) {
|
|
if (c == ';' || (b_wchar_is_space(c) && c != '\n')) {
|
|
skip_ignored_chars(lex, false);
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
c = peek(lex);
|
|
}
|
|
|
|
if (c == '\\') {
|
|
advance(lex);
|
|
skip_ignored_chars(lex, true);
|
|
c = peek(lex);
|
|
}
|
|
|
|
if (c == '\n') {
|
|
set_token_start(lex);
|
|
set_token_end(lex);
|
|
|
|
while (c == '\n') {
|
|
advance(lex);
|
|
|
|
if (!input_available(lex)) {
|
|
break;
|
|
}
|
|
|
|
c = peek(lex);
|
|
}
|
|
|
|
if (c < 0) {
|
|
return -c;
|
|
}
|
|
|
|
return push_linefeed(lex);
|
|
}
|
|
|
|
while (b_wchar_is_space(c) && c != '\n') {
|
|
advance(lex);
|
|
c = peek(lex);
|
|
}
|
|
|
|
if (IS_VALID_IDENT_START_CHAR(c)) {
|
|
return read_ident(lex, MIE_TOK_NONE);
|
|
}
|
|
|
|
if (char_can_begin_symbol(c)) {
|
|
return read_symbol(lex);
|
|
}
|
|
|
|
if (c == '"') {
|
|
return read_string(lex);
|
|
}
|
|
|
|
if (isdigit(c)) {
|
|
return read_number(lex, false);
|
|
}
|
|
|
|
return MIE_ERR_BAD_SYNTAX;
|
|
}
|
|
|
|
struct mie_token *mie_lex_peek(struct mie_lex *lex)
|
|
{
|
|
enum mie_status status = MIE_SUCCESS;
|
|
|
|
while (b_queue_empty(&lex->lex_queue)) {
|
|
status = pump_tokens(lex);
|
|
|
|
if (status != MIE_SUCCESS) {
|
|
lex->lex_status = status;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
lex->lex_status = status;
|
|
|
|
b_queue_entry *entry = b_queue_first(&lex->lex_queue);
|
|
struct mie_token *tok = b_unbox(struct mie_token, entry, tok_entry);
|
|
return tok;
|
|
}
|
|
|
|
void mie_lex_advance(struct mie_lex *lex)
|
|
{
|
|
enum mie_status status = MIE_SUCCESS;
|
|
|
|
while (b_queue_empty(&lex->lex_queue)) {
|
|
status = pump_tokens(lex);
|
|
|
|
if (status != MIE_SUCCESS) {
|
|
lex->lex_status = status;
|
|
return;
|
|
}
|
|
}
|
|
|
|
b_queue_entry *entry = b_queue_pop_front(&lex->lex_queue);
|
|
struct mie_token *tok = b_unbox(struct mie_token, entry, tok_entry);
|
|
mie_token_destroy(tok);
|
|
}
|
|
|
|
bool mie_lex_tokens_available(struct mie_lex *lex)
|
|
{
|
|
if (!b_queue_empty(&lex->lex_queue)) {
|
|
return true;
|
|
}
|
|
|
|
if (input_available(lex)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|