mie: start implementing new ir parser

This commit is contained in:
2025-12-21 13:50:41 +00:00
parent fabbe35483
commit 6573360656
8 changed files with 1178 additions and 1 deletions

View File

@@ -11,5 +11,5 @@ else ()
endif () endif ()
target_include_directories(mie PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/) target_include_directories(mie PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/)
target_link_libraries(mie Bluelib::Core Bluelib::Ds) target_link_libraries(mie Bluelib::Core Bluelib::Ds Bluelib::Io)
target_compile_definitions(mie PRIVATE MIE_EXPORT=1 MIE_STATIC=${MIE_STATIC}) target_compile_definitions(mie PRIVATE MIE_EXPORT=1 MIE_STATIC=${MIE_STATIC})

View File

@@ -0,0 +1,18 @@
#ifndef MIE_PARSE_LEX_H_
#define MIE_PARSE_LEX_H_
#include <blue/core/stream.h>
#include <mie/misc.h>
#include <mie/status.h>
struct mie_lex;
struct mie_token;
MIE_API struct mie_lex *mie_lex_create(b_stream *src);
MIE_API void mie_lex_destroy(struct mie_lex *lex);
MIE_API enum mie_status mie_lex_get_status(const struct mie_lex *lex);
MIE_API struct mie_token *mie_lex_peek(struct mie_lex *lex);
MIE_API void mie_lex_advance(struct mie_lex *lex);
#endif

View File

@@ -0,0 +1,62 @@
#ifndef MIE_PARSE_PARSE_H_
#define MIE_PARSE_PARSE_H_
#include <blue/ds/string.h>
#include <mie/misc.h>
#include <mie/parse/token.h>
#include <mie/status.h>
#include <stdbool.h>
struct mie_parse_ctx;
struct mie_lex;
struct mie_ctx;
/* these structs are temporary, and are just here for documentation purposes atm */
struct mie_argument {
};
struct mie_unresolved_operand {
};
struct mie_region {
};
struct mie_type {
};
MIE_API struct mie_parse_ctx *mie_parse_ctx_create(
struct mie_ctx *ctx, struct mie_lex *lex);
MIE_API void mie_parse_ctx_destroy(struct mie_parse_ctx *ctx);
MIE_API enum mie_status mie_parse_ctx_get_status(const struct mie_parse_ctx *ctx);
MIE_API enum mie_token_type mie_parse_ctx_peek(struct mie_parse_ctx *ctx);
MIE_API bool mie_parse_ctx_advance(struct mie_parse_ctx *ctx);
MIE_API bool mie_parse_ctx_parse_instname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_graphname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_vregname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_mregname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_blockname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_typename(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_symname(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_string(struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_keyword(struct mie_parse_ctx *ctx, const char *kw);
MIE_API bool mie_parse_ctx_parse_symbol(
struct mie_parse_ctx *ctx, enum mie_token_symbol sym);
MIE_API bool mie_parse_ctx_parse_operand(
struct mie_parse_ctx *ctx, struct mie_unresolved_operand *out);
MIE_API bool mie_parse_ctx_parse_region(
struct mie_parse_ctx *ctx, struct mie_region *region);
MIE_API bool mie_parse_ctx_parse_type(
struct mie_parse_ctx *ctx, struct mie_type **out);
MIE_API bool mie_parse_ctx_parse_assignment_list(
struct mie_parse_ctx *ctx, struct mie_argument **out_lhs,
struct mie_unresolved_operand **out_rhs, size_t *out_count);
MIE_API bool mie_parse_ctx_parse_unknown_keyword(
struct mie_parse_ctx *ctx, b_string *out);
MIE_API bool mie_parse_ctx_parse_unknown_symbol(
struct mie_parse_ctx *ctx, enum mie_token_symbol sym);
#endif

View File

@@ -0,0 +1,82 @@
#ifndef MIE_PARSE_TOKEN_H_
#define MIE_PARSE_TOKEN_H_
#include <blue/core/queue.h>
#include <mie/misc.h>
enum mie_token_type {
MIE_TOK_NONE = 0,
MIE_TOK_LINEFEED,
MIE_TOK_INT,
MIE_TOK_DOUBLE,
MIE_TOK_SYMBOL,
MIE_TOK_STRING,
MIE_TOK_WORD, /* single words, not dot-delimited */
MIE_TOK_NAME, /* set of words with at least one dot */
MIE_TOK_INSTNAME, /* word or name, prefixed with an * asterisk */
MIE_TOK_SYMNAME, /* word or name, prefixed with an @ at */
MIE_TOK_OPNAME, /* word or name, prefixed with a ~ tilde */
MIE_TOK_GRAPHNAME, /* word or name, prefixed with a + plus */
MIE_TOK_VREGNAME, /* word or name, prefixed with a % percent */
MIE_TOK_MREGNAME, /* word or name, prefixed with a $ dollar */
MIE_TOK_BLOCKNAME, /* word or name, prefixed with a ^ caret */
MIE_TOK_TYPENAME, /* word or name, prefixed with a # hash */
};
enum mie_token_value_type {
MIE_TOK_V_NONE = 0,
MIE_TOK_V_INT,
MIE_TOK_V_DOUBLE,
MIE_TOK_V_STRING,
MIE_TOK_V_SYMBOL,
};
enum mie_token_symbol {
MIE_SYM_NONE = 0,
MIE_SYM_COLON,
MIE_SYM_EQUAL,
MIE_SYM_COMMA,
MIE_SYM_HYPHEN,
MIE_SYM_ASTERISK,
MIE_SYM_PLUS,
MIE_SYM_PERCENT,
MIE_SYM_DOLLAR,
MIE_SYM_CARET,
MIE_SYM_HASH,
MIE_SYM_TILDE,
MIE_SYM_ATSIGN,
MIE_SYM_LEFT_BRACE,
MIE_SYM_RIGHT_BRACE,
MIE_SYM_LEFT_BRACKET,
MIE_SYM_RIGHT_BRACKET,
MIE_SYM_LEFT_PAREN,
MIE_SYM_RIGHT_PAREN,
MIE_SYM_LEFT_ANGLE,
MIE_SYM_RIGHT_ANGLE,
MIE_SYM_HYPHEN_RIGHT_ANGLE,
MIE_SYM_OTHER,
};
struct mie_token_location {
unsigned int c_row, c_col;
};
struct mie_token {
struct mie_token_location tok_start, tok_end;
enum mie_token_type tok_type;
enum mie_token_value_type tok_value_type;
b_queue_entry tok_entry;
union {
char *tok_str;
enum mie_token_symbol tok_sym;
long long tok_int;
double tok_double;
};
};
MIE_API void mie_token_destroy(struct mie_token *tok);
MIE_API const char *mie_token_type_to_string(enum mie_token_type type);
MIE_API const char *mie_token_symbol_to_string(enum mie_token_symbol sym);
#endif

898
mie/parse/lex.c Normal file
View File

@@ -0,0 +1,898 @@
#include "lex.h"
#include <blue/core/hash.h>
#include <blue/core/misc.h>
#include <blue/core/queue.h>
#include <blue/ds/dict.h>
#include <blue/ds/number.h>
#include <blue/ds/string.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#define LINEBUF_DEFAULT_CAPACITY 1024
#define LEX_TOKEN_DEF(i, n) {.id = (i), .name = (n)}
#define IS_VALID_IDENT_CHAR(c) \
(b_wchar_is_alnum(c) || c == '.' || c == '-' || c == '_')
#define IS_VALID_IDENT_START_CHAR(c) \
(b_wchar_is_alpha(c) || c == '.' || c == '_')
#define IS_VALID_REG_START_CHAR(c) (b_wchar_is_alnum(c) || c == '.' || c == '_')
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF(MIE_SYM_COLON, ":"),
LEX_TOKEN_DEF(MIE_SYM_EQUAL, "="),
LEX_TOKEN_DEF(MIE_SYM_COMMA, ","),
LEX_TOKEN_DEF(MIE_SYM_HYPHEN, "-"),
LEX_TOKEN_DEF(MIE_SYM_ASTERISK, "*"),
LEX_TOKEN_DEF(MIE_SYM_PLUS, "+"),
LEX_TOKEN_DEF(MIE_SYM_PERCENT, "%"),
LEX_TOKEN_DEF(MIE_SYM_DOLLAR, "$"),
LEX_TOKEN_DEF(MIE_SYM_CARET, "^"),
LEX_TOKEN_DEF(MIE_SYM_HASH, "#"),
LEX_TOKEN_DEF(MIE_SYM_ATSIGN, "@"),
LEX_TOKEN_DEF(MIE_SYM_TILDE, "~"),
LEX_TOKEN_DEF(MIE_SYM_LEFT_BRACE, "{"),
LEX_TOKEN_DEF(MIE_SYM_RIGHT_BRACE, "}"),
LEX_TOKEN_DEF(MIE_SYM_LEFT_BRACKET, "["),
LEX_TOKEN_DEF(MIE_SYM_RIGHT_BRACKET, "]"),
LEX_TOKEN_DEF(MIE_SYM_LEFT_PAREN, "("),
LEX_TOKEN_DEF(MIE_SYM_RIGHT_PAREN, ")"),
LEX_TOKEN_DEF(MIE_SYM_LEFT_ANGLE, "<"),
LEX_TOKEN_DEF(MIE_SYM_RIGHT_ANGLE, ">"),
LEX_TOKEN_DEF(MIE_SYM_HYPHEN_RIGHT_ANGLE, "->"),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
static struct mie_lex_symbol_node *get_symbol_node(
struct mie_lex_symbol_node *node, char c)
{
b_queue_entry *entry = b_queue_first(&node->s_children);
while (entry) {
struct mie_lex_symbol_node *child
= b_unbox(struct mie_lex_symbol_node, entry, s_entry);
if (child->s_char == c) {
return child;
}
entry = b_queue_next(entry);
}
return NULL;
}
static b_string *get_temp_string(struct mie_lex *lex)
{
if (!lex->lex_temp) {
lex->lex_temp = b_string_create();
}
b_string_clear(lex->lex_temp);
return lex->lex_temp;
}
static enum mie_status put_symbol(
struct mie_lex_symbol_node *tree, struct lex_token_def *sym)
{
for (size_t i = 0; sym->name[i]; i++) {
char c = sym->name[i];
struct mie_lex_symbol_node *child = get_symbol_node(tree, c);
if (child) {
tree = child;
continue;
}
child = malloc(sizeof *child);
if (!child) {
return MIE_ERR_NO_MEMORY;
}
memset(child, 0x0, sizeof *child);
child->s_def = NULL;
child->s_char = c;
b_queue_push_back(&tree->s_children, &child->s_entry);
tree = child;
}
tree->s_def = sym;
return MIE_SUCCESS;
}
static void destroy_symbol_tree(struct mie_lex_symbol_node *tree)
{
b_queue_entry *entry = b_queue_first(&tree->s_children);
while (entry) {
struct mie_lex_symbol_node *node
= b_unbox(struct mie_lex_symbol_node, entry, s_entry);
b_queue_entry *next = b_queue_next(entry);
b_queue_delete(&tree->s_children, entry);
destroy_symbol_tree(node);
entry = next;
}
free(tree);
}
static struct mie_lex_symbol_node *build_symbol_tree(void)
{
struct mie_lex_symbol_node *root = malloc(sizeof *root);
if (!root) {
return NULL;
}
memset(root, 0x0, sizeof *root);
root->s_def = NULL;
enum mie_status status = MIE_SUCCESS;
for (size_t i = 0; i < nr_symbols; i++) {
status = put_symbol(root, &symbols[i]);
if (status != MIE_SUCCESS) {
destroy_symbol_tree(root);
return NULL;
}
}
return root;
}
struct mie_lex *mie_lex_create(b_stream *src)
{
struct mie_lex *lex = malloc(sizeof *lex);
if (!lex) {
return NULL;
}
memset(lex, 0x0, sizeof *lex);
lex->lex_cursor_row = lex->lex_cursor_col = 1;
lex->lex_status = MIE_SUCCESS;
lex->lex_source = src;
lex->lex_linebuf = b_string_create();
lex->lex_sym_tree = build_symbol_tree();
if (!lex->lex_sym_tree) {
mie_lex_destroy(lex);
return NULL;
}
return lex;
}
void mie_lex_destroy(struct mie_lex *lex)
{
b_queue_entry *entry = b_queue_first(&lex->lex_queue);
while (entry) {
struct mie_token *tok
= b_unbox(struct mie_token, entry, tok_entry);
b_queue_entry *next = b_queue_next(entry);
b_queue_delete(&lex->lex_queue, entry);
mie_token_destroy(tok);
entry = next;
}
if (lex->lex_linebuf) {
free(lex->lex_linebuf);
}
if (lex->lex_sym_tree) {
destroy_symbol_tree(lex->lex_sym_tree);
}
if (lex->lex_temp) {
b_string_unref(lex->lex_temp);
}
free(lex);
}
enum mie_status mie_lex_get_status(const struct mie_lex *lex)
{
return lex->lex_status;
}
static enum mie_status refill_linebuf(struct mie_lex *lex)
{
if (!lex->lex_source) {
return MIE_ERR_EOF;
}
if (lex->lex_linebuf_ptr) {
b_iterator_unref(lex->lex_linebuf_ptr);
lex->lex_linebuf_ptr = NULL;
}
b_stringstream *s = b_stringstream_create();
b_status status = b_stream_read_line_s(lex->lex_source, s);
if (status == B_ERR_NO_DATA) {
return MIE_ERR_EOF;
}
if (!B_OK(status)) {
return MIE_ERR_INTERNAL_FAILURE;
}
b_string_replace_all_with_stringstream(lex->lex_linebuf, s);
b_stringstream_unref(s);
lex->lex_linebuf_ptr = b_iterator_begin(lex->lex_linebuf);
return MIE_SUCCESS;
}
static int peek(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
if (!lex->lex_linebuf_ptr || !b_iterator_is_valid(lex->lex_linebuf_ptr)) {
status = refill_linebuf(lex);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
return c;
}
static int advance(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
if (!b_iterator_is_valid(lex->lex_linebuf_ptr)) {
status = refill_linebuf(lex);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
b_iterator_move_next(lex->lex_linebuf_ptr);
lex->lex_cursor_col++;
if (c == '\n') {
lex->lex_cursor_col = 1;
lex->lex_cursor_row++;
}
return c;
}
static bool input_available(struct mie_lex *lex)
{
return lex->lex_linebuf_ptr && b_iterator_is_valid(lex->lex_linebuf_ptr);
}
static bool char_can_begin_symbol(char c)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].name[0] == c) {
return true;
}
}
return false;
}
static struct mie_token *create_token(enum mie_token_type type)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return NULL;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = type;
return tok;
}
static void set_token_start(struct mie_lex *lex)
{
lex->lex_token_start_row = lex->lex_cursor_row;
lex->lex_token_start_col = lex->lex_cursor_col;
}
static void set_token_end(struct mie_lex *lex)
{
lex->lex_token_end_row = lex->lex_cursor_row;
lex->lex_token_end_col = lex->lex_cursor_col;
}
static enum mie_status push_token(struct mie_lex *lex, struct mie_token *tok)
{
tok->tok_start.c_row = lex->lex_token_start_row;
tok->tok_start.c_col = lex->lex_token_start_col;
tok->tok_end.c_row = lex->lex_token_end_row;
tok->tok_end.c_col = lex->lex_token_end_col;
b_queue_push_back(&lex->lex_queue, &tok->tok_entry);
return MIE_SUCCESS;
}
static enum mie_status push_linefeed(struct mie_lex *lex)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return MIE_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = MIE_TOK_LINEFEED;
tok->tok_value_type = MIE_TOK_V_NONE;
return push_token(lex, tok);
}
static enum mie_status push_symbol(struct mie_lex *lex, enum mie_token_symbol sym)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return MIE_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = MIE_TOK_SYMBOL;
tok->tok_value_type = MIE_TOK_V_SYMBOL;
tok->tok_sym = sym;
return push_token(lex, tok);
}
static enum mie_status push_string_token(
struct mie_lex *lex, enum mie_token_type type, char *s)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return MIE_ERR_NO_MEMORY;
}
char *ep = NULL;
long long v = strtoll(s, &ep, 10);
memset(tok, 0x0, sizeof *tok);
tok->tok_type = type;
if (*ep == '\0') {
tok->tok_int = v;
tok->tok_value_type = MIE_TOK_V_INT;
free(s);
} else {
tok->tok_str = s;
tok->tok_value_type = MIE_TOK_V_STRING;
}
return push_token(lex, tok);
}
static enum mie_status push_int(struct mie_lex *lex, unsigned long long v)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return MIE_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = MIE_TOK_INT;
tok->tok_value_type = MIE_TOK_V_INT;
tok->tok_int = v;
return push_token(lex, tok);
}
static enum mie_status push_double(struct mie_lex *lex, double v)
{
struct mie_token *tok = malloc(sizeof *tok);
if (!tok) {
return MIE_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = MIE_TOK_DOUBLE;
tok->tok_value_type = MIE_TOK_V_DOUBLE;
tok->tok_double = v;
return push_token(lex, tok);
}
static enum mie_status read_line_comment(struct mie_lex *lex)
{
while (true) {
b_wchar c = advance(lex);
if (c == -MIE_ERR_EOF || c == '\n') {
break;
}
if (c < 0) {
return -c;
}
}
return MIE_SUCCESS;
}
static enum mie_status read_number(struct mie_lex *lex, bool negate)
{
int token_len = 0;
int base = 10;
int dots = 0;
b_string *str = get_temp_string(lex);
if (!negate) {
set_token_start(lex);
}
while (true) {
b_wchar c = peek(lex);
if (c == -MIE_ERR_EOF) {
break;
}
if (c < 0) {
return -c;
}
if (c == '_') {
token_len++;
set_token_end(lex);
advance(lex);
continue;
}
if (c == '.') {
if (base != 10) {
return MIE_ERR_BAD_SYNTAX;
}
if (dots > 0) {
return MIE_ERR_BAD_SYNTAX;
}
token_len++;
dots++;
char s[] = {c, 0};
b_string_append_cstr(str, s);
set_token_end(lex);
advance(lex);
continue;
}
if (b_wchar_is_space(c) || b_wchar_is_punct(c)) {
break;
}
if (c == 'x' && token_len == 1) {
base = 16;
token_len++;
set_token_end(lex);
advance(lex);
continue;
}
if (c == 'b' && token_len == 1) {
base = 2;
token_len++;
set_token_end(lex);
advance(lex);
continue;
}
if (base == 2 && c != '0' && c != '1') {
return MIE_ERR_BAD_SYNTAX;
}
if (base == 10 && !isdigit(c)) {
return MIE_ERR_BAD_SYNTAX;
}
if (base == 16 && !isxdigit(c)) {
return MIE_ERR_BAD_SYNTAX;
}
b_string_append_wc(str, c);
set_token_end(lex);
advance(lex);
token_len++;
}
if (token_len == 1 && base == 7) {
return push_int(lex, 0);
}
const char *s = b_string_ptr(str);
char *ep = NULL;
/* negative numbers will be lexed as a hyphen followed by a positive
* number. */
if (dots > 0) {
double v = strtod(s, &ep);
if (*ep != '\0') {
return MIE_ERR_BAD_SYNTAX;
}
if (negate) {
v *= -1;
}
return push_double(lex, v);
} else {
unsigned long long v = strtoull(s, &ep, base);
if (*ep != '\0') {
return MIE_ERR_BAD_SYNTAX;
}
if (negate) {
v *= -1;
}
return push_int(lex, v);
}
}
static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type)
{
int dots = 0;
b_string *str = get_temp_string(lex);
b_wchar prev = 0;
if (type == MIE_TOK_NONE) {
set_token_start(lex);
}
while (1) {
b_wchar c = peek(lex);
if ((c == '.' || c == '-') && prev == c) {
return MIE_ERR_BAD_SYNTAX;
}
if (c == '.') {
dots++;
}
if (!IS_VALID_IDENT_CHAR(c)) {
break;
}
prev = c;
b_string_append_wc(str, c);
set_token_end(lex);
advance(lex);
}
if (type == MIE_TOK_NONE) {
type = dots > 0 ? MIE_TOK_NAME : MIE_TOK_WORD;
}
char *s = b_string_steal(str);
switch (type) {
case MIE_TOK_INSTNAME:
if (dots > 0) {
return push_string_token(lex, type, s);
} else {
push_symbol(lex, MIE_SYM_ASTERISK);
return push_string_token(lex, MIE_TOK_WORD, s);
}
break;
default:
return push_string_token(lex, type, s);
}
}
static enum mie_status read_string(struct mie_lex *lex)
{
b_string *str = get_temp_string(lex);
b_wchar c = peek(lex);
bool esc = false;
if (c != '"') {
return MIE_ERR_BAD_SYNTAX;
}
advance(lex);
while (1) {
b_wchar c = peek(lex);
if (esc) {
switch (c) {
case '\\':
case '"':
b_string_append_wc(str, c);
break;
default:
return MIE_ERR_BAD_SYNTAX;
}
esc = false;
advance(lex);
continue;
}
if (c == '\\') {
esc = true;
advance(lex);
continue;
}
if (c == '"') {
advance(lex);
break;
}
b_string_append_wc(str, c);
advance(lex);
}
char *s = b_string_steal(str);
return push_string_token(lex, MIE_TOK_STRING, s);
}
static enum mie_status read_symbol(struct mie_lex *lex)
{
struct mie_lex_symbol_node *node = lex->lex_sym_tree;
set_token_start(lex);
b_wchar prev = 0;
while (true) {
b_wchar c = peek(lex);
if (c < 0) {
break;
}
struct mie_lex_symbol_node *next = get_symbol_node(node, c);
if (!next) {
prev = c;
break;
}
node = next;
set_token_end(lex);
advance(lex);
prev = c;
}
if (!node || node->s_def == NULL) {
return MIE_ERR_BAD_SYNTAX;
}
if (node->s_def->id == MIE_SYM_HYPHEN && isdigit(prev)) {
return read_number(lex, true);
}
if (IS_VALID_IDENT_START_CHAR(prev)) {
switch (node->s_def->id) {
case MIE_SYM_ASTERISK:
return read_ident(lex, MIE_TOK_INSTNAME);
case MIE_SYM_PLUS:
return read_ident(lex, MIE_TOK_GRAPHNAME);
case MIE_SYM_PERCENT:
return read_ident(lex, MIE_TOK_VREGNAME);
case MIE_SYM_DOLLAR:
return read_ident(lex, MIE_TOK_MREGNAME);
case MIE_SYM_CARET:
return read_ident(lex, MIE_TOK_BLOCKNAME);
case MIE_SYM_TILDE:
return read_ident(lex, MIE_TOK_OPNAME);
case MIE_SYM_HASH:
return read_ident(lex, MIE_TOK_TYPENAME);
case MIE_SYM_ATSIGN:
return read_ident(lex, MIE_TOK_SYMNAME);
default:
break;
}
}
if (IS_VALID_REG_START_CHAR(prev)) {
switch (node->s_def->id) {
case MIE_SYM_PERCENT:
return read_ident(lex, MIE_TOK_VREGNAME);
case MIE_SYM_DOLLAR:
return read_ident(lex, MIE_TOK_MREGNAME);
case MIE_SYM_ATSIGN:
return read_ident(lex, MIE_TOK_SYMNAME);
default:
break;
}
}
return push_symbol(lex, node->s_def->id);
}
static void skip_whitespace(struct mie_lex *lex)
{
b_wchar c = peek(lex);
while (b_wchar_is_space(c)) {
advance(lex);
c = peek(lex);
}
}
static bool should_skip(b_wchar c, bool skip_linefeeds)
{
bool skip = b_wchar_is_space(c);
if (!skip_linefeeds) {
skip = (skip && c != '\n');
}
return skip;
}
static void skip_ignored_chars(struct mie_lex *lex, bool include_linefeeds)
{
b_wchar c = peek(lex);
while (1) {
while (should_skip(c, include_linefeeds)) {
advance(lex);
c = peek(lex);
}
if (c != ';') {
break;
}
advance(lex);
c = peek(lex);
while (c != '\n') {
advance(lex);
c = peek(lex);
}
advance(lex);
c = peek(lex);
}
}
static enum mie_status pump_tokens(struct mie_lex *lex)
{
b_wchar c = peek(lex);
if (c < 0) {
return -c;
}
while (1) {
if (c == ';' || (b_wchar_is_space(c) && c != '\n')) {
skip_ignored_chars(lex, false);
} else {
break;
}
c = peek(lex);
}
if (c == '\\') {
advance(lex);
skip_ignored_chars(lex, true);
c = peek(lex);
}
if (c == '\n') {
set_token_start(lex);
set_token_end(lex);
while (c == '\n') {
advance(lex);
if (!input_available(lex)) {
break;
}
c = peek(lex);
}
if (c < 0) {
return -c;
}
return push_linefeed(lex);
}
while (b_wchar_is_space(c) && c != '\n') {
advance(lex);
c = peek(lex);
}
if (IS_VALID_IDENT_START_CHAR(c)) {
return read_ident(lex, MIE_TOK_NONE);
}
if (char_can_begin_symbol(c)) {
return read_symbol(lex);
}
if (c == '"') {
return read_string(lex);
}
if (isdigit(c)) {
return read_number(lex, false);
}
return MIE_ERR_BAD_SYNTAX;
}
struct mie_token *mie_lex_peek(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
while (b_queue_empty(&lex->lex_queue)) {
status = pump_tokens(lex);
if (status != MIE_SUCCESS) {
lex->lex_status = status;
return NULL;
}
}
lex->lex_status = status;
b_queue_entry *entry = b_queue_first(&lex->lex_queue);
struct mie_token *tok = b_unbox(struct mie_token, entry, tok_entry);
return tok;
}
void mie_lex_advance(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
while (b_queue_empty(&lex->lex_queue)) {
status = pump_tokens(lex);
if (status != MIE_SUCCESS) {
lex->lex_status = status;
return;
}
}
b_queue_entry *entry = b_queue_pop_front(&lex->lex_queue);
struct mie_token *tok = b_unbox(struct mie_token, entry, tok_entry);
mie_token_destroy(tok);
}
bool mie_lex_tokens_available(struct mie_lex *lex)
{
if (!b_queue_empty(&lex->lex_queue)) {
return true;
}
if (input_available(lex)) {
return true;
}
return false;
}

45
mie/parse/lex.h Normal file
View File

@@ -0,0 +1,45 @@
#ifndef _PARSE_LEX_H_
#define _PARSE_LEX_H_
#include <blue/core/queue.h>
#include <blue/ds/dict.h>
#include <blue/ds/string.h>
#include <mie/parse/lex.h>
#include <mie/parse/token.h>
#include <mie/status.h>
#include <stdint.h>
struct mie_lex {
struct mie_lex_symbol_node *lex_sym_tree;
b_stream *lex_source;
enum mie_status lex_status;
b_queue lex_queue;
b_string *lex_temp;
b_queue lex_state;
unsigned int lex_brace_depth;
unsigned long lex_token_start_row, lex_token_start_col;
unsigned long lex_token_end_row, lex_token_end_col;
unsigned long lex_cursor_row, lex_cursor_col;
b_string *lex_linebuf;
b_iterator *lex_linebuf_ptr;
};
struct mie_lex_symbol_node {
char s_char;
struct lex_token_def *s_def;
b_queue_entry s_entry;
b_queue s_children;
};
struct lex_token_def {
int id;
const char *name;
uint64_t name_hash;
};
#endif

0
mie/parse/parse.c Normal file
View File

72
mie/parse/token.c Normal file
View File

@@ -0,0 +1,72 @@
#include <mie/parse/token.h>
void mie_token_destroy(struct mie_token *tok)
{
switch (tok->tok_value_type) {
case MIE_TOK_V_STRING:
free(tok->tok_str);
break;
default:
break;
}
free(tok);
}
#define ENUM_STR(x) \
case x: \
return #x
const char *mie_token_type_to_string(enum mie_token_type type)
{
switch (type) {
ENUM_STR(MIE_TOK_NONE);
ENUM_STR(MIE_TOK_LINEFEED);
ENUM_STR(MIE_TOK_INT);
ENUM_STR(MIE_TOK_DOUBLE);
ENUM_STR(MIE_TOK_SYMBOL);
ENUM_STR(MIE_TOK_WORD);
ENUM_STR(MIE_TOK_NAME);
ENUM_STR(MIE_TOK_OPNAME);
ENUM_STR(MIE_TOK_INSTNAME);
ENUM_STR(MIE_TOK_GRAPHNAME);
ENUM_STR(MIE_TOK_VREGNAME);
ENUM_STR(MIE_TOK_MREGNAME);
ENUM_STR(MIE_TOK_BLOCKNAME);
ENUM_STR(MIE_TOK_TYPENAME);
ENUM_STR(MIE_TOK_SYMNAME);
ENUM_STR(MIE_TOK_STRING);
default:
return "";
}
}
const char *mie_token_symbol_to_string(enum mie_token_symbol sym)
{
switch (sym) {
ENUM_STR(MIE_SYM_NONE);
ENUM_STR(MIE_SYM_COLON);
ENUM_STR(MIE_SYM_EQUAL);
ENUM_STR(MIE_SYM_COMMA);
ENUM_STR(MIE_SYM_HYPHEN);
ENUM_STR(MIE_SYM_ASTERISK);
ENUM_STR(MIE_SYM_PLUS);
ENUM_STR(MIE_SYM_PERCENT);
ENUM_STR(MIE_SYM_DOLLAR);
ENUM_STR(MIE_SYM_CARET);
ENUM_STR(MIE_SYM_HASH);
ENUM_STR(MIE_SYM_ATSIGN);
ENUM_STR(MIE_SYM_LEFT_BRACE);
ENUM_STR(MIE_SYM_RIGHT_BRACE);
ENUM_STR(MIE_SYM_LEFT_BRACKET);
ENUM_STR(MIE_SYM_RIGHT_BRACKET);
ENUM_STR(MIE_SYM_LEFT_PAREN);
ENUM_STR(MIE_SYM_RIGHT_PAREN);
ENUM_STR(MIE_SYM_LEFT_ANGLE);
ENUM_STR(MIE_SYM_RIGHT_ANGLE);
ENUM_STR(MIE_SYM_HYPHEN_RIGHT_ANGLE);
ENUM_STR(MIE_SYM_OTHER);
default:
return "";
}
}