mie: lex: move file i/o handling to a separate struct

This commit is contained in:
2026-01-27 20:46:08 +00:00
parent b5fa40d4d8
commit bd5ba9e9fd
5 changed files with 246 additions and 146 deletions

View File

@@ -7,8 +7,9 @@
struct mie_lex;
struct mie_token;
struct mie_line_source;
MIE_API struct mie_lex *mie_lex_create(b_stream *src);
MIE_API struct mie_lex *mie_lex_create(struct mie_line_source *src);
MIE_API void mie_lex_destroy(struct mie_lex *lex);
MIE_API enum mie_status mie_lex_get_status(const struct mie_lex *lex);

View File

@@ -0,0 +1,35 @@
#ifndef MIE_PARSE_LINE_SOURCE_H_
#define MIE_PARSE_LINE_SOURCE_H_
#include <blue/core/stream.h>
#include <blue/ds/array.h>
#include <blue/ds/string.h>
#include <mie/misc.h>
#include <mie/parse/file-span.h>
#include <mie/status.h>
struct mie_line_source {
b_stream *s_stream;
const char *s_path;
b_string *s_linebuf;
b_iterator *s_linebuf_ptr;
b_array *s_lines;
struct mie_file_cell s_cursor;
};
MIE_API enum mie_status mie_line_source_init(
struct mie_line_source *src, const char *path, b_stream *stream);
MIE_API void mie_line_source_cleanup(struct mie_line_source *src);
MIE_API const char *mie_line_source_get_path(const struct mie_line_source *src);
MIE_API const struct mie_file_cell *mie_line_source_get_cursor(
const struct mie_line_source *src);
MIE_API b_wchar mie_line_source_peekc(struct mie_line_source *src);
MIE_API b_wchar mie_line_source_getc(struct mie_line_source *src);
MIE_API enum mie_status mie_line_source_get_row(
struct mie_line_source *src, size_t row, const b_string **out);
MIE_API bool mie_line_source_input_available(struct mie_line_source *src);
#endif

View File

@@ -145,7 +145,7 @@ static struct mie_lex_symbol_node *build_symbol_tree(void)
return root;
}
struct mie_lex *mie_lex_create(b_stream *src)
struct mie_lex *mie_lex_create(struct mie_line_source *src)
{
struct mie_lex *lex = malloc(sizeof *lex);
if (!lex) {
@@ -154,11 +154,9 @@ struct mie_lex *mie_lex_create(b_stream *src)
memset(lex, 0x0, sizeof *lex);
lex->lex_cursor_row = lex->lex_cursor_col = 1;
lex->lex_status = MIE_SUCCESS;
lex->lex_source = src;
lex->lex_linebuf = b_string_create();
lex->lex_sym_tree = build_symbol_tree();
if (!lex->lex_sym_tree) {
@@ -184,10 +182,6 @@ void mie_lex_destroy(struct mie_lex *lex)
entry = next;
}
if (lex->lex_linebuf) {
free(lex->lex_linebuf);
}
if (lex->lex_sym_tree) {
destroy_symbol_tree(lex->lex_sym_tree);
}
@@ -204,89 +198,6 @@ enum mie_status mie_lex_get_status(const struct mie_lex *lex)
return lex->lex_status;
}
static enum mie_status refill_linebuf(struct mie_lex *lex)
{
if (!lex->lex_source) {
return MIE_ERR_EOF;
}
if (lex->lex_linebuf_ptr) {
b_iterator_unref(lex->lex_linebuf_ptr);
lex->lex_linebuf_ptr = NULL;
}
b_stringstream *s = b_stringstream_create();
b_status status = b_stream_read_line_s(lex->lex_source, s);
if (status == B_ERR_NO_DATA) {
return MIE_ERR_EOF;
}
if (!B_OK(status)) {
return MIE_ERR_INTERNAL_FAILURE;
}
b_string_replace_all_with_stringstream(lex->lex_linebuf, s);
b_stringstream_unref(s);
lex->lex_linebuf_ptr = b_iterator_begin(lex->lex_linebuf);
return MIE_SUCCESS;
}
static int peek(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
if (!lex->lex_linebuf_ptr || !b_iterator_is_valid(lex->lex_linebuf_ptr)) {
status = refill_linebuf(lex);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
return c;
}
static int advance(struct mie_lex *lex)
{
enum mie_status status = MIE_SUCCESS;
if (!b_iterator_is_valid(lex->lex_linebuf_ptr)) {
status = refill_linebuf(lex);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int;
b_iterator_move_next(lex->lex_linebuf_ptr);
lex->lex_cursor_col++;
if (c == '\n') {
lex->lex_cursor_col = 1;
lex->lex_cursor_row++;
}
return c;
}
static bool input_available(struct mie_lex *lex)
{
return lex->lex_linebuf_ptr && b_iterator_is_valid(lex->lex_linebuf_ptr);
}
static bool char_can_begin_symbol(char c)
{
for (size_t i = 0; i < nr_symbols; i++) {
@@ -313,22 +224,18 @@ static struct mie_token *create_token(enum mie_token_type type)
static void set_token_start(struct mie_lex *lex)
{
lex->lex_token_start_row = lex->lex_cursor_row;
lex->lex_token_start_col = lex->lex_cursor_col;
lex->lex_token_start = *mie_line_source_get_cursor(lex->lex_source);
}
static void set_token_end(struct mie_lex *lex)
{
lex->lex_token_end_row = lex->lex_cursor_row;
lex->lex_token_end_col = lex->lex_cursor_col;
lex->lex_token_end = *mie_line_source_get_cursor(lex->lex_source);
}
static enum mie_status push_token(struct mie_lex *lex, struct mie_token *tok)
{
tok->tok_location.s_start.c_row = lex->lex_token_start_row;
tok->tok_location.s_start.c_col = lex->lex_token_start_col;
tok->tok_location.s_end.c_row = lex->lex_token_end_row;
tok->tok_location.s_end.c_col = lex->lex_token_end_col;
tok->tok_location.s_start = lex->lex_token_start;
tok->tok_location.s_end = lex->lex_token_end;
b_queue_push_back(&lex->lex_queue, &tok->tok_entry);
return MIE_SUCCESS;
@@ -423,7 +330,7 @@ static enum mie_status push_float(struct mie_lex *lex, double v)
static enum mie_status read_line_comment(struct mie_lex *lex)
{
while (true) {
b_wchar c = advance(lex);
b_wchar c = mie_line_source_getc(lex->lex_source);
if (c == -MIE_ERR_EOF || c == '\n') {
break;
@@ -449,7 +356,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
}
while (true) {
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
if (c == -MIE_ERR_EOF) {
break;
}
@@ -461,7 +368,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
if (c == '_') {
token_len++;
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
@@ -479,7 +386,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
char s[] = {c, 0};
b_string_append_cstr(str, s);
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
@@ -491,7 +398,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
base = 16;
token_len++;
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
@@ -499,7 +406,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
base = 2;
token_len++;
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
@@ -517,7 +424,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate)
b_string_append_wc(str, c);
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
token_len++;
}
@@ -569,7 +476,7 @@ static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type)
}
while (1) {
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
if ((c == '.' || c == '-') && prev == c) {
return MIE_ERR_BAD_SYNTAX;
@@ -586,7 +493,7 @@ static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type)
prev = c;
b_string_append_wc(str, c);
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
}
if (type == MIE_TOK_NONE) {
@@ -613,17 +520,17 @@ static enum mie_status read_string(struct mie_lex *lex)
{
b_string *str = get_temp_string(lex);
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
bool esc = false;
if (c != '"') {
return MIE_ERR_BAD_SYNTAX;
}
advance(lex);
mie_line_source_getc(lex->lex_source);
while (1) {
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
if (esc) {
switch (c) {
@@ -636,23 +543,23 @@ static enum mie_status read_string(struct mie_lex *lex)
}
esc = false;
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
if (c == '\\') {
esc = true;
advance(lex);
mie_line_source_getc(lex->lex_source);
continue;
}
if (c == '"') {
advance(lex);
mie_line_source_getc(lex->lex_source);
break;
}
b_string_append_wc(str, c);
advance(lex);
mie_line_source_getc(lex->lex_source);
}
char *s = b_string_steal(str);
@@ -666,7 +573,7 @@ static enum mie_status read_symbol(struct mie_lex *lex)
b_wchar prev = 0;
while (true) {
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
if (c < 0) {
break;
}
@@ -679,7 +586,7 @@ static enum mie_status read_symbol(struct mie_lex *lex)
node = next;
set_token_end(lex);
advance(lex);
mie_line_source_getc(lex->lex_source);
prev = c;
}
@@ -734,11 +641,11 @@ static enum mie_status read_symbol(struct mie_lex *lex)
static void skip_whitespace(struct mie_lex *lex)
{
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
while (b_wchar_is_space(c)) {
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
}
}
@@ -755,34 +662,34 @@ static bool should_skip(b_wchar c, bool skip_linefeeds)
static void skip_ignored_chars(struct mie_lex *lex, bool include_linefeeds)
{
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
while (1) {
while (should_skip(c, include_linefeeds)) {
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
}
if (c != ';') {
break;
}
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
while (c != '\n') {
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
}
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
}
}
static enum mie_status pump_tokens(struct mie_lex *lex)
{
b_wchar c = peek(lex);
b_wchar c = mie_line_source_peekc(lex->lex_source);
if (c < 0) {
return -c;
@@ -795,13 +702,13 @@ static enum mie_status pump_tokens(struct mie_lex *lex)
break;
}
c = peek(lex);
c = mie_line_source_peekc(lex->lex_source);
}
if (c == '\\') {
advance(lex);
mie_line_source_getc(lex->lex_source);
skip_ignored_chars(lex, true);
c = peek(lex);
c = mie_line_source_peekc(lex->lex_source);
}
if (c == '\n') {
@@ -809,13 +716,13 @@ static enum mie_status pump_tokens(struct mie_lex *lex)
set_token_end(lex);
while (c == '\n') {
advance(lex);
mie_line_source_getc(lex->lex_source);
if (!input_available(lex)) {
if (!mie_line_source_input_available(lex->lex_source)) {
break;
}
c = peek(lex);
c = mie_line_source_peekc(lex->lex_source);
}
if (c < 0) {
@@ -826,8 +733,8 @@ static enum mie_status pump_tokens(struct mie_lex *lex)
}
while (b_wchar_is_space(c) && c != '\n') {
advance(lex);
c = peek(lex);
mie_line_source_getc(lex->lex_source);
c = mie_line_source_peekc(lex->lex_source);
}
if (IS_VALID_IDENT_START_CHAR(c)) {
@@ -893,7 +800,7 @@ bool mie_lex_tokens_available(struct mie_lex *lex)
return true;
}
if (input_available(lex)) {
if (mie_line_source_input_available(lex->lex_source)) {
return true;
}

View File

@@ -5,13 +5,14 @@
#include <blue/ds/dict.h>
#include <blue/ds/string.h>
#include <mie/parse/lex.h>
#include <mie/parse/line-source.h>
#include <mie/parse/token.h>
#include <mie/status.h>
#include <stdint.h>
struct mie_lex {
struct mie_lex_symbol_node *lex_sym_tree;
b_stream *lex_source;
struct mie_line_source *lex_source;
enum mie_status lex_status;
b_queue lex_queue;
@@ -20,12 +21,7 @@ struct mie_lex {
b_queue lex_state;
unsigned int lex_brace_depth;
unsigned long lex_token_start_row, lex_token_start_col;
unsigned long lex_token_end_row, lex_token_end_col;
unsigned long lex_cursor_row, lex_cursor_col;
b_string *lex_linebuf;
b_iterator *lex_linebuf_ptr;
struct mie_file_cell lex_token_start, lex_token_end;
};
struct mie_lex_symbol_node {

161
mie/parse/line-source.c Normal file
View File

@@ -0,0 +1,161 @@
#include <mie/parse/line-source.h>
enum mie_status mie_line_source_init(
struct mie_line_source *src, const char *path, b_stream *stream)
{
memset(src, 0x0, sizeof *src);
src->s_lines = b_array_create();
if (!src->s_lines) {
return MIE_ERR_NO_MEMORY;
}
src->s_stream = stream;
src->s_path = path;
src->s_cursor.c_col = 1;
src->s_cursor.c_row = 1;
return MIE_SUCCESS;
}
void mie_line_source_cleanup(struct mie_line_source *src)
{
if (src->s_linebuf_ptr) {
b_iterator_unref(src->s_linebuf_ptr);
}
if (src->s_lines) {
b_array_unref(src->s_lines);
}
memset(src, 0x0, sizeof *src);
}
const char *mie_line_source_get_path(const struct mie_line_source *src)
{
return src->s_path;
}
const struct mie_file_cell *mie_line_source_get_cursor(
const struct mie_line_source *src)
{
return &src->s_cursor;
}
static enum mie_status refill_linebuf(struct mie_line_source *src)
{
if (!src->s_stream) {
return MIE_ERR_EOF;
}
if (src->s_linebuf_ptr) {
b_iterator_unref(src->s_linebuf_ptr);
src->s_linebuf_ptr = NULL;
}
b_stringstream *s = b_stringstream_create();
b_status status = b_stream_read_line_s(src->s_stream, s);
if (status == B_ERR_NO_DATA) {
return MIE_ERR_EOF;
}
if (!B_OK(status)) {
return MIE_ERR_INTERNAL_FAILURE;
}
b_string *line = b_string_create();
b_string_replace_all_with_stringstream(line, s);
b_stringstream_unref(s);
b_array_append(src->s_lines, line);
b_string_unref(line);
src->s_linebuf = line;
src->s_linebuf_ptr = b_iterator_begin(src->s_linebuf);
return MIE_SUCCESS;
}
static int peek(struct mie_line_source *src)
{
enum mie_status status = MIE_SUCCESS;
if (!src->s_linebuf_ptr || !b_iterator_is_valid(src->s_linebuf_ptr)) {
status = refill_linebuf(src);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(src->s_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(src->s_linebuf_ptr).v_int;
return c;
}
static int advance(struct mie_line_source *src)
{
enum mie_status status = MIE_SUCCESS;
if (!b_iterator_is_valid(src->s_linebuf_ptr)) {
status = refill_linebuf(src);
}
if (status != MIE_SUCCESS) {
return -status;
}
if (b_string_get_size(src->s_linebuf, B_STRLEN_NORMAL) == 0) {
return -MIE_ERR_EOF;
}
b_wchar c = b_iterator_get_value(src->s_linebuf_ptr).v_int;
b_iterator_move_next(src->s_linebuf_ptr);
src->s_cursor.c_col++;
if (c == '\n') {
src->s_cursor.c_col = 1;
src->s_cursor.c_row++;
}
return c;
}
b_wchar mie_line_source_peekc(struct mie_line_source *src)
{
return peek(src);
}
b_wchar mie_line_source_getc(struct mie_line_source *src)
{
return advance(src);
}
enum mie_status mie_line_source_get_row(
struct mie_line_source *src, size_t row, const b_string **out)
{
if (row == 0) {
return MIE_ERR_INVALID_ARGUMENT;
}
row--;
if (row >= b_array_size(src->s_lines)) {
return MIE_ERR_EOF;
}
b_string *line = b_array_at(src->s_lines, row);
*out = line;
return MIE_SUCCESS;
}
bool mie_line_source_input_available(struct mie_line_source *src)
{
return src->s_linebuf_ptr && b_iterator_is_valid(src->s_linebuf_ptr);
}