From bd5ba9e9fd9d695d2daeb22d5851560196155158 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Tue, 27 Jan 2026 20:46:08 +0000 Subject: [PATCH] mie: lex: move file i/o handling to a separate struct --- mie/include/mie/parse/lex.h | 3 +- mie/include/mie/parse/line-source.h | 35 ++++++ mie/parse/lex.c | 183 +++++++--------------------- mie/parse/lex.h | 10 +- mie/parse/line-source.c | 161 ++++++++++++++++++++++++ 5 files changed, 246 insertions(+), 146 deletions(-) create mode 100644 mie/include/mie/parse/line-source.h create mode 100644 mie/parse/line-source.c diff --git a/mie/include/mie/parse/lex.h b/mie/include/mie/parse/lex.h index 300e5ce..3ba7e57 100644 --- a/mie/include/mie/parse/lex.h +++ b/mie/include/mie/parse/lex.h @@ -7,8 +7,9 @@ struct mie_lex; struct mie_token; +struct mie_line_source; -MIE_API struct mie_lex *mie_lex_create(b_stream *src); +MIE_API struct mie_lex *mie_lex_create(struct mie_line_source *src); MIE_API void mie_lex_destroy(struct mie_lex *lex); MIE_API enum mie_status mie_lex_get_status(const struct mie_lex *lex); diff --git a/mie/include/mie/parse/line-source.h b/mie/include/mie/parse/line-source.h new file mode 100644 index 0000000..abbb7e0 --- /dev/null +++ b/mie/include/mie/parse/line-source.h @@ -0,0 +1,35 @@ +#ifndef MIE_PARSE_LINE_SOURCE_H_ +#define MIE_PARSE_LINE_SOURCE_H_ + +#include +#include +#include +#include +#include +#include + +struct mie_line_source { + b_stream *s_stream; + const char *s_path; + b_string *s_linebuf; + b_iterator *s_linebuf_ptr; + b_array *s_lines; + + struct mie_file_cell s_cursor; +}; + +MIE_API enum mie_status mie_line_source_init( + struct mie_line_source *src, const char *path, b_stream *stream); +MIE_API void mie_line_source_cleanup(struct mie_line_source *src); + +MIE_API const char *mie_line_source_get_path(const struct mie_line_source *src); +MIE_API const struct mie_file_cell *mie_line_source_get_cursor( + const struct mie_line_source *src); +MIE_API b_wchar mie_line_source_peekc(struct mie_line_source *src); +MIE_API b_wchar mie_line_source_getc(struct mie_line_source *src); +MIE_API enum mie_status mie_line_source_get_row( + struct mie_line_source *src, size_t row, const b_string **out); + +MIE_API bool mie_line_source_input_available(struct mie_line_source *src); + +#endif diff --git a/mie/parse/lex.c b/mie/parse/lex.c index 2ce84df..82a86e0 100644 --- a/mie/parse/lex.c +++ b/mie/parse/lex.c @@ -145,7 +145,7 @@ static struct mie_lex_symbol_node *build_symbol_tree(void) return root; } -struct mie_lex *mie_lex_create(b_stream *src) +struct mie_lex *mie_lex_create(struct mie_line_source *src) { struct mie_lex *lex = malloc(sizeof *lex); if (!lex) { @@ -154,11 +154,9 @@ struct mie_lex *mie_lex_create(b_stream *src) memset(lex, 0x0, sizeof *lex); - lex->lex_cursor_row = lex->lex_cursor_col = 1; lex->lex_status = MIE_SUCCESS; lex->lex_source = src; - lex->lex_linebuf = b_string_create(); lex->lex_sym_tree = build_symbol_tree(); if (!lex->lex_sym_tree) { @@ -184,10 +182,6 @@ void mie_lex_destroy(struct mie_lex *lex) entry = next; } - if (lex->lex_linebuf) { - free(lex->lex_linebuf); - } - if (lex->lex_sym_tree) { destroy_symbol_tree(lex->lex_sym_tree); } @@ -204,89 +198,6 @@ enum mie_status mie_lex_get_status(const struct mie_lex *lex) return lex->lex_status; } -static enum mie_status refill_linebuf(struct mie_lex *lex) -{ - if (!lex->lex_source) { - return MIE_ERR_EOF; - } - - if (lex->lex_linebuf_ptr) { - b_iterator_unref(lex->lex_linebuf_ptr); - lex->lex_linebuf_ptr = NULL; - } - - b_stringstream *s = b_stringstream_create(); - - b_status status = b_stream_read_line_s(lex->lex_source, s); - - if (status == B_ERR_NO_DATA) { - return MIE_ERR_EOF; - } - - if (!B_OK(status)) { - return MIE_ERR_INTERNAL_FAILURE; - } - - b_string_replace_all_with_stringstream(lex->lex_linebuf, s); - b_stringstream_unref(s); - - lex->lex_linebuf_ptr = b_iterator_begin(lex->lex_linebuf); - - return MIE_SUCCESS; -} - -static int peek(struct mie_lex *lex) -{ - enum mie_status status = MIE_SUCCESS; - - if (!lex->lex_linebuf_ptr || !b_iterator_is_valid(lex->lex_linebuf_ptr)) { - status = refill_linebuf(lex); - } - - if (status != MIE_SUCCESS) { - return -status; - } - - if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) { - return -MIE_ERR_EOF; - } - - b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int; - return c; -} - -static int advance(struct mie_lex *lex) -{ - enum mie_status status = MIE_SUCCESS; - - if (!b_iterator_is_valid(lex->lex_linebuf_ptr)) { - status = refill_linebuf(lex); - } - - if (status != MIE_SUCCESS) { - return -status; - } - - if (b_string_get_size(lex->lex_linebuf, B_STRLEN_NORMAL) == 0) { - return -MIE_ERR_EOF; - } - - b_wchar c = b_iterator_get_value(lex->lex_linebuf_ptr).v_int; - b_iterator_move_next(lex->lex_linebuf_ptr); - - lex->lex_cursor_col++; - if (c == '\n') { - lex->lex_cursor_col = 1; - lex->lex_cursor_row++; - } - return c; -} - -static bool input_available(struct mie_lex *lex) -{ - return lex->lex_linebuf_ptr && b_iterator_is_valid(lex->lex_linebuf_ptr); -} - static bool char_can_begin_symbol(char c) { for (size_t i = 0; i < nr_symbols; i++) { @@ -313,22 +224,18 @@ static struct mie_token *create_token(enum mie_token_type type) static void set_token_start(struct mie_lex *lex) { - lex->lex_token_start_row = lex->lex_cursor_row; - lex->lex_token_start_col = lex->lex_cursor_col; + lex->lex_token_start = *mie_line_source_get_cursor(lex->lex_source); } static void set_token_end(struct mie_lex *lex) { - lex->lex_token_end_row = lex->lex_cursor_row; - lex->lex_token_end_col = lex->lex_cursor_col; + lex->lex_token_end = *mie_line_source_get_cursor(lex->lex_source); } static enum mie_status push_token(struct mie_lex *lex, struct mie_token *tok) { - tok->tok_location.s_start.c_row = lex->lex_token_start_row; - tok->tok_location.s_start.c_col = lex->lex_token_start_col; - tok->tok_location.s_end.c_row = lex->lex_token_end_row; - tok->tok_location.s_end.c_col = lex->lex_token_end_col; + tok->tok_location.s_start = lex->lex_token_start; + tok->tok_location.s_end = lex->lex_token_end; b_queue_push_back(&lex->lex_queue, &tok->tok_entry); return MIE_SUCCESS; @@ -423,7 +330,7 @@ static enum mie_status push_float(struct mie_lex *lex, double v) static enum mie_status read_line_comment(struct mie_lex *lex) { while (true) { - b_wchar c = advance(lex); + b_wchar c = mie_line_source_getc(lex->lex_source); if (c == -MIE_ERR_EOF || c == '\n') { break; @@ -449,7 +356,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) } while (true) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); if (c == -MIE_ERR_EOF) { break; } @@ -461,7 +368,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) if (c == '_') { token_len++; set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } @@ -479,7 +386,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) char s[] = {c, 0}; b_string_append_cstr(str, s); set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } @@ -491,7 +398,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) base = 16; token_len++; set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } @@ -499,7 +406,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) base = 2; token_len++; set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } @@ -517,7 +424,7 @@ static enum mie_status read_number(struct mie_lex *lex, bool negate) b_string_append_wc(str, c); set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); token_len++; } @@ -569,7 +476,7 @@ static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type) } while (1) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); if ((c == '.' || c == '-') && prev == c) { return MIE_ERR_BAD_SYNTAX; @@ -586,7 +493,7 @@ static enum mie_status read_ident(struct mie_lex *lex, enum mie_token_type type) prev = c; b_string_append_wc(str, c); set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); } if (type == MIE_TOK_NONE) { @@ -613,17 +520,17 @@ static enum mie_status read_string(struct mie_lex *lex) { b_string *str = get_temp_string(lex); - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); bool esc = false; if (c != '"') { return MIE_ERR_BAD_SYNTAX; } - advance(lex); + mie_line_source_getc(lex->lex_source); while (1) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); if (esc) { switch (c) { @@ -636,23 +543,23 @@ static enum mie_status read_string(struct mie_lex *lex) } esc = false; - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } if (c == '\\') { esc = true; - advance(lex); + mie_line_source_getc(lex->lex_source); continue; } if (c == '"') { - advance(lex); + mie_line_source_getc(lex->lex_source); break; } b_string_append_wc(str, c); - advance(lex); + mie_line_source_getc(lex->lex_source); } char *s = b_string_steal(str); @@ -666,7 +573,7 @@ static enum mie_status read_symbol(struct mie_lex *lex) b_wchar prev = 0; while (true) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); if (c < 0) { break; } @@ -679,7 +586,7 @@ static enum mie_status read_symbol(struct mie_lex *lex) node = next; set_token_end(lex); - advance(lex); + mie_line_source_getc(lex->lex_source); prev = c; } @@ -734,11 +641,11 @@ static enum mie_status read_symbol(struct mie_lex *lex) static void skip_whitespace(struct mie_lex *lex) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); while (b_wchar_is_space(c)) { - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); } } @@ -755,34 +662,34 @@ static bool should_skip(b_wchar c, bool skip_linefeeds) static void skip_ignored_chars(struct mie_lex *lex, bool include_linefeeds) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); while (1) { while (should_skip(c, include_linefeeds)) { - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); } if (c != ';') { break; } - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); while (c != '\n') { - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); } - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); } } static enum mie_status pump_tokens(struct mie_lex *lex) { - b_wchar c = peek(lex); + b_wchar c = mie_line_source_peekc(lex->lex_source); if (c < 0) { return -c; @@ -795,13 +702,13 @@ static enum mie_status pump_tokens(struct mie_lex *lex) break; } - c = peek(lex); + c = mie_line_source_peekc(lex->lex_source); } if (c == '\\') { - advance(lex); + mie_line_source_getc(lex->lex_source); skip_ignored_chars(lex, true); - c = peek(lex); + c = mie_line_source_peekc(lex->lex_source); } if (c == '\n') { @@ -809,13 +716,13 @@ static enum mie_status pump_tokens(struct mie_lex *lex) set_token_end(lex); while (c == '\n') { - advance(lex); + mie_line_source_getc(lex->lex_source); - if (!input_available(lex)) { + if (!mie_line_source_input_available(lex->lex_source)) { break; } - c = peek(lex); + c = mie_line_source_peekc(lex->lex_source); } if (c < 0) { @@ -826,8 +733,8 @@ static enum mie_status pump_tokens(struct mie_lex *lex) } while (b_wchar_is_space(c) && c != '\n') { - advance(lex); - c = peek(lex); + mie_line_source_getc(lex->lex_source); + c = mie_line_source_peekc(lex->lex_source); } if (IS_VALID_IDENT_START_CHAR(c)) { @@ -893,7 +800,7 @@ bool mie_lex_tokens_available(struct mie_lex *lex) return true; } - if (input_available(lex)) { + if (mie_line_source_input_available(lex->lex_source)) { return true; } diff --git a/mie/parse/lex.h b/mie/parse/lex.h index 30a2f3f..ad8ca64 100644 --- a/mie/parse/lex.h +++ b/mie/parse/lex.h @@ -5,13 +5,14 @@ #include #include #include +#include #include #include #include struct mie_lex { struct mie_lex_symbol_node *lex_sym_tree; - b_stream *lex_source; + struct mie_line_source *lex_source; enum mie_status lex_status; b_queue lex_queue; @@ -20,12 +21,7 @@ struct mie_lex { b_queue lex_state; unsigned int lex_brace_depth; - unsigned long lex_token_start_row, lex_token_start_col; - unsigned long lex_token_end_row, lex_token_end_col; - unsigned long lex_cursor_row, lex_cursor_col; - - b_string *lex_linebuf; - b_iterator *lex_linebuf_ptr; + struct mie_file_cell lex_token_start, lex_token_end; }; struct mie_lex_symbol_node { diff --git a/mie/parse/line-source.c b/mie/parse/line-source.c new file mode 100644 index 0000000..f8e2e8d --- /dev/null +++ b/mie/parse/line-source.c @@ -0,0 +1,161 @@ +#include + +enum mie_status mie_line_source_init( + struct mie_line_source *src, const char *path, b_stream *stream) +{ + memset(src, 0x0, sizeof *src); + + src->s_lines = b_array_create(); + + if (!src->s_lines) { + return MIE_ERR_NO_MEMORY; + } + + src->s_stream = stream; + src->s_path = path; + src->s_cursor.c_col = 1; + src->s_cursor.c_row = 1; + + return MIE_SUCCESS; +} + +void mie_line_source_cleanup(struct mie_line_source *src) +{ + if (src->s_linebuf_ptr) { + b_iterator_unref(src->s_linebuf_ptr); + } + + if (src->s_lines) { + b_array_unref(src->s_lines); + } + + memset(src, 0x0, sizeof *src); +} + +const char *mie_line_source_get_path(const struct mie_line_source *src) +{ + return src->s_path; +} + +const struct mie_file_cell *mie_line_source_get_cursor( + const struct mie_line_source *src) +{ + return &src->s_cursor; +} + +static enum mie_status refill_linebuf(struct mie_line_source *src) +{ + if (!src->s_stream) { + return MIE_ERR_EOF; + } + + if (src->s_linebuf_ptr) { + b_iterator_unref(src->s_linebuf_ptr); + src->s_linebuf_ptr = NULL; + } + + b_stringstream *s = b_stringstream_create(); + + b_status status = b_stream_read_line_s(src->s_stream, s); + + if (status == B_ERR_NO_DATA) { + return MIE_ERR_EOF; + } + + if (!B_OK(status)) { + return MIE_ERR_INTERNAL_FAILURE; + } + + b_string *line = b_string_create(); + b_string_replace_all_with_stringstream(line, s); + b_stringstream_unref(s); + + b_array_append(src->s_lines, line); + b_string_unref(line); + + src->s_linebuf = line; + src->s_linebuf_ptr = b_iterator_begin(src->s_linebuf); + + return MIE_SUCCESS; +} + +static int peek(struct mie_line_source *src) +{ + enum mie_status status = MIE_SUCCESS; + + if (!src->s_linebuf_ptr || !b_iterator_is_valid(src->s_linebuf_ptr)) { + status = refill_linebuf(src); + } + + if (status != MIE_SUCCESS) { + return -status; + } + + if (b_string_get_size(src->s_linebuf, B_STRLEN_NORMAL) == 0) { + return -MIE_ERR_EOF; + } + + b_wchar c = b_iterator_get_value(src->s_linebuf_ptr).v_int; + return c; +} + +static int advance(struct mie_line_source *src) +{ + enum mie_status status = MIE_SUCCESS; + + if (!b_iterator_is_valid(src->s_linebuf_ptr)) { + status = refill_linebuf(src); + } + + if (status != MIE_SUCCESS) { + return -status; + } + + if (b_string_get_size(src->s_linebuf, B_STRLEN_NORMAL) == 0) { + return -MIE_ERR_EOF; + } + + b_wchar c = b_iterator_get_value(src->s_linebuf_ptr).v_int; + b_iterator_move_next(src->s_linebuf_ptr); + + src->s_cursor.c_col++; + if (c == '\n') { + src->s_cursor.c_col = 1; + src->s_cursor.c_row++; + } + return c; +} + +b_wchar mie_line_source_peekc(struct mie_line_source *src) +{ + return peek(src); +} + +b_wchar mie_line_source_getc(struct mie_line_source *src) +{ + return advance(src); +} + +enum mie_status mie_line_source_get_row( + struct mie_line_source *src, size_t row, const b_string **out) +{ + if (row == 0) { + return MIE_ERR_INVALID_ARGUMENT; + } + + row--; + + if (row >= b_array_size(src->s_lines)) { + return MIE_ERR_EOF; + } + + b_string *line = b_array_at(src->s_lines, row); + *out = line; + + return MIE_SUCCESS; +} + +bool mie_line_source_input_available(struct mie_line_source *src) +{ + return src->s_linebuf_ptr && b_iterator_is_valid(src->s_linebuf_ptr); +}