From 1c3aff9ad39d47818253accd37c29fe88d399c7a Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 28 Apr 2025 22:53:21 +0100 Subject: [PATCH] lang: add start/end coordinates to lexer tokens --- lang/include/ivy/lang/lex.h | 9 ++++++++ lang/lex.c | 45 +++++++++++++++++++++++++++++++++++++ lang/lex.h | 4 ++++ 3 files changed, 58 insertions(+) diff --git a/lang/include/ivy/lang/lex.h b/lang/include/ivy/lang/lex.h index 07dbe4a..f12f095 100644 --- a/lang/include/ivy/lang/lex.h +++ b/lang/include/ivy/lang/lex.h @@ -113,6 +113,15 @@ enum ivy_symbol { struct ivy_token { enum ivy_token_type t_type; + + struct { + unsigned long p_row, p_col; + } t_start; + + struct { + unsigned long p_row, p_col; + } t_end; + b_queue_entry t_entry; union { diff --git a/lang/lex.c b/lang/lex.c index 176587d..925929d 100644 --- a/lang/lex.c +++ b/lang/lex.c @@ -274,6 +274,8 @@ enum ivy_status ivy_lexer_create(struct ivy_lexer **lexp) memset(lex, 0x0, sizeof *lex); + lex->lex_cursor_row = lex->lex_cursor_col = 1; + lex->lex_status = IVY_OK; lex->lex_prev_token = IVY_TOK_NONE; @@ -419,6 +421,13 @@ static int advance(struct ivy_lexer *lex) } int c = lex->lex_linebuf[lex->lex_linebuf_ptr++]; + + lex->lex_cursor_col++; + if (c == '\n') { + lex->lex_cursor_col = 1; + lex->lex_cursor_row++; + } + return c; } @@ -451,8 +460,25 @@ static struct ivy_token *create_token(enum ivy_token_type type) return tok; } +static void set_token_start(struct ivy_lexer *lex) +{ + lex->lex_token_start_row = lex->lex_cursor_row; + lex->lex_token_start_col = lex->lex_cursor_col; +} + +static void set_token_end(struct ivy_lexer *lex) +{ + lex->lex_token_end_row = lex->lex_cursor_row; + lex->lex_token_end_col = lex->lex_cursor_col; +} + static enum ivy_status push_token(struct ivy_lexer *lex, struct ivy_token *tok) { + tok->t_start.p_row = lex->lex_token_start_row; + tok->t_start.p_col = lex->lex_token_start_col; + tok->t_end.p_row = lex->lex_token_end_row; + tok->t_end.p_col = lex->lex_token_end_col; + b_queue_push_back(&lex->lex_queue, &tok->t_entry); lex->lex_prev_token = tok->t_type; return IVY_OK; @@ -698,6 +724,7 @@ static enum ivy_status read_atom(struct ivy_lexer *lex) char s[] = {c, 0}; b_string_append_cstr(str, s); + set_token_end(lex); advance(lex); } @@ -723,11 +750,13 @@ static enum ivy_status read_string_content(struct ivy_lexer *lex) } if (state->s_type == STATE_STRING && c == '"') { + set_token_end(lex); break; } char s[2] = {c, 0}; b_string_append_cstr(str, s); + set_token_end(lex); advance(lex); } @@ -749,6 +778,7 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex) { struct ivy_lexer_symbol_node *node = lex->lex_sym_tree; struct lexer_state *state = get_lexer_state(lex); + set_token_start(lex); while (true) { int c = peek(lex); @@ -759,6 +789,7 @@ static enum ivy_status read_symbol(struct ivy_lexer *lex) } node = next; + set_token_end(lex); advance(lex); } @@ -806,6 +837,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) int base = 10; int dots = 0; b_string *str = get_temp_string(lex); + set_token_start(lex); while (true) { int c = peek(lex); @@ -819,6 +851,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) if (c == '_') { token_len++; + set_token_end(lex); advance(lex); continue; } @@ -836,6 +869,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) dots++; char s[] = {c, 0}; b_string_append_cstr(str, s); + set_token_end(lex); advance(lex); continue; } @@ -847,6 +881,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) if (c == '0' && token_len == 0) { base = 7; token_len++; + set_token_end(lex); advance(lex); continue; } @@ -854,6 +889,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) if (c == 'x' && token_len == 1) { base = 16; token_len++; + set_token_end(lex); advance(lex); continue; } @@ -861,6 +897,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) if (c == 'b' && token_len == 1) { base = 2; token_len++; + set_token_end(lex); advance(lex); continue; } @@ -879,6 +916,7 @@ static enum ivy_status read_number(struct ivy_lexer *lex) char s[] = {c, 0}; b_string_append_cstr(str, s); + set_token_end(lex); advance(lex); token_len++; } @@ -917,6 +955,7 @@ static enum ivy_status read_ident(struct ivy_lexer *lex) { b_string *str = get_temp_string(lex); bool label = false; + set_token_start(lex); while (true) { int c = peek(lex); @@ -926,6 +965,7 @@ static enum ivy_status read_ident(struct ivy_lexer *lex) } if (c == ':' && peek_next(lex) != ':') { + set_token_end(lex); advance(lex); label = true; break; @@ -937,6 +977,7 @@ static enum ivy_status read_ident(struct ivy_lexer *lex) char s[2] = {c, 0}; b_string_append_cstr(str, s); + set_token_end(lex); advance(lex); } @@ -972,6 +1013,7 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex) } if (state->s_type == STATE_FSTRING && c != '\'' && c != '{') { + set_token_start(lex); return read_string_content(lex); } @@ -980,6 +1022,9 @@ static enum ivy_status pump_tokens(struct ivy_lexer *lex) state = NULL; if (c == '\n') { + set_token_start(lex); + set_token_end(lex); + while (c == '\n') { advance(lex); diff --git a/lang/lex.h b/lang/lex.h index 5a02db9..0a91498 100644 --- a/lang/lex.h +++ b/lang/lex.h @@ -21,6 +21,10 @@ struct ivy_lexer { b_queue lex_state; unsigned int lex_brace_depth; + unsigned long lex_token_start_row, lex_token_start_col; + unsigned long lex_token_end_row, lex_token_end_col; + unsigned long lex_cursor_row, lex_cursor_col; + char *lex_linebuf; size_t lex_linebuf_len; size_t lex_linebuf_cap;