From e430b7b2f1c2cfc500b3ace78edd215788f52104 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Tue, 15 Apr 2025 11:02:47 +0100 Subject: [PATCH] lang: add var keyword for variable declarations the var keyword allows greater control over what scope a particular variable exists in. it clarifies whether a new variable is being defined or an existing variable is being assigned to. it will also facilitate the implementation of global variables. --- lang/ast/block.c | 8 +++ lang/ast/expr/arith.c | 21 ++++++ lang/ast/node.c | 3 + lang/ast/unit.c | 9 +++ lang/ast/var.c | 133 ++++++++++++++++++++++++++++++++++++ lang/codegen/expr.c | 9 +-- lang/include/ivy/lang/ast.h | 8 +++ lang/include/ivy/lang/lex.h | 1 + lang/lex.c | 2 + 9 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 lang/ast/var.c diff --git a/lang/ast/block.c b/lang/ast/block.c index 58c45a7..6e918c7 100644 --- a/lang/ast/block.c +++ b/lang/ast/block.c @@ -101,6 +101,13 @@ static struct token_parse_result parse_symbol( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } +static struct token_parse_result parse_var( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + parser_push_state(ctx, IVY_AST_VAR, 0); + return PARSE_RESULT(IVY_OK, 0); +} + static struct token_parse_result parse_expr_begin( struct ivy_parser *ctx, struct ivy_token *tok) { @@ -150,6 +157,7 @@ struct ast_node_type block_node_ops = { .n_state_size = sizeof(struct block_parser_state), .n_node_size = sizeof(struct ivy_ast_block_node), .n_keyword_parsers = { + KW_PARSER(VAR, parse_var), KW_PARSER(END, parse_end), KW_PARSER(ELSE, parse_else), KW_PARSER_FALLBACK(parse_keyword), diff --git a/lang/ast/expr/arith.c b/lang/ast/expr/arith.c index fb36d2a..f65e248 100644 --- a/lang/ast/expr/arith.c +++ b/lang/ast/expr/arith.c @@ -472,18 +472,31 @@ struct token_parse_result arith_parse_operator( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } + unsigned short tok_id = 0; + switch (tok->t_type) { case IVY_TOK_SYMBOL: + tok_id = tok->t_symbol; state->s_prev_token = tok->t_symbol; break; case IVY_TOK_KEYWORD: + tok_id = tok->t_keyword; state->s_prev_token = tok->t_keyword; break; default: + tok_id = tok->t_type; state->s_prev_token = tok->t_type; break; } + if (expr_terminates_at_token(state, tok_id)) { + /* treat this as a statement terminator. */ + struct token_parse_result result + = expr_finalise_and_return(ctx, state); + result.r_flags |= PARSE_REPEAT_TOKEN; + return result; + } + struct ivy_ast_node *op = create_operator_node_from_token(tok); if (!op) { return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); @@ -1136,6 +1149,14 @@ struct token_parse_result arith_parse_dot( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } + if (expr_terminates_at_token(state, IVY_SYM_DOT)) { + /* treat this as a statement terminator. */ + struct token_parse_result result + = expr_finalise_and_return(ctx, state); + result.r_flags |= PARSE_REPEAT_TOKEN; + return result; + } + state->s_prev_token = IVY_SYM_DOT; return expr_finalise_and_return(ctx, state); } diff --git a/lang/ast/node.c b/lang/ast/node.c index b93545f..ea273a1 100644 --- a/lang/ast/node.c +++ b/lang/ast/node.c @@ -16,6 +16,7 @@ extern struct ast_node_type expr_node_ops; extern struct ast_node_type block_node_ops; extern struct ast_node_type msg_node_ops; extern struct ast_node_type op_node_ops; +extern struct ast_node_type var_node_ops; extern struct ast_node_type ident_node_ops; extern struct ast_node_type int_node_ops; extern struct ast_node_type double_node_ops; @@ -51,6 +52,7 @@ static const struct ast_node_type *node_ops[] = { [IVY_AST_BLOCK] = &block_node_ops, [IVY_AST_MSG] = &msg_node_ops, [IVY_AST_OP] = &op_node_ops, + [IVY_AST_VAR] = &var_node_ops, [IVY_AST_IDENT] = &ident_node_ops, [IVY_AST_INT] = &int_node_ops, [IVY_AST_DOUBLE] = &double_node_ops, @@ -268,6 +270,7 @@ const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v) ENUM_STR(IVY_AST_UNIT_IMPORT); ENUM_STR(IVY_AST_DISCARD); ENUM_STR(IVY_AST_INT); + ENUM_STR(IVY_AST_VAR); ENUM_STR(IVY_AST_DOUBLE); ENUM_STR(IVY_AST_STRING); ENUM_STR(IVY_AST_FSTRING); diff --git a/lang/ast/unit.c b/lang/ast/unit.c index fc30e8d..8335767 100644 --- a/lang/ast/unit.c +++ b/lang/ast/unit.c @@ -1,4 +1,5 @@ #include "ctx.h" +#include "expr/expr.h" #include "iterate.h" #include "node.h" @@ -18,6 +19,13 @@ static struct token_parse_result parse_use_keyword( return PARSE_RESULT(IVY_OK, 0); } +static struct token_parse_result parse_var_keyword( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + parser_push_state(ctx, IVY_AST_VAR, 0); + return PARSE_RESULT(IVY_OK, 0); +} + static struct token_parse_result parse_class_keyword( struct ivy_parser *ctx, struct ivy_token *tok) { @@ -67,6 +75,7 @@ struct ast_node_type unit_node_ops = { KW_PARSER(PACKAGE, parse_package_keyword), KW_PARSER(CLASS, parse_class_keyword), KW_PARSER(USE, parse_use_keyword), + KW_PARSER(VAR, parse_var_keyword), }, .n_symbol_parsers = { SYM_PARSER(DOT, parse_dot), diff --git a/lang/ast/var.c b/lang/ast/var.c new file mode 100644 index 0000000..fe172e3 --- /dev/null +++ b/lang/ast/var.c @@ -0,0 +1,133 @@ +#include "ctx.h" +#include "expr/expr.h" +#include "iterate.h" +#include "node.h" + +#include + +struct var_parser_state { + struct parser_state s_base; + unsigned int s_prev_token; +}; + +static void init_state(struct ivy_parser *ctx, struct parser_state *sp, uintptr_t arg) +{ + struct var_parser_state *state = (struct var_parser_state *)sp; + state->s_prev_token = IVY_KW_VAR; +} + +struct token_parse_result parse_ident(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct var_parser_state *state + = parser_get_state(ctx, struct var_parser_state); + if (state->s_prev_token != IVY_KW_VAR) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + struct expr_parser_state *ident_parse + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + expr_add_terminator(ident_parse, IVY_SYM_EQUAL); + expr_add_terminator(ident_parse, IVY_SYM_DOT); + return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); +} + +struct token_parse_result parse_left_paren( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct var_parser_state *state + = parser_get_state(ctx, struct var_parser_state); + + if (state->s_prev_token != IVY_KW_VAR) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + struct expr_parser_state *tuple_ident_parse + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + expr_add_terminator(tuple_ident_parse, IVY_SYM_EQUAL); + return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); +} + +struct token_parse_result parse_equal(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct var_parser_state *state + = parser_get_state(ctx, struct var_parser_state); + struct ivy_ast_var_node *var + = (struct ivy_ast_var_node *)state->s_base.s_node; + if (!var->n_left) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_prev_token = IVY_SYM_EQUAL; + + struct expr_parser_state *val_parse + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + expr_add_terminator(val_parse, IVY_SYM_DOT); + return PARSE_RESULT(IVY_OK, 0); +} + +struct token_parse_result parse_dot(struct ivy_parser *ctx, struct ivy_token *tok) +{ + parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT); + return PARSE_RESULT(IVY_OK, 0); +} + +static enum ivy_status add_child( + struct parser_state *parent, struct ivy_ast_node *child) +{ + struct var_parser_state *state = (struct var_parser_state *)parent; + struct ivy_ast_var_node *var = (struct ivy_ast_var_node *)parent->s_node; + + switch (state->s_prev_token) { + case IVY_KW_VAR: + if (var->n_left) { + return IVY_ERR_BAD_SYNTAX; + } + + var->n_left = child; + break; + case IVY_SYM_EQUAL: + if (var->n_val) { + return IVY_ERR_BAD_SYNTAX; + } + + var->n_val = child; + break; + default: + return IVY_ERR_BAD_SYNTAX; + } + + return IVY_OK; +} + +static void collect_children( + struct ivy_ast_node *node, struct ivy_ast_node_iterator *iterator) +{ + struct ivy_ast_var_node *var = (struct ivy_ast_var_node *)node; + + if (var->n_left) { + ast_node_iterator_enqueue_node(iterator, node, var->n_left); + } + + if (var->n_val) { + ast_node_iterator_enqueue_node(iterator, node, var->n_val); + } +} + +struct ast_node_type var_node_ops = { + .n_add_child = add_child, + .n_collect_children = collect_children, + .n_state_size = sizeof(struct var_parser_state), + .n_node_size = sizeof(struct ivy_ast_var_node), + .n_init_state = init_state, + .n_token_parsers = { + TOK_PARSER(IDENT, parse_ident), + }, + .n_symbol_parsers = { + SYM_PARSER(LEFT_PAREN, parse_left_paren), + SYM_PARSER(EQUAL, parse_equal), + SYM_PARSER(DOT, parse_dot), + }, +}; diff --git a/lang/codegen/expr.c b/lang/codegen/expr.c index 370f3d6..a0e3c8d 100644 --- a/lang/codegen/expr.c +++ b/lang/codegen/expr.c @@ -1,3 +1,4 @@ +#include "../debug.h" #include "codegen.h" #include @@ -25,7 +26,7 @@ static struct code_generator_result gen_int( { struct expr_codegen_state *expr = (struct expr_codegen_state *)state; - printf("codegen: got int\n"); + debug_printf("codegen: got int\n"); struct ivy_ast_int_node *int_node = (struct ivy_ast_int_node *)node; struct mie_value *value @@ -46,7 +47,7 @@ static struct code_generator_result gen_op( { struct expr_codegen_state *expr = (struct expr_codegen_state *)state; - printf("codegen: got operator\n"); + debug_printf("codegen: got operator\n"); struct mie_value *left = codegen_pop_value(gen); struct mie_value *right = codegen_pop_value(gen); @@ -100,14 +101,14 @@ static struct code_generator_result gen_op( static enum ivy_status state_init( struct ivy_codegen *gen, struct code_generator_state *state) { - printf("codegen: start of expression\n"); + debug_printf("codegen: start of expression\n"); return IVY_OK; } static enum ivy_status state_fini( struct ivy_codegen *gen, struct code_generator_state *state) { - printf("codegen: end of expression\n"); + debug_printf("codegen: end of expression\n"); return IVY_OK; } diff --git a/lang/include/ivy/lang/ast.h b/lang/include/ivy/lang/ast.h index de68c2b..8236276 100644 --- a/lang/include/ivy/lang/ast.h +++ b/lang/include/ivy/lang/ast.h @@ -25,6 +25,7 @@ enum ivy_ast_node_type { IVY_AST_LAMBDA, IVY_AST_UNIT_PACKAGE, IVY_AST_UNIT_IMPORT, + IVY_AST_VAR, IVY_AST_DISCARD, IVY_AST_INT, IVY_AST_DOUBLE, @@ -87,6 +88,13 @@ struct ivy_ast_op_node { struct ivy_ast_node *n_right; }; +struct ivy_ast_var_node { + struct ivy_ast_node n_base; + /* could be a simple ident, a tuple, etc */ + struct ivy_ast_node *n_left; + struct ivy_ast_node *n_val; +}; + struct ivy_ast_return_node { struct ivy_ast_node n_base; struct ivy_ast_node *n_val; diff --git a/lang/include/ivy/lang/lex.h b/lang/include/ivy/lang/lex.h index ec0e339..b246df1 100644 --- a/lang/include/ivy/lang/lex.h +++ b/lang/include/ivy/lang/lex.h @@ -51,6 +51,7 @@ enum ivy_keyword { IVY_KW_GET, IVY_KW_SET, IVY_KW_END, + IVY_KW_VAR, __IVY_KW_INDEX_LIMIT, }; diff --git a/lang/lex.c b/lang/lex.c index ebbb103..bbb30d3 100644 --- a/lang/lex.c +++ b/lang/lex.c @@ -46,6 +46,7 @@ static struct lex_token_def keywords[] = { LEX_TOKEN_DEF(IVY_KW_GET, "get"), LEX_TOKEN_DEF(IVY_KW_SET, "set"), LEX_TOKEN_DEF(IVY_KW_END, "end"), + LEX_TOKEN_DEF(IVY_KW_VAR, "var"), }; static const size_t nr_keywords = sizeof keywords / sizeof keywords[0]; @@ -1154,6 +1155,7 @@ const char *ivy_keyword_to_string(enum ivy_keyword keyword) ENUM_STR(IVY_KW_GET); ENUM_STR(IVY_KW_SET); ENUM_STR(IVY_KW_END); + ENUM_STR(IVY_KW_VAR); default: return ""; }