From 7acf80f930fd438b9d0997d51feba3d6904c4c55 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Wed, 4 Dec 2024 22:22:25 +0000 Subject: [PATCH] lang: ast: implement parsing of match statements --- lang/ast/cond.c | 11 +- lang/ast/discard.c | 9 ++ lang/ast/expr/arith.c | 67 +++++++++- lang/ast/expr/expr.c | 8 ++ lang/ast/expr/expr.h | 10 +- lang/ast/expr/stmt.c | 33 +++++ lang/ast/match.c | 246 ++++++++++++++++++++++++++++++++++++ lang/ast/node.c | 6 + lang/ast/unit.c | 1 - lang/include/ivy/lang/ast.h | 18 ++- 10 files changed, 396 insertions(+), 13 deletions(-) create mode 100644 lang/ast/discard.c create mode 100644 lang/ast/match.c diff --git a/lang/ast/cond.c b/lang/ast/cond.c index 89439f1..f83c3c1 100644 --- a/lang/ast/cond.c +++ b/lang/ast/cond.c @@ -1,4 +1,5 @@ #include "block.h" +#include "iterate.h" #include "expr/expr.h" struct cond_group_parser_state { @@ -72,7 +73,7 @@ static enum ivy_status flush_current_branch(struct cond_group_parser_state *stat return IVY_OK; } -struct token_parse_result parse_then(struct ivy_parser *ctx, struct ivy_token *tok) +static struct token_parse_result parse_then(struct ivy_parser *ctx, struct ivy_token *tok) { struct cond_group_parser_state *state = parser_get_state(ctx, struct cond_group_parser_state); @@ -108,7 +109,7 @@ struct token_parse_result parse_then(struct ivy_parser *ctx, struct ivy_token *t return PARSE_RESULT(IVY_OK, 0); } -struct token_parse_result parse_else(struct ivy_parser *ctx, struct ivy_token *tok) +static struct token_parse_result parse_else(struct ivy_parser *ctx, struct ivy_token *tok) { enum ivy_status status; struct cond_group_parser_state *state @@ -167,7 +168,7 @@ struct token_parse_result parse_else(struct ivy_parser *ctx, struct ivy_token *t return PARSE_RESULT(IVY_OK, 0); } -struct token_parse_result parse_expr_begin(struct ivy_parser* ctx, struct ivy_token* tok) +static struct token_parse_result parse_expr_begin(struct ivy_parser* ctx, struct ivy_token* tok) { struct cond_group_parser_state *state = parser_get_state(ctx, struct cond_group_parser_state); @@ -235,7 +236,7 @@ static enum ivy_status finalise_cond_group(struct cond_group_parser_state* state } } -struct token_parse_result parse_punct_terminator( +static struct token_parse_result parse_punct_terminator( struct ivy_parser *ctx, struct ivy_token *tok) { struct cond_group_parser_state *state @@ -255,7 +256,7 @@ struct token_parse_result parse_punct_terminator( return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); } -struct token_parse_result parse_end(struct ivy_parser *ctx, struct ivy_token *tok) +static struct token_parse_result parse_end(struct ivy_parser *ctx, struct ivy_token *tok) { struct cond_group_parser_state *state = parser_get_state(ctx, struct cond_group_parser_state); diff --git a/lang/ast/discard.c b/lang/ast/discard.c new file mode 100644 index 0000000..1b954ce --- /dev/null +++ b/lang/ast/discard.c @@ -0,0 +1,9 @@ +#include "ctx.h" +#include "node.h" + +#include + +struct ast_node_type discard_node_ops = { + .n_state_size = sizeof(struct parser_state), + .n_node_size = sizeof(struct ivy_ast_discard_node), +}; diff --git a/lang/ast/expr/arith.c b/lang/ast/expr/arith.c index c75b755..4bfaa62 100644 --- a/lang/ast/expr/arith.c +++ b/lang/ast/expr/arith.c @@ -50,7 +50,7 @@ static void print_operand(struct ivy_ast_node *node) } } -void arith_push_operand(struct expr_parser_state *state, struct ivy_token *tok) +enum ivy_status arith_push_operand(struct expr_parser_state *state, struct ivy_token *tok) { switch (tok->t_type) { case IVY_TOK_IDENT: { @@ -84,9 +84,20 @@ void arith_push_operand(struct expr_parser_state *state, struct ivy_token *tok) b_queue_push_back(&state->s_output_queue, &v->n_base.n_entry); break; } - default: + case IVY_TOK_SYMBOL: { + if (tok->t_symbol != IVY_SYM_UNDERSCORE) { + return IVY_ERR_BAD_SYNTAX; + } + + struct ivy_ast_node *v = ast_node_create(IVY_AST_DISCARD); + b_queue_push_back(&state->s_output_queue, &v->n_entry); break; } + default: + return IVY_ERR_BAD_SYNTAX; + } + + return IVY_OK; } static const struct ivy_operator *get_operator_from_token(struct ivy_token *tok) @@ -448,6 +459,20 @@ struct token_parse_result arith_parse_operator( return PARSE_RESULT(IVY_OK, 0); } +struct token_parse_result arith_parse_in( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_terminator == IVY_KW_IN) { + state->s_prev_token = IVY_KW_IN; + return expr_finalise_and_return(ctx, state); + } + + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); +} + struct token_parse_result arith_parse_ident( struct ivy_parser *ctx, struct ivy_token *tok) { @@ -790,21 +815,21 @@ struct token_parse_result expr_finalise( struct ivy_ast_cascade_node *cascade = expr_finalise_cascade(state); - *result = cascade; + *result = (struct ivy_ast_node *)cascade; return PARSE_RESULT(IVY_OK, flags); } if (state->s_sub_type == EXPR_SUBTYPE_COMPLEX_MSG) { /* this is the end of a keyword-message */ struct ivy_ast_msg_node *msg = expr_finalise_complex_msg(state); - *result = msg; + *result = (struct ivy_ast_node *)msg; return PARSE_RESULT(IVY_OK, 0); } if (state->s_sub_type == EXPR_SUBTYPE_KEYWORD_MSG) { /* this is the end of a keyword-message */ struct ivy_ast_msg_node *msg = expr_finalise_keyword_msg(state); - *result = msg; + *result = (struct ivy_ast_node *)msg; return PARSE_RESULT(IVY_OK, flags); } @@ -854,6 +879,38 @@ struct token_parse_result arith_parse_dot( return expr_finalise_and_return(ctx, state); } +struct token_parse_result arith_parse_comma( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_type != EXPR_TYPE_ARITH) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + if (state->s_paren_depth > 0 && (state->s_prev_component == EXPR_CMP_OPERAND || state->s_prev_component == EXPR_CMP_MSG)) { + /* tuple. */ + return PARSE_RESULT(IVY_ERR_NOT_SUPPORTED, 0); + } + + state->s_prev_token = IVY_SYM_DOT; + return expr_finalise_and_return(ctx, state); +} + +extern struct token_parse_result arith_parse_equal_right_angle( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + state->s_prev_token = IVY_SYM_EQUAL_RIGHT_ANGLE; + struct token_parse_result result = expr_finalise_and_return(ctx, state); + + result.r_flags |= PARSE_REPEAT_TOKEN; + return result; +} + struct token_parse_result arith_parse_label( struct ivy_parser *ctx, struct ivy_token *tok) { diff --git a/lang/ast/expr/expr.c b/lang/ast/expr/expr.c index 2e05820..4eb10d1 100644 --- a/lang/ast/expr/expr.c +++ b/lang/ast/expr/expr.c @@ -32,12 +32,20 @@ struct ast_node_type expr_node_ops = { SYM_PARSER(LEFT_PAREN, arith_parse_left_paren), SYM_PARSER(RIGHT_PAREN, arith_parse_right_paren), SYM_PARSER(SEMICOLON, arith_parse_semicolon), + SYM_PARSER(UNDERSCORE, arith_parse_operand), + SYM_PARSER(COMMA, arith_parse_comma), SYM_PARSER(DOT, arith_parse_dot), + SYM_PARSER(EQUAL_RIGHT_ANGLE, arith_parse_equal_right_angle), }, .n_keyword_parsers = { + /* statement keywords */ + KW_PARSER(MATCH, stmt_parse_match), KW_PARSER(IF, stmt_parse_if), KW_PARSER(THEN, stmt_parse_then), KW_PARSER(ELSE, stmt_parse_else), KW_PARSER(END, stmt_parse_end), + + /* operator keywords */ + KW_PARSER(IN, arith_parse_in), } }; diff --git a/lang/ast/expr/expr.h b/lang/ast/expr/expr.h index c59b2bc..b15f505 100644 --- a/lang/ast/expr/expr.h +++ b/lang/ast/expr/expr.h @@ -95,7 +95,7 @@ extern struct token_parse_result expr_finalise_and_return( extern void arith_push_operator( struct expr_parser_state *state, struct ivy_ast_node *node); -extern void arith_push_operand( +extern enum ivy_status arith_push_operand( struct expr_parser_state *state, struct ivy_token *tok); extern enum ivy_status arith_add_child( @@ -117,9 +117,17 @@ extern struct token_parse_result arith_parse_semicolon( struct ivy_parser *ctx, struct ivy_token *tok); extern struct token_parse_result arith_parse_dot( struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_comma( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_equal_right_angle( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_in( + struct ivy_parser *ctx, struct ivy_token *tok); /* statement parser callbacks */ +extern struct token_parse_result stmt_parse_match( + struct ivy_parser *ctx, struct ivy_token *tok); extern struct token_parse_result stmt_parse_if( struct ivy_parser *ctx, struct ivy_token *tok); extern struct token_parse_result stmt_parse_then( diff --git a/lang/ast/expr/stmt.c b/lang/ast/expr/stmt.c index e696003..6e402ea 100644 --- a/lang/ast/expr/stmt.c +++ b/lang/ast/expr/stmt.c @@ -6,6 +6,39 @@ #include #include +struct token_parse_result stmt_parse_match( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_prev_component == EXPR_CMP_OPERAND || state->s_prev_component == EXPR_CMP_MSG) { + /* match statements are operands. */ + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + struct ivy_ast_node *expr = NULL; + struct token_parse_result result + = expr_finalise(ctx, state, IVY_PRECEDENCE_IF_ELSE, &expr); + if (result.r_status != IVY_OK) { + return result; + } + + state->s_prev_token = IVY_KW_MATCH; + + if (b_queue_empty(&state->s_operator_stack) && b_queue_empty(&state->s_output_queue)) { + parser_pop_state(ctx, 0); + } + + /* if expr is NULL, this is an if-then-else-end statement, + * otherwise, this is an expr-if-else-expr. */ + struct cond_group_parser_state *cond + = (struct cond_group_parser_state *)parser_push_state( + ctx, IVY_AST_MATCH, 0); + + return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); +} + struct token_parse_result stmt_parse_if( struct ivy_parser *ctx, struct ivy_token *tok) { diff --git a/lang/ast/match.c b/lang/ast/match.c new file mode 100644 index 0000000..dfd6aad --- /dev/null +++ b/lang/ast/match.c @@ -0,0 +1,246 @@ +#include "block.h" +#include "iterate.h" +#include "expr/expr.h" + +struct match_parser_state { + struct parser_state s_base; + unsigned int s_prev_token; + + struct ivy_ast_node *s_cond; + + struct ivy_ast_cond_node *s_cur_branch; + b_queue s_branches; + + struct ivy_ast_node *s_prev_node; +}; + +static void init_state(struct ivy_parser *ctx, struct parser_state *sp, uintptr_t arg) +{ + struct match_parser_state *state + = (struct match_parser_state *)sp; + state->s_prev_node = (struct ivy_ast_node *)arg; +} + +struct token_parse_result parse_match( + struct ivy_parser *ctx, + struct ivy_token *tok) +{ + struct match_parser_state *state + = parser_get_state(ctx, struct match_parser_state); + + if (state->s_cur_branch || state->s_prev_node) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_cur_branch = (struct ivy_ast_cond_node *)ast_node_create(IVY_AST_COND); + if (!state->s_cur_branch) { + return PARSE_RESULT(IVY_ERR_NO_MEMORY, 0); + } + + struct expr_parser_state *expr + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + + state->s_prev_token = IVY_KW_MATCH; + expr->s_subexpr_depth = 1; + expr->s_terminator = IVY_KW_IN; + + return PARSE_RESULT(IVY_OK, 0); +} + +static enum ivy_status flush_current_branch(struct match_parser_state *state) +{ + if (!state->s_cur_branch) { + return IVY_ERR_INTERNAL_FAILURE; + } + + b_queue_push_back(&state->s_branches, &state->s_cur_branch->n_base.n_entry); + + state->s_cur_branch + = (struct ivy_ast_cond_node *)ast_node_create(IVY_AST_COND); + + if (!state->s_cur_branch) { + return IVY_ERR_NO_MEMORY; + } + + return IVY_OK; +} + +struct token_parse_result parse_in(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct match_parser_state *state + = parser_get_state(ctx, struct match_parser_state); + + if (!state->s_cur_branch) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + if (state->s_prev_token != IVY_KW_MATCH) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + /* previous component was the match-condition. */ + if (!state->s_prev_node) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_cond = state->s_prev_node; + state->s_prev_node = NULL; + + /* next component will be a branch condition. */ + struct block_parser_state *block + = (struct block_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + + state->s_prev_token = IVY_KW_IN; + return PARSE_RESULT(IVY_OK, 0); +} + +static enum ivy_status finalise_match(struct match_parser_state* state) +{ + struct ivy_ast_match_node *match + = (struct ivy_ast_match_node *)state->s_base.s_node; + + /* we have just reached the 'end' keyword. */ + if (!state->s_cur_branch) { + /* not currently parsing a conditional branch. */ + return IVY_ERR_BAD_SYNTAX; + } + + match->n_cond = state->s_cond; + match->n_branches = state->s_branches; + state->s_branches = B_QUEUE_INIT; + return IVY_OK; +} + +static struct token_parse_result parse_arrow( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct match_parser_state *state + = parser_get_state(ctx, struct match_parser_state); + + if (state->s_prev_token != IVY_KW_IN && state->s_prev_token != IVY_SYM_COMMA) { + /* this token can only appear after the `in` keyword and an expression. */ + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + if (!state->s_prev_node) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_cur_branch->n_cond = state->s_prev_node; + state->s_prev_node = NULL; + state->s_prev_token = IVY_SYM_EQUAL_RIGHT_ANGLE; + + /* the next component is the branch body */ + struct expr_parser_state *expr + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + + expr->s_terminator = IVY_KW_END; + expr->s_subexpr_depth = 1; + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_comma( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct match_parser_state *state + = parser_get_state(ctx, struct match_parser_state); + + if (state->s_prev_token != IVY_SYM_EQUAL_RIGHT_ANGLE) { + /* this token can only appear after the `=>` symbol and an expression. */ + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + if (!state->s_prev_node) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_cur_branch->n_body = state->s_prev_node; + flush_current_branch(state); + state->s_prev_node = NULL; + state->s_prev_token = IVY_SYM_COMMA; + + /* the next component is a branch condition */ + struct expr_parser_state *expr + = (struct expr_parser_state *)parser_push_state( + ctx, IVY_AST_EXPR, 0); + + expr->s_subexpr_depth = 1; + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_end(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct match_parser_state *state + = parser_get_state(ctx, struct match_parser_state); + + /* end can only be used after the '=>' symbol and an expression */ + if (state->s_prev_token != IVY_SYM_EQUAL_RIGHT_ANGLE) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + if (!state->s_prev_node) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_cur_branch->n_body = state->s_prev_node; + flush_current_branch(state); + state->s_prev_node = NULL; + state->s_prev_token = IVY_KW_END; + + enum ivy_status status = finalise_match(state); + if (status != IVY_OK) { + return PARSE_RESULT(status, 0); + } + + parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT); + return PARSE_RESULT(IVY_OK, 0); +} + +static enum ivy_status add_child( + struct parser_state *parent, struct ivy_ast_node *child) +{ + struct match_parser_state *state + = (struct match_parser_state *)parent; + + if (state->s_prev_node) { + return IVY_ERR_BAD_SYNTAX; + } + + state->s_prev_node = child; + return IVY_OK; +} + +static void match_collect_children( + struct ivy_ast_node *node, struct ivy_ast_node_iterator *iterator) +{ + struct ivy_ast_match_node *match + = (struct ivy_ast_match_node *)node; + + ast_node_iterator_enqueue_node(iterator, node, match->n_cond); + + b_queue_iterator it = {0}; + b_queue_foreach (&it, &match->n_branches) { + struct ivy_ast_node *branch = b_unbox(struct ivy_ast_node, it.entry, n_entry); + ast_node_iterator_enqueue_node(iterator, node, branch); + } +} + +struct ast_node_type match_node_ops = { + .n_init_state = init_state, + .n_add_child = add_child, + .n_collect_children = match_collect_children, + .n_state_size = sizeof(struct match_parser_state), + .n_node_size = sizeof(struct ivy_ast_cond_group_node), + .n_keyword_parsers = { + KW_PARSER(MATCH, parse_match), + KW_PARSER(IN, parse_in), + KW_PARSER(END, parse_end), + }, + .n_symbol_parsers = { + SYM_PARSER(EQUAL_RIGHT_ANGLE, parse_arrow), + SYM_PARSER(COMMA, parse_comma), + }, +}; diff --git a/lang/ast/node.c b/lang/ast/node.c index 9fd182f..e853248 100644 --- a/lang/ast/node.c +++ b/lang/ast/node.c @@ -22,6 +22,8 @@ extern struct ast_node_type string_node_ops; extern struct ast_node_type cascade_node_ops; extern struct ast_node_type cond_group_node_ops; extern struct ast_node_type cond_node_ops; +extern struct ast_node_type match_node_ops; +extern struct ast_node_type discard_node_ops; static const struct ast_node_type *node_ops[] = { [IVY_AST_UNIT] = &unit_node_ops, @@ -41,6 +43,8 @@ static const struct ast_node_type *node_ops[] = { [IVY_AST_CASCADE] = &cascade_node_ops, [IVY_AST_COND_GROUP] = &cond_group_node_ops, [IVY_AST_COND] = &cond_node_ops, + [IVY_AST_MATCH] = &match_node_ops, + [IVY_AST_DISCARD] = &discard_node_ops, }; static const size_t nr_node_ops = sizeof node_ops / sizeof node_ops[0]; @@ -222,6 +226,7 @@ const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v) ENUM_STR(IVY_AST_LAMBDA); ENUM_STR(IVY_AST_UNIT_PACKAGE); ENUM_STR(IVY_AST_UNIT_IMPORT); + ENUM_STR(IVY_AST_DISCARD); ENUM_STR(IVY_AST_INT); ENUM_STR(IVY_AST_DOUBLE); ENUM_STR(IVY_AST_STRING); @@ -232,6 +237,7 @@ const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v) ENUM_STR(IVY_AST_WHILE_LOOP); ENUM_STR(IVY_AST_CASCADE); ENUM_STR(IVY_AST_COND_GROUP); + ENUM_STR(IVY_AST_MATCH); ENUM_STR(IVY_AST_COND); ENUM_STR(IVY_AST_TUPLE); ENUM_STR(IVY_AST_BLOCK); diff --git a/lang/ast/unit.c b/lang/ast/unit.c index ae10887..fc30e8d 100644 --- a/lang/ast/unit.c +++ b/lang/ast/unit.c @@ -35,7 +35,6 @@ static struct token_parse_result parse_expr_begin( static struct token_parse_result parse_dot( struct ivy_parser *ctx, struct ivy_token *tok) { - printf("unneeded dot\n"); return PARSE_RESULT(IVY_OK, 0); } diff --git a/lang/include/ivy/lang/ast.h b/lang/include/ivy/lang/ast.h index 41ffd0e..3cbb664 100644 --- a/lang/include/ivy/lang/ast.h +++ b/lang/include/ivy/lang/ast.h @@ -24,6 +24,7 @@ enum ivy_ast_node_type { IVY_AST_UNIT_PACKAGE, IVY_AST_UNIT_IMPORT, IVY_AST_EXPR, + IVY_AST_DISCARD, IVY_AST_INT, IVY_AST_DOUBLE, IVY_AST_STRING, @@ -33,6 +34,7 @@ enum ivy_ast_node_type { IVY_AST_FOR_LOOP, IVY_AST_WHILE_LOOP, IVY_AST_CASCADE, + IVY_AST_MATCH, IVY_AST_COND_GROUP, IVY_AST_COND, IVY_AST_TUPLE, @@ -148,6 +150,10 @@ struct ivy_ast_expr_node { struct ivy_ast_node *n_child; }; +struct ivy_ast_discard_node { + struct ivy_ast_node n_base; +}; + struct ivy_ast_int_node { struct ivy_ast_node n_base; /* lex token of type IVY_TOK_INT. */ @@ -213,9 +219,19 @@ struct ivy_ast_cond_group_node { b_queue n_branches; }; +struct ivy_ast_match_node { + struct ivy_ast_node n_base; + /* the match condition. the branch that matches this condition will be evaluated. */ + struct ivy_ast_node *n_cond; + /* queue of struct ivy_ast_cond_node. */ + b_queue n_branches; +}; + struct ivy_ast_cond_node { struct ivy_ast_node n_base; - /* expression. must evaluate to true for the condition body to be evaluated. NULL for unconditional (else). */ + /* expression. NULL for unconditional (else). + * for if-else expressions, this condition (if present) must evaluate to true for n_body to be evaluated. + * for match expressions, this condition must be equal to the match condition for n_body to be evaluated. */ struct ivy_ast_node *n_cond; /* expression/block to evaluate if the condition is true. */ struct ivy_ast_node *n_body;