From e8c30b65b58468cd56c2719747a42ca853cf98df Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sat, 7 Dec 2024 21:28:25 +0000 Subject: [PATCH] lang: ast: implement support for multiple expression terminator tokens --- lang/ast/block.c | 2 +- lang/ast/expr/arith.c | 24 ++++++++++++++---------- lang/ast/expr/expr.c | 27 +++++++++++++++++++++++++++ lang/ast/expr/expr.h | 15 +++++++++++---- lang/ast/expr/stmt.c | 4 ++-- lang/ast/for.c | 4 ++-- lang/ast/match.c | 4 ++-- lang/ast/package.c | 12 ++++++------ lang/ast/property.c | 2 +- lang/ast/string.c | 2 +- lang/ast/tuple.c | 0 lang/ast/while.c | 2 +- 12 files changed, 68 insertions(+), 30 deletions(-) create mode 100644 lang/ast/tuple.c diff --git a/lang/ast/block.c b/lang/ast/block.c index e355840..35e9097 100644 --- a/lang/ast/block.c +++ b/lang/ast/block.c @@ -68,7 +68,7 @@ static struct token_parse_result parse_expr_begin( struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); - expr->s_terminator = state->s_terminator; + return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); } diff --git a/lang/ast/expr/arith.c b/lang/ast/expr/arith.c index 7a94903..082feb1 100644 --- a/lang/ast/expr/arith.c +++ b/lang/ast/expr/arith.c @@ -504,7 +504,7 @@ struct token_parse_result arith_parse_in( struct expr_parser_state *state = parser_get_state(ctx, struct expr_parser_state); - if (state->s_terminator == IVY_KW_IN) { + if (expr_terminates_at_token(state, IVY_KW_IN)) { /* treat this as a statement terminator. */ struct token_parse_result result = expr_finalise_and_return(ctx, state); result.r_flags |= PARSE_REPEAT_TOKEN; @@ -521,7 +521,7 @@ struct token_parse_result arith_parse_do( = parser_get_state(ctx, struct expr_parser_state); bool terminator = false; - if (state->s_terminator == IVY_KW_DO) { + if (expr_terminates_at_token(state, IVY_KW_DO)) { terminator = true; } @@ -1132,7 +1132,7 @@ struct token_parse_result arith_parse_comma( struct expr_parser_state *state = parser_get_state(ctx, struct expr_parser_state); - if (state->s_terminator == IVY_SYM_COMMA) { + if (expr_terminates_at_token(state, IVY_SYM_COMMA)) { struct token_parse_result result = expr_finalise_and_return(ctx, state); result.r_flags = PARSE_REPEAT_TOKEN; @@ -1177,21 +1177,23 @@ struct token_parse_result arith_parse_label( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } - if (state->s_terminator == IVY_TOK_LABEL) { + if (expr_terminates_at_token(state, IVY_TOK_LABEL)) { /* we are currently parsing a keyword or complex message * argument, and have just encountered the label denoting the * next argument. terminate here and propagate this label to the * parent keyword-message parser context. */ struct ivy_ast_node *expr = NULL; - enum ivy_status status = expr_finalise_arith( - state, &expr, IVY_PRECEDENCE_ASSIGN); - if (status != IVY_OK) { - return PARSE_RESULT(status, 0); + struct token_parse_result result = expr_finalise(ctx, + state, IVY_PRECEDENCE_ASSIGN, &expr); + if (result.r_status != IVY_OK) { + return result; } + result.r_flags |= PARSE_REPEAT_TOKEN; + parser_replace_current_node(ctx, expr); parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT); - return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, PARSE_REPEAT_TOKEN); + return result; } if (state->s_sub_type != EXPR_SUBTYPE_KEYWORD_MSG @@ -1208,6 +1210,7 @@ struct token_parse_result arith_parse_label( if (new_parser) { msg_expr = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); + expr_copy_terminators(state, msg_expr); } else { msg_expr = state; } @@ -1248,7 +1251,8 @@ struct token_parse_result arith_parse_label( struct expr_parser_state *arg_expr = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); - arg_expr->s_terminator = IVY_TOK_LABEL; + expr_copy_terminators(state, arg_expr); + expr_add_terminator(arg_expr, IVY_TOK_LABEL); arg_expr->s_sub_type = EXPR_SUBTYPE_KEYWORD_ARG; arg_expr->s_subexpr_depth = state->s_subexpr_depth + 1; } diff --git a/lang/ast/expr/expr.c b/lang/ast/expr/expr.c index 960aed1..70d6581 100644 --- a/lang/ast/expr/expr.c +++ b/lang/ast/expr/expr.c @@ -2,6 +2,33 @@ #include "../node.h" +void expr_add_terminator(struct expr_parser_state *state, unsigned short tok) +{ + if (state->s_nr_terminators < EXPR_TERMINATOR_MAX) { + state->s_terminators[state->s_nr_terminators++] = tok; + } +} + +void expr_copy_terminators(const struct expr_parser_state *src, struct expr_parser_state *dest) +{ + dest->s_nr_terminators = src->s_nr_terminators; + + for (unsigned int i = 0; i < src->s_nr_terminators; i++) { + dest->s_terminators[i] = src->s_terminators[i]; + } +} + +bool expr_terminates_at_token(struct expr_parser_state *state, unsigned short tok) +{ + for (unsigned int i = 0; i < EXPR_TERMINATOR_MAX; i++) { + if (state->s_terminators[i] == tok) { + return true; + } + } + + return false; +} + static enum ivy_status add_child( struct parser_state *parent, struct ivy_ast_node *child) { diff --git a/lang/ast/expr/expr.h b/lang/ast/expr/expr.h index 34d6afc..6608100 100644 --- a/lang/ast/expr/expr.h +++ b/lang/ast/expr/expr.h @@ -6,6 +6,8 @@ #include +#define EXPR_TERMINATOR_MAX 8 + struct ivy_ast_node; struct ivy_ast_msg_node; @@ -65,10 +67,11 @@ struct expr_parser_state { * the depth of the expression is recorded here. */ unsigned int s_subexpr_depth; - /* when this is set, the expression will be terminated when the - * specified token is encountered. the token that terminated the - * expression will not be consumed. */ - unsigned int s_terminator; + /* the expression will be terminated when any token in this list + * is encountered. the token that terminated the expression will + * not be consumed. */ + unsigned short s_terminators[EXPR_TERMINATOR_MAX]; + unsigned short s_nr_terminators; b_queue s_output_queue; b_queue s_operator_stack; @@ -88,6 +91,10 @@ struct expr_parser_state { /* general functions */ +extern void expr_add_terminator(struct expr_parser_state *state, unsigned short tok); +extern void expr_copy_terminators(const struct expr_parser_state *src, struct expr_parser_state *dest); +extern bool expr_terminates_at_token(struct expr_parser_state *state, unsigned short tok); + extern struct token_parse_result expr_finalise( struct ivy_parser *ctx, struct expr_parser_state *state, enum ivy_operator_precedence min_precedence, struct ivy_ast_node **expr); diff --git a/lang/ast/expr/stmt.c b/lang/ast/expr/stmt.c index 0f24e26..5a00649 100644 --- a/lang/ast/expr/stmt.c +++ b/lang/ast/expr/stmt.c @@ -12,7 +12,7 @@ struct token_parse_result stmt_parse_for( struct expr_parser_state *state = parser_get_state(ctx, struct expr_parser_state); - if (state->s_terminator == IVY_KW_FOR) { + if (expr_terminates_at_token(state, IVY_KW_FOR)) { /* treat this as a statement terminator. */ struct token_parse_result result = expr_finalise_and_return(ctx, state); result.r_flags |= PARSE_REPEAT_TOKEN; @@ -126,7 +126,7 @@ struct token_parse_result stmt_parse_if( struct expr_parser_state *state = parser_get_state(ctx, struct expr_parser_state); - if (state->s_terminator == IVY_KW_IF) { + if (expr_terminates_at_token(state, IVY_KW_IF)) { /* treat this as a statement terminator. */ struct token_parse_result result = expr_finalise_and_return(ctx, state); result.r_flags |= PARSE_REPEAT_TOKEN; diff --git a/lang/ast/for.c b/lang/ast/for.c index c646c2d..f13fe76 100644 --- a/lang/ast/for.c +++ b/lang/ast/for.c @@ -37,7 +37,7 @@ struct token_parse_result parse_for(struct ivy_parser *ctx, struct ivy_token *to ctx, IVY_AST_EXPR, 0); state->s_prev_token = IVY_KW_FOR; - expr->s_terminator = IVY_KW_IN; + expr_add_terminator(expr, IVY_KW_IN); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -68,7 +68,7 @@ static struct token_parse_result parse_in( /* set the sub-expression depth to be non-zero so the expression parser doesn't consume the expression separator. */ expr->s_subexpr_depth = 1; - expr->s_terminator = IVY_KW_DO; + expr_add_terminator(expr, IVY_KW_DO); state->s_prev_token = IVY_KW_IN; return PARSE_RESULT(IVY_OK, 0); diff --git a/lang/ast/match.c b/lang/ast/match.c index b317d75..bbb47d7 100644 --- a/lang/ast/match.c +++ b/lang/ast/match.c @@ -41,7 +41,7 @@ struct token_parse_result parse_match(struct ivy_parser *ctx, struct ivy_token * state->s_prev_token = IVY_KW_MATCH; expr->s_subexpr_depth = 1; - expr->s_terminator = IVY_KW_IN; + expr_add_terminator(expr, IVY_KW_IN); return PARSE_RESULT(IVY_OK, 0); } @@ -136,7 +136,7 @@ static struct token_parse_result parse_arrow( = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_KW_END; + expr_add_terminator(expr, IVY_KW_END); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); } diff --git a/lang/ast/package.c b/lang/ast/package.c index 1b08774..fc24354 100644 --- a/lang/ast/package.c +++ b/lang/ast/package.c @@ -124,7 +124,7 @@ static struct token_parse_result parse_equal_right_angle( state->s_prev_node = NULL; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_SYM_COMMA; + expr_add_terminator(expr, IVY_SYM_COMMA); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -162,7 +162,7 @@ static struct token_parse_result parse_comma( state->s_prev = IVY_SYM_COMMA; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_SYM_EQUAL_RIGHT_ANGLE; + expr_add_terminator(expr, IVY_SYM_EQUAL_RIGHT_ANGLE); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -261,7 +261,7 @@ static struct token_parse_result parse_for( state->s_prev_node = NULL; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_KW_IN; + expr_add_terminator(expr, IVY_KW_IN); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -292,7 +292,7 @@ static struct token_parse_result parse_in( state->s_prev_node = NULL; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_KW_IF; + expr_add_terminator(expr, IVY_KW_IF); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -323,7 +323,7 @@ static struct token_parse_result parse_if( state->s_prev_node = NULL; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_SYM_RIGHT_BRACE; + expr_add_terminator(expr, IVY_SYM_RIGHT_BRACE); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); @@ -411,7 +411,7 @@ static void init_state(struct ivy_parser *ctx, struct parser_state *sp, uintptr_ state->s_next_implicit_index = 0; struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state(ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_KW_FOR; + expr_add_terminator(expr, IVY_KW_FOR); expr->s_subexpr_depth = 1; } diff --git a/lang/ast/property.c b/lang/ast/property.c index 5c74312..f75adcc 100644 --- a/lang/ast/property.c +++ b/lang/ast/property.c @@ -175,7 +175,7 @@ static struct token_parse_result parse_equal_right_arrow( struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_SYM_COMMA; + expr_add_terminator(expr, IVY_SYM_COMMA); expr->s_subexpr_depth = 1; return PARSE_RESULT(IVY_OK, 0); diff --git a/lang/ast/string.c b/lang/ast/string.c index 6a6fea6..d7e9e82 100644 --- a/lang/ast/string.c +++ b/lang/ast/string.c @@ -36,7 +36,7 @@ struct token_parse_result parse_left_brace( struct expr_parser_state *expr = (struct expr_parser_state *)parser_push_state( ctx, IVY_AST_EXPR, 0); - expr->s_terminator = IVY_SYM_RIGHT_BRACE; + expr_add_terminator(expr, IVY_SYM_RIGHT_BRACE); return PARSE_RESULT(IVY_OK, 0); } diff --git a/lang/ast/tuple.c b/lang/ast/tuple.c new file mode 100644 index 0000000..e69de29 diff --git a/lang/ast/while.c b/lang/ast/while.c index 5111a28..490b3d1 100644 --- a/lang/ast/while.c +++ b/lang/ast/while.c @@ -38,7 +38,7 @@ struct token_parse_result parse_while(struct ivy_parser *ctx, struct ivy_token * state->s_prev_token = IVY_KW_WHILE; expr->s_subexpr_depth = 1; - expr->s_terminator = IVY_KW_DO; + expr_add_terminator(expr, IVY_KW_DO); return PARSE_RESULT(IVY_OK, 0); }