From 16ab13d7388c7c0ed23763bd51cf75b247b99ae8 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Tue, 3 Dec 2024 13:26:55 +0000 Subject: [PATCH] lang: ast: re-factor expression parser into multiple files --- lang/ast/{expr.c => expr/arith.c} | 120 +++++------------------------- lang/ast/expr/expr.c | 36 +++++++++ lang/ast/expr/expr.h | 97 ++++++++++++++++++++++++ lang/ast/expr/stmt.c | 0 lang/include/ivy/lang/ast.h | 8 ++ lang/include/ivy/lang/operator.h | 2 +- 6 files changed, 160 insertions(+), 103 deletions(-) rename lang/ast/{expr.c => expr/arith.c} (86%) create mode 100644 lang/ast/expr/expr.c create mode 100644 lang/ast/expr/expr.h create mode 100644 lang/ast/expr/stmt.c diff --git a/lang/ast/expr.c b/lang/ast/expr/arith.c similarity index 86% rename from lang/ast/expr.c rename to lang/ast/expr/arith.c index 63f6f8a..858e979 100644 --- a/lang/ast/expr.c +++ b/lang/ast/expr/arith.c @@ -1,73 +1,11 @@ -#include "ctx.h" -#include "node.h" +#include "../node.h" +#include "expr.h" #include #include #include #include -enum expr_type { - EXPR_TYPE_NONE = 0, - /* if-else, while/for, match, etc (any expression that ends with an - * 'end' keyword */ - EXPR_TYPE_STMT, - /* arithmetic and message-sending expressions (any expression that ends - * implicitly or with an expression separator */ - EXPR_TYPE_ARITH, -}; - -enum expr_subtype { - EXPR_SUBTYPE_NONE = 0, - /* generic parenthesis-enclosed arithmetic expressions */ - EXPR_SUBTYPE_PAREN, - /* keyword messages */ - EXPR_SUBTYPE_KEYWORD_MSG, - /* expression delimited by labels */ - EXPR_SUBTYPE_KEYWORD_ARG, - /* complex messages */ - EXPR_SUBTYPE_COMPLEX_MSG, -}; - -enum expr_component { - EXPR_CMP_NONE = 0, - EXPR_CMP_OPERATOR, - EXPR_CMP_OPERAND, - EXPR_CMP_MSG, -}; - -struct expr_parser_state { - struct parser_state s_base; - enum expr_type s_type; - /* for arithmetic expressions, this records whether the previous - * component (either a token or parenthesised group of tokens) is an - * operator, operand, or message */ - enum expr_component s_prev_component; - /* the token type/keyword type/symbol type of the last token that was - * encountered */ - unsigned int s_prev_token; - /* if this is an arithmetic expression, this variable is the depth - * of parentheses that this sub-expression is at */ - unsigned int s_paren_depth; - - /* when this is set, the expression will be terminated when the - * specified token is encountered. the token that terminated the - * expression will not be consumed. */ - unsigned int s_terminator; - - /* all sub-expressions (i.e. those conatained within brackets, - * keyword-messages and keyword-message args, etc) will have this set/ */ - enum expr_subtype s_subexpr; - - b_queue s_output_queue; - b_queue s_operator_stack; - - /* these variables are for keyword-message expressions */ - struct ivy_ast_node *s_recipient; - struct ivy_ast_msg_node *s_msg; - b_queue s_labels; - b_queue s_args; -}; - static void print_operand(struct ivy_ast_node *node) { switch (node->n_type) { @@ -112,7 +50,7 @@ static void print_operand(struct ivy_ast_node *node) } } -static void push_operand(struct expr_parser_state *state, struct ivy_token *tok) +void arith_push_operand(struct expr_parser_state *state, struct ivy_token *tok) { switch (tok->t_type) { case IVY_TOK_IDENT: { @@ -228,7 +166,7 @@ static void print_expr_queues(struct expr_parser_state *state) printf("\n"); } -static void push_operator(struct expr_parser_state *state, struct ivy_ast_node *node) +void arith_push_operator(struct expr_parser_state *state, struct ivy_ast_node *node) { const struct ivy_operator *op = get_operator_from_node(node); if (!op) { @@ -452,7 +390,7 @@ static enum ivy_status finalise_expr( return IVY_OK; } -static struct token_parse_result parse_operand( +struct token_parse_result arith_parse_operand( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -468,7 +406,7 @@ static struct token_parse_result parse_operand( state->s_type = EXPR_TYPE_ARITH; - push_operand(state, tok); + arith_push_operand(state, tok); // print_expr_queues(state); state->s_prev_component = EXPR_CMP_OPERAND; state->s_prev_token = tok->t_type; @@ -476,7 +414,7 @@ static struct token_parse_result parse_operand( return PARSE_RESULT(IVY_OK, 0); } -static struct token_parse_result parse_operator( +struct token_parse_result arith_parse_operator( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -503,14 +441,14 @@ static struct token_parse_result parse_operator( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } - push_operator(state, op); + arith_push_operator(state, op); state->s_prev_component = EXPR_CMP_OPERATOR; print_expr_queues(state); return PARSE_RESULT(IVY_OK, 0); } -static struct token_parse_result parse_ident( +struct token_parse_result arith_parse_ident( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -520,16 +458,16 @@ static struct token_parse_result parse_ident( if (state->s_prev_component == EXPR_CMP_OPERAND || state->s_prev_component == EXPR_CMP_MSG) { - result = parse_operator(ctx, tok); + result = arith_parse_operator(ctx, tok); state->s_prev_component = EXPR_CMP_MSG; } else { - result = parse_operand(ctx, tok); + result = arith_parse_operand(ctx, tok); } return result; } -static struct token_parse_result parse_left_paren( +struct token_parse_result arith_parse_left_paren( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -644,7 +582,7 @@ static struct ivy_ast_msg_node *finalise_complex_msg(struct expr_parser_state *s return msg; } -static struct token_parse_result parse_right_paren( +struct token_parse_result arith_parse_right_paren( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -693,7 +631,7 @@ static struct token_parse_result parse_right_paren( return PARSE_RESULT(IVY_OK, flags); } -static struct token_parse_result parse_dot( +struct token_parse_result arith_parse_dot( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -708,7 +646,7 @@ static struct token_parse_result parse_dot( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } - if (state->s_recipient) { + if (state->s_subexpr == EXPR_SUBTYPE_KEYWORD_MSG) { /* this is the end of a keyword-message */ struct ivy_ast_msg_node *msg = finalise_keyword_msg(state); parser_replace_current_node(ctx, (struct ivy_ast_node *)msg); @@ -739,7 +677,7 @@ static struct token_parse_result parse_dot( return PARSE_RESULT(IVY_OK, flags); } -static struct token_parse_result parse_label( +struct token_parse_result arith_parse_label( struct ivy_parser *ctx, struct ivy_token *tok) { struct expr_parser_state *state @@ -807,13 +745,10 @@ static struct token_parse_result parse_label( return PARSE_RESULT(IVY_OK, 0); } -static enum ivy_status add_child( +enum ivy_status arith_add_child( struct parser_state *parent, struct ivy_ast_node *child) { struct expr_parser_state *state = (struct expr_parser_state *)parent; - if (state->s_type == EXPR_TYPE_STMT) { - return IVY_ERR_NOT_SUPPORTED; - } if (state->s_subexpr == EXPR_SUBTYPE_KEYWORD_MSG || state->s_subexpr == EXPR_SUBTYPE_COMPLEX_MSG) { @@ -825,7 +760,7 @@ static enum ivy_status add_child( b_queue_push_back(&state->s_output_queue, &child->n_entry); state->s_prev_component = EXPR_CMP_OPERAND; } else if (child->n_type == IVY_AST_MSG) { - push_operator(state, child); + arith_push_operator(state, child); state->s_prev_component = EXPR_CMP_MSG; } else { /* treat the child node as a sub-expression enclosed in @@ -836,22 +771,3 @@ static enum ivy_status add_child( return IVY_OK; } - -struct ast_node_type expr_node_ops = { - .n_add_child = add_child, - .n_state_size = sizeof(struct expr_parser_state), - .n_node_size = sizeof(struct ivy_ast_expr_node), - .n_token_parsers = { - TOK_PARSER(IDENT, parse_ident), - TOK_PARSER(INT, parse_operand), - TOK_PARSER(DOUBLE, parse_operand), - TOK_PARSER(STRING, parse_operand), - TOK_PARSER(SYMBOL, parse_operator), - TOK_PARSER(LABEL, parse_label), - }, - .n_symbol_parsers = { - SYM_PARSER(LEFT_PAREN, parse_left_paren), - SYM_PARSER(RIGHT_PAREN, parse_right_paren), - SYM_PARSER(DOT, parse_dot), - }, -}; diff --git a/lang/ast/expr/expr.c b/lang/ast/expr/expr.c new file mode 100644 index 0000000..0b18ff0 --- /dev/null +++ b/lang/ast/expr/expr.c @@ -0,0 +1,36 @@ +#include "expr.h" + +#include "../node.h" + +static enum ivy_status add_child( + struct parser_state *parent, struct ivy_ast_node *child) +{ + struct expr_parser_state *state = (struct expr_parser_state *)parent; + switch (state->s_type) { + case EXPR_TYPE_STMT: + return IVY_ERR_NOT_SUPPORTED; + case EXPR_TYPE_ARITH: + return arith_add_child(parent, child); + default: + return IVY_ERR_NOT_SUPPORTED; + } +} + +struct ast_node_type expr_node_ops = { + .n_add_child = add_child, + .n_state_size = sizeof(struct expr_parser_state), + .n_node_size = sizeof(struct ivy_ast_expr_node), + .n_token_parsers = { + TOK_PARSER(IDENT, arith_parse_ident), + TOK_PARSER(INT, arith_parse_operand), + TOK_PARSER(DOUBLE, arith_parse_operand), + TOK_PARSER(STRING, arith_parse_operand), + TOK_PARSER(SYMBOL, arith_parse_operator), + TOK_PARSER(LABEL, arith_parse_label), + }, + .n_symbol_parsers = { + SYM_PARSER(LEFT_PAREN, arith_parse_left_paren), + SYM_PARSER(RIGHT_PAREN, arith_parse_right_paren), + SYM_PARSER(DOT, arith_parse_dot), + }, +}; diff --git a/lang/ast/expr/expr.h b/lang/ast/expr/expr.h new file mode 100644 index 0000000..e98f3dd --- /dev/null +++ b/lang/ast/expr/expr.h @@ -0,0 +1,97 @@ +#ifndef _AST_EXPR_EXPR_H_ +#define _AST_EXPR_EXPR_H_ + +#include "../ctx.h" +#include "../node.h" + +#include + +struct ivy_ast_node; +struct ivy_ast_msg_node; + +enum expr_type { + EXPR_TYPE_NONE = 0, + /* if-else, while/for, match, etc (any expression that ends with an + * 'end' keyword */ + EXPR_TYPE_STMT, + /* arithmetic and message-sending expressions (any expression that ends + * implicitly or with an expression separator */ + EXPR_TYPE_ARITH, +}; + +enum expr_subtype { + EXPR_SUBTYPE_NONE = 0, + /* generic parenthesis-enclosed arithmetic expressions */ + EXPR_SUBTYPE_PAREN, + /* keyword messages */ + EXPR_SUBTYPE_KEYWORD_MSG, + /* expression delimited by labels */ + EXPR_SUBTYPE_KEYWORD_ARG, + /* complex messages */ + EXPR_SUBTYPE_COMPLEX_MSG, +}; + +enum expr_component { + EXPR_CMP_NONE = 0, + EXPR_CMP_OPERATOR, + EXPR_CMP_OPERAND, + EXPR_CMP_MSG, +}; + +struct expr_parser_state { + struct parser_state s_base; + enum expr_type s_type; + /* for arithmetic expressions, this records whether the previous + * component (either a token or parenthesised group of tokens) is an + * operator, operand, or message */ + enum expr_component s_prev_component; + /* the token type/keyword type/symbol type of the last token that was + * encountered */ + unsigned int s_prev_token; + /* if this is an arithmetic expression, this variable is the depth + * of parentheses that this sub-expression is at */ + unsigned int s_paren_depth; + + /* when this is set, the expression will be terminated when the + * specified token is encountered. the token that terminated the + * expression will not be consumed. */ + unsigned int s_terminator; + + /* all sub-expressions (i.e. those conatained within brackets, + * keyword-messages and keyword-message args, etc) will have this set/ */ + enum expr_subtype s_subexpr; + + b_queue s_output_queue; + b_queue s_operator_stack; + + /* these variables are for keyword-message expressions */ + struct ivy_ast_node *s_recipient; + struct ivy_ast_msg_node *s_msg; + b_queue s_labels; + b_queue s_args; +}; + +extern void arith_push_operator( + struct expr_parser_state *state, struct ivy_ast_node *node); +extern void arith_push_operand( + struct expr_parser_state *state, struct ivy_token *tok); + +extern enum ivy_status arith_add_child( + struct parser_state *parent, struct ivy_ast_node *child); + +extern struct token_parse_result arith_parse_ident( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_operand( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_operator( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_label( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_left_paren( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_right_paren( + struct ivy_parser *ctx, struct ivy_token *tok); +extern struct token_parse_result arith_parse_dot( + struct ivy_parser *ctx, struct ivy_token *tok); + +#endif diff --git a/lang/ast/expr/stmt.c b/lang/ast/expr/stmt.c new file mode 100644 index 0000000..e69de29 diff --git a/lang/include/ivy/lang/ast.h b/lang/include/ivy/lang/ast.h index 7b3dccd..ff111d5 100644 --- a/lang/include/ivy/lang/ast.h +++ b/lang/include/ivy/lang/ast.h @@ -32,6 +32,7 @@ enum ivy_ast_node_type { IVY_AST_IDENT, IVY_AST_FOR_LOOP, IVY_AST_WHILE_LOOP, + IVY_AST_CASCADE, IVY_AST_COND_GROUP, IVY_AST_COND, IVY_AST_TUPLE, @@ -199,6 +200,13 @@ struct ivy_ast_while_loop_node { b_queue n_body; }; +struct ivy_ast_cascade_node { + struct ivy_ast_node n_base; + struct ivy_ast_node *n_recipient; + /* queue of struct ivy_ast_msg_node, all with NULL recipients. */ + b_queue n_msg; +}; + struct ivy_ast_cond_group_node { struct ivy_ast_node n_base; /* queue of struct ivy_ast_cond_node. */ diff --git a/lang/include/ivy/lang/operator.h b/lang/include/ivy/lang/operator.h index b580135..5b31154 100644 --- a/lang/include/ivy/lang/operator.h +++ b/lang/include/ivy/lang/operator.h @@ -5,8 +5,8 @@ enum ivy_operator_precedence { IVY_PRECEDENCE_ASSIGN, - IVY_PRECEDENCE_KEYWORD_MSG, IVY_PRECEDENCE_IF_ELSE, + IVY_PRECEDENCE_KEYWORD_MSG, IVY_PRECEDENCE_CASCADE, IVY_PRECEDENCE_LOGICAL_OR, IVY_PRECEDENCE_LOGICAL_AND,