From b227b27c063f0ae8f74a523b361db1a2f0e51364 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Thu, 28 Nov 2024 17:00:37 +0000 Subject: [PATCH] lang: ast: implement more of the expression parser --- lang/ast/expr.c | 206 +++++++++++++++++++++++++----------- lang/include/ivy/lang/ast.h | 15 +++ 2 files changed, 159 insertions(+), 62 deletions(-) diff --git a/lang/ast/expr.c b/lang/ast/expr.c index 3029046..c3e6eb9 100644 --- a/lang/ast/expr.c +++ b/lang/ast/expr.c @@ -23,7 +23,10 @@ enum expr_part { struct expr_parser_state { struct parser_state s_base; enum expr_end s_end; - enum ivy_keyword s_type; + + /* for a keyword-based expression (loops, conditionals, etc) this is the id of the keyword that started the expression. + if this is a return expression (i.e. prefixed with a caret), this will be IVY_SYM_CARET. */ + unsigned int s_type; unsigned int s_prev_tok; enum expr_part s_prev_part; b_queue s_operand_queue; @@ -48,15 +51,12 @@ static void set_previous(struct expr_parser_state *state, struct ivy_token *tok) switch (tok->t_type) { case IVY_TOK_SYMBOL: state->s_prev_tok = tok->t_symbol; - state->s_prev_part = EXPR_OPERATOR; break; case IVY_TOK_KEYWORD: state->s_prev_tok = tok->t_keyword; - state->s_prev_part = EXPR_OPERATOR; break; default: state->s_prev_tok = tok->t_type; - state->s_prev_part = EXPR_OPERAND; break; } } @@ -91,6 +91,25 @@ static void print_token(struct ivy_token *tok) static enum ivy_status finalise_expr(struct expr_parser_state *state) { b_queue_iterator it = {0}; + while (true) { + b_queue_entry *entry = b_queue_pop_back(&state->s_operator_stack); + if (!entry) { + break; + } + + struct ivy_tokoen *tok = b_unbox(struct ivy_token, entry, t_entry); + if (!tok) { + /* this should never happen */ + return IVY_ERR_INTERNAL_FAILURE; + } + + if (ivy_token_is_symbol(tok, IVY_SYM_LEFT_PAREN)) { + /* mismatched parentheses */ + return IVY_ERR_BAD_SYNTAX; + } + + b_queue_push_back(&state->s_operand_queue, entry); + } int i = 0; b_queue_foreach (&it, &state->s_operand_queue) { @@ -105,20 +124,73 @@ static enum ivy_status finalise_expr(struct expr_parser_state *state) i++; } - b_queue_foreach (&it, &state->s_operator_stack) { - struct ivy_token *operator= b_unbox( - struct ivy_token, it.entry, t_entry); + printf("\n"); + return IVY_OK; +} - if (i > 0) { - printf(" "); - } +static const struct ivy_operator *get_operator(struct ivy_token *tok) +{ + switch (tok->t_type) { + case IVY_TOK_IDENT: + return ivy_operator_get(tok->t_type); + case IVY_TOK_KEYWORD: + return ivy_operator_get(tok->t_keyword); + case IVY_TOK_SYMBOL: + return ivy_operator_get(tok->t_symbol); + default: + return NULL; + } +} - print_token(operator); - i++; +static enum ivy_status push_operator( + struct expr_parser_state *state, struct ivy_token *tok, const struct ivy_operator *op) +{ + if (!op) { + op = get_operator(tok); } - printf("\n"); - printf("%zu\n", sizeof(struct ast_node_type)); + if (!op) { + return IVY_ERR_BAD_SYNTAX; + } + + if ((op->op_location == IVY_OP_INFIX || op->op_location == IVY_OP_POSTFIX) + && state->s_prev_part != EXPR_OPERAND) { + return IVY_ERR_BAD_SYNTAX; + } + + if (op->op_location == IVY_OP_PREFIX + && state->s_prev_part == EXPR_OPERAND) { + return IVY_ERR_BAD_SYNTAX; + } + + while (true) { + b_queue_entry *top_entry = b_queue_last(&state->s_operator_stack); + + if (!top_entry) { + break; + } + + struct ivy_token *top + = b_unbox(struct ivy_token, top_entry, t_entry); + if (ivy_token_is_symbol(top, IVY_SYM_LEFT_PAREN)) { + break; + } + + const struct ivy_operator *top_op = get_operator(top); + if (top_op->op_precedence < op->op_precedence) { + break; + } + + if (top_op->op_precedence == op->op_precedence + && op->op_associativity != IVY_ASSOCIATIVITY_LEFT) { + break; + } + + b_queue_delete(&state->s_operator_stack, top_entry); + b_queue_push_back(&state->s_operand_queue, top_entry); + } + + b_queue_push_back(&state->s_operator_stack, &tok->t_entry); return IVY_OK; } @@ -132,8 +204,14 @@ static struct token_parse_result parse_ident( state->s_end = EXPR_END_DOT; } - b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + if (state->s_prev_part == EXPR_OPERAND) { + push_operator(state, tok, NULL); + } else { + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + } + set_previous(state, tok); + state->s_prev_part = EXPR_OPERAND; return PARSE_RESULT(IVY_OK, 0); } @@ -150,6 +228,7 @@ static struct token_parse_result parse_atom( b_queue_push_back(&state->s_operand_queue, &tok->t_entry); set_previous(state, tok); + state->s_prev_part = EXPR_OPERAND; return PARSE_RESULT(IVY_OK, 0); } @@ -166,6 +245,8 @@ static struct token_parse_result parse_string( b_queue_push_back(&state->s_operand_queue, &tok->t_entry); set_previous(state, tok); + state->s_prev_part = EXPR_OPERAND; + return PARSE_RESULT(IVY_OK, 0); } @@ -197,8 +278,13 @@ static struct token_parse_result parse_int( state->s_end = EXPR_END_DOT; } + if (state->s_prev_part == EXPR_OPERAND) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); set_previous(state, tok); + state->s_prev_part = EXPR_OPERAND; return PARSE_RESULT(IVY_OK, 0); } @@ -213,24 +299,17 @@ static struct token_parse_result parse_double( state->s_end = EXPR_END_DOT; } + if (state->s_prev_part == EXPR_OPERAND) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); set_previous(state, tok); + state->s_prev_part = EXPR_OPERAND; return PARSE_RESULT(IVY_OK, 0); } -static const struct ivy_operator *get_operator(struct ivy_token *tok) -{ - switch (tok->t_type) { - case IVY_TOK_KEYWORD: - return ivy_operator_get(tok->t_keyword); - case IVY_TOK_SYMBOL: - return ivy_operator_get(tok->t_symbol); - default: - return NULL; - } -} - static struct token_parse_result parse_symbol( struct ivy_parser *ctx, struct ivy_token *tok) { @@ -241,42 +320,11 @@ static struct token_parse_result parse_symbol( return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } - const struct ivy_operator *op = ivy_operator_get(tok->t_symbol); - if (!op) { - return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); - } - - while (true) { - b_queue_entry *top_entry = b_queue_last(&state->s_operator_stack); - - if (!top_entry) { - break; - } - - struct ivy_token *top - = b_unbox(struct ivy_token, top_entry, t_entry); - if (ivy_token_is_symbol(top, IVY_SYM_LEFT_PAREN)) { - break; - } - - const struct ivy_operator *top_op = get_operator(top); - if (top_op->op_precedence < op->op_precedence) { - break; - } - - if (top_op->op_precedence == op->op_precedence - && op->op_associativity != IVY_ASSOCIATIVITY_LEFT) { - break; - } - - b_queue_delete(&state->s_operator_stack, top_entry); - b_queue_push_back(&state->s_operand_queue, top_entry); - } - - b_queue_push_back(&state->s_operator_stack, &tok->t_entry); + enum ivy_status status = push_operator(state, tok, NULL); set_previous(state, tok); + state->s_prev_part = EXPR_OPERATOR; - return PARSE_RESULT(IVY_OK, 0); + return PARSE_RESULT(status, 0); } static struct token_parse_result parse_left_paren( @@ -289,6 +337,14 @@ static struct token_parse_result parse_left_paren( state->s_end = EXPR_END_DOT; } + if (state->s_prev_tok == IVY_TOK_IDENT) { + /* this is the openning parenthesis of a complex message. */ + } + + if (state->s_prev_part == EXPR_OPERAND) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + b_queue_push_back(&state->s_operator_stack, &tok->t_entry); set_previous(state, tok); @@ -401,7 +457,17 @@ static struct token_parse_result parse_or( static struct token_parse_result parse_not( struct ivy_parser *ctx, struct ivy_token *tok) { - return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end != EXPR_END_DOT) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + enum ivy_status status = push_operator(state, tok, NULL); + set_previous(state, tok); + + return PARSE_RESULT(status, 0); } static struct token_parse_result parse_bang( @@ -436,6 +502,21 @@ static struct token_parse_result parse_dot( return PARSE_RESULT(status, 0); } +static struct token_parse_result parse_caret( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end != EXPR_END_NONE || state->s_type != IVY_KW_NONE) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + state->s_type = IVY_SYM_CARET; + + return PARSE_RESULT(IVY_OK, 0); +} + struct ast_node_type expr_node_ops = { .n_add_child = add_child, .n_state_size = sizeof(struct expr_parser_state), @@ -454,6 +535,7 @@ struct ast_node_type expr_node_ops = { .n_symbol_parsers = { SYM_PARSER(BANG, parse_bang), SYM_PARSER(DOT, parse_dot), + SYM_PARSER(CARET, parse_caret), SYM_PARSER(LEFT_PAREN, parse_left_paren), SYM_PARSER(RIGHT_PAREN, parse_right_paren), }, diff --git a/lang/include/ivy/lang/ast.h b/lang/include/ivy/lang/ast.h index 10539d1..69639a2 100644 --- a/lang/include/ivy/lang/ast.h +++ b/lang/include/ivy/lang/ast.h @@ -35,6 +35,8 @@ enum ivy_ast_node_type { IVY_AST_COND_GROUP, IVY_AST_COND, IVY_AST_TUPLE, + IVY_AST_PKG_INIT, + IVY_AST_PKG_COMPREHENSION, IVY_AST_TYPE_COUNT, }; @@ -217,6 +219,19 @@ struct ivy_ast_tuple_node { b_queue n_members; }; +struct ivy_ast_pkg_init_node { + struct ivy_ast_node n_base; + b_queue n_items; +}; + +struct ivy_ast_pkg_comprehension_node { + struct ivy_ast_node n_base; + struct ivy_ast_node *n_transform; + struct ivy_ast_node *n_item; + struct ivy_ast_node *n_source; + struct ivy_ast_node *n_cond; +}; + struct ivy_ast_block_node { struct ivy_ast_node n_base; /* queue of struct ivy_ast_node. expressions to evaluate when the do node itself is evaluated. */