From 101d87e09def80108183deee8652ec661e0cd893 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Wed, 27 Nov 2024 22:29:29 +0000 Subject: [PATCH] lang: ast: implement simple identifier and operator expression parsing --- lang/ast/expr.c | 451 +++++++++++++++++++++++++++++++++++++++++++++++- lang/ast/msgh.c | 23 +-- lang/ast/node.c | 65 +++---- lang/ast/node.h | 14 +- 4 files changed, 503 insertions(+), 50 deletions(-) diff --git a/lang/ast/expr.c b/lang/ast/expr.c index bb20292..7f598c6 100644 --- a/lang/ast/expr.c +++ b/lang/ast/expr.c @@ -2,27 +2,472 @@ #include "node.h" #include +#include #include +#include + +enum expr_end { + EXPR_END_NONE = 0, + /* arithmetic expressions, terminated with a dot (.) */ + EXPR_END_DOT, + /* keyword expressions (if-else, while/for loops, match, etc), terminated with the end keyword. */ + EXPR_END_KEYWORD, +}; + +enum expr_part { + EXPR_NONE = 0, + EXPR_OPERATOR, + EXPR_OPERAND, +}; struct expr_parser_state { struct parser_state s_base; + enum expr_end s_end; + enum ivy_keyword s_type; + unsigned int s_prev_tok; + enum expr_part s_prev_part; + b_queue s_operand_queue; + b_queue s_operator_stack; }; static enum ivy_status add_child( struct ivy_ast_node *parent, struct ivy_ast_node *child) { - struct ivy_ast_expr_node *c = (struct ivy_ast_expr_node *)parent; + struct ivy_ast_expr_node *expr = (struct ivy_ast_expr_node *)parent; - if (!c->n_child) { - c->n_child = child; + if (!expr->n_child) { + expr->n_child = child; return IVY_OK; } return IVY_ERR_NOT_SUPPORTED; } +static void set_previous(struct expr_parser_state *state, struct ivy_token *tok) +{ + switch (tok->t_type) { + case IVY_TOK_SYMBOL: + state->s_prev_tok = tok->t_symbol; + state->s_prev_part = EXPR_OPERATOR; + break; + case IVY_TOK_KEYWORD: + state->s_prev_tok = tok->t_keyword; + state->s_prev_part = EXPR_OPERATOR; + break; + default: + state->s_prev_tok = tok->t_type; + state->s_prev_part = EXPR_OPERAND; + break; + } +} + +static void print_token(struct ivy_token *tok) +{ + switch (tok->t_type) { + case IVY_TOK_IDENT: + printf("%s", tok->t_str); + break; + case IVY_TOK_INT: + printf("%llu", tok->t_int); + break; + case IVY_TOK_DOUBLE: + printf("%.2lf", tok->t_double); + break; + case IVY_TOK_SYMBOL: + printf("%s", ivy_symbol_to_string(tok->t_symbol)); + break; + case IVY_TOK_KEYWORD: + printf("%s", ivy_keyword_to_string(tok->t_keyword)); + break; + case IVY_TOK_STRING: + printf("\"%s\"", tok->t_str); + break; + default: + printf(""); + break; + } +} + +static enum ivy_status finalise_expr(struct expr_parser_state *state) +{ + b_queue_iterator it = {0}; + + int i = 0; + b_queue_foreach (&it, &state->s_operand_queue) { + struct ivy_token *operand = b_unbox(struct ivy_token, it.entry, t_entry); + + if (i > 0) { + printf(" "); + } + + print_token(operand); + i++; + } + + b_queue_foreach (&it, &state->s_operator_stack) { + struct ivy_token *operator = b_unbox(struct ivy_token, it.entry, t_entry); + + if (i > 0) { + printf(" "); + } + + print_token(operator); + i++; + } + + printf("\n"); + return IVY_OK; +} + +static struct token_parse_result parse_ident( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_atom( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_string( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + set_previous(state, tok); + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_str_start( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_str_end( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_label( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_int( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_double( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operand_queue, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct ivy_operator *get_operator(struct ivy_token *tok) +{ + switch (tok->t_type) { + case IVY_TOK_KEYWORD: + return ivy_operator_get(tok->t_keyword); + case IVY_TOK_SYMBOL: + return ivy_operator_get(tok->t_symbol); + default: + return NULL; + } +} + +static struct token_parse_result parse_symbol( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end != EXPR_END_DOT) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + struct ivy_operator *op = ivy_operator_get(tok->t_symbol); + if (!op) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + while (true) { + b_queue_entry *top_entry = b_queue_last(&state->s_operator_stack); + + if (!top_entry) { + break; + } + + struct ivy_token *top = b_unbox(struct ivy_token, top_entry, t_entry); + if (ivy_token_is_symbol(top, IVY_SYM_LEFT_PAREN)) { + break; + } + + struct ivy_operator *top_op = get_operator(top); + if (top_op->op_precedence < op->op_precedence) { + break; + } + + if (top_op->op_precedence == op->op_precedence && op->op_associativity != IVY_ASSOCIATIVITY_LEFT) { + break; + } + + b_queue_delete(&state->s_operator_stack, top_entry); + b_queue_push_back(&state->s_operand_queue, top_entry); + } + + b_queue_push_back(&state->s_operator_stack, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_left_paren( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_DOT; + } + + b_queue_push_back(&state->s_operator_stack, &tok->t_entry); + set_previous(state, tok); + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_right_paren( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_if( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end == EXPR_END_NONE) { + state->s_end = EXPR_END_KEYWORD; + } + + return PARSE_RESULT(IVY_OK, 0); +} + +static struct token_parse_result parse_else( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_end( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_while( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_for( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_match( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_try( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_catch( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_throw( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_understands( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_in( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_do( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_is( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_and( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_or( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_not( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0); +} + +static struct token_parse_result parse_bang( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end != EXPR_END_DOT) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + enum ivy_status status = finalise_expr(state); + + parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT); + return PARSE_RESULT(status, PARSE_REPEAT_TOKEN); +} + +static struct token_parse_result parse_dot( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct expr_parser_state *state + = parser_get_state(ctx, struct expr_parser_state); + + if (state->s_end != EXPR_END_DOT) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); + } + + enum ivy_status status = finalise_expr(state); + + parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT); + return PARSE_RESULT(status, 0); +} + struct ast_node_type expr_node_ops = { .n_add_child = add_child, .n_state_size = sizeof(struct expr_parser_state), .n_node_size = sizeof(struct ivy_ast_expr_node), + .n_token_parsers = { + [IVY_TOK_IDENT] = parse_ident, + [IVY_TOK_ATOM] = parse_atom, + [IVY_TOK_STRING] = parse_string, + [IVY_TOK_STR_START] = parse_str_start, + [IVY_TOK_STR_END] = parse_str_end, + [IVY_TOK_LABEL] = parse_label, + [IVY_TOK_INT] = parse_int, + [IVY_TOK_DOUBLE] = parse_double, + [IVY_TOK_SYMBOL] = parse_symbol, + }, + .n_symbol_parsers = { + [IVY_SYM_BANG] = parse_bang, + [IVY_SYM_DOT] = parse_dot, + [IVY_SYM_LEFT_PAREN] = parse_left_paren, + [IVY_SYM_RIGHT_PAREN] = parse_right_paren, + }, + .n_keyword_parsers = { + [IVY_KW_IF] = parse_if, + [IVY_KW_ELSE] = parse_else, + [IVY_KW_END] = parse_end, + [IVY_KW_WHILE] = parse_while, + [IVY_KW_FOR] = parse_for, + [IVY_KW_MATCH] = parse_match, + [IVY_KW_TRY] = parse_try, + [IVY_KW_CATCH] = parse_catch, + [IVY_KW_THROW] = parse_throw, + [IVY_KW_UNDERSTANDS] = parse_understands, + [IVY_KW_IN] = parse_in, + [IVY_KW_DO] = parse_do, + [IVY_KW_IS] = parse_is, + [IVY_KW_AND] = parse_and, + [IVY_KW_OR] = parse_or, + [IVY_KW_NOT] = parse_not, + }, }; diff --git a/lang/ast/msgh.c b/lang/ast/msgh.c index afc7228..e3da6c9 100644 --- a/lang/ast/msgh.c +++ b/lang/ast/msgh.c @@ -13,8 +13,8 @@ struct msgh_parser_state { unsigned int s_prev; }; -static struct token_parse_result parse_ident( - struct ivy_parser *ctx, struct ivy_token *tok) +static struct token_parse_result parse_expr( + struct ivy_parser* ctx, struct ivy_token* tok) { struct msgh_parser_state *state = parser_get_state(ctx, struct msgh_parser_state); @@ -22,17 +22,12 @@ static struct token_parse_result parse_ident( struct ivy_ast_msgh_node *msgh = (struct ivy_ast_msgh_node *)state->s_base.s_node; - if (msgh->n_sel) { - /* TODO expression parsing */ - return PARSE_RESULT(IVY_ERR_NOT_SUPPORTED, 0); + if (!msgh->n_sel) { + return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0); } - if (state->s_prev == IVY_SYM_HYPHEN) { - /* message name */ - return PARSE_RESULT(IVY_OK, 0); - } - - return PARSE_RESULT(IVY_OK, 0); + parser_push_state(ctx, IVY_AST_EXPR); + return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN); } static enum ivy_status add_child( @@ -64,7 +59,7 @@ struct ast_node_type msgh_node_ops = { .n_init_state = init_state, .n_state_size = sizeof(struct msgh_parser_state), .n_node_size = sizeof(struct ivy_ast_msgh_node), - .n_token_parsers = { - [IVY_TOK_IDENT] = parse_ident, - }, + .n_expr_parser = { + .expr_begin = parse_expr, + } }; diff --git a/lang/ast/node.c b/lang/ast/node.c index 9c0f442..4760d0b 100644 --- a/lang/ast/node.c +++ b/lang/ast/node.c @@ -11,6 +11,7 @@ extern struct ast_node_type unit_import_node_ops; extern struct ast_node_type class_node_ops; extern struct ast_node_type msgh_node_ops; extern struct ast_node_type selector_node_ops; +extern struct ast_node_type expr_node_ops; static const struct ast_node_type *node_ops[] = { [IVY_AST_UNIT] = &unit_node_ops, @@ -19,15 +20,10 @@ static const struct ast_node_type *node_ops[] = { [IVY_AST_CLASS] = &class_node_ops, [IVY_AST_MSGH] = &msgh_node_ops, [IVY_AST_SELECTOR] = &selector_node_ops, + [IVY_AST_EXPR] = &expr_node_ops, }; static const size_t nr_node_ops = sizeof node_ops / sizeof node_ops[0]; -enum tok_expr_type { - TOK_EXPR_NONE = 0, - TOK_EXPR_BEGIN, - TOK_EXPR_ANY, -}; - const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type) { if (type >= nr_node_ops) { @@ -37,7 +33,7 @@ const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type) return node_ops[type]; } -static enum tok_expr_type get_tok_expr_type(struct ivy_token *tok) +enum tok_expr_type get_tok_expr_type(struct ivy_token *tok) { switch (tok->t_type) { case IVY_TOK_IDENT: @@ -112,14 +108,45 @@ token_parse_function get_token_parser( return NULL; } token_parse_function generic_parser = type->n_token_parsers[tok->t_type]; + + if (!generic_parser) { + generic_parser = type->n_token_parsers[IVY_TOK_NONE]; + } + token_parse_function better_parser = NULL; switch (tok->t_type) { case IVY_TOK_KEYWORD: better_parser = type->n_keyword_parsers[tok->t_keyword]; + if (type->n_keyword_parsers[IVY_KW_NONE]) { + generic_parser = type->n_keyword_parsers[IVY_KW_NONE]; + } break; case IVY_TOK_SYMBOL: better_parser = type->n_symbol_parsers[tok->t_symbol]; + if (type->n_symbol_parsers[IVY_SYM_NONE]) { + generic_parser = type->n_symbol_parsers[IVY_SYM_NONE]; + } + break; + default: + break; + } + + if (better_parser) { + return better_parser; + } + + enum token_expr_type expr_type = get_tok_expr_type(tok); + switch (expr_type) { + case TOK_EXPR_BEGIN: + better_parser = type->n_expr_parser.expr_begin + ? type->n_expr_parser.expr_begin + : type->n_expr_parser.expr_all; + break; + case TOK_EXPR_ANY: + better_parser = type->n_expr_parser.expr_other + ? type->n_expr_parser.expr_other + : type->n_expr_parser.expr_all; break; default: break; @@ -223,30 +250,6 @@ const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v) } } -const char *ivy_ast_op_to_string(enum ivy_ast_op v) -{ - switch (v) { - ENUM_STR(IVY_OP_NONE); - ENUM_STR(IVY_OP_ASSIGN); - ENUM_STR(IVY_OP_ADD); - ENUM_STR(IVY_OP_SUBTRACT); - ENUM_STR(IVY_OP_MULTIPLY); - ENUM_STR(IVY_OP_DIVIDE); - ENUM_STR(IVY_OP_LESS_THAN); - ENUM_STR(IVY_OP_GREATER_THAN); - ENUM_STR(IVY_OP_EQUAL); - ENUM_STR(IVY_OP_NOT_EQUAL); - ENUM_STR(IVY_OP_LESS_EQUAL); - ENUM_STR(IVY_OP_GREATER_EQUAL); - ENUM_STR(IVY_OP_AND); - ENUM_STR(IVY_OP_OR); - ENUM_STR(IVY_OP_IS); - ENUM_STR(IVY_OP_NOT); - default: - return ""; - } -} - const char *ivy_ast_msgh_recipient_type_to_string(enum ivy_ast_msgh_recipient_type v) { switch (v) { diff --git a/lang/ast/node.h b/lang/ast/node.h index 8a9ed2a..7d29fda 100644 --- a/lang/ast/node.h +++ b/lang/ast/node.h @@ -16,6 +16,12 @@ enum token_parse_flags { PARSE_REPEAT_TOKEN = 0x01u, }; +enum tok_expr_type { + TOK_EXPR_NONE = 0, + TOK_EXPR_BEGIN, + TOK_EXPR_ANY, +}; + struct token_parse_result { enum ivy_status r_status; enum token_parse_flags r_flags; @@ -38,13 +44,17 @@ struct ast_node_type { token_parse_function n_token_parsers[IVY_TOK_TYPE_COUNT]; token_parse_function n_keyword_parsers[IVY_KW_TYPE_COUNT]; token_parse_function n_symbol_parsers[IVY_SYM_TYPE_COUNT]; - token_parse_function n_expr_parser; + struct { + token_parse_function expr_begin; + token_parse_function expr_other; + token_parse_function expr_all; + } n_expr_parser; }; extern const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type); extern token_parse_function get_token_parser( struct ivy_ast_node *context, struct ivy_token *tok); - +extern enum tok_expr_type get_tok_expr_type(struct ivy_token *tok); extern struct ivy_ast_node *ast_node_create_with_size( enum ivy_ast_node_type type, size_t size); extern enum ivy_status ast_node_add_child(