lang: ast: re-factor expression parser into multiple files

This commit is contained in:
2024-12-03 13:26:55 +00:00
parent 1204bffb4d
commit 16ab13d738
6 changed files with 160 additions and 103 deletions

View File

@@ -1,73 +1,11 @@
#include "ctx.h"
#include "node.h"
#include "../node.h"
#include "expr.h"
#include <blue/object/string.h>
#include <ivy/lang/lex.h>
#include <ivy/lang/operator.h>
#include <stdio.h>
enum expr_type {
EXPR_TYPE_NONE = 0,
/* if-else, while/for, match, etc (any expression that ends with an
* 'end' keyword */
EXPR_TYPE_STMT,
/* arithmetic and message-sending expressions (any expression that ends
* implicitly or with an expression separator */
EXPR_TYPE_ARITH,
};
enum expr_subtype {
EXPR_SUBTYPE_NONE = 0,
/* generic parenthesis-enclosed arithmetic expressions */
EXPR_SUBTYPE_PAREN,
/* keyword messages */
EXPR_SUBTYPE_KEYWORD_MSG,
/* expression delimited by labels */
EXPR_SUBTYPE_KEYWORD_ARG,
/* complex messages */
EXPR_SUBTYPE_COMPLEX_MSG,
};
enum expr_component {
EXPR_CMP_NONE = 0,
EXPR_CMP_OPERATOR,
EXPR_CMP_OPERAND,
EXPR_CMP_MSG,
};
struct expr_parser_state {
struct parser_state s_base;
enum expr_type s_type;
/* for arithmetic expressions, this records whether the previous
* component (either a token or parenthesised group of tokens) is an
* operator, operand, or message */
enum expr_component s_prev_component;
/* the token type/keyword type/symbol type of the last token that was
* encountered */
unsigned int s_prev_token;
/* if this is an arithmetic expression, this variable is the depth
* of parentheses that this sub-expression is at */
unsigned int s_paren_depth;
/* when this is set, the expression will be terminated when the
* specified token is encountered. the token that terminated the
* expression will not be consumed. */
unsigned int s_terminator;
/* all sub-expressions (i.e. those conatained within brackets,
* keyword-messages and keyword-message args, etc) will have this set/ */
enum expr_subtype s_subexpr;
b_queue s_output_queue;
b_queue s_operator_stack;
/* these variables are for keyword-message expressions */
struct ivy_ast_node *s_recipient;
struct ivy_ast_msg_node *s_msg;
b_queue s_labels;
b_queue s_args;
};
static void print_operand(struct ivy_ast_node *node)
{
switch (node->n_type) {
@@ -112,7 +50,7 @@ static void print_operand(struct ivy_ast_node *node)
}
}
static void push_operand(struct expr_parser_state *state, struct ivy_token *tok)
void arith_push_operand(struct expr_parser_state *state, struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_IDENT: {
@@ -228,7 +166,7 @@ static void print_expr_queues(struct expr_parser_state *state)
printf("\n");
}
static void push_operator(struct expr_parser_state *state, struct ivy_ast_node *node)
void arith_push_operator(struct expr_parser_state *state, struct ivy_ast_node *node)
{
const struct ivy_operator *op = get_operator_from_node(node);
if (!op) {
@@ -452,7 +390,7 @@ static enum ivy_status finalise_expr(
return IVY_OK;
}
static struct token_parse_result parse_operand(
struct token_parse_result arith_parse_operand(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -468,7 +406,7 @@ static struct token_parse_result parse_operand(
state->s_type = EXPR_TYPE_ARITH;
push_operand(state, tok);
arith_push_operand(state, tok);
// print_expr_queues(state);
state->s_prev_component = EXPR_CMP_OPERAND;
state->s_prev_token = tok->t_type;
@@ -476,7 +414,7 @@ static struct token_parse_result parse_operand(
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_operator(
struct token_parse_result arith_parse_operator(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -503,14 +441,14 @@ static struct token_parse_result parse_operator(
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
push_operator(state, op);
arith_push_operator(state, op);
state->s_prev_component = EXPR_CMP_OPERATOR;
print_expr_queues(state);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_ident(
struct token_parse_result arith_parse_ident(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -520,16 +458,16 @@ static struct token_parse_result parse_ident(
if (state->s_prev_component == EXPR_CMP_OPERAND
|| state->s_prev_component == EXPR_CMP_MSG) {
result = parse_operator(ctx, tok);
result = arith_parse_operator(ctx, tok);
state->s_prev_component = EXPR_CMP_MSG;
} else {
result = parse_operand(ctx, tok);
result = arith_parse_operand(ctx, tok);
}
return result;
}
static struct token_parse_result parse_left_paren(
struct token_parse_result arith_parse_left_paren(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -644,7 +582,7 @@ static struct ivy_ast_msg_node *finalise_complex_msg(struct expr_parser_state *s
return msg;
}
static struct token_parse_result parse_right_paren(
struct token_parse_result arith_parse_right_paren(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -693,7 +631,7 @@ static struct token_parse_result parse_right_paren(
return PARSE_RESULT(IVY_OK, flags);
}
static struct token_parse_result parse_dot(
struct token_parse_result arith_parse_dot(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -708,7 +646,7 @@ static struct token_parse_result parse_dot(
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
if (state->s_recipient) {
if (state->s_subexpr == EXPR_SUBTYPE_KEYWORD_MSG) {
/* this is the end of a keyword-message */
struct ivy_ast_msg_node *msg = finalise_keyword_msg(state);
parser_replace_current_node(ctx, (struct ivy_ast_node *)msg);
@@ -739,7 +677,7 @@ static struct token_parse_result parse_dot(
return PARSE_RESULT(IVY_OK, flags);
}
static struct token_parse_result parse_label(
struct token_parse_result arith_parse_label(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
@@ -807,13 +745,10 @@ static struct token_parse_result parse_label(
return PARSE_RESULT(IVY_OK, 0);
}
static enum ivy_status add_child(
enum ivy_status arith_add_child(
struct parser_state *parent, struct ivy_ast_node *child)
{
struct expr_parser_state *state = (struct expr_parser_state *)parent;
if (state->s_type == EXPR_TYPE_STMT) {
return IVY_ERR_NOT_SUPPORTED;
}
if (state->s_subexpr == EXPR_SUBTYPE_KEYWORD_MSG
|| state->s_subexpr == EXPR_SUBTYPE_COMPLEX_MSG) {
@@ -825,7 +760,7 @@ static enum ivy_status add_child(
b_queue_push_back(&state->s_output_queue, &child->n_entry);
state->s_prev_component = EXPR_CMP_OPERAND;
} else if (child->n_type == IVY_AST_MSG) {
push_operator(state, child);
arith_push_operator(state, child);
state->s_prev_component = EXPR_CMP_MSG;
} else {
/* treat the child node as a sub-expression enclosed in
@@ -836,22 +771,3 @@ static enum ivy_status add_child(
return IVY_OK;
}
struct ast_node_type expr_node_ops = {
.n_add_child = add_child,
.n_state_size = sizeof(struct expr_parser_state),
.n_node_size = sizeof(struct ivy_ast_expr_node),
.n_token_parsers = {
TOK_PARSER(IDENT, parse_ident),
TOK_PARSER(INT, parse_operand),
TOK_PARSER(DOUBLE, parse_operand),
TOK_PARSER(STRING, parse_operand),
TOK_PARSER(SYMBOL, parse_operator),
TOK_PARSER(LABEL, parse_label),
},
.n_symbol_parsers = {
SYM_PARSER(LEFT_PAREN, parse_left_paren),
SYM_PARSER(RIGHT_PAREN, parse_right_paren),
SYM_PARSER(DOT, parse_dot),
},
};

36
lang/ast/expr/expr.c Normal file
View File

@@ -0,0 +1,36 @@
#include "expr.h"
#include "../node.h"
static enum ivy_status add_child(
struct parser_state *parent, struct ivy_ast_node *child)
{
struct expr_parser_state *state = (struct expr_parser_state *)parent;
switch (state->s_type) {
case EXPR_TYPE_STMT:
return IVY_ERR_NOT_SUPPORTED;
case EXPR_TYPE_ARITH:
return arith_add_child(parent, child);
default:
return IVY_ERR_NOT_SUPPORTED;
}
}
struct ast_node_type expr_node_ops = {
.n_add_child = add_child,
.n_state_size = sizeof(struct expr_parser_state),
.n_node_size = sizeof(struct ivy_ast_expr_node),
.n_token_parsers = {
TOK_PARSER(IDENT, arith_parse_ident),
TOK_PARSER(INT, arith_parse_operand),
TOK_PARSER(DOUBLE, arith_parse_operand),
TOK_PARSER(STRING, arith_parse_operand),
TOK_PARSER(SYMBOL, arith_parse_operator),
TOK_PARSER(LABEL, arith_parse_label),
},
.n_symbol_parsers = {
SYM_PARSER(LEFT_PAREN, arith_parse_left_paren),
SYM_PARSER(RIGHT_PAREN, arith_parse_right_paren),
SYM_PARSER(DOT, arith_parse_dot),
},
};

97
lang/ast/expr/expr.h Normal file
View File

@@ -0,0 +1,97 @@
#ifndef _AST_EXPR_EXPR_H_
#define _AST_EXPR_EXPR_H_
#include "../ctx.h"
#include "../node.h"
#include <blue/core/queue.h>
struct ivy_ast_node;
struct ivy_ast_msg_node;
enum expr_type {
EXPR_TYPE_NONE = 0,
/* if-else, while/for, match, etc (any expression that ends with an
* 'end' keyword */
EXPR_TYPE_STMT,
/* arithmetic and message-sending expressions (any expression that ends
* implicitly or with an expression separator */
EXPR_TYPE_ARITH,
};
enum expr_subtype {
EXPR_SUBTYPE_NONE = 0,
/* generic parenthesis-enclosed arithmetic expressions */
EXPR_SUBTYPE_PAREN,
/* keyword messages */
EXPR_SUBTYPE_KEYWORD_MSG,
/* expression delimited by labels */
EXPR_SUBTYPE_KEYWORD_ARG,
/* complex messages */
EXPR_SUBTYPE_COMPLEX_MSG,
};
enum expr_component {
EXPR_CMP_NONE = 0,
EXPR_CMP_OPERATOR,
EXPR_CMP_OPERAND,
EXPR_CMP_MSG,
};
struct expr_parser_state {
struct parser_state s_base;
enum expr_type s_type;
/* for arithmetic expressions, this records whether the previous
* component (either a token or parenthesised group of tokens) is an
* operator, operand, or message */
enum expr_component s_prev_component;
/* the token type/keyword type/symbol type of the last token that was
* encountered */
unsigned int s_prev_token;
/* if this is an arithmetic expression, this variable is the depth
* of parentheses that this sub-expression is at */
unsigned int s_paren_depth;
/* when this is set, the expression will be terminated when the
* specified token is encountered. the token that terminated the
* expression will not be consumed. */
unsigned int s_terminator;
/* all sub-expressions (i.e. those conatained within brackets,
* keyword-messages and keyword-message args, etc) will have this set/ */
enum expr_subtype s_subexpr;
b_queue s_output_queue;
b_queue s_operator_stack;
/* these variables are for keyword-message expressions */
struct ivy_ast_node *s_recipient;
struct ivy_ast_msg_node *s_msg;
b_queue s_labels;
b_queue s_args;
};
extern void arith_push_operator(
struct expr_parser_state *state, struct ivy_ast_node *node);
extern void arith_push_operand(
struct expr_parser_state *state, struct ivy_token *tok);
extern enum ivy_status arith_add_child(
struct parser_state *parent, struct ivy_ast_node *child);
extern struct token_parse_result arith_parse_ident(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_operand(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_operator(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_label(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_left_paren(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_right_paren(
struct ivy_parser *ctx, struct ivy_token *tok);
extern struct token_parse_result arith_parse_dot(
struct ivy_parser *ctx, struct ivy_token *tok);
#endif

0
lang/ast/expr/stmt.c Normal file
View File

View File

@@ -32,6 +32,7 @@ enum ivy_ast_node_type {
IVY_AST_IDENT,
IVY_AST_FOR_LOOP,
IVY_AST_WHILE_LOOP,
IVY_AST_CASCADE,
IVY_AST_COND_GROUP,
IVY_AST_COND,
IVY_AST_TUPLE,
@@ -199,6 +200,13 @@ struct ivy_ast_while_loop_node {
b_queue n_body;
};
struct ivy_ast_cascade_node {
struct ivy_ast_node n_base;
struct ivy_ast_node *n_recipient;
/* queue of struct ivy_ast_msg_node, all with NULL recipients. */
b_queue n_msg;
};
struct ivy_ast_cond_group_node {
struct ivy_ast_node n_base;
/* queue of struct ivy_ast_cond_node. */

View File

@@ -5,8 +5,8 @@
enum ivy_operator_precedence {
IVY_PRECEDENCE_ASSIGN,
IVY_PRECEDENCE_KEYWORD_MSG,
IVY_PRECEDENCE_IF_ELSE,
IVY_PRECEDENCE_KEYWORD_MSG,
IVY_PRECEDENCE_CASCADE,
IVY_PRECEDENCE_LOGICAL_OR,
IVY_PRECEDENCE_LOGICAL_AND,