lang: ast: implement simple identifier and operator expression parsing

This commit is contained in:
2024-11-27 22:29:29 +00:00
parent 31de937a21
commit 101d87e09d
4 changed files with 503 additions and 50 deletions

View File

@@ -2,27 +2,472 @@
#include "node.h"
#include <blue/object/string.h>
#include <ivy/lang/operator.h>
#include <ivy/lang/lex.h>
#include <stdio.h>
enum expr_end {
EXPR_END_NONE = 0,
/* arithmetic expressions, terminated with a dot (.) */
EXPR_END_DOT,
/* keyword expressions (if-else, while/for loops, match, etc), terminated with the end keyword. */
EXPR_END_KEYWORD,
};
enum expr_part {
EXPR_NONE = 0,
EXPR_OPERATOR,
EXPR_OPERAND,
};
struct expr_parser_state {
struct parser_state s_base;
enum expr_end s_end;
enum ivy_keyword s_type;
unsigned int s_prev_tok;
enum expr_part s_prev_part;
b_queue s_operand_queue;
b_queue s_operator_stack;
};
static enum ivy_status add_child(
struct ivy_ast_node *parent, struct ivy_ast_node *child)
{
struct ivy_ast_expr_node *c = (struct ivy_ast_expr_node *)parent;
struct ivy_ast_expr_node *expr = (struct ivy_ast_expr_node *)parent;
if (!c->n_child) {
c->n_child = child;
if (!expr->n_child) {
expr->n_child = child;
return IVY_OK;
}
return IVY_ERR_NOT_SUPPORTED;
}
static void set_previous(struct expr_parser_state *state, struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_SYMBOL:
state->s_prev_tok = tok->t_symbol;
state->s_prev_part = EXPR_OPERATOR;
break;
case IVY_TOK_KEYWORD:
state->s_prev_tok = tok->t_keyword;
state->s_prev_part = EXPR_OPERATOR;
break;
default:
state->s_prev_tok = tok->t_type;
state->s_prev_part = EXPR_OPERAND;
break;
}
}
static void print_token(struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_IDENT:
printf("%s", tok->t_str);
break;
case IVY_TOK_INT:
printf("%llu", tok->t_int);
break;
case IVY_TOK_DOUBLE:
printf("%.2lf", tok->t_double);
break;
case IVY_TOK_SYMBOL:
printf("%s", ivy_symbol_to_string(tok->t_symbol));
break;
case IVY_TOK_KEYWORD:
printf("%s", ivy_keyword_to_string(tok->t_keyword));
break;
case IVY_TOK_STRING:
printf("\"%s\"", tok->t_str);
break;
default:
printf("<token>");
break;
}
}
static enum ivy_status finalise_expr(struct expr_parser_state *state)
{
b_queue_iterator it = {0};
int i = 0;
b_queue_foreach (&it, &state->s_operand_queue) {
struct ivy_token *operand = b_unbox(struct ivy_token, it.entry, t_entry);
if (i > 0) {
printf(" ");
}
print_token(operand);
i++;
}
b_queue_foreach (&it, &state->s_operator_stack) {
struct ivy_token *operator = b_unbox(struct ivy_token, it.entry, t_entry);
if (i > 0) {
printf(" ");
}
print_token(operator);
i++;
}
printf("\n");
return IVY_OK;
}
static struct token_parse_result parse_ident(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operand_queue, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_atom(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operand_queue, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_string(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operand_queue, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_str_start(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_str_end(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_label(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_int(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operand_queue, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_double(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operand_queue, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct ivy_operator *get_operator(struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_KEYWORD:
return ivy_operator_get(tok->t_keyword);
case IVY_TOK_SYMBOL:
return ivy_operator_get(tok->t_symbol);
default:
return NULL;
}
}
static struct token_parse_result parse_symbol(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end != EXPR_END_DOT) {
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
struct ivy_operator *op = ivy_operator_get(tok->t_symbol);
if (!op) {
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
while (true) {
b_queue_entry *top_entry = b_queue_last(&state->s_operator_stack);
if (!top_entry) {
break;
}
struct ivy_token *top = b_unbox(struct ivy_token, top_entry, t_entry);
if (ivy_token_is_symbol(top, IVY_SYM_LEFT_PAREN)) {
break;
}
struct ivy_operator *top_op = get_operator(top);
if (top_op->op_precedence < op->op_precedence) {
break;
}
if (top_op->op_precedence == op->op_precedence && op->op_associativity != IVY_ASSOCIATIVITY_LEFT) {
break;
}
b_queue_delete(&state->s_operator_stack, top_entry);
b_queue_push_back(&state->s_operand_queue, top_entry);
}
b_queue_push_back(&state->s_operator_stack, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_left_paren(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_DOT;
}
b_queue_push_back(&state->s_operator_stack, &tok->t_entry);
set_previous(state, tok);
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_right_paren(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_if(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end == EXPR_END_NONE) {
state->s_end = EXPR_END_KEYWORD;
}
return PARSE_RESULT(IVY_OK, 0);
}
static struct token_parse_result parse_else(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_end(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_while(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_for(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_match(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_try(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_catch(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_throw(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_understands(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_in(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_do(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_is(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_and(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_or(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_not(
struct ivy_parser *ctx, struct ivy_token *tok)
{
return PARSE_RESULT(IVY_ERR_IO_FAILURE, 0);
}
static struct token_parse_result parse_bang(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end != EXPR_END_DOT) {
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
enum ivy_status status = finalise_expr(state);
parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT);
return PARSE_RESULT(status, PARSE_REPEAT_TOKEN);
}
static struct token_parse_result parse_dot(
struct ivy_parser *ctx, struct ivy_token *tok)
{
struct expr_parser_state *state
= parser_get_state(ctx, struct expr_parser_state);
if (state->s_end != EXPR_END_DOT) {
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
enum ivy_status status = finalise_expr(state);
parser_pop_state(ctx, STATE_ADD_NODE_TO_PARENT);
return PARSE_RESULT(status, 0);
}
struct ast_node_type expr_node_ops = {
.n_add_child = add_child,
.n_state_size = sizeof(struct expr_parser_state),
.n_node_size = sizeof(struct ivy_ast_expr_node),
.n_token_parsers = {
[IVY_TOK_IDENT] = parse_ident,
[IVY_TOK_ATOM] = parse_atom,
[IVY_TOK_STRING] = parse_string,
[IVY_TOK_STR_START] = parse_str_start,
[IVY_TOK_STR_END] = parse_str_end,
[IVY_TOK_LABEL] = parse_label,
[IVY_TOK_INT] = parse_int,
[IVY_TOK_DOUBLE] = parse_double,
[IVY_TOK_SYMBOL] = parse_symbol,
},
.n_symbol_parsers = {
[IVY_SYM_BANG] = parse_bang,
[IVY_SYM_DOT] = parse_dot,
[IVY_SYM_LEFT_PAREN] = parse_left_paren,
[IVY_SYM_RIGHT_PAREN] = parse_right_paren,
},
.n_keyword_parsers = {
[IVY_KW_IF] = parse_if,
[IVY_KW_ELSE] = parse_else,
[IVY_KW_END] = parse_end,
[IVY_KW_WHILE] = parse_while,
[IVY_KW_FOR] = parse_for,
[IVY_KW_MATCH] = parse_match,
[IVY_KW_TRY] = parse_try,
[IVY_KW_CATCH] = parse_catch,
[IVY_KW_THROW] = parse_throw,
[IVY_KW_UNDERSTANDS] = parse_understands,
[IVY_KW_IN] = parse_in,
[IVY_KW_DO] = parse_do,
[IVY_KW_IS] = parse_is,
[IVY_KW_AND] = parse_and,
[IVY_KW_OR] = parse_or,
[IVY_KW_NOT] = parse_not,
},
};

View File

@@ -13,8 +13,8 @@ struct msgh_parser_state {
unsigned int s_prev;
};
static struct token_parse_result parse_ident(
struct ivy_parser *ctx, struct ivy_token *tok)
static struct token_parse_result parse_expr(
struct ivy_parser* ctx, struct ivy_token* tok)
{
struct msgh_parser_state *state
= parser_get_state(ctx, struct msgh_parser_state);
@@ -22,17 +22,12 @@ static struct token_parse_result parse_ident(
struct ivy_ast_msgh_node *msgh
= (struct ivy_ast_msgh_node *)state->s_base.s_node;
if (msgh->n_sel) {
/* TODO expression parsing */
return PARSE_RESULT(IVY_ERR_NOT_SUPPORTED, 0);
if (!msgh->n_sel) {
return PARSE_RESULT(IVY_ERR_BAD_SYNTAX, 0);
}
if (state->s_prev == IVY_SYM_HYPHEN) {
/* message name */
return PARSE_RESULT(IVY_OK, 0);
}
return PARSE_RESULT(IVY_OK, 0);
parser_push_state(ctx, IVY_AST_EXPR);
return PARSE_RESULT(IVY_OK, PARSE_REPEAT_TOKEN);
}
static enum ivy_status add_child(
@@ -64,7 +59,7 @@ struct ast_node_type msgh_node_ops = {
.n_init_state = init_state,
.n_state_size = sizeof(struct msgh_parser_state),
.n_node_size = sizeof(struct ivy_ast_msgh_node),
.n_token_parsers = {
[IVY_TOK_IDENT] = parse_ident,
},
.n_expr_parser = {
.expr_begin = parse_expr,
}
};

View File

@@ -11,6 +11,7 @@ extern struct ast_node_type unit_import_node_ops;
extern struct ast_node_type class_node_ops;
extern struct ast_node_type msgh_node_ops;
extern struct ast_node_type selector_node_ops;
extern struct ast_node_type expr_node_ops;
static const struct ast_node_type *node_ops[] = {
[IVY_AST_UNIT] = &unit_node_ops,
@@ -19,15 +20,10 @@ static const struct ast_node_type *node_ops[] = {
[IVY_AST_CLASS] = &class_node_ops,
[IVY_AST_MSGH] = &msgh_node_ops,
[IVY_AST_SELECTOR] = &selector_node_ops,
[IVY_AST_EXPR] = &expr_node_ops,
};
static const size_t nr_node_ops = sizeof node_ops / sizeof node_ops[0];
enum tok_expr_type {
TOK_EXPR_NONE = 0,
TOK_EXPR_BEGIN,
TOK_EXPR_ANY,
};
const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type)
{
if (type >= nr_node_ops) {
@@ -37,7 +33,7 @@ const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type)
return node_ops[type];
}
static enum tok_expr_type get_tok_expr_type(struct ivy_token *tok)
enum tok_expr_type get_tok_expr_type(struct ivy_token *tok)
{
switch (tok->t_type) {
case IVY_TOK_IDENT:
@@ -112,14 +108,45 @@ token_parse_function get_token_parser(
return NULL;
}
token_parse_function generic_parser = type->n_token_parsers[tok->t_type];
if (!generic_parser) {
generic_parser = type->n_token_parsers[IVY_TOK_NONE];
}
token_parse_function better_parser = NULL;
switch (tok->t_type) {
case IVY_TOK_KEYWORD:
better_parser = type->n_keyword_parsers[tok->t_keyword];
if (type->n_keyword_parsers[IVY_KW_NONE]) {
generic_parser = type->n_keyword_parsers[IVY_KW_NONE];
}
break;
case IVY_TOK_SYMBOL:
better_parser = type->n_symbol_parsers[tok->t_symbol];
if (type->n_symbol_parsers[IVY_SYM_NONE]) {
generic_parser = type->n_symbol_parsers[IVY_SYM_NONE];
}
break;
default:
break;
}
if (better_parser) {
return better_parser;
}
enum token_expr_type expr_type = get_tok_expr_type(tok);
switch (expr_type) {
case TOK_EXPR_BEGIN:
better_parser = type->n_expr_parser.expr_begin
? type->n_expr_parser.expr_begin
: type->n_expr_parser.expr_all;
break;
case TOK_EXPR_ANY:
better_parser = type->n_expr_parser.expr_other
? type->n_expr_parser.expr_other
: type->n_expr_parser.expr_all;
break;
default:
break;
@@ -223,30 +250,6 @@ const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v)
}
}
const char *ivy_ast_op_to_string(enum ivy_ast_op v)
{
switch (v) {
ENUM_STR(IVY_OP_NONE);
ENUM_STR(IVY_OP_ASSIGN);
ENUM_STR(IVY_OP_ADD);
ENUM_STR(IVY_OP_SUBTRACT);
ENUM_STR(IVY_OP_MULTIPLY);
ENUM_STR(IVY_OP_DIVIDE);
ENUM_STR(IVY_OP_LESS_THAN);
ENUM_STR(IVY_OP_GREATER_THAN);
ENUM_STR(IVY_OP_EQUAL);
ENUM_STR(IVY_OP_NOT_EQUAL);
ENUM_STR(IVY_OP_LESS_EQUAL);
ENUM_STR(IVY_OP_GREATER_EQUAL);
ENUM_STR(IVY_OP_AND);
ENUM_STR(IVY_OP_OR);
ENUM_STR(IVY_OP_IS);
ENUM_STR(IVY_OP_NOT);
default:
return "";
}
}
const char *ivy_ast_msgh_recipient_type_to_string(enum ivy_ast_msgh_recipient_type v)
{
switch (v) {

View File

@@ -16,6 +16,12 @@ enum token_parse_flags {
PARSE_REPEAT_TOKEN = 0x01u,
};
enum tok_expr_type {
TOK_EXPR_NONE = 0,
TOK_EXPR_BEGIN,
TOK_EXPR_ANY,
};
struct token_parse_result {
enum ivy_status r_status;
enum token_parse_flags r_flags;
@@ -38,13 +44,17 @@ struct ast_node_type {
token_parse_function n_token_parsers[IVY_TOK_TYPE_COUNT];
token_parse_function n_keyword_parsers[IVY_KW_TYPE_COUNT];
token_parse_function n_symbol_parsers[IVY_SYM_TYPE_COUNT];
token_parse_function n_expr_parser;
struct {
token_parse_function expr_begin;
token_parse_function expr_other;
token_parse_function expr_all;
} n_expr_parser;
};
extern const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type);
extern token_parse_function get_token_parser(
struct ivy_ast_node *context, struct ivy_token *tok);
extern enum tok_expr_type get_tok_expr_type(struct ivy_token *tok);
extern struct ivy_ast_node *ast_node_create_with_size(
enum ivy_ast_node_type type, size_t size);
extern enum ivy_status ast_node_add_child(