From f1ea916155cc4e5380efd451be7890fc9829e98d Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sun, 24 Nov 2024 11:10:42 +0000 Subject: [PATCH] lang: ast: re-factor parser into multiple files --- lang/ast.c | 244 ------------------------------------ lang/ast.h | 29 ----- lang/ast/ctx.c | 112 +++++++++++++++++ lang/ast/ctx.h | 37 ++++++ lang/ast/node.c | 80 ++++++++++++ lang/ast/node.h | 35 ++++++ lang/ast/parse.c | 22 ++++ lang/ast/parse.h | 16 +++ lang/ast/unit-package.c | 73 +++++++++++ lang/ast/unit-package.h | 13 ++ lang/ast/unit.c | 30 +++++ lang/include/ivy/lang/ast.h | 8 +- 12 files changed, 423 insertions(+), 276 deletions(-) delete mode 100644 lang/ast.c delete mode 100644 lang/ast.h create mode 100644 lang/ast/ctx.c create mode 100644 lang/ast/ctx.h create mode 100644 lang/ast/node.c create mode 100644 lang/ast/node.h create mode 100644 lang/ast/parse.c create mode 100644 lang/ast/parse.h create mode 100644 lang/ast/unit-package.c create mode 100644 lang/ast/unit-package.h create mode 100644 lang/ast/unit.c diff --git a/lang/ast.c b/lang/ast.c deleted file mode 100644 index 9cff7ce..0000000 --- a/lang/ast.c +++ /dev/null @@ -1,244 +0,0 @@ -#include "ast.h" - -#include -#include -#include -#include - -#define ast_node_create(id, type) _ast_node_create(id, sizeof(type)) -#define push_state(parser, node_type_id, node_type_struct, state_struct) _push_state(parser, node_type_id, sizeof(node_type_struct), sizeof(state_struct)) - -enum pop_state_flags { - ADD_NODE_TO_PARENT = 0x01u, -}; - -static enum ivy_status unit_add_child(struct ivy_ast_node *parent, struct ivy_ast_node *child) -{ - struct ivy_ast_unit_node *unit = (struct ivy_ast_unit_node *)parent; - b_queue_push_back(&unit->n_children, &child->n_entry); - - return IVY_OK; -} - -typedef enum ivy_status(*node_add_child_function)(struct ivy_ast_node *, struct ivy_ast_node *); - -static node_add_child_function node_add_child[] = { - [IVY_AST_UNIT] = unit_add_child, -}; -static const size_t nr_node_add_child = sizeof node_add_child / sizeof *node_add_child; - -static struct ivy_ast_node *_ast_node_create(enum ivy_ast_node_type type, size_t size) -{ - struct ivy_ast_node *node = malloc(size); - if (!node) { - return NULL; - } - - memset(node, 0x0, size); - - node->n_type = type; - - return node; -} - -static enum ivy_status ast_node_add_child(struct ivy_ast_node *parent, struct ivy_ast_node *child) -{ - if (parent->n_type >= nr_node_add_child) { - return IVY_ERR_NOT_SUPPORTED; - } - - node_add_child_function add_child = node_add_child[parent->n_type]; - - if (!add_child) { - return IVY_ERR_NOT_SUPPORTED; - } - - return add_child(parent, child); -} - -static struct parser_state *get_state(struct ivy_parser *parser) -{ - b_queue_entry *entry = b_queue_last(&parser->p_state); - if (!entry) { - return NULL; - } - - struct parser_state *state = b_unbox(struct parser_state, entry, s_entry); - return state; -} - -static struct parser_state *_push_state(struct ivy_parser *parser, enum ivy_ast_node_type type, size_t node_size, size_t state_size) -{ - struct parser_state *state = malloc(sizeof *state); - if (!state) { - return NULL; - } - - memset(state, 0x0, sizeof *state); - - b_queue_entry *current_state_entry = b_queue_last(&parser->p_state); - if (current_state_entry) { - struct parser_state *current_state = b_unbox(struct parser_state, current_state_entry, s_entry); - state->s_parent = current_state->s_node; - } - - state->s_node = ast_node_create(type, node_size); - - b_queue_push_back(&parser->p_state, &state->s_entry); - return state; -} - -static void pop_state(struct ivy_parser *parser, enum pop_state_flags flags) -{ - if (parser->p_state.q_first == parser->p_state.q_last) { - return; - } - - b_queue_entry *entry = b_queue_last(&parser->p_state); - struct parser_state *state = b_unbox(struct parser_state, entry, s_entry); - b_queue_pop_back(&parser->p_state); - - if (flags & ADD_NODE_TO_PARENT) { - ast_node_add_child(state->s_parent, state->s_node); - } - - free(state); -} - -enum ivy_status ivy_parser_create(struct ivy_parser **parser) -{ - struct ivy_parser *out = malloc(sizeof *out); - if (!out) { - return IVY_ERR_NO_MEMORY; - } - - memset(out, 0x0, sizeof *out); - - push_state(out, IVY_AST_UNIT, struct ivy_ast_unit_node, struct unit_parser_state); - - *parser = out; - return IVY_OK; -} - -void ivy_parser_destroy(struct ivy_parser *parser) -{ - free(parser); -} - -enum ivy_status ivy_parser_get_status(struct ivy_parser *parser) -{ - return parser->p_status; -} - -static enum ivy_status parse_unit_package(struct ivy_parser *parser, struct ivy_token *tok) -{ - return IVY_ERR_NOT_SUPPORTED; -} - -static enum ivy_status parse_token_in_unit(struct ivy_parser *parser, struct ivy_token *tok) -{ - if (tok->t_type == IVY_TOK_KEYWORD) { - switch (tok->t_keyword) { - case IVY_KW_PACKAGE: - return parse_unit_package(parser, tok); - default: - return IVY_ERR_BAD_SYNTAX; - } - } -} - -enum ivy_status ivy_parser_push_token(struct ivy_parser *parser, struct ivy_token *tok) -{ - struct parser_state *state = get_state(parser); - if (!state) { - parser->p_status = IVY_ERR_INTERNAL_FAILURE; - return parser->p_status; - } - - switch (state->s_node->n_type) { - case IVY_AST_UNIT: - parser->p_status = parse_token_in_unit(parser, tok); - break; - case IVY_AST_UNIT_PACKAGE: - parser->p_status = parse_unit_package(parser, tok); - break; - default: - parser->p_status = IVY_ERR_NOT_SUPPORTED; - break; - } - - return parser->p_status; -} - -void ivy_ast_node_destroy(struct ivy_ast_node *node) -{ -} - -#define ENUM_STR(x) \ - case x: \ - return #x - -const char *ivy_ast_node_type_to_string(enum ivy_ast_node_type v) -{ - switch (v) { - ENUM_STR(IVY_AST_NONE); - ENUM_STR(IVY_AST_UNIT); - ENUM_STR(IVY_AST_OP); - ENUM_STR(IVY_AST_MSG); - ENUM_STR(IVY_AST_CLASS); - ENUM_STR(IVY_AST_MSGH); - ENUM_STR(IVY_AST_PROPERTY); - ENUM_STR(IVY_AST_LAMBDA); - ENUM_STR(IVY_AST_UNIT_PACKAGE); - ENUM_STR(IVY_AST_UNIT_IMPORT); - ENUM_STR(IVY_AST_INT); - ENUM_STR(IVY_AST_DOUBLE); - ENUM_STR(IVY_AST_STRING); - ENUM_STR(IVY_AST_FSTRING); - ENUM_STR(IVY_AST_ATOM); - ENUM_STR(IVY_AST_IDENT); - ENUM_STR(IVY_AST_FOR_LOOP); - ENUM_STR(IVY_AST_WHILE_LOOP); - ENUM_STR(IVY_AST_COND_GROUP); - ENUM_STR(IVY_AST_COND); - ENUM_STR(IVY_AST_TUPLE); - ENUM_STR(IVY_AST_DO); - default: - return ""; - } -} - -const char *ivy_ast_op_to_string(enum ivy_ast_op v) -{ - switch (v) { - ENUM_STR(IVY_OP_NONE); - ENUM_STR(IVY_OP_ASSIGN); - ENUM_STR(IVY_OP_ADD); - ENUM_STR(IVY_OP_SUBTRACT); - ENUM_STR(IVY_OP_MULTIPLY); - ENUM_STR(IVY_OP_DIVIDE); - ENUM_STR(IVY_OP_LESS_THAN); - ENUM_STR(IVY_OP_GREATER_THAN); - ENUM_STR(IVY_OP_EQUAL); - ENUM_STR(IVY_OP_NOT_EQUAL); - ENUM_STR(IVY_OP_LESS_EQUAL); - ENUM_STR(IVY_OP_GREATER_EQUAL); - ENUM_STR(IVY_OP_AND); - ENUM_STR(IVY_OP_OR); - ENUM_STR(IVY_OP_IS); - ENUM_STR(IVY_OP_NOT); - default: - return ""; - } -} - -const char *ivy_ast_msgh_recipient_type_to_string(enum ivy_ast_msgh_recipient_type v) -{ - switch (v) { - ENUM_STR(IVY_AST_MSGH_NONE); - ENUM_STR(IVY_AST_MSGH_OBJECT); - ENUM_STR(IVY_AST_MSGH_CLASS); - default: - return ""; - } -} diff --git a/lang/ast.h b/lang/ast.h deleted file mode 100644 index 520bc97..0000000 --- a/lang/ast.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _AST_H_ -#define _AST_H_ - -#include -#include - -struct parser_state { - b_queue_entry s_entry; - struct ivy_ast_node *s_parent; - struct ivy_ast_node *s_node; -}; - -struct unit_parser_state { - struct parser_state s_base; -}; - -struct ivy_parser { - enum ivy_status p_status; - b_queue p_state; - b_queue p_token_queue; - b_queue p_node_queue; -}; - -struct ast_node_ops { - enum ivy_status(*n_add_child)(struct ivy_ast_node *, struct ivy_ast_node *); - void(*n_print)(struct ivy_ast_node *); -}; - -#endif diff --git a/lang/ast/ctx.c b/lang/ast/ctx.c new file mode 100644 index 0000000..df9d1d8 --- /dev/null +++ b/lang/ast/ctx.c @@ -0,0 +1,112 @@ +#include "ctx.h" + +#include "node.h" +#include "parse.h" + +#include +#include +#include +#include +#include + +enum ivy_status ivy_parser_create(struct ivy_parser **parser) +{ + struct ivy_parser *out = malloc(sizeof *out); + if (!out) { + return IVY_ERR_NO_MEMORY; + } + + memset(out, 0x0, sizeof *out); + + parser_push_state( + out, IVY_AST_UNIT, struct ivy_ast_unit_node, struct parser_state); + + *parser = out; + return IVY_OK; +} + +void ivy_parser_destroy(struct ivy_parser *parser) +{ + free(parser); +} + +enum ivy_status ivy_parser_get_status(struct ivy_parser *parser) +{ + return parser->p_status; +} + +enum ivy_status ivy_parser_push_token( + struct ivy_parser *parser, struct ivy_token *tok) +{ + struct parser_state *state = parser_get_state_generic(parser); + if (!state) { + parser->p_status = IVY_ERR_INTERNAL_FAILURE; + return IVY_ERR_INTERNAL_FAILURE; + } + + token_parse_function func = get_token_parser(state->s_node, tok); + if (!func) { + parser->p_status = IVY_ERR_BAD_SYNTAX; + return IVY_ERR_BAD_SYNTAX; + } + + parser->p_status = func(parser, tok); + return parser->p_status; +} + +struct parser_state *parser_get_state_generic(struct ivy_parser *parser) +{ + b_queue_entry *entry = b_queue_last(&parser->p_state); + if (!entry) { + return NULL; + } + + struct parser_state *state = b_unbox(struct parser_state, entry, s_entry); + return state; +} + +struct parser_state *parser_push_state_generic( + struct ivy_parser *parser, enum ivy_ast_node_type type, + size_t node_size, size_t state_size) +{ + struct parser_state *state = malloc(state_size); + if (!state) { + return NULL; + } + + memset(state, 0x0, state_size); + + b_queue_entry *current_state_entry = b_queue_last(&parser->p_state); + if (current_state_entry) { + struct parser_state *current_state = b_unbox( + struct parser_state, current_state_entry, s_entry); + state->s_parent = current_state->s_node; + } + + state->s_node = ast_node_create_with_size(type, node_size); + + const struct ast_node_type *node_type = get_ast_node_type(type); + if (node_type && node_type->n_init_state) { + node_type->n_init_state(state); + } + + b_queue_push_back(&parser->p_state, &state->s_entry); + return state; +} + +void parser_pop_state(struct ivy_parser *parser, enum pop_state_flags flags) +{ + if (parser->p_state.q_first == parser->p_state.q_last) { + return; + } + + b_queue_entry *entry = b_queue_last(&parser->p_state); + struct parser_state *state = b_unbox(struct parser_state, entry, s_entry); + b_queue_pop_back(&parser->p_state); + + if (flags & STATE_ADD_NODE_TO_PARENT) { + ast_node_add_child(state->s_parent, state->s_node); + } + + free(state); +} diff --git a/lang/ast/ctx.h b/lang/ast/ctx.h new file mode 100644 index 0000000..ca099b7 --- /dev/null +++ b/lang/ast/ctx.h @@ -0,0 +1,37 @@ +#ifndef _AST_CTX_H_ +#define _AST_CTX_H_ + +#include +#include +#include + +#define parser_push_state(parser, node_id, node_type, state_type) \ + ((state_type *)parser_push_state_generic( \ + parser, node_id, sizeof(node_type), sizeof(state_type))) +#define parser_get_state(parser, state_type) \ + ((state_type *)parser_get_state_generic(parser)) + +struct parser_state { + b_queue_entry s_entry; + struct ivy_ast_node *s_parent; + struct ivy_ast_node *s_node; +}; + +struct ivy_parser { + enum ivy_status p_status; + b_queue p_state; + b_queue p_token_queue; + b_queue p_node_queue; +}; + +enum pop_state_flags { + STATE_ADD_NODE_TO_PARENT = 0x01u, +}; + +extern struct parser_state *parser_push_state_generic( + struct ivy_parser *parser, enum ivy_ast_node_type node_type, + size_t node_size, size_t state_size); +extern void parser_pop_state(struct ivy_parser *parser, enum pop_state_flags flags); +extern struct parser_state *parser_get_state_generic(struct ivy_parser *parser); + +#endif diff --git a/lang/ast/node.c b/lang/ast/node.c new file mode 100644 index 0000000..bf0d5a1 --- /dev/null +++ b/lang/ast/node.c @@ -0,0 +1,80 @@ +#include "node.h" + +#include +#include +#include + +extern struct ast_node_type unit_node_ops; +extern struct ast_node_type unit_package_node_ops; + +static const struct ast_node_type *node_ops[] = { + [IVY_AST_UNIT] = &unit_node_ops, + [IVY_AST_UNIT_PACKAGE] = &unit_package_node_ops, +}; +static const size_t nr_node_ops = sizeof node_ops / sizeof node_ops[0]; + +const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type) +{ + if (type >= nr_node_ops) { + return NULL; + } + + return node_ops[type]; +} + +token_parse_function get_token_parser( + struct ivy_ast_node *context, struct ivy_token *tok) +{ + const struct ast_node_type *type = get_ast_node_type(context->n_type); + if (!type) { + return NULL; + } + token_parse_function generic_parser = type->n_token_parsers[tok->t_type]; + token_parse_function better_parser = NULL; + + switch (tok->t_type) { + case IVY_TOK_KEYWORD: + better_parser = type->n_keyword_parsers[tok->t_keyword]; + break; + case IVY_TOK_SYMBOL: + better_parser = type->n_symbol_parsers[tok->t_symbol]; + break; + default: + break; + } + + return better_parser ? better_parser : generic_parser; +} + +struct ivy_ast_node *ast_node_create_with_size( + enum ivy_ast_node_type type, size_t size) +{ + struct ivy_ast_node *node = malloc(size); + if (!node) { + return NULL; + } + + memset(node, 0x0, size); + + node->n_type = type; + + return node; +} + +enum ivy_status ast_node_add_child( + struct ivy_ast_node *parent, struct ivy_ast_node *child) +{ + const struct ast_node_type *ops = get_ast_node_type(parent->n_type); + if (!ops) { + return IVY_ERR_NOT_SUPPORTED; + } + + enum ivy_status (*add_child)(struct ivy_ast_node *, struct ivy_ast_node *) + = ops->n_add_child; + + if (!add_child) { + return IVY_ERR_NOT_SUPPORTED; + } + + return add_child(parent, child); +} diff --git a/lang/ast/node.h b/lang/ast/node.h new file mode 100644 index 0000000..fcc4dcf --- /dev/null +++ b/lang/ast/node.h @@ -0,0 +1,35 @@ +#ifndef _AST_NODE_H_ +#define _AST_NODE_H_ + +#include +#include + +#define ast_node_create(type_id, type_struct) \ + ((type_struct *)ast_node_create_with_size(type_id, sizeof(type_struct))) + +struct parser_state; + +typedef enum ivy_status (*token_parse_function)( + struct ivy_parser *, struct ivy_token *); + +struct ast_node_type { + enum ivy_status (*n_add_child)( + struct ivy_ast_node *, struct ivy_ast_node *); + void (*n_print)(struct ivy_ast_node *); + void (*n_init_state)(struct parser_state *); + + token_parse_function n_token_parsers[IVY_TOK_TYPE_COUNT]; + token_parse_function n_keyword_parsers[IVY_KW_TYPE_COUNT]; + token_parse_function n_symbol_parsers[IVY_SYM_TYPE_COUNT]; +}; + +extern const struct ast_node_type *get_ast_node_type(enum ivy_ast_node_type type); +extern token_parse_function get_token_parser( + struct ivy_ast_node *context, struct ivy_token *tok); + +extern struct ivy_ast_node *ast_node_create_with_size( + enum ivy_ast_node_type type, size_t size); +extern enum ivy_status ast_node_add_child( + struct ivy_ast_node *parent, struct ivy_ast_node *child); + +#endif diff --git a/lang/ast/parse.c b/lang/ast/parse.c new file mode 100644 index 0000000..8be3f4d --- /dev/null +++ b/lang/ast/parse.c @@ -0,0 +1,22 @@ +#include "parse.h" + +#include +#include + +static token_parse_function token_parsers[IVY_AST_TYPE_COUNT][IVY_TOK_TYPE_COUNT] = { + [IVY_AST_UNIT] = { + [IVY_TOK_KEYWORD] = NULL, + }, +}; + +static token_parse_function keyword_parsers[IVY_AST_TYPE_COUNT][IVY_KW_TYPE_COUNT] = { + [IVY_AST_UNIT] = { + [IVY_KW_PACKAGE] = NULL, + }, +}; + +static token_parse_function symbol_parsers[IVY_AST_TYPE_COUNT][IVY_SYM_TYPE_COUNT] = { + [IVY_AST_UNIT] = { + [IVY_SYM_NONE] = NULL, + }, +}; diff --git a/lang/ast/parse.h b/lang/ast/parse.h new file mode 100644 index 0000000..faf53a5 --- /dev/null +++ b/lang/ast/parse.h @@ -0,0 +1,16 @@ +#ifndef _AST_PARSE_H_ +#define _AST_PARSE_H_ + +#include + +struct ivy_parser; +struct ivy_ast_node; +struct ivy_token; + +typedef enum ivy_status (*token_parse_function)( + struct ivy_parser *, struct ivy_token *); + +extern token_parse_function get_token_parser( + struct ivy_ast_node *context, struct ivy_token *tok); + +#endif diff --git a/lang/ast/unit-package.c b/lang/ast/unit-package.c new file mode 100644 index 0000000..000b9c2 --- /dev/null +++ b/lang/ast/unit-package.c @@ -0,0 +1,73 @@ +#include "unit-package.h" + +#include "ctx.h" +#include "node.h" + +static enum ivy_status parse_dot(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct unit_package_parser_state *state + = parser_get_state(ctx, struct unit_package_parser_state); + + if (state->s_prev_token != IVY_TOK_IDENT) { + return IVY_ERR_BAD_SYNTAX; + } + + state->s_prev_token = IVY_SYM_DOT; + return IVY_OK; +} + +static enum ivy_status parse_ident(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct unit_package_parser_state *state + = parser_get_state(ctx, struct unit_package_parser_state); + + if (state->s_prev_token == IVY_TOK_IDENT) { + return IVY_ERR_BAD_SYNTAX; + } + + state->s_prev_token = IVY_TOK_IDENT; + return IVY_OK; +} + +static enum ivy_status parse_linefeed(struct ivy_parser *ctx, struct ivy_token *tok) +{ + struct unit_package_parser_state *state + = parser_get_state(ctx, struct unit_package_parser_state); + + if (state->s_prev_token != IVY_TOK_IDENT) { + return IVY_ERR_BAD_SYNTAX; + } + + return IVY_ERR_IO_FAILURE; +} + +static enum ivy_status add_child( + struct ivy_ast_node *parent, struct ivy_ast_node *child) +{ + return IVY_OK; +} + +static void print(struct ivy_ast_node *node) +{ +} + +static void init_state(struct parser_state *sp) +{ + struct unit_package_parser_state *state + = (struct unit_package_parser_state *)sp; + state->s_prev_token = IVY_KW_PACKAGE; +} + +struct ast_node_type unit_package_node_ops = { + .n_add_child = add_child, + .n_print = print, + .n_init_state = init_state, + .n_symbol_parsers = { + [IVY_SYM_DOT] = parse_dot, + }, + .n_token_parsers = { + [IVY_TOK_IDENT] = parse_ident, + [IVY_TOK_LINEFEED] = parse_linefeed, + } + +}; diff --git a/lang/ast/unit-package.h b/lang/ast/unit-package.h new file mode 100644 index 0000000..1477bfd --- /dev/null +++ b/lang/ast/unit-package.h @@ -0,0 +1,13 @@ +#ifndef _AST_UNIT_PACKAGE_H_ +#define _AST_UNIT_PACKAGE_H_ + +#include "ctx.h" + +#include + +struct unit_package_parser_state { + struct parser_state s_base; + int s_prev_token; +}; + +#endif diff --git a/lang/ast/unit.c b/lang/ast/unit.c new file mode 100644 index 0000000..5e75b41 --- /dev/null +++ b/lang/ast/unit.c @@ -0,0 +1,30 @@ +#include "ctx.h" +#include "node.h" +#include "unit-package.h" + +static enum ivy_status parse_package_keyword( + struct ivy_parser *ctx, struct ivy_token *tok) +{ + parser_push_state( + ctx, IVY_AST_UNIT_PACKAGE, struct ivy_ast_unit_package_node, + struct unit_package_parser_state); + return IVY_OK; +} + +static enum ivy_status add_child( + struct ivy_ast_node *parent, struct ivy_ast_node *child) +{ + return IVY_OK; +} + +static void print(struct ivy_ast_node *node) +{ +} + +struct ast_node_type unit_node_ops = { + .n_add_child = add_child, + .n_print = print, + .n_keyword_parsers = { + [IVY_KW_PACKAGE] = parse_package_keyword, + }, +}; diff --git a/lang/include/ivy/lang/ast.h b/lang/include/ivy/lang/ast.h index 54fdc64..3f86df7 100644 --- a/lang/include/ivy/lang/ast.h +++ b/lang/include/ivy/lang/ast.h @@ -31,6 +31,7 @@ enum ivy_ast_node_type { IVY_AST_COND, IVY_AST_TUPLE, IVY_AST_DO, + IVY_AST_TYPE_COUNT, }; enum ivy_ast_op { @@ -121,8 +122,8 @@ struct ivy_ast_property_node { struct ivy_ast_node *n_get; /* one of either: * a) a lambda. the lambda is executed with the provided value as a - * parameter to set the property value; or, b) NULL. the property is - * read-only. + * parameter to set the property value; or, + * b) NULL. the property is read-only. */ struct ivy_ast_node *n_set; }; @@ -230,7 +231,8 @@ IVY_API void ivy_parser_destroy(struct ivy_parser *parser); IVY_API enum ivy_status ivy_parser_get_status(struct ivy_parser *parser); -IVY_API enum ivy_status ivy_parser_push_token(struct ivy_parser *parser, struct ivy_token *tok); +IVY_API enum ivy_status ivy_parser_push_token( + struct ivy_parser *parser, struct ivy_token *tok); IVY_API void ivy_ast_node_destroy(struct ivy_ast_node *node);