mie: parse: implement type reference parsing

This commit is contained in:
2026-01-08 19:19:45 +00:00
parent 0a45e3b571
commit 8ad3f36288
2 changed files with 268 additions and 85 deletions

View File

@@ -2,18 +2,13 @@
#define MIE_PARSE_PARSE_H_
#include <blue/ds/string.h>
#include <mie/ir/register.h>
#include <mie/misc.h>
#include <mie/parse/token.h>
#include <mie/status.h>
#include <mie/vector.h>
#include <stdbool.h>
enum mie_unresolved_operand_type {
MIE_UNRESOLVED_OPERAND_NONE = 0,
MIE_UNRESOLVED_OPERAND_VIRTUAL_REGISTER,
MIE_UNRESOLVED_OPERAND_MACHINE_REGISTER,
};
struct mie_parser;
struct mie_name_map;
struct mie_lex;
@@ -23,20 +18,11 @@ struct mie_type;
struct mie_module;
struct mie_region;
struct mie_op;
struct mie_op_arg;
struct mie_op_attribute;
struct mie_op_successor;
struct mie_register;
/* these structs are temporary, and are just here for documentation purposes atm */
struct mie_argument {
};
struct mie_unresolved_operand {
enum mie_unresolved_operand_type op_type;
b_string *op_name;
};
MIE_API struct mie_parser *mie_parser_create(
struct mie_ctx *ctx, struct mie_lex *lex);
MIE_API void mie_parser_destroy(struct mie_parser *ctx);
@@ -75,19 +61,18 @@ MIE_API bool mie_parser_parse_string(
MIE_API bool mie_parser_parse_keyword(struct mie_parser *ctx, const char *kw);
MIE_API bool mie_parser_parse_symbol(
struct mie_parser *ctx, enum mie_token_symbol sym);
MIE_API bool mie_parser_parse_assignment_list(
struct mie_parser *ctx, struct mie_argument **out_lhs,
struct mie_unresolved_operand **out_rhs, size_t *out_count);
MIE_API bool mie_parser_parse_type(struct mie_parser *ctx, struct mie_type **out);
MIE_API bool mie_parser_parse_type(
struct mie_parser *ctx, const struct mie_type **out);
MIE_API bool mie_parser_parse_type_list(
struct mie_parser *ctx, MIE_VECTOR_REF_PARAM(const struct mie_type *, out));
MIE_API bool mie_parser_parse_function_type(
struct mie_parser *ctx, struct mie_type **out);
MIE_API bool mie_parser_parse_operand(
struct mie_parser *ctx, struct mie_unresolved_operand *out);
struct mie_parser *ctx, struct mie_op_arg *out);
MIE_API bool mie_parser_parse_operand_list(
struct mie_parser *ctx,
MIE_VECTOR_REF_PARAM(struct mie_unresolved_operand, out));
struct mie_parser *ctx, MIE_VECTOR_REF_PARAM(struct mie_op_arg, out));
MIE_API bool mie_parser_parse_unknown_keyword(struct mie_parser *ctx, b_string *out);
MIE_API bool mie_parser_parse_unknown_symbol(

View File

@@ -1,3 +1,7 @@
#include <mie/ctx.h>
#include <mie/dialect/arith.h>
#include <mie/dialect/dialect.h>
#include <mie/dialect/type.h>
#include <mie/ir/module.h>
#include <mie/ir/op.h>
#include <mie/ir/region.h>
@@ -5,6 +9,7 @@
#include <mie/parse/lex.h>
#include <mie/parse/parse.h>
#include <mie/parse/token.h>
#include <mie/type/function.h>
struct mie_parser {
struct mie_ctx *p_ctx;
@@ -80,41 +85,32 @@ bool mie_parser_advance(struct mie_parser *ctx)
return mie_lex_get_status(ctx->p_lex) == MIE_SUCCESS;
}
bool mie_parser_parse_vregname(struct mie_parser *ctx, b_string *out)
{
if (!mie_parser_check_type(ctx, MIE_TOK_VREGNAME)) {
return false;
#define TOKEN_PARSER(name, id) \
bool mie_parser_parse_##name( \
struct mie_parser *ctx, b_string *out, struct mie_file_span *loc) \
{ \
if (!mie_parser_check_type(ctx, id)) { \
return false; \
} \
struct mie_token *tok = mie_lex_peek(ctx->p_lex); \
b_string_append_cstr(out, tok->tok_str); \
if (loc) { \
mie_file_span_init(loc, tok); \
} \
mie_lex_advance(ctx->p_lex); \
return true; \
}
struct mie_token *tok = mie_lex_peek(ctx->p_lex);
b_string_append_cstr(out, tok->tok_str);
mie_lex_advance(ctx->p_lex);
return true;
}
bool mie_parser_parse_mregname(struct mie_parser *ctx, b_string *out)
{
if (!mie_parser_check_type(ctx, MIE_TOK_MREGNAME)) {
return false;
}
struct mie_token *tok = mie_lex_peek(ctx->p_lex);
b_string_append_cstr(out, tok->tok_str);
mie_lex_advance(ctx->p_lex);
return true;
}
bool mie_parser_parse_opname(struct mie_parser *ctx, b_string *out)
{
if (!mie_parser_check_type(ctx, MIE_TOK_OPNAME)) {
return false;
}
struct mie_token *tok = mie_lex_peek(ctx->p_lex);
b_string_append_cstr(out, tok->tok_str);
mie_lex_advance(ctx->p_lex);
return true;
}
TOKEN_PARSER(word, MIE_TOK_WORD);
TOKEN_PARSER(instname, MIE_TOK_INSTNAME);
TOKEN_PARSER(graphname, MIE_TOK_GRAPHNAME);
TOKEN_PARSER(opname, MIE_TOK_OPNAME);
TOKEN_PARSER(vregname, MIE_TOK_VREGNAME);
TOKEN_PARSER(mregname, MIE_TOK_MREGNAME);
TOKEN_PARSER(blockname, MIE_TOK_BLOCKNAME);
TOKEN_PARSER(typename, MIE_TOK_TYPENAME);
TOKEN_PARSER(symname, MIE_TOK_SYMNAME);
TOKEN_PARSER(string, MIE_TOK_STRING);
bool mie_parser_parse_symbol(struct mie_parser *ctx, enum mie_token_symbol sym)
{
@@ -127,16 +123,155 @@ bool mie_parser_parse_symbol(struct mie_parser *ctx, enum mie_token_symbol sym)
return true;
}
bool mie_parser_parse_type(struct mie_parser *ctx, struct mie_type **out)
static bool parse_builtin_type_name(
struct mie_parser *ctx, const struct mie_type **out)
{
b_string *name = b_string_create();
struct mie_file_span loc;
if (!mie_parser_parse_word(ctx, name, &loc)) {
b_string_unref(name);
return false;
}
const struct mie_dialect_type *type_info = NULL;
const struct mie_type *type = NULL;
size_t width = 0;
char tmp = 0;
enum {
NONE = 0,
INT,
FLOAT
} base_type = NONE;
const char *name_cstr = b_string_ptr(name);
if (!strcmp(name_cstr, "memref")) {
type_info = mie_ctx_get_dialect_type(
ctx->p_ctx, "memref", "memref");
} else if (!strcmp(name_cstr, "index")) {
type_info
= mie_ctx_get_dialect_type(ctx->p_ctx, "index", "index");
} else if (!strcmp(name_cstr, "str")) {
type_info = mie_ctx_get_dialect_type(
ctx->p_ctx, "builtin", "string");
} else if (sscanf(name_cstr, "i%zu%c", &width, &tmp) == 1) {
type_info = mie_ctx_get_dialect_type(ctx->p_ctx, "arith", "int");
base_type = INT;
} else if (sscanf(name_cstr, "f%zu%c", &width, &tmp) == 1) {
type_info
= mie_ctx_get_dialect_type(ctx->p_ctx, "arith", "float");
base_type = FLOAT;
}
if (!type_info) {
return false;
}
switch (base_type) {
case INT:
type = mie_arith_int_get_type(ctx->p_ctx, width);
break;
case FLOAT:
type = mie_arith_float_get_type(ctx->p_ctx, width);
break;
default:
type = mie_ctx_get_type(
ctx->p_ctx, type_info->ty_parent->d_name,
type_info->ty_name);
break;
}
*out = type;
return type != NULL;
}
static bool parse_type_name(struct mie_parser *ctx, const struct mie_type **out)
{
b_string *name = b_string_create();
struct mie_file_span loc;
if (!mie_parser_parse_typename(ctx, name, &loc)) {
b_string_unref(name);
return false;
}
*out = NULL;
return false;
}
static bool parse_composite_type(struct mie_parser *ctx, const struct mie_type **out)
{
const struct mie_type *temp = NULL;
MIE_VECTOR_DEFINE(const struct mie_type *, type_list_1);
MIE_VECTOR_DEFINE(const struct mie_type *, type_list_2);
if (!mie_parser_parse_type_list(ctx, MIE_VECTOR_REF(type_list_1))) {
return false;
}
if (!mie_parser_parse_symbol(ctx, MIE_SYM_HYPHEN_RIGHT_ANGLE)) {
*out = mie_ctx_get_storage_type(
ctx->p_ctx, type_list_1.items, type_list_1.count);
return *out != NULL;
}
bool ok = false;
if (mie_parser_peek_symbol(ctx) == MIE_SYM_LEFT_PAREN) {
ok = mie_parser_parse_type_list(ctx, MIE_VECTOR_REF(type_list_2));
} else {
ok = mie_parser_parse_type(ctx, &temp);
if (temp) {
mie_vector_push_back(type_list_2, &temp);
}
}
if (!ok) {
mie_vector_destroy(type_list_1, NULL);
mie_vector_destroy(type_list_2, NULL);
return false;
}
temp = mie_ctx_get_function_type(
ctx->p_ctx, type_list_1.items, type_list_1.count,
type_list_2.items, type_list_2.count);
mie_vector_destroy(type_list_1, NULL);
mie_vector_destroy(type_list_2, NULL);
*out = temp;
return temp != NULL;
}
bool mie_parser_parse_type(struct mie_parser *ctx, const struct mie_type **out)
{
if (mie_parser_peek_symbol(ctx) == MIE_SYM_LEFT_PAREN) {
return parse_composite_type(ctx, out);
}
switch (mie_parser_peek_type(ctx)) {
case MIE_TOK_WORD:
return parse_builtin_type_name(ctx, out);
case MIE_TOK_TYPENAME:
return parse_type_name(ctx, out);
default:
return false;
}
}
bool mie_parser_parse_type_list(
struct mie_parser *ctx, MIE_VECTOR_PARAM(struct mie_type *, out))
struct mie_parser *ctx, MIE_VECTOR_REF_PARAM(const struct mie_type *, out))
{
bool ok = false;
struct mie_type **type_slot = NULL;
const struct mie_type **type_slot = NULL;
if (!mie_parser_parse_symbol(ctx, MIE_SYM_LEFT_PAREN)) {
return false;
}
if (mie_parser_parse_symbol(ctx, MIE_SYM_RIGHT_PAREN)) {
/* empty type list */
return true;
}
type_slot = mie_vector_ref_emplace_back(out);
if (!type_slot) {
@@ -164,32 +299,83 @@ bool mie_parser_parse_type_list(
}
}
return true;
}
bool mie_parser_parse_operand(
struct mie_parser *ctx, struct mie_unresolved_operand *out)
{
memset(out, 0x0, sizeof *out);
out->op_name = b_string_create();
if (mie_parser_parse_vregname(ctx, out->op_name)) {
out->op_type = MIE_UNRESOLVED_OPERAND_VIRTUAL_REGISTER;
} else if (mie_parser_parse_mregname(ctx, out->op_name)) {
out->op_type = MIE_UNRESOLVED_OPERAND_MACHINE_REGISTER;
} else {
b_string_unref(out->op_name);
if (!mie_parser_parse_symbol(ctx, MIE_SYM_RIGHT_PAREN)) {
return false;
}
return true;
}
MIE_API bool mie_parser_parse_function_type(
struct mie_parser *ctx, struct mie_type **out)
{
const struct mie_type *type = NULL;
MIE_VECTOR_DEFINE(const struct mie_type *, in_parts);
MIE_VECTOR_DEFINE(const struct mie_type *, out_parts);
if (!mie_parser_parse_type_list(ctx, MIE_VECTOR_REF(in_parts))) {
return false;
}
if (!mie_parser_parse_symbol(ctx, MIE_SYM_HYPHEN_RIGHT_ANGLE)) {
mie_vector_destroy(in_parts, NULL);
mie_vector_destroy(out_parts, NULL);
return false;
}
bool ok = false;
if (mie_parser_peek_symbol(ctx) == MIE_SYM_LEFT_PAREN) {
ok = mie_parser_parse_type_list(ctx, MIE_VECTOR_REF(out_parts));
} else {
ok = mie_parser_parse_type(ctx, &type);
if (type) {
mie_vector_push_back(out_parts, &type);
}
}
if (!ok) {
mie_vector_destroy(in_parts, NULL);
mie_vector_destroy(out_parts, NULL);
return false;
}
type = mie_ctx_get_function_type(
ctx->p_ctx, in_parts.items, in_parts.count, out_parts.items,
out_parts.count);
mie_vector_destroy(in_parts, NULL);
mie_vector_destroy(out_parts, NULL);
*out = (struct mie_type *)type;
return type != NULL;
}
bool mie_parser_parse_operand(struct mie_parser *ctx, struct mie_op_arg *out)
{
memset(out, 0x0, sizeof *out);
b_string *str = b_string_create();
bool result = false;
struct mie_file_span loc;
if (mie_parser_parse_vregname(ctx, str, &loc)) {
out->arg_unresolved.reg_name = b_string_steal(str);
out->arg_unresolved.reg_flags = MIE_REGISTER_F_VIRTUAL;
result = true;
} else if (mie_parser_parse_mregname(ctx, str, &loc)) {
out->arg_unresolved.reg_name = b_string_steal(str);
out->arg_unresolved.reg_flags = MIE_REGISTER_F_MACHINE;
result = true;
}
b_string_unref(str);
return result;
}
bool mie_parser_parse_operand_list(
struct mie_parser *ctx,
MIE_VECTOR_REF_PARAM(struct mie_unresolved_operand, out))
struct mie_parser *ctx, MIE_VECTOR_REF_PARAM(struct mie_op_arg, out))
{
bool ok = false;
struct mie_unresolved_operand *operand = NULL;
struct mie_op_arg *operand = NULL;
struct mie_token *tok = mie_parser_peek(ctx);
enum mie_token_type type = MIE_TOKEN_TYPE(tok);
@@ -253,7 +439,7 @@ bool mie_parser_parse_register(
}
if (!mie_name_map_put(
names, &out->reg_name, tok->tok_str, MIE_NAME_MAP_STRICT)) {
names, &out->reg_name, tok->tok_str, MIE_NAME_MAP_F_STRICT)) {
return false;
}
@@ -422,9 +608,7 @@ static bool parse_generic_op(
return false;
}
MIE_VECTOR_DEFINE(struct mie_unresolved_operand, operands);
if (mie_parser_parse_operand_list(ctx, MIE_VECTOR_REF(operands))) {
if (!mie_parser_parse_operand_list(ctx, MIE_VECTOR_REF(dest->op_args))) {
return false;
}
@@ -469,15 +653,29 @@ static bool parse_generic_op(
return false;
}
/* parse input type list */
if (!mie_parser_parse_symbol(ctx, MIE_SYM_HYPHEN_RIGHT_ANGLE)) {
struct mie_function_type *func_type = NULL;
if (!mie_parser_parse_function_type(ctx, (struct mie_type **)&func_type)) {
return false;
}
/* parse output type list */
if (MIE_VECTOR_COUNT(func_type->func_in)
!= MIE_VECTOR_COUNT(dest->op_args)) {
return false;
}
mie_vector_destroy(operands, NULL);
if (MIE_VECTOR_COUNT(func_type->func_out)
!= MIE_VECTOR_COUNT(dest->op_result)) {
return false;
}
for (size_t i = 0; i < MIE_VECTOR_COUNT(func_type->func_in); i++) {
dest->op_args.items[i].arg_unresolved.reg_type
= func_type->func_in.items[i];
}
for (size_t i = 0; i < MIE_VECTOR_COUNT(func_type->func_out); i++) {
dest->op_result.items[i].reg_type = func_type->func_out.items[i];
}
return true;
}