asm: implement import and instruction parsing

This commit is contained in:
2024-12-14 20:26:04 +00:00
parent 724f40f19f
commit 462f67c6aa
11 changed files with 805 additions and 17 deletions

View File

@@ -0,0 +1,381 @@
#include "parse.h"
#include <ctype.h>
#include <stdlib.h>
#include <ivy/asm/assembler.h>
#include <ivy/asm/bin.h>
#include <ivy/ident.h>
#include <ivy/selector.h>
#include <blue/core/hash.h>
#define HASH_SELF 0x2d19e518d40792b7
#define HASH_POOL 0x8c22f10da88b1083
#define HASH_SP 0x08d93e07b5793c56
#define HASH_BP 0x08a64407b54decef
#define REG_INDEX_INVALID ((unsigned long long)-1)
enum instr_component {
INSTR_NONE = 0,
INSTR_OPCODE,
INSTR_OPERAND,
INSTR_OPERAND_SEPARATOR,
INSTR_OPERAND_INDEX_LEFT,
INSTR_OPERAND_INDEX_BASE,
INSTR_OPERAND_INDEX_SEPARATOR,
INSTR_OPERAND_INDEX_OFFSET,
INSTR_OPERAND_INDEX_RIGHT,
};
enum index_base {
INDEX_NONE = 0,
INDEX_SELF,
INDEX_POOL,
INDEX_SP,
INDEX_BP,
};
enum arg_type {
ARG_NONE = 0,
ARG_CONST,
ARG_REG,
ARG_INDEX_REG,
ARG_INDEX_CONST,
};
struct arg {
enum arg_type arg_type;
b_queue_entry arg_entry;
union {
struct ivy_asm_token *arg_const;
struct {
struct ivy_asm_token *reg_token;
unsigned long long reg_index;
} arg_reg;
struct {
enum index_base index_base;
struct ivy_asm_token *index_base_token;
struct ivy_asm_token *index_offset;
unsigned long long index_offset_reg;
} arg_index_reg;
struct {
enum index_base index_base;
struct ivy_asm_token *index_base_token;
struct ivy_asm_token *index_offset;
} arg_index_const;
};
};
struct block_parser_state {
struct parser_state s_base;
unsigned int s_prev_token;
enum instr_component s_prev_component;
struct ivy_asm_token *s_mnemonic;
b_queue s_args;
struct arg *s_current_arg;
};
static unsigned long long get_register_index(struct ivy_asm_token *tok)
{
if (tok->t_type != IVY_ASM_TOK_IDENT) {
return REG_INDEX_INVALID;
}
const char *s = tok->t_str;
if (tolower(s[0]) != 'x') {
return REG_INDEX_INVALID;
}
char index_str[5] = {0};
strncpy(index_str + 1, s, sizeof index_str - 1);
char *ep = NULL;
unsigned long long index = strtoul(index_str, &ep, 10);
if (*ep || index >= 256) {
return REG_INDEX_INVALID;
}
return index;
}
static enum index_base get_index_base(struct ivy_asm_token *tok)
{
if (tok->t_type != IVY_ASM_TOK_IDENT) {
return REG_INDEX_INVALID;
}
const char *s = tok->t_str;
uint64_t hash = b_hash_string(s);
switch (hash) {
case HASH_SELF:
if (!strcmp(s, "self")) {
return INDEX_SELF;
}
return INDEX_NONE;
case HASH_POOL:
if (!strcmp(s, "pool")) {
return INDEX_POOL;
}
return INDEX_NONE;
case HASH_SP:
if (!strcmp(s, "sp")) {
return INDEX_SP;
}
return INDEX_NONE;
case HASH_BP:
if (!strcmp(s, "bp")) {
return INDEX_BP;
}
return INDEX_NONE;
default:
return INDEX_NONE;
}
}
static enum ivy_status write_instruction(struct block_parser_state *state)
{
return IVY_OK;
}
static enum ivy_status push_const_arg(struct block_parser_state *state, struct ivy_asm_token *tok)
{
struct arg *arg = malloc(sizeof *arg);
if (!arg) {
return IVY_ERR_NO_MEMORY;
}
memset(arg, 0x0, sizeof *arg);
arg->arg_type = ARG_CONST;
arg->arg_const = tok;
b_queue_push_back(&state->s_args, &arg->arg_entry);
return IVY_OK;
}
static enum ivy_status push_reg_arg(
struct block_parser_state *state, struct ivy_asm_token *tok, unsigned long long reg_index)
{
struct arg *arg = malloc(sizeof *arg);
if (!arg) {
return IVY_ERR_NO_MEMORY;
}
memset(arg, 0x0, sizeof *arg);
arg->arg_type = ARG_REG;
arg->arg_reg.reg_token = tok;
arg->arg_reg.reg_index = reg_index;
b_queue_push_back(&state->s_args, &arg->arg_entry);
return IVY_OK;
}
static enum ivy_status parse_linefeed(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
switch (state->s_prev_component) {
case INSTR_NONE:
return IVY_OK;
case INSTR_OPCODE:
case INSTR_OPERAND:
case INSTR_OPERAND_INDEX_RIGHT:
state->s_prev_component = INSTR_NONE;
return write_instruction(state);
default:
return IVY_ERR_BAD_SYNTAX;
}
}
static enum ivy_status parse_int(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
switch (state->s_prev_component) {
case INSTR_OPCODE:
case INSTR_OPERAND_SEPARATOR:
push_const_arg(state, tok);
state->s_prev_component = INSTR_OPERAND;
return IVY_OK;
case INSTR_OPERAND_INDEX_SEPARATOR:
state->s_current_arg->arg_type = ARG_INDEX_CONST;
state->s_current_arg->arg_index_const.index_offset = tok;
state->s_prev_component = INSTR_OPERAND_INDEX_OFFSET;
return IVY_OK;
default:
return IVY_ERR_BAD_SYNTAX;
}
}
static enum ivy_status parse_ident(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
unsigned long long x = 0;
switch (state->s_prev_component) {
case INSTR_NONE:
state->s_mnemonic = tok;
state->s_prev_component = INSTR_OPCODE;
return IVY_OK;
case INSTR_OPCODE:
case INSTR_OPERAND_SEPARATOR:
x = get_register_index(tok);
if (x == REG_INDEX_INVALID) {
return IVY_ERR_BAD_SYNTAX;
}
state->s_prev_component = INSTR_OPERAND;
return push_reg_arg(state, tok, x);
case INSTR_OPERAND_INDEX_LEFT:
x = get_index_base(tok);
if (x == INDEX_NONE) {
return IVY_ERR_BAD_SYNTAX;
}
state->s_current_arg->arg_type = ARG_INDEX_REG;
state->s_current_arg->arg_index_reg.index_base_token = tok;
state->s_current_arg->arg_index_reg.index_base = x;
state->s_prev_component = INSTR_OPERAND_INDEX_BASE;
return IVY_OK;
default:
return IVY_ERR_BAD_SYNTAX;
}
/* not sure what this is but we aren't expecting it. */
return IVY_ERR_BAD_SYNTAX;
}
static enum ivy_status parse_label(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
/* not sure what this is but we aren't expecting it. */
return IVY_ERR_BAD_SYNTAX;
}
static enum ivy_status parse_comma(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
switch (state->s_prev_component) {
case INSTR_OPERAND:
case INSTR_OPERAND_INDEX_RIGHT:
state->s_prev_component = INSTR_OPERAND_SEPARATOR;
return IVY_OK;
case INSTR_OPERAND_INDEX_BASE:
state->s_prev_component = INSTR_OPERAND_INDEX_SEPARATOR;
return IVY_OK;
default:
return IVY_ERR_BAD_SYNTAX;
}
}
static enum ivy_status parse_left_bracket(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
switch (state->s_prev_component) {
case INSTR_OPCODE:
case INSTR_OPERAND_SEPARATOR:
break;
default:
return IVY_ERR_BAD_SYNTAX;
}
struct arg *arg = malloc(sizeof *arg);
if (!arg) {
return IVY_ERR_NO_MEMORY;
}
memset(arg, 0x0, sizeof *arg);
state->s_current_arg = arg;
state->s_prev_component = INSTR_OPERAND_INDEX_LEFT;
return IVY_OK;
}
static enum ivy_status parse_right_bracket(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
if (state->s_prev_component != INSTR_OPERAND_INDEX_OFFSET) {
return IVY_ERR_BAD_SYNTAX;
}
b_queue_push_back(&state->s_args, &state->s_current_arg->arg_entry);
state->s_current_arg = NULL;
state->s_prev_component = INSTR_OPERAND;
return IVY_OK;
}
static enum ivy_status parse_end(
struct ivy_asm_parser *ctx, struct ivy_asm_token *tok)
{
struct block_parser_state *state
= (struct block_parser_state *)asm_parser_get_state(ctx);
if (state->s_prev_component != INSTR_NONE) {
return IVY_ERR_BAD_SYNTAX;
}
asm_parser_pop_state(ctx, NULL);
return IVY_OK;
}
static void init_state(struct ivy_asm_parser *ctx, struct parser_state *state)
{
}
static void finish_state(struct ivy_asm_parser *ctx, struct parser_state *state)
{
}
struct parser_state_type block_parser_state_type = {
.n_init_state = init_state,
.n_finish_state = finish_state,
.n_state_size = sizeof(struct block_parser_state),
.n_token_parsers = {
TOK_PARSER(IDENT, parse_ident),
TOK_PARSER(LABEL, parse_label),
TOK_PARSER(INT, parse_int),
TOK_PARSER(LINEFEED, parse_linefeed),
},
.n_symbol_parsers = {
SYM_PARSER(LEFT_BRACKET, parse_left_bracket),
SYM_PARSER(RIGHT_BRACKET, parse_right_bracket),
SYM_PARSER(COMMA, parse_comma),
},
.n_keyword_parsers = {
KW_PARSER(END, parse_end),
},
};