Files
ivy/asm/lex.c

1113 lines
21 KiB
C
Raw Normal View History

2024-11-22 22:30:15 +00:00
#include "lex.h"
2024-11-19 22:08:58 +00:00
#include <blue/core/hash.h>
#include <blue/core/queue.h>
#include <blue/object/dict.h>
#include <blue/object/number.h>
2024-11-22 22:30:15 +00:00
#include <blue/object/string.h>
2024-11-19 22:08:58 +00:00
#include <ctype.h>
#include <ivy/asm/lex.h>
2024-11-19 22:08:58 +00:00
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2024-11-22 22:30:15 +00:00
#include <wctype.h>
2024-11-19 22:08:58 +00:00
#define LINEBUF_DEFAULT_CAPACITY 1024
#define LEX_TOKEN_DEF(i, n) \
{ \
.id = (i), .name = (n) \
}
static struct lex_token_def keywords[] = {
LEX_TOKEN_DEF(IVY_ASM_KW_IMPORT, "@import"),
2024-11-19 22:08:58 +00:00
LEX_TOKEN_DEF(IVY_ASM_KW_IDENT, "@ident"),
2024-11-22 22:30:15 +00:00
LEX_TOKEN_DEF(IVY_ASM_KW_SELECTOR, "@selector"),
2024-11-19 22:08:58 +00:00
LEX_TOKEN_DEF(IVY_ASM_KW_ATOM, "@atom"),
LEX_TOKEN_DEF(IVY_ASM_KW_CONSTPOOL, "@constpool"),
2024-11-22 22:30:15 +00:00
LEX_TOKEN_DEF(IVY_ASM_KW_CLASS, "@class"),
LEX_TOKEN_DEF(IVY_ASM_KW_BLOCK, "@block"),
LEX_TOKEN_DEF(IVY_ASM_KW_PACKAGE, "@package"),
LEX_TOKEN_DEF(IVY_ASM_KW_PROPERTY, "@property"),
LEX_TOKEN_DEF(IVY_ASM_KW_VAR, "@var"),
LEX_TOKEN_DEF(IVY_ASM_KW_MSGH, "@msgh"),
2024-11-19 22:08:58 +00:00
LEX_TOKEN_DEF(IVY_ASM_KW_END, "@end"),
};
static const size_t nr_keywords = sizeof keywords / sizeof keywords[0];
static struct lex_token_def symbols[] = {
LEX_TOKEN_DEF(IVY_ASM_SYM_DOT, "."),
LEX_TOKEN_DEF(IVY_ASM_SYM_COMMA, ","),
LEX_TOKEN_DEF(IVY_ASM_SYM_LEFT_PAREN, "("),
LEX_TOKEN_DEF(IVY_ASM_SYM_RIGHT_PAREN, ")"),
LEX_TOKEN_DEF(IVY_ASM_SYM_LEFT_BRACKET, "["),
LEX_TOKEN_DEF(IVY_ASM_SYM_RIGHT_BRACKET, "]"),
LEX_TOKEN_DEF(IVY_ASM_SYM_LEFT_BRACE, "{"),
LEX_TOKEN_DEF(IVY_ASM_SYM_RIGHT_BRACE, "}"),
2024-11-19 22:08:58 +00:00
LEX_TOKEN_DEF(IVY_ASM_SYM_COLON, ":"),
LEX_TOKEN_DEF(IVY_ASM_SYM_SEMICOLON, ";"),
LEX_TOKEN_DEF(IVY_ASM_SYM_HYPHEN, "-"),
LEX_TOKEN_DEF(IVY_ASM_SYM_SQUOTE, "'"),
LEX_TOKEN_DEF(IVY_ASM_SYM_DQUOTE, "\""),
LEX_TOKEN_DEF(IVY_ASM_SYM_FORWARD_SLASH_ASTERISK, "/*"),
LEX_TOKEN_DEF(IVY_ASM_SYM_ASTERISK_FORWARD_SLASH, "*/"),
};
static const size_t nr_symbols = sizeof symbols / sizeof symbols[0];
static struct lexer_state *push_lexer_state(
struct ivy_asm_lexer *lex, enum lexer_state_type state_type)
{
struct lexer_state *state = malloc(sizeof *state);
if (!state) {
return NULL;
}
memset(state, 0x0, sizeof *state);
state->s_type = state_type;
b_queue_push_back(&lex->lex_state, &state->s_entry);
return state;
}
static void pop_lexer_state(struct ivy_asm_lexer *lex)
{
b_queue_entry *entry = b_queue_pop_back(&lex->lex_state);
if (!entry) {
return;
}
struct lexer_state *state = b_unbox(struct lexer_state, entry, s_entry);
free(state);
}
static struct lexer_state *get_lexer_state(struct ivy_asm_lexer *lex)
{
b_queue_entry *entry = b_queue_last(&lex->lex_state);
if (!entry) {
return NULL;
}
return b_unbox(struct lexer_state, entry, s_entry);
}
static void destroy_state_stack(b_queue *state)
{
b_queue_iterator it;
b_queue_iterator_begin(state, &it);
while (b_queue_iterator_is_valid(&it)) {
struct lexer_state *node
= b_unbox(struct lexer_state, it.entry, s_entry);
b_queue_iterator_erase(&it);
free(node);
}
}
static struct ivy_asm_lexer_symbol_node *get_symbol_node(
struct ivy_asm_lexer_symbol_node *node, char c)
{
b_queue_iterator it;
b_queue_foreach (&it, &node->s_children) {
struct ivy_asm_lexer_symbol_node *child = b_unbox(
struct ivy_asm_lexer_symbol_node, it.entry, s_entry);
if (child->s_char == c) {
return child;
}
}
return NULL;
}
static b_string *get_temp_string(struct ivy_asm_lexer *lex)
{
if (!lex->lex_temp) {
lex->lex_temp = b_string_create();
}
b_string_clear(lex->lex_temp);
return lex->lex_temp;
}
static enum ivy_status put_symbol(
struct ivy_asm_lexer_symbol_node *tree, struct lex_token_def *sym)
{
for (size_t i = 0; sym->name[i]; i++) {
char c = sym->name[i];
struct ivy_asm_lexer_symbol_node *child = get_symbol_node(tree, c);
if (child) {
tree = child;
continue;
}
child = malloc(sizeof *child);
if (!child) {
return IVY_ERR_NO_MEMORY;
}
memset(child, 0x0, sizeof *child);
child->s_id = IVY_ASM_SYM_NONE;
child->s_char = c;
b_queue_push_back(&tree->s_children, &child->s_entry);
tree = child;
}
tree->s_id = sym->id;
return IVY_OK;
}
static void destroy_symbol_tree(struct ivy_asm_lexer_symbol_node *tree)
{
b_queue_iterator it;
b_queue_iterator_begin(&tree->s_children, &it);
while (b_queue_iterator_is_valid(&it)) {
struct ivy_asm_lexer_symbol_node *node = b_unbox(
struct ivy_asm_lexer_symbol_node, it.entry, s_entry);
b_queue_iterator_erase(&it);
destroy_symbol_tree(node);
}
free(tree);
}
static struct ivy_asm_lexer_symbol_node *build_symbol_tree(void)
{
struct ivy_asm_lexer_symbol_node *root = malloc(sizeof *root);
if (!root) {
return NULL;
}
memset(root, 0x0, sizeof *root);
root->s_id = IVY_ASM_SYM_NONE;
enum ivy_status status = IVY_OK;
for (size_t i = 0; i < nr_symbols; i++) {
status = put_symbol(root, &symbols[i]);
if (status != IVY_OK) {
destroy_symbol_tree(root);
return NULL;
}
}
return root;
}
static void init_keywords(b_dict *keyword_dict)
{
for (size_t i = 0; i < nr_keywords; i++) {
struct lex_token_def *keyword = &keywords[i];
b_dict_put(keyword_dict, keyword->name, B_RV_INT(keyword->id));
}
}
2024-11-22 22:30:15 +00:00
static enum ivy_asm_keyword find_keyword_by_name(
struct ivy_asm_lexer *lex, const char *s)
2024-11-19 22:08:58 +00:00
{
b_number *id = B_NUMBER(b_dict_at(lex->lex_keywords, s));
if (!id) {
return IVY_ASM_KW_NONE;
}
return b_number_get_int(id);
}
enum ivy_status ivy_asm_lexer_create(struct ivy_asm_lexer **lexp)
{
struct ivy_asm_lexer *lex = malloc(sizeof *lex);
if (!lex) {
return IVY_ERR_NO_MEMORY;
}
memset(lex, 0x0, sizeof *lex);
lex->lex_status = IVY_OK;
lex->lex_prev_token = IVY_ASM_TOK_NONE;
lex->lex_linebuf = malloc(LINEBUF_DEFAULT_CAPACITY);
lex->lex_linebuf_cap = LINEBUF_DEFAULT_CAPACITY;
lex->lex_sym_tree = build_symbol_tree();
if (!lex->lex_sym_tree) {
ivy_asm_lexer_destroy(lex);
return IVY_ERR_NO_MEMORY;
}
if (!push_lexer_state(lex, STATE_NORMAL)) {
ivy_asm_lexer_destroy(lex);
return IVY_ERR_NO_MEMORY;
}
lex->lex_keywords = b_dict_create();
init_keywords(lex->lex_keywords);
*lexp = lex;
return IVY_OK;
}
void ivy_asm_lexer_destroy(struct ivy_asm_lexer *lex)
{
b_queue_iterator it = {0};
b_queue_iterator_begin(&lex->lex_queue, &it);
while (b_queue_iterator_is_valid(&it)) {
struct ivy_asm_token *tok
= b_unbox(struct ivy_asm_token, it.entry, t_entry);
b_queue_iterator_erase(&it);
ivy_asm_token_destroy(tok);
2024-11-19 22:08:58 +00:00
}
if (lex->lex_linebuf) {
free(lex->lex_linebuf);
}
if (lex->lex_sym_tree) {
destroy_symbol_tree(lex->lex_sym_tree);
}
if (lex->lex_temp) {
b_string_release(lex->lex_temp);
}
if (lex->lex_keywords) {
b_dict_release(lex->lex_keywords);
}
destroy_state_stack(&lex->lex_state);
free(lex);
}
void ivy_asm_lexer_set_source(struct ivy_asm_lexer *lex, struct ivy_line_source *src)
{
lex->lex_source = src;
}
enum ivy_status ivy_asm_lexer_get_status(struct ivy_asm_lexer *lex)
{
return lex->lex_status;
}
static enum ivy_status refill_linebuf(struct ivy_asm_lexer *lex)
{
if (!lex->lex_source) {
return IVY_ERR_EOF;
}
enum ivy_status status = ivy_line_source_readline(
lex->lex_source, lex->lex_linebuf, lex->lex_linebuf_cap,
&lex->lex_linebuf_len, NULL);
if (status == IVY_OK) {
lex->lex_linebuf_ptr = 0;
}
return status;
}
static int peek(struct ivy_asm_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr];
return c;
}
static int peek_next(struct ivy_asm_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
if (lex->lex_linebuf_ptr + 1 >= lex->lex_linebuf_len) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr + 1];
return c;
}
static int advance(struct ivy_asm_lexer *lex)
{
enum ivy_status status = IVY_OK;
if (lex->lex_linebuf_ptr >= lex->lex_linebuf_len) {
status = refill_linebuf(lex);
}
if (status != IVY_OK) {
return status;
}
if (lex->lex_linebuf_len == 0) {
return IVY_ERR_EOF;
}
int c = lex->lex_linebuf[lex->lex_linebuf_ptr++];
return c;
}
2024-11-22 22:30:15 +00:00
static bool input_available(struct ivy_asm_lexer *lex)
2024-11-19 22:08:58 +00:00
{
return lex->lex_linebuf_ptr < lex->lex_linebuf_len;
}
static bool char_can_begin_symbol(char c)
{
for (size_t i = 0; i < nr_symbols; i++) {
if (symbols[i].name[0] == c) {
return true;
}
}
return false;
}
static struct ivy_asm_token *create_token(enum ivy_asm_token_type type)
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return NULL;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = type;
return tok;
}
2024-11-22 22:30:15 +00:00
static enum ivy_status push_token(
struct ivy_asm_lexer *lex, struct ivy_asm_token *tok)
2024-11-19 22:08:58 +00:00
{
b_queue_push_back(&lex->lex_queue, &tok->t_entry);
2024-11-19 22:08:58 +00:00
lex->lex_prev_token = tok->t_type;
return IVY_OK;
}
static enum ivy_status push_linefeed(struct ivy_asm_lexer *lex)
{
if (lex->lex_prev_token == IVY_ASM_TOK_LINEFEED) {
return IVY_OK;
}
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_LINEFEED;
return push_token(lex, tok);
}
static enum ivy_status push_string_content(struct ivy_asm_lexer *lex, char *s)
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_STRING;
tok->t_str = s;
return push_token(lex, tok);
}
static enum ivy_status push_symbol(struct ivy_asm_lexer *lex, enum ivy_asm_symbol sym)
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_SYMBOL;
tok->t_symbol = sym;
return push_token(lex, tok);
}
static enum ivy_status push_int(struct ivy_asm_lexer *lex, long long v)
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_INT;
tok->t_int.v = v;
tok->t_int.sign = true;
return push_token(lex, tok);
}
static enum ivy_status push_uint(struct ivy_asm_lexer *lex, unsigned long long v)
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_INT;
tok->t_int.uv = v;
tok->t_int.sign = false;
return push_token(lex, tok);
}
static enum ivy_status push_double(struct ivy_asm_lexer *lex, double v)
{
2024-11-19 22:08:58 +00:00
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
2024-11-19 22:08:58 +00:00
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_DOUBLE;
tok->t_double = v;
return push_token(lex, tok);
}
2024-11-22 22:30:15 +00:00
static enum ivy_status push_keyword(
struct ivy_asm_lexer *lex, enum ivy_asm_keyword keyword)
2024-11-19 22:08:58 +00:00
{
struct ivy_asm_token *tok = malloc(sizeof *tok);
if (!tok) {
return IVY_ERR_NO_MEMORY;
}
memset(tok, 0x0, sizeof *tok);
tok->t_type = IVY_ASM_TOK_KEYWORD;
tok->t_keyword = keyword;
return push_token(lex, tok);
}
static enum ivy_status read_line_comment(struct ivy_asm_lexer *lex)
{
while (true) {
int c = peek(lex);
2024-11-19 22:08:58 +00:00
if (c == IVY_ERR_EOF || c == '\n') {
break;
}
if (c < 0) {
return c;
}
advance(lex);
2024-11-19 22:08:58 +00:00
}
return IVY_OK;
}
static enum ivy_status read_block_comment(struct ivy_asm_lexer *lex)
{
int depth = 1;
char buf[2] = {0};
while (depth > 0) {
int c = peek(lex);
if (c < 0) {
return c;
}
if (!buf[0]) {
buf[0] = c;
} else if (!buf[1]) {
buf[1] = c;
} else {
buf[0] = buf[1];
buf[1] = c;
}
if (buf[0] == '/' && buf[1] == '*') {
depth++;
} else if (buf[0] == '*' && buf[1] == '/') {
depth--;
}
advance(lex);
}
return IVY_OK;
}
static enum ivy_status read_squote_marker(struct ivy_asm_lexer *lex)
{
struct lexer_state *state = get_lexer_state(lex);
if (state->s_type == STATE_STRING) {
/* already within a string */
pop_lexer_state(lex);
return IVY_OK;
}
/* start of a new string */
if (!push_lexer_state(lex, STATE_STRING)) {
return IVY_ERR_NO_MEMORY;
}
return IVY_OK;
}
static enum ivy_status read_dquote_marker(struct ivy_asm_lexer *lex)
{
struct lexer_state *state = get_lexer_state(lex);
if (state->s_type == STATE_DSTRING) {
/* already within a string */
pop_lexer_state(lex);
return IVY_OK;
}
/* start of a new string */
if (!push_lexer_state(lex, STATE_DSTRING)) {
return IVY_ERR_NO_MEMORY;
}
return IVY_OK;
}
static enum ivy_status read_string_content(struct ivy_asm_lexer *lex)
{
int c;
b_string *str = get_temp_string(lex);
struct lexer_state *state = get_lexer_state(lex);
if (!str) {
return IVY_ERR_NO_MEMORY;
}
while (true) {
c = peek(lex);
if (state->s_type == STATE_STRING && (c == '\'')) {
break;
}
if (state->s_type == STATE_DSTRING && c == '"') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
if (b_string_get_size(str, B_STRLEN_NORMAL) == 0) {
return IVY_OK;
}
char *s = b_string_steal(str);
enum ivy_status status = push_string_content(lex, s);
if (status != IVY_OK) {
free(s);
}
return status;
}
static enum ivy_status read_symbol(struct ivy_asm_lexer *lex)
{
struct ivy_asm_lexer_symbol_node *node = lex->lex_sym_tree;
struct lexer_state *state = get_lexer_state(lex);
while (true) {
int c = peek(lex);
struct ivy_asm_lexer_symbol_node *next = get_symbol_node(node, c);
if (!next) {
break;
}
node = next;
advance(lex);
}
if (!node || node->s_id == IVY_ASM_SYM_NONE) {
return IVY_ERR_BAD_SYNTAX;
}
switch (node->s_id) {
case IVY_ASM_SYM_SQUOTE:
return read_squote_marker(lex);
case IVY_ASM_SYM_DQUOTE:
return read_dquote_marker(lex);
case IVY_ASM_SYM_FORWARD_SLASH_ASTERISK:
return read_block_comment(lex);
case IVY_ASM_SYM_SEMICOLON:
return read_line_comment(lex);
default:
push_symbol(lex, node->s_id);
return IVY_OK;
}
}
static enum ivy_status read_number(struct ivy_asm_lexer *lex)
{
/* skip the leading # symbol */
advance(lex);
int token_len = 0;
int base = 10;
int dots = 0;
bool neg = false;
b_string *str = get_temp_string(lex);
while (true) {
int c = peek(lex);
if (c == IVY_ERR_EOF) {
break;
}
if (c < 0) {
return c;
}
if (c == '_') {
token_len++;
advance(lex);
continue;
}
if (c == '-') {
if (neg) {
return IVY_ERR_BAD_SYNTAX;
}
neg = true;
token_len++;
advance(lex);
continue;
}
if (c == '.' && iswdigit(peek_next(lex))) {
if (base != 10) {
return IVY_ERR_BAD_SYNTAX;
}
if (dots > 0) {
return IVY_ERR_BAD_SYNTAX;
}
token_len++;
dots++;
char s[] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
continue;
}
if (isspace(c) || ispunct(c)) {
break;
}
if (c == '0' && token_len == 0) {
base = 7;
token_len++;
advance(lex);
continue;
}
if (c == 'x' && token_len == 1) {
base = 16;
token_len++;
advance(lex);
continue;
}
if (c == 'b' && token_len == 1) {
base = 2;
token_len++;
advance(lex);
continue;
}
if (base == 2 && c != '0' && c != '1') {
return IVY_ERR_BAD_SYNTAX;
}
if (base == 10 && !isdigit(c)) {
return IVY_ERR_BAD_SYNTAX;
}
if (base == 16 && !isxdigit(c)) {
return IVY_ERR_BAD_SYNTAX;
}
char s[] = {c, 0};
b_string_append_cstr(str, s);
token_len++;
2024-11-19 22:08:58 +00:00
advance(lex);
}
if (token_len == 1 && base == 7) {
return push_uint(lex, 0);
}
const char *s = b_string_ptr(str);
char *ep = NULL;
if (dots > 0) {
double v = strtod(s, &ep);
if (*ep != '\0') {
return IVY_ERR_BAD_SYNTAX;
}
if (neg) {
v *= -1;
}
return push_double(lex, v);
} else if (neg) {
long long v = strtoll(s, &ep, base);
if (*ep != '\0') {
return IVY_ERR_BAD_SYNTAX;
}
v *= -1;
return push_int(lex, v);
} else {
unsigned long long v = strtoull(s, &ep, base);
if (*ep != '\0') {
return IVY_ERR_BAD_SYNTAX;
}
return push_uint(lex, v);
}
}
static enum ivy_status read_keyword(struct ivy_asm_lexer *lex)
{
advance(lex);
b_string *str = get_temp_string(lex);
b_string_append_cstr(str, "@");
bool label = false;
while (true) {
int c = peek(lex);
if (c < 0) {
break;
}
if (!isalnum(c) && c != '_') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
const char *s = b_string_ptr(str);
2024-11-22 22:30:15 +00:00
enum ivy_asm_keyword keyword = find_keyword_by_name(lex, s);
2024-11-19 22:08:58 +00:00
if (keyword == IVY_ASM_KW_NONE) {
return IVY_ERR_BAD_SYNTAX;
}
return push_keyword(lex, keyword);
}
static enum ivy_status read_label_ref(struct ivy_asm_lexer *lex)
{
advance(lex);
b_string *str = get_temp_string(lex);
bool label = false;
while (true) {
int c = peek(lex);
if (c < 0) {
break;
}
if (c == ':' && peek_next(lex) != ':') {
advance(lex);
label = true;
break;
}
if (!isalnum(c) && c != '_') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
const char *s = b_string_ptr(str);
struct ivy_asm_token *tok = create_token(IVY_ASM_TOK_LABEL_REF);
tok->t_str = b_string_steal(str);
return push_token(lex, tok);
}
2024-11-19 22:08:58 +00:00
static enum ivy_status read_ident(struct ivy_asm_lexer *lex)
{
b_string *str = get_temp_string(lex);
bool label = false;
while (true) {
int c = peek(lex);
if (c < 0) {
break;
}
if (c == ':' && peek_next(lex) != ':') {
advance(lex);
label = true;
break;
}
if (!isalnum(c) && c != '_') {
break;
}
char s[2] = {c, 0};
b_string_append_cstr(str, s);
advance(lex);
}
const char *s = b_string_ptr(str);
2024-11-22 22:30:15 +00:00
2024-11-19 22:08:58 +00:00
struct ivy_asm_token *tok
= create_token(label ? IVY_ASM_TOK_LABEL : IVY_ASM_TOK_IDENT);
tok->t_str = b_string_steal(str);
return push_token(lex, tok);
}
static enum ivy_status pump_tokens(struct ivy_asm_lexer *lex)
{
struct lexer_state *state = get_lexer_state(lex);
int c = peek(lex);
if (c < 0) {
return c;
}
if (state->s_type == STATE_DSTRING && c != '"') {
return read_string_content(lex);
}
if (state->s_type == STATE_STRING && c != '\'') {
return read_string_content(lex);
}
/* `state` is invalid past this point, as the read_* functions
* may perform state transitions. */
state = NULL;
if (c == '\n') {
while (c == '\n') {
advance(lex);
if (!input_available(lex)) {
break;
}
c = peek(lex);
}
if (c < 0) {
return c;
}
return push_linefeed(lex);
}
while (isspace(c)) {
advance(lex);
c = peek(lex);
}
if (isalpha(c) || c == '_') {
return read_ident(lex);
}
if (char_can_begin_symbol(c)) {
return read_symbol(lex);
}
if (c == '$') {
return read_label_ref(lex);
}
2024-11-19 22:08:58 +00:00
if (c == '@') {
return read_keyword(lex);
}
if (c == '#') {
return read_number(lex);
}
return IVY_ERR_BAD_SYNTAX;
}
struct ivy_asm_token *ivy_asm_lexer_peek(struct ivy_asm_lexer *lex)
{
enum ivy_status status = IVY_OK;
while (b_queue_empty(&lex->lex_queue)) {
2024-11-19 22:08:58 +00:00
status = pump_tokens(lex);
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
}
lex->lex_status = status;
b_queue_entry *entry = b_queue_first(&lex->lex_queue);
struct ivy_asm_token *tok = b_unbox(struct ivy_asm_token, entry, t_entry);
2024-11-19 22:08:58 +00:00
return tok;
}
struct ivy_asm_token *ivy_asm_lexer_read(struct ivy_asm_lexer *lex)
{
enum ivy_status status = IVY_OK;
while (b_queue_empty(&lex->lex_queue)) {
2024-11-19 22:08:58 +00:00
status = pump_tokens(lex);
if (status != IVY_OK) {
lex->lex_status = status;
return NULL;
}
}
b_queue_entry *entry = b_queue_pop_front(&lex->lex_queue);
struct ivy_asm_token *tok = b_unbox(struct ivy_asm_token, entry, t_entry);
2024-11-19 22:08:58 +00:00
return tok;
}
void ivy_asm_token_destroy(struct ivy_asm_token *tok)
{
switch (tok->t_type) {
case IVY_ASM_TOK_STRING:
case IVY_ASM_TOK_IDENT:
free(tok->t_str);
break;
default:
break;
}
free(tok);
}
#define ENUM_STR(x) \
case x: \
return #x
const char *ivy_asm_token_type_to_string(enum ivy_asm_token_type type)
{
switch (type) {
ENUM_STR(IVY_ASM_TOK_NONE);
ENUM_STR(IVY_ASM_TOK_KEYWORD);
ENUM_STR(IVY_ASM_TOK_SYMBOL);
ENUM_STR(IVY_ASM_TOK_INT);
ENUM_STR(IVY_ASM_TOK_DOUBLE);
ENUM_STR(IVY_ASM_TOK_LABEL);
ENUM_STR(IVY_ASM_TOK_LABEL_REF);
2024-11-19 22:08:58 +00:00
ENUM_STR(IVY_ASM_TOK_IDENT);
ENUM_STR(IVY_ASM_TOK_STRING);
ENUM_STR(IVY_ASM_TOK_LINEFEED);
default:
return "";
}
}
const char *ivy_asm_keyword_to_string(enum ivy_asm_keyword keyword)
{
switch (keyword) {
ENUM_STR(IVY_ASM_KW_NONE);
ENUM_STR(IVY_ASM_KW_IDENT);
ENUM_STR(IVY_ASM_KW_SELECTOR);
ENUM_STR(IVY_ASM_KW_ATOM);
ENUM_STR(IVY_ASM_KW_CONSTPOOL);
ENUM_STR(IVY_ASM_KW_CLASS);
ENUM_STR(IVY_ASM_KW_BLOCK);
ENUM_STR(IVY_ASM_KW_IMPORT);
ENUM_STR(IVY_ASM_KW_PACKAGE);
ENUM_STR(IVY_ASM_KW_PROPERTY);
ENUM_STR(IVY_ASM_KW_VAR);
ENUM_STR(IVY_ASM_KW_MSGH);
2024-11-19 22:08:58 +00:00
ENUM_STR(IVY_ASM_KW_END);
default:
return "";
}
}
const char *ivy_asm_symbol_to_string(enum ivy_asm_symbol sym)
{
switch (sym) {
ENUM_STR(IVY_ASM_SYM_NONE);
ENUM_STR(IVY_ASM_SYM_DOT);
ENUM_STR(IVY_ASM_SYM_SQUOTE);
ENUM_STR(IVY_ASM_SYM_DQUOTE);
ENUM_STR(IVY_ASM_SYM_LEFT_BRACKET);
ENUM_STR(IVY_ASM_SYM_RIGHT_BRACKET);
ENUM_STR(IVY_ASM_SYM_LEFT_PAREN);
ENUM_STR(IVY_ASM_SYM_RIGHT_PAREN);
ENUM_STR(IVY_ASM_SYM_LEFT_BRACE);
ENUM_STR(IVY_ASM_SYM_RIGHT_BRACE);
2024-11-19 22:08:58 +00:00
ENUM_STR(IVY_ASM_SYM_COLON);
ENUM_STR(IVY_ASM_SYM_HYPHEN);
ENUM_STR(IVY_ASM_SYM_COMMA);
ENUM_STR(IVY_ASM_SYM_SEMICOLON);
ENUM_STR(IVY_ASM_SYM_FORWARD_SLASH_ASTERISK);
default:
return "";
}
}