From bdf201dedcc28104a776d087c23b7957da0a4dbf Mon Sep 17 00:00:00 2001 From: Max Wash Date: Fri, 19 Sep 2025 15:47:59 +0100 Subject: [PATCH] TEMP: add toml parser --- serial/toml.c | 1809 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1809 insertions(+) create mode 100644 serial/toml.c diff --git a/serial/toml.c b/serial/toml.c new file mode 100644 index 0000000..d35699b --- /dev/null +++ b/serial/toml.c @@ -0,0 +1,1809 @@ +#include "blue/core/status.h" +#include "blue/object/datetime.h" +#include "serial.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IS_VALID_KEY_COMPONENT(tok) \ + ((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING)) + +enum token_type { + TOK_NONE = 0, + TOK_WORD, + TOK_STRING, + TOK_INT, + TOK_UINT, + TOK_FLOAT, + TOK_BOOL, + TOK_TIMESTAMP, + TOK_NEWLINE, + TOK_EQUAL, + TOK_DOT, + TOK_COMMA, + TOK_LEFT_BRACKET, + TOK_RIGHT_BRACKET, + TOK_DOUBLE_LEFT_BRACKET, + TOK_DOUBLE_RIGHT_BRACKET, + TOK_LEFT_BRACE, + TOK_RIGHT_BRACE, +}; + +struct timestamp { + unsigned int ts_year, ts_month, ts_day; + unsigned short ts_hour, ts_min, ts_sec; + unsigned int ts_msec; + + unsigned short ts_zone_offset_hour, ts_zone_offset_minute; + unsigned char ts_zone_offset_negative; +}; + +struct token { + enum token_type tok_type; + struct b_queue_entry tok_entry; + char *tok_str; + + union { + struct { + int64_t v; + bool nan; + } i; + + struct { + double v; + bool nan; + } f; + + bool b; + // struct timestamp time; + b_datetime *time; + } tok_value; +}; + +enum ctx_flags { + CTX_EOF = 0x01u, + CTX_ENABLE_NUMBERS = 0x02u, + CTX_ENABLE_TIMESTAMPS = 0x04u, + CTX_ENABLE_BOOLS = 0x08u, + CTX_ENABLE_LONG_SYMBOLS = 0x10u, + CTX_ENABLE_MULTILINE_STRING = 0x20u, +}; + +enum ctx_state { + CTX_STATE_NONE = 0, + CTX_STATE_IN_TABLE, + CTX_STATE_IN_ARRAY, +}; + +struct ctx { + enum ctx_flags ctx_flags; + b_stream *ctx_src; + b_string *ctx_wordbuf; + b_string *ctx_linebuf; + b_stream *ctx_linebuf_stream; + size_t ctx_linebuf_pos; + enum b_status ctx_status; + b_hashmap *ctx_static_objects; + + b_queue ctx_tokens; +}; + +static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj) +{ + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + b_hashmap_value value = {}; + + b_hashmap_put(ctx->ctx_static_objects, &key, &value); +} + +static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj) +{ + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + return b_hashmap_has_key(ctx->ctx_static_objects, &key); +} + +static bool data_available(struct ctx *ctx) +{ + size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL); + return len != 0 && ctx->ctx_linebuf_pos < len; +} + +static enum b_status refill_linebuf(struct ctx *ctx) +{ + b_string_clear(ctx->ctx_linebuf); + ctx->ctx_linebuf_pos = 0; + b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START); + + enum b_status status + = b_stream_read_line_s(ctx->ctx_src, ctx->ctx_linebuf_stream); + if (!B_OK(status)) { + return status; + } + + b_string_append_cstr(ctx->ctx_linebuf, "\n"); + + return B_SUCCESS; +} + +static struct b_string *get_wordbuf(struct ctx *ctx) +{ + b_string_clear(ctx->ctx_wordbuf); + return ctx->ctx_wordbuf; +} + +static bool is_valid_char(int c) +{ + if (c <= 0) { + return false; + } + + switch (c) { + case '\0': + case '\r': + case '\b': + case 0x0C: + case 0x1F: + case 0x7F: + case 0xFF: + case 0x10: + return false; + default: + return true; + } +} + +static int advance_char(struct ctx *ctx) +{ + enum b_status status = B_SUCCESS; + if (!data_available(ctx)) { + status = refill_linebuf(ctx); + } + + if (!B_OK(status)) { + ctx->ctx_status = status; + return -1; + } + + if (!data_available(ctx)) { + return -1; + } + + const char *s = b_string_ptr(ctx->ctx_linebuf); + int c = s[ctx->ctx_linebuf_pos++]; + + if (!is_valid_char(c)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + return c; +} + +static int peek_char(struct ctx *ctx) +{ + enum b_status status = B_SUCCESS; + if (!data_available(ctx)) { + status = refill_linebuf(ctx); + } + + if (!B_OK(status)) { + ctx->ctx_status = status; + return -1; + } + + if (!data_available(ctx)) { + return -1; + } + + const char *s = b_string_ptr(ctx->ctx_linebuf); + int c = s[ctx->ctx_linebuf_pos]; + + if (!is_valid_char(c)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + return c; +} + +#if 0 +static int peek_char(struct ctx *ctx) +{ + int c = __peek_char(ctx); + + if (c != '#') { + return c; + } + + c = __peek_char(ctx); + while (c != '\n' && c != -1) { + __advance_char(ctx); + c = __peek_char(ctx); + } + + return c; +} + +static int advance_char(struct ctx *ctx) +{ + int c = __advance_char(ctx); + + if (c != '#') { + return c; + } + + c = __peek_char(ctx); + while (c != '\n' && c != -1) { + __advance_char(ctx); + c = __peek_char(ctx); + } + + return c; +} +#endif + +static struct token *enqueue_token(struct ctx *ctx, enum token_type type) +{ + struct token *tok = malloc(sizeof *tok); + if (!tok) { + return NULL; + } + + memset(tok, 0x0, sizeof *tok); + + tok->tok_type = type; + + b_queue_push_back(&ctx->ctx_tokens, &tok->tok_entry); + + return tok; +} + +static void discard_token(struct ctx *ctx) +{ + struct b_queue_entry *entry = b_queue_pop_front(&ctx->ctx_tokens); + if (!entry) { + return; + } + + struct token *tok = b_unbox(struct token, entry, tok_entry); + + if (tok->tok_str) { + free(tok->tok_str); + } + + free(tok); +} + +static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) +{ + b_datetime *dt = b_datetime_parse( + B_DATETIME_FORMAT_RFC3339, b_string_ptr(token_str)); + if (!dt) { + return false; + } + + struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.time = dt; + + return true; +} + +#if 0 +static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) +{ + const char *s = b_string_ptr(token_str); + size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); + + size_t i = 0, c = 0; + struct timestamp ts = {0}; + + bool has_date = false, has_time = false; + + if (len >= 10 && s[4] == '-' && s[7] == '-') { + has_date = true; + } + + if (len >= 8 && s[2] == ':' && s[5] == ':') { + has_time = true; + } + + if (len >= 19 && s[4] == '-' && s[7] == '-' + && (s[10] == 'T' || s[10] == ' ') && s[13] == ':' && s[16] == ':') { + has_date = true; + has_time = true; + } + + if (!has_date && !has_time) { + return false; + } + + if (has_date) { + for (c = 0; c < 4; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_year *= 10; + ts.ts_year += (s[i] - '0'); + } + + if (s[i++] != '-') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_month *= 10; + ts.ts_month += (s[i] - '0'); + } + + if (s[i++] != '-') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_day *= 10; + ts.ts_day += (s[i] - '0'); + } + } + + if (has_date && has_time) { + if (s[i] != 'T' && s[i] != ' ') { + return false; + } + + i++; + } + + if (has_time) { + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_hour *= 10; + ts.ts_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_min *= 10; + ts.ts_min += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_sec *= 10; + ts.ts_sec += (s[i] - '0'); + } + } + + if (s[i] == '.') { + i++; + for (c = 0; s[i]; c++, i++) { + if (!isdigit(s[i])) { + break; + } + + ts.ts_msec *= 10; + ts.ts_msec += (s[i] - '0'); + } + + if (c == 0) { + return false; + } + } + + if (s[i] == '+' || s[i] == '-') { + ts.ts_zone_offset_negative = s[i] == '-'; + i++; + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_zone_offset_hour *= 10; + ts.ts_zone_offset_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_zone_offset_minute *= 10; + ts.ts_zone_offset_minute += (s[i] - '0'); + } + } else if (s[i] == 'Z') { + i++; + } + + if (s[i] != 0) { + return false; + } + + struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.time = ts; + + return true; +} +#endif + +static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str) +{ + size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); + struct b_string *str = b_string_duplicate(token_str); + const char *s = b_string_ptr(str); + + if (len == 0) { + return false; + } + + size_t offset = 0; + bool has_sign = false; + + int mul = 1; + if (s[0] == '+') { + offset++; + has_sign = true; + } else if (s[0] == '-') { + offset++; + mul = -1; + has_sign = true; + } + + int base = 10; + if (*(s + offset) == '0') { + char b = *(s + offset + 1); + switch (b) { + case 'x': + offset += 2; + base = 16; + break; + case 'b': + offset += 2; + base = 2; + break; + case 'o': + offset += 2; + base = 8; + break; + default: + b_string_release(str); + return false; + } + } + + if (has_sign && base != 10) { + b_string_release(str); + return false; + } + + if (offset == len) { + b_string_release(str); + return false; + } + + bool is_valid = true; + bool is_double = false; + + char previous = 0; + for (size_t i = offset; i < len; i++) { + char c = s[i]; + + if (previous == '_' && !isnumber(c)) { + is_valid = false; + break; + } + + if (c == '_') { + if (!isnumber(previous)) { + is_valid = false; + break; + } + + b_string_remove(str, i, 1); + len--; + i--; + previous = c; + continue; + } + + if (c == 'e' || c == '.') { + if (!isnumber(c)) { + is_valid = false; + break; + } + is_double = true; + previous = c; + continue; + } + + if ((c == '-' || c == '+') && previous != 'e') { + is_valid = false; + break; + } + + previous = c; + } + + if (previous == '_' || previous == '.') { + is_valid = false; + } + + if (is_double && base != 10) { + is_valid = false; + } + + if (!is_valid) { + b_string_release(str); + return false; + } + + double d = 0; + long long i = 0; + + if (is_double) { + int r = 0; + int len = strlen(s + offset); + // d = strtold(s + offset, &ep) * mul; + int ret = sscanf(s + offset, "%lf%n", &d, &r); + d *= mul; + is_valid = (ret == 1) && r == len; + } else { + char *ep; + i = strtoll(s + offset, &ep, base) * mul; + is_valid = ((*ep) == 0); + } + + b_string_release(str); + + if (!is_valid) { + return false; + } + + struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT); + tok->tok_str = b_string_steal(token_str); + + if (is_double) { + tok->tok_value.f.v = d; + } else { + tok->tok_value.i.v = i; + } + + return true; +} + +static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str) +{ + const char *s = b_string_ptr(token_str); + struct token *tok = NULL; + + if (!strcmp(s, "true")) { + tok = enqueue_token(ctx, TOK_BOOL); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.b = true; + } else if (!strcmp(s, "false")) { + tok = enqueue_token(ctx, TOK_BOOL); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.b = false; + } else { + return false; + } + + return true; +} + +static void split_word(struct ctx *ctx, struct b_string *wordbuf) +{ + long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL); + if (!len) { + return; + } + + char *s = b_string_steal(wordbuf); + int trailing_dots = 0; + + char prev = 0; + + for (long i = 0; i < len; i++) { + if (prev == '.' && s[i] == '.') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + prev = s[i]; + } + + if (!B_OK(ctx->ctx_status)) { + free(s); + return; + } + + for (; len > 0; len--) { + if (s[len - 1] == '.') { + trailing_dots++; + } else { + break; + } + } + + char *ep; + char *tok = strtok_r(s, ".", &ep); + + unsigned int i = 0; + + while (tok) { + if (*tok == 0) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + if (i > 0) { + enqueue_token(ctx, TOK_DOT); + } + + struct token *word = enqueue_token(ctx, TOK_WORD); + word->tok_str = b_strdup(tok); + + i++; + tok = strtok_r(NULL, ".", &ep); + } + + for (long i = 0; i < trailing_dots; i++) { + enqueue_token(ctx, TOK_DOT); + } + + free(s); +} + +static void read_number(struct ctx *ctx) +{ + int c = 0; + struct b_string *wordbuf = get_wordbuf(ctx); + + while (1) { + c = peek_char(ctx); + + if (c == -1 || !B_OK(ctx->ctx_status)) { + break; + } + + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.' + || c == '+'; + + if (!ok) { + break; + } + + char s[] = {c, 0}; + b_string_append_cstr(wordbuf, s); + advance_char(ctx); + } + + bool is_number = try_convert_word_to_number(ctx, wordbuf); + + if (!is_number) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + } +} + +static void read_word(struct ctx *ctx) +{ + int c = 0; + struct b_string *wordbuf = get_wordbuf(ctx); + + while (1) { + c = peek_char(ctx); + + if (c == -1 || !B_OK(ctx->ctx_status)) { + break; + } + + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + + if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) { + ok = ok || c == ':' || c == ' ' || c == '+'; + } + + if (ctx->ctx_flags & CTX_ENABLE_NUMBERS) { + ok = ok || c == '+'; + } + + if (!ok) { + break; + } + + char s[] = {c, 0}; + b_string_append_cstr(wordbuf, s); + advance_char(ctx); + } + + bool parsed = false; + b_string_trim(wordbuf); + + if (ctx->ctx_flags & CTX_ENABLE_BOOLS) { + parsed = try_convert_word_to_bool(ctx, wordbuf); + } + + if (!parsed && (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS)) { + parsed = try_convert_word_to_timestamp(ctx, wordbuf); + } + + if (!parsed && (ctx->ctx_flags & CTX_ENABLE_NUMBERS)) { + parsed = try_convert_word_to_number(ctx, wordbuf); + } + + if (parsed) { + return; + } + + const char *s = b_string_ptr(wordbuf); + for (size_t i = 0; s[i]; i++) { + c = s[i]; + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + if (!ok) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + } + + split_word(ctx, wordbuf); +} + +static void read_string(struct ctx *ctx, bool squote) +{ + advance_char(ctx); + + char term = '"'; + if (squote) { + term = '\''; + } + + bool multiline = false; + struct token *tok = enqueue_token(ctx, TOK_STRING); + struct b_string *str = get_wordbuf(ctx); + + int c = peek_char(ctx); + if (c == term) { + advance_char(ctx); + c = peek_char(ctx); + + if (c == term) { + advance_char(ctx); + c = peek_char(ctx); + multiline = true; + } else { + return; + } + + if (c == '\n') { + advance_char(ctx); + } + } + + if (multiline && !(ctx->ctx_flags & CTX_ENABLE_MULTILINE_STRING)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + + bool fail = false; + bool esc = false; + + tok->tok_type = TOK_STRING; + + while (!fail) { + c = peek_char(ctx); + if (c == -1) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + char s[] = {c, 0}; + + if (esc) { + if (c == '\n') { + while (c != -1 && isspace(c)) { + advance_char(ctx); + c = peek_char(ctx); + } + + esc = false; + continue; + } + + switch (c) { + case '"': + case '\\': + b_string_append_cstr(str, s); + break; + case 'b': + s[0] = '\b'; + b_string_append_cstr(str, s); + break; + case 't': + s[0] = '\t'; + b_string_append_cstr(str, s); + break; + case 'n': + s[0] = '\n'; + b_string_append_cstr(str, s); + break; + case 'r': + s[0] = '\r'; + b_string_append_cstr(str, s); + break; + case 'f': + s[0] = '\f'; + b_string_append_cstr(str, s); + break; + default: + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + esc = false; + advance_char(ctx); + continue; + } + + else if (c == '\\' && !squote) { + esc = true; + } + + else if (c == '\n') { + if (!multiline) { + fail = true; + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + b_string_append_cstr(str, s); + } + + else if (c == term) { + advance_char(ctx); + + if (!multiline) { + break; + } + + c = peek_char(ctx); + if (c != term) { + b_string_append_cstr(str, s); + continue; + } + + advance_char(ctx); + c = peek_char(ctx); + if (c != term) { + b_string_append_cstr(str, s); + b_string_append_cstr(str, s); + continue; + } + + advance_char(ctx); + c = peek_char(ctx); + if (c == term) { + b_string_append_cstr(str, s); + advance_char(ctx); + } + break; + } + + else { + b_string_append_cstr(str, s); + } + + advance_char(ctx); + } + + if (fail) { + discard_token(ctx); + return; + } + + tok->tok_str = b_string_steal(str); +} + +static void read_symbol(struct ctx *ctx) +{ + int c = peek_char(ctx); + advance_char(ctx); + + struct token *tok = enqueue_token(ctx, TOK_NONE); + + char s[] = {c, 0}; + + switch (c) { + case '=': + tok->tok_type = TOK_EQUAL; + break; + case '.': + tok->tok_type = TOK_DOT; + break; + case ',': + tok->tok_type = TOK_COMMA; + break; + case '[': + if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { + tok->tok_type = TOK_LEFT_BRACKET; + break; + } + + c = peek_char(ctx); + switch (c) { + case '[': + tok->tok_type = TOK_DOUBLE_LEFT_BRACKET; + advance_char(ctx); + break; + default: + tok->tok_type = TOK_LEFT_BRACKET; + break; + } + break; + case ']': + if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { + /* if we're parsing more complex values, don't generate double-symbol tokens */ + tok->tok_type = TOK_RIGHT_BRACKET; + break; + } + + c = peek_char(ctx); + switch (c) { + case ']': + tok->tok_type = TOK_DOUBLE_RIGHT_BRACKET; + advance_char(ctx); + break; + default: + tok->tok_type = TOK_RIGHT_BRACKET; + break; + } + break; + case '{': + tok->tok_type = TOK_LEFT_BRACE; + break; + case '}': + tok->tok_type = TOK_RIGHT_BRACE; + break; + default: + discard_token(ctx); + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } +} + +static void read_newline(struct ctx *ctx) +{ + int c = peek_char(ctx); + while (c == '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + enqueue_token(ctx, TOK_NEWLINE); +} + +static void read_comment(struct ctx *ctx) +{ + int c = peek_char(ctx); + while (c != '\n' && c != -1) { + advance_char(ctx); + c = peek_char(ctx); + } + + if (!B_OK(ctx->ctx_status)) { + return; + } + + advance_char(ctx); + enqueue_token(ctx, TOK_NEWLINE); +} + +static enum b_status advance_token(struct ctx *ctx) +{ + discard_token(ctx); + + if (!b_queue_empty(&ctx->ctx_tokens)) { + return B_SUCCESS; + } + + int c = peek_char(ctx); + while (isspace(c) && c != '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + if (c == -1) { + ctx->ctx_flags |= CTX_EOF; + return B_ERR_NO_DATA; + } + +#if 1 + while (c == '#') { + read_comment(ctx); + c = peek_char(ctx); + } +#endif + + if (!B_OK(ctx->ctx_status)) { + return ctx->ctx_status; + } + + if (c == '"') { + read_string(ctx, false); + } else if (c == '\'') { + read_string(ctx, true); + } else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) { + read_number(ctx); + } else if (ispunct(c)) { + read_symbol(ctx); + } else if (c == '\n') { + read_newline(ctx); + } else { + read_word(ctx); + } + + return ctx->ctx_status; +} + +static struct token *peek_token(struct ctx *ctx) +{ + struct b_queue_entry *entry = b_queue_first(&ctx->ctx_tokens); + if (!entry) { + return NULL; + } + + return b_unbox(struct token, entry, tok_entry); +} + +static void ctx_cleanup(struct ctx *ctx) +{ + if (ctx->ctx_linebuf_stream) { + b_stream_close(ctx->ctx_linebuf_stream); + ctx->ctx_linebuf_stream = NULL; + } + + if (ctx->ctx_linebuf) { + b_string_release(ctx->ctx_linebuf); + ctx->ctx_linebuf = NULL; + } + + if (ctx->ctx_wordbuf) { + b_string_release(ctx->ctx_wordbuf); + ctx->ctx_wordbuf = NULL; + } + + if (ctx->ctx_static_objects) { + b_hashmap_release(ctx->ctx_static_objects); + ctx->ctx_static_objects = NULL; + } +} + +static enum b_status ctx_init(struct ctx *ctx) +{ + memset(ctx, 0x0, sizeof *ctx); + + ctx->ctx_linebuf = b_string_create(); + ctx->ctx_wordbuf = b_string_create(); + + b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream); + + ctx->ctx_static_objects = b_hashmap_create(NULL, NULL); + + return B_SUCCESS; +} + +static enum b_status toml_serialise( + struct b_serial_ctx *serial, struct b_object *src, + struct b_stream *dest, enum b_serial_flags flags) +{ + return B_SUCCESS; +} + +static void print_token(struct token *tok) +{ + switch (tok->tok_type) { + case TOK_NONE: + printf("TOK_NONE\n"); + break; + case TOK_WORD: + printf("TOK_WORD %s\n", tok->tok_str); + break; + case TOK_STRING: + printf("TOK_STRING %s\n", tok->tok_str); + break; + case TOK_TIMESTAMP: + printf("TOK_TIMESTAMP %04ld-%02ld-%02ld " + "%02ld:%02ld:%02ld.%04ld %c" + "%02ld:%02ld\n", + b_datetime_year(tok->tok_value.time), + b_datetime_month(tok->tok_value.time), + b_datetime_day(tok->tok_value.time), + b_datetime_hour(tok->tok_value.time), + b_datetime_minute(tok->tok_value.time), + b_datetime_second(tok->tok_value.time), + b_datetime_subsecond(tok->tok_value.time), + b_datetime_zone_offset_is_negative(tok->tok_value.time) + ? '-' + : '+', + b_datetime_zone_offset_hour(tok->tok_value.time), + b_datetime_zone_offset_minute(tok->tok_value.time)); + break; + case TOK_INT: + printf("TOK_INT "); + if (tok->tok_value.i.nan) { + printf("NaN"); + } else { + printf("%lld", tok->tok_value.i.v); + } + + printf("\n"); + break; + case TOK_FLOAT: + printf("TOK_FLOAT "); + if (tok->tok_value.f.nan) { + printf("NaN"); + } else { + printf("%lf", tok->tok_value.f.v); + } + + printf("\n"); + break; + case TOK_BOOL: + printf("TOK_BOOL %s\n", tok->tok_value.b ? "true" : "false"); + break; + case TOK_NEWLINE: + printf("TOK_NEWLINE\n"); + break; + case TOK_EQUAL: + printf("TOK_EQUAL\n"); + break; + case TOK_DOT: + printf("TOK_DOT\n"); + break; + case TOK_COMMA: + printf("TOK_COMMA\n"); + break; + case TOK_LEFT_BRACKET: + printf("TOK_LEFT_BRACKET\n"); + break; + case TOK_RIGHT_BRACKET: + printf("TOK_RIGHT_BRACKET\n"); + break; + case TOK_DOUBLE_LEFT_BRACKET: + printf("TOK_DOUBLE_LEFT_BRACKET\n"); + break; + case TOK_DOUBLE_RIGHT_BRACKET: + printf("TOK_DOUBLE_RIGHT_BRACKET\n"); + break; + case TOK_LEFT_BRACE: + printf("TOK_LEFT_BRACE\n"); + break; + case TOK_RIGHT_BRACE: + printf("TOK_RIGHT_BRACE\n"); + break; + default: + break; + } +} + +static enum b_status parse_value(struct ctx *ctx, struct b_object **result); +static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container); + +static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_datetime *dt = tok->tok_value.time; + tok->tok_value.time = NULL; + + *result = B_OBJECT(dt); + return B_SUCCESS; +} + +static enum b_status parse_string(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_string *str = b_string_create_from_cstr(tok->tok_str); + if (!str) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(str); + return B_SUCCESS; +} + +static enum b_status parse_int(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_INT64(tok->tok_value.i.v); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_float(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_DOUBLE(tok->tok_value.f.v); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_bool(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_INT8(tok->tok_value.b); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result) +{ + advance_token(ctx); + + struct b_dict *table = b_dict_create(); + if (!table) { + return B_ERR_NO_MEMORY; + } + + bool done = false; + while (!done) { + struct b_object *value; + enum b_status status = parse_key_value_pair(ctx, table); + if (!B_OK(status)) { + b_dict_release(table); + return status; + } + + struct token *tok = peek_token(ctx); + + if (!tok) { + b_dict_release(table); + return status; + } + + switch (tok->tok_type) { + case TOK_RIGHT_BRACE: + done = true; + break; + case TOK_COMMA: + advance_token(ctx); + break; + default: + b_dict_release(table); + return B_ERR_BAD_FORMAT; + } + } + + *result = B_OBJECT(table); + return B_SUCCESS; +} + +static void skip_newlines(struct ctx *ctx) +{ + struct token *tok = peek_token(ctx); + + while (tok && tok->tok_type == TOK_NEWLINE) { + advance_token(ctx); + tok = peek_token(ctx); + } +} + +static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result) +{ + bool done = false; + advance_token(ctx); + + struct b_array *array = b_array_create(); + if (!array) { + return B_ERR_NO_MEMORY; + } + + struct token *tok = peek_token(ctx); + if (!tok) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type == TOK_RIGHT_BRACKET) { + advance_token(ctx); + done = true; + } + + while (!done) { + skip_newlines(ctx); + + tok = peek_token(ctx); + + if (!tok) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type == TOK_RIGHT_BRACKET) { + done = true; + break; + } + + struct b_object *value; + enum b_status status = parse_value(ctx, &value); + if (!B_OK(status)) { + b_array_release(array); + return status; + } + + b_array_append(array, B_RV(value)); + + skip_newlines(ctx); + + tok = peek_token(ctx); + + if (tok && tok->tok_type == TOK_RIGHT_BRACKET) { + done = true; + break; + } + + if (!tok || tok->tok_type != TOK_COMMA) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + } + + *result = B_OBJECT(array); + return B_SUCCESS; +} + +static enum b_status parse_value(struct ctx *ctx, struct b_object **result) +{ + + struct token *tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + switch (tok->tok_type) { + case TOK_STRING: + return parse_string(ctx, result); + case TOK_INT: + return parse_int(ctx, result); + case TOK_FLOAT: + return parse_float(ctx, result); + case TOK_BOOL: + return parse_bool(ctx, result); + case TOK_TIMESTAMP: + return parse_timestamp(ctx, result); + case TOK_LEFT_BRACKET: + return parse_array_inline(ctx, result); + case TOK_LEFT_BRACE: + return parse_table_inline(ctx, result); + default: + return B_ERR_BAD_FORMAT; + } +} + +static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container) +{ + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + if (ctx_object_is_static(ctx, sub_dict)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + ctx_add_static_object(ctx, sub_dict); + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (b_dict_has_key(container, key)) { + return B_ERR_BAD_FORMAT; + } + + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type != TOK_EQUAL) { + return B_ERR_BAD_FORMAT; + } + + ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; + ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; + advance_token(ctx); + + struct b_object *value = NULL; + enum b_status status = parse_value(ctx, &value); + + ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; + ctx->ctx_flags + &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); + + if (!B_OK(status)) { + return status; + } + + advance_token(ctx); + + b_dict_put(container, key, B_RV(value)); + + if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) { + ctx_add_static_object(ctx, value); + } + + return B_SUCCESS; +} + +static enum b_status parse_table_header( + struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) +{ + advance_token(ctx); + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( + B_ARRAY(sub_dict), + b_array_size(B_ARRAY(sub_dict)) - 1); + } else if (!B_OBJECT_IS(sub_dict, DICT)) { + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (!tok || tok->tok_type != TOK_RIGHT_BRACKET) { + return B_ERR_BAD_FORMAT; + } + + struct b_dict *new_table = B_DICT(b_dict_at(container, key)); + + if (new_table) { + if (!B_OBJECT_IS(new_table, DICT) + || ctx_object_is_static(ctx, B_OBJECT(new_table))) { + return B_ERR_BAD_FORMAT; + } + + ctx_add_static_object(ctx, B_OBJECT(new_table)); + } else { + new_table = b_dict_create(); + + if (!new_table) { + free(key); + return B_ERR_NO_MEMORY; + } + + b_dict_put(container, key, B_RV(new_table)); + ctx_add_static_object(ctx, B_OBJECT(new_table)); + } + + free(key); + + advance_token(ctx); + *new_container = new_table; + return B_SUCCESS; +} + +static enum b_status parse_array_header( + struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) +{ + advance_token(ctx); + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( + B_ARRAY(sub_dict), + b_array_size(B_ARRAY(sub_dict)) - 1); + } else if (!B_OBJECT_IS(sub_dict, DICT)) { + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (!tok || tok->tok_type != TOK_DOUBLE_RIGHT_BRACKET) { + return B_ERR_BAD_FORMAT; + } + + struct b_array *array = B_ARRAY(b_dict_get(container, key)); + if (!array) { + array = b_array_create(); + b_dict_put(container, key, B_RV(array)); + } else if ( + !B_OBJECT_IS(array, ARRAY) + || ctx_object_is_static(ctx, B_OBJECT(array))) { + return B_ERR_BAD_FORMAT; + } + + struct b_dict *new_table = b_dict_create(); + if (!new_table) { + free(key); + return B_ERR_NO_MEMORY; + } + + b_array_append(array, B_RV(new_table)); + free(key); + + advance_token(ctx); + *new_container = new_table; + return B_SUCCESS; +} + +static enum b_status parse_root(struct ctx *ctx, struct b_dict **result) +{ + enum b_status status = B_SUCCESS; + struct b_dict *root = b_dict_create(); + struct b_dict *current = root; + + while (!(ctx->ctx_flags & CTX_EOF) && B_OK(status)) { + struct token *tok = peek_token(ctx); + if (!tok) { + break; + } + + switch (tok->tok_type) { + case TOK_LEFT_BRACKET: + status = parse_table_header(ctx, root, ¤t); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_DOUBLE_LEFT_BRACKET: + status = parse_array_header(ctx, root, ¤t); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_WORD: + case TOK_STRING: + status = parse_key_value_pair(ctx, current); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_NEWLINE: + advance_token(ctx); + break; + default: + status = B_ERR_BAD_FORMAT; + break; + } + + if (!B_OK(ctx->ctx_status) && ctx->ctx_status != B_ERR_NO_DATA) { + status = ctx->ctx_status; + } + } + + if (!B_OK(status)) { + b_dict_release(root); + root = NULL; + } + + *result = root; + return status; +} + +static enum b_status toml_deserialise( + struct b_serial_ctx *serial, struct b_stream *src, + struct b_object **dest, enum b_serial_flags flags) +{ + struct ctx ctx = {0}; + enum b_status status = ctx_init(&ctx); + + if (!B_OK(status)) { + return status; + } + + ctx.ctx_src = src; + + status = advance_token(&ctx); + if (!B_OK(status)) { + return status; + } + + struct b_dict *result = NULL; + status = parse_root(&ctx, &result); + if (!B_OK(status)) { + return status; + } + + *dest = B_OBJECT(result); +#if 0 + ctx.ctx_flags + = CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS; + + while (!(ctx.ctx_flags & CTX_EOF) && B_OK(ctx.ctx_status)) { + struct token *tok = peek_token(&ctx); + print_token(tok); + status = advance_token(&ctx); + } +#endif + + return B_SUCCESS; +} + +const struct b_serial_format_ops z__b_toml_format_ops = { + .fmt_serialise = toml_serialise, + .fmt_deserialise = toml_deserialise, +};