Files
bluelib/serial/toml.c
2025-09-19 15:47:59 +01:00

1810 lines
33 KiB
C

#include "blue/core/status.h"
#include "blue/object/datetime.h"
#include "serial.h"
#include <blue/object/array.h>
#include <blue/object/dict.h>
#include <blue/object/hashmap.h>
#include <blue/object/number.h>
#include <blue/object/string.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define IS_VALID_KEY_COMPONENT(tok) \
((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING))
enum token_type {
TOK_NONE = 0,
TOK_WORD,
TOK_STRING,
TOK_INT,
TOK_UINT,
TOK_FLOAT,
TOK_BOOL,
TOK_TIMESTAMP,
TOK_NEWLINE,
TOK_EQUAL,
TOK_DOT,
TOK_COMMA,
TOK_LEFT_BRACKET,
TOK_RIGHT_BRACKET,
TOK_DOUBLE_LEFT_BRACKET,
TOK_DOUBLE_RIGHT_BRACKET,
TOK_LEFT_BRACE,
TOK_RIGHT_BRACE,
};
struct timestamp {
unsigned int ts_year, ts_month, ts_day;
unsigned short ts_hour, ts_min, ts_sec;
unsigned int ts_msec;
unsigned short ts_zone_offset_hour, ts_zone_offset_minute;
unsigned char ts_zone_offset_negative;
};
struct token {
enum token_type tok_type;
struct b_queue_entry tok_entry;
char *tok_str;
union {
struct {
int64_t v;
bool nan;
} i;
struct {
double v;
bool nan;
} f;
bool b;
// struct timestamp time;
b_datetime *time;
} tok_value;
};
enum ctx_flags {
CTX_EOF = 0x01u,
CTX_ENABLE_NUMBERS = 0x02u,
CTX_ENABLE_TIMESTAMPS = 0x04u,
CTX_ENABLE_BOOLS = 0x08u,
CTX_ENABLE_LONG_SYMBOLS = 0x10u,
CTX_ENABLE_MULTILINE_STRING = 0x20u,
};
enum ctx_state {
CTX_STATE_NONE = 0,
CTX_STATE_IN_TABLE,
CTX_STATE_IN_ARRAY,
};
struct ctx {
enum ctx_flags ctx_flags;
b_stream *ctx_src;
b_string *ctx_wordbuf;
b_string *ctx_linebuf;
b_stream *ctx_linebuf_stream;
size_t ctx_linebuf_pos;
enum b_status ctx_status;
b_hashmap *ctx_static_objects;
b_queue ctx_tokens;
};
static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj)
{
b_hashmap_key key = {
.key_data = obj,
.key_size = sizeof(struct b_object *),
.key_flags = B_HASHMAP_KEY_F_INTVALUE,
};
b_hashmap_value value = {};
b_hashmap_put(ctx->ctx_static_objects, &key, &value);
}
static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj)
{
b_hashmap_key key = {
.key_data = obj,
.key_size = sizeof(struct b_object *),
.key_flags = B_HASHMAP_KEY_F_INTVALUE,
};
return b_hashmap_has_key(ctx->ctx_static_objects, &key);
}
static bool data_available(struct ctx *ctx)
{
size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL);
return len != 0 && ctx->ctx_linebuf_pos < len;
}
static enum b_status refill_linebuf(struct ctx *ctx)
{
b_string_clear(ctx->ctx_linebuf);
ctx->ctx_linebuf_pos = 0;
b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START);
enum b_status status
= b_stream_read_line_s(ctx->ctx_src, ctx->ctx_linebuf_stream);
if (!B_OK(status)) {
return status;
}
b_string_append_cstr(ctx->ctx_linebuf, "\n");
return B_SUCCESS;
}
static struct b_string *get_wordbuf(struct ctx *ctx)
{
b_string_clear(ctx->ctx_wordbuf);
return ctx->ctx_wordbuf;
}
static bool is_valid_char(int c)
{
if (c <= 0) {
return false;
}
switch (c) {
case '\0':
case '\r':
case '\b':
case 0x0C:
case 0x1F:
case 0x7F:
case 0xFF:
case 0x10:
return false;
default:
return true;
}
}
static int advance_char(struct ctx *ctx)
{
enum b_status status = B_SUCCESS;
if (!data_available(ctx)) {
status = refill_linebuf(ctx);
}
if (!B_OK(status)) {
ctx->ctx_status = status;
return -1;
}
if (!data_available(ctx)) {
return -1;
}
const char *s = b_string_ptr(ctx->ctx_linebuf);
int c = s[ctx->ctx_linebuf_pos++];
if (!is_valid_char(c)) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
return -1;
}
return c;
}
static int peek_char(struct ctx *ctx)
{
enum b_status status = B_SUCCESS;
if (!data_available(ctx)) {
status = refill_linebuf(ctx);
}
if (!B_OK(status)) {
ctx->ctx_status = status;
return -1;
}
if (!data_available(ctx)) {
return -1;
}
const char *s = b_string_ptr(ctx->ctx_linebuf);
int c = s[ctx->ctx_linebuf_pos];
if (!is_valid_char(c)) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
return -1;
}
return c;
}
#if 0
static int peek_char(struct ctx *ctx)
{
int c = __peek_char(ctx);
if (c != '#') {
return c;
}
c = __peek_char(ctx);
while (c != '\n' && c != -1) {
__advance_char(ctx);
c = __peek_char(ctx);
}
return c;
}
static int advance_char(struct ctx *ctx)
{
int c = __advance_char(ctx);
if (c != '#') {
return c;
}
c = __peek_char(ctx);
while (c != '\n' && c != -1) {
__advance_char(ctx);
c = __peek_char(ctx);
}
return c;
}
#endif
static struct token *enqueue_token(struct ctx *ctx, enum token_type type)
{
struct token *tok = malloc(sizeof *tok);
if (!tok) {
return NULL;
}
memset(tok, 0x0, sizeof *tok);
tok->tok_type = type;
b_queue_push_back(&ctx->ctx_tokens, &tok->tok_entry);
return tok;
}
static void discard_token(struct ctx *ctx)
{
struct b_queue_entry *entry = b_queue_pop_front(&ctx->ctx_tokens);
if (!entry) {
return;
}
struct token *tok = b_unbox(struct token, entry, tok_entry);
if (tok->tok_str) {
free(tok->tok_str);
}
free(tok);
}
static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str)
{
b_datetime *dt = b_datetime_parse(
B_DATETIME_FORMAT_RFC3339, b_string_ptr(token_str));
if (!dt) {
return false;
}
struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP);
tok->tok_str = b_string_steal(token_str);
tok->tok_value.time = dt;
return true;
}
#if 0
static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str)
{
const char *s = b_string_ptr(token_str);
size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL);
size_t i = 0, c = 0;
struct timestamp ts = {0};
bool has_date = false, has_time = false;
if (len >= 10 && s[4] == '-' && s[7] == '-') {
has_date = true;
}
if (len >= 8 && s[2] == ':' && s[5] == ':') {
has_time = true;
}
if (len >= 19 && s[4] == '-' && s[7] == '-'
&& (s[10] == 'T' || s[10] == ' ') && s[13] == ':' && s[16] == ':') {
has_date = true;
has_time = true;
}
if (!has_date && !has_time) {
return false;
}
if (has_date) {
for (c = 0; c < 4; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_year *= 10;
ts.ts_year += (s[i] - '0');
}
if (s[i++] != '-') {
return false;
}
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_month *= 10;
ts.ts_month += (s[i] - '0');
}
if (s[i++] != '-') {
return false;
}
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_day *= 10;
ts.ts_day += (s[i] - '0');
}
}
if (has_date && has_time) {
if (s[i] != 'T' && s[i] != ' ') {
return false;
}
i++;
}
if (has_time) {
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_hour *= 10;
ts.ts_hour += (s[i] - '0');
}
if (s[i++] != ':') {
return false;
}
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_min *= 10;
ts.ts_min += (s[i] - '0');
}
if (s[i++] != ':') {
return false;
}
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_sec *= 10;
ts.ts_sec += (s[i] - '0');
}
}
if (s[i] == '.') {
i++;
for (c = 0; s[i]; c++, i++) {
if (!isdigit(s[i])) {
break;
}
ts.ts_msec *= 10;
ts.ts_msec += (s[i] - '0');
}
if (c == 0) {
return false;
}
}
if (s[i] == '+' || s[i] == '-') {
ts.ts_zone_offset_negative = s[i] == '-';
i++;
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_zone_offset_hour *= 10;
ts.ts_zone_offset_hour += (s[i] - '0');
}
if (s[i++] != ':') {
return false;
}
for (c = 0; c < 2; c++, i++) {
if (!isdigit(s[i])) {
return false;
}
ts.ts_zone_offset_minute *= 10;
ts.ts_zone_offset_minute += (s[i] - '0');
}
} else if (s[i] == 'Z') {
i++;
}
if (s[i] != 0) {
return false;
}
struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP);
tok->tok_str = b_string_steal(token_str);
tok->tok_value.time = ts;
return true;
}
#endif
static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str)
{
size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL);
struct b_string *str = b_string_duplicate(token_str);
const char *s = b_string_ptr(str);
if (len == 0) {
return false;
}
size_t offset = 0;
bool has_sign = false;
int mul = 1;
if (s[0] == '+') {
offset++;
has_sign = true;
} else if (s[0] == '-') {
offset++;
mul = -1;
has_sign = true;
}
int base = 10;
if (*(s + offset) == '0') {
char b = *(s + offset + 1);
switch (b) {
case 'x':
offset += 2;
base = 16;
break;
case 'b':
offset += 2;
base = 2;
break;
case 'o':
offset += 2;
base = 8;
break;
default:
b_string_release(str);
return false;
}
}
if (has_sign && base != 10) {
b_string_release(str);
return false;
}
if (offset == len) {
b_string_release(str);
return false;
}
bool is_valid = true;
bool is_double = false;
char previous = 0;
for (size_t i = offset; i < len; i++) {
char c = s[i];
if (previous == '_' && !isnumber(c)) {
is_valid = false;
break;
}
if (c == '_') {
if (!isnumber(previous)) {
is_valid = false;
break;
}
b_string_remove(str, i, 1);
len--;
i--;
previous = c;
continue;
}
if (c == 'e' || c == '.') {
if (!isnumber(c)) {
is_valid = false;
break;
}
is_double = true;
previous = c;
continue;
}
if ((c == '-' || c == '+') && previous != 'e') {
is_valid = false;
break;
}
previous = c;
}
if (previous == '_' || previous == '.') {
is_valid = false;
}
if (is_double && base != 10) {
is_valid = false;
}
if (!is_valid) {
b_string_release(str);
return false;
}
double d = 0;
long long i = 0;
if (is_double) {
int r = 0;
int len = strlen(s + offset);
// d = strtold(s + offset, &ep) * mul;
int ret = sscanf(s + offset, "%lf%n", &d, &r);
d *= mul;
is_valid = (ret == 1) && r == len;
} else {
char *ep;
i = strtoll(s + offset, &ep, base) * mul;
is_valid = ((*ep) == 0);
}
b_string_release(str);
if (!is_valid) {
return false;
}
struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT);
tok->tok_str = b_string_steal(token_str);
if (is_double) {
tok->tok_value.f.v = d;
} else {
tok->tok_value.i.v = i;
}
return true;
}
static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str)
{
const char *s = b_string_ptr(token_str);
struct token *tok = NULL;
if (!strcmp(s, "true")) {
tok = enqueue_token(ctx, TOK_BOOL);
tok->tok_str = b_string_steal(token_str);
tok->tok_value.b = true;
} else if (!strcmp(s, "false")) {
tok = enqueue_token(ctx, TOK_BOOL);
tok->tok_str = b_string_steal(token_str);
tok->tok_value.b = false;
} else {
return false;
}
return true;
}
static void split_word(struct ctx *ctx, struct b_string *wordbuf)
{
long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL);
if (!len) {
return;
}
char *s = b_string_steal(wordbuf);
int trailing_dots = 0;
char prev = 0;
for (long i = 0; i < len; i++) {
if (prev == '.' && s[i] == '.') {
ctx->ctx_status = B_ERR_BAD_FORMAT;
break;
}
prev = s[i];
}
if (!B_OK(ctx->ctx_status)) {
free(s);
return;
}
for (; len > 0; len--) {
if (s[len - 1] == '.') {
trailing_dots++;
} else {
break;
}
}
char *ep;
char *tok = strtok_r(s, ".", &ep);
unsigned int i = 0;
while (tok) {
if (*tok == 0) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
break;
}
if (i > 0) {
enqueue_token(ctx, TOK_DOT);
}
struct token *word = enqueue_token(ctx, TOK_WORD);
word->tok_str = b_strdup(tok);
i++;
tok = strtok_r(NULL, ".", &ep);
}
for (long i = 0; i < trailing_dots; i++) {
enqueue_token(ctx, TOK_DOT);
}
free(s);
}
static void read_number(struct ctx *ctx)
{
int c = 0;
struct b_string *wordbuf = get_wordbuf(ctx);
while (1) {
c = peek_char(ctx);
if (c == -1 || !B_OK(ctx->ctx_status)) {
break;
}
bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'
|| c == '+';
if (!ok) {
break;
}
char s[] = {c, 0};
b_string_append_cstr(wordbuf, s);
advance_char(ctx);
}
bool is_number = try_convert_word_to_number(ctx, wordbuf);
if (!is_number) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
}
}
static void read_word(struct ctx *ctx)
{
int c = 0;
struct b_string *wordbuf = get_wordbuf(ctx);
while (1) {
c = peek_char(ctx);
if (c == -1 || !B_OK(ctx->ctx_status)) {
break;
}
bool ok = isalnum(c) || c == '_' || c == '-' || c == '.';
if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) {
ok = ok || c == ':' || c == ' ' || c == '+';
}
if (ctx->ctx_flags & CTX_ENABLE_NUMBERS) {
ok = ok || c == '+';
}
if (!ok) {
break;
}
char s[] = {c, 0};
b_string_append_cstr(wordbuf, s);
advance_char(ctx);
}
bool parsed = false;
b_string_trim(wordbuf);
if (ctx->ctx_flags & CTX_ENABLE_BOOLS) {
parsed = try_convert_word_to_bool(ctx, wordbuf);
}
if (!parsed && (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS)) {
parsed = try_convert_word_to_timestamp(ctx, wordbuf);
}
if (!parsed && (ctx->ctx_flags & CTX_ENABLE_NUMBERS)) {
parsed = try_convert_word_to_number(ctx, wordbuf);
}
if (parsed) {
return;
}
const char *s = b_string_ptr(wordbuf);
for (size_t i = 0; s[i]; i++) {
c = s[i];
bool ok = isalnum(c) || c == '_' || c == '-' || c == '.';
if (!ok) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
return;
}
}
split_word(ctx, wordbuf);
}
static void read_string(struct ctx *ctx, bool squote)
{
advance_char(ctx);
char term = '"';
if (squote) {
term = '\'';
}
bool multiline = false;
struct token *tok = enqueue_token(ctx, TOK_STRING);
struct b_string *str = get_wordbuf(ctx);
int c = peek_char(ctx);
if (c == term) {
advance_char(ctx);
c = peek_char(ctx);
if (c == term) {
advance_char(ctx);
c = peek_char(ctx);
multiline = true;
} else {
return;
}
if (c == '\n') {
advance_char(ctx);
}
}
if (multiline && !(ctx->ctx_flags & CTX_ENABLE_MULTILINE_STRING)) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
return;
}
bool fail = false;
bool esc = false;
tok->tok_type = TOK_STRING;
while (!fail) {
c = peek_char(ctx);
if (c == -1) {
ctx->ctx_status = B_ERR_BAD_FORMAT;
fail = true;
break;
}
char s[] = {c, 0};
if (esc) {
if (c == '\n') {
while (c != -1 && isspace(c)) {
advance_char(ctx);
c = peek_char(ctx);
}
esc = false;
continue;
}
switch (c) {
case '"':
case '\\':
b_string_append_cstr(str, s);
break;
case 'b':
s[0] = '\b';
b_string_append_cstr(str, s);
break;
case 't':
s[0] = '\t';
b_string_append_cstr(str, s);
break;
case 'n':
s[0] = '\n';
b_string_append_cstr(str, s);
break;
case 'r':
s[0] = '\r';
b_string_append_cstr(str, s);
break;
case 'f':
s[0] = '\f';
b_string_append_cstr(str, s);
break;
default:
ctx->ctx_status = B_ERR_BAD_FORMAT;
fail = true;
break;
}
esc = false;
advance_char(ctx);
continue;
}
else if (c == '\\' && !squote) {
esc = true;
}
else if (c == '\n') {
if (!multiline) {
fail = true;
ctx->ctx_status = B_ERR_BAD_FORMAT;
break;
}
b_string_append_cstr(str, s);
}
else if (c == term) {
advance_char(ctx);
if (!multiline) {
break;
}
c = peek_char(ctx);
if (c != term) {
b_string_append_cstr(str, s);
continue;
}
advance_char(ctx);
c = peek_char(ctx);
if (c != term) {
b_string_append_cstr(str, s);
b_string_append_cstr(str, s);
continue;
}
advance_char(ctx);
c = peek_char(ctx);
if (c == term) {
b_string_append_cstr(str, s);
advance_char(ctx);
}
break;
}
else {
b_string_append_cstr(str, s);
}
advance_char(ctx);
}
if (fail) {
discard_token(ctx);
return;
}
tok->tok_str = b_string_steal(str);
}
static void read_symbol(struct ctx *ctx)
{
int c = peek_char(ctx);
advance_char(ctx);
struct token *tok = enqueue_token(ctx, TOK_NONE);
char s[] = {c, 0};
switch (c) {
case '=':
tok->tok_type = TOK_EQUAL;
break;
case '.':
tok->tok_type = TOK_DOT;
break;
case ',':
tok->tok_type = TOK_COMMA;
break;
case '[':
if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) {
tok->tok_type = TOK_LEFT_BRACKET;
break;
}
c = peek_char(ctx);
switch (c) {
case '[':
tok->tok_type = TOK_DOUBLE_LEFT_BRACKET;
advance_char(ctx);
break;
default:
tok->tok_type = TOK_LEFT_BRACKET;
break;
}
break;
case ']':
if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) {
/* if we're parsing more complex values, don't generate double-symbol tokens */
tok->tok_type = TOK_RIGHT_BRACKET;
break;
}
c = peek_char(ctx);
switch (c) {
case ']':
tok->tok_type = TOK_DOUBLE_RIGHT_BRACKET;
advance_char(ctx);
break;
default:
tok->tok_type = TOK_RIGHT_BRACKET;
break;
}
break;
case '{':
tok->tok_type = TOK_LEFT_BRACE;
break;
case '}':
tok->tok_type = TOK_RIGHT_BRACE;
break;
default:
discard_token(ctx);
ctx->ctx_status = B_ERR_BAD_FORMAT;
break;
}
}
static void read_newline(struct ctx *ctx)
{
int c = peek_char(ctx);
while (c == '\n') {
advance_char(ctx);
c = peek_char(ctx);
}
enqueue_token(ctx, TOK_NEWLINE);
}
static void read_comment(struct ctx *ctx)
{
int c = peek_char(ctx);
while (c != '\n' && c != -1) {
advance_char(ctx);
c = peek_char(ctx);
}
if (!B_OK(ctx->ctx_status)) {
return;
}
advance_char(ctx);
enqueue_token(ctx, TOK_NEWLINE);
}
static enum b_status advance_token(struct ctx *ctx)
{
discard_token(ctx);
if (!b_queue_empty(&ctx->ctx_tokens)) {
return B_SUCCESS;
}
int c = peek_char(ctx);
while (isspace(c) && c != '\n') {
advance_char(ctx);
c = peek_char(ctx);
}
if (c == -1) {
ctx->ctx_flags |= CTX_EOF;
return B_ERR_NO_DATA;
}
#if 1
while (c == '#') {
read_comment(ctx);
c = peek_char(ctx);
}
#endif
if (!B_OK(ctx->ctx_status)) {
return ctx->ctx_status;
}
if (c == '"') {
read_string(ctx, false);
} else if (c == '\'') {
read_string(ctx, true);
} else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) {
read_number(ctx);
} else if (ispunct(c)) {
read_symbol(ctx);
} else if (c == '\n') {
read_newline(ctx);
} else {
read_word(ctx);
}
return ctx->ctx_status;
}
static struct token *peek_token(struct ctx *ctx)
{
struct b_queue_entry *entry = b_queue_first(&ctx->ctx_tokens);
if (!entry) {
return NULL;
}
return b_unbox(struct token, entry, tok_entry);
}
static void ctx_cleanup(struct ctx *ctx)
{
if (ctx->ctx_linebuf_stream) {
b_stream_close(ctx->ctx_linebuf_stream);
ctx->ctx_linebuf_stream = NULL;
}
if (ctx->ctx_linebuf) {
b_string_release(ctx->ctx_linebuf);
ctx->ctx_linebuf = NULL;
}
if (ctx->ctx_wordbuf) {
b_string_release(ctx->ctx_wordbuf);
ctx->ctx_wordbuf = NULL;
}
if (ctx->ctx_static_objects) {
b_hashmap_release(ctx->ctx_static_objects);
ctx->ctx_static_objects = NULL;
}
}
static enum b_status ctx_init(struct ctx *ctx)
{
memset(ctx, 0x0, sizeof *ctx);
ctx->ctx_linebuf = b_string_create();
ctx->ctx_wordbuf = b_string_create();
b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream);
ctx->ctx_static_objects = b_hashmap_create(NULL, NULL);
return B_SUCCESS;
}
static enum b_status toml_serialise(
struct b_serial_ctx *serial, struct b_object *src,
struct b_stream *dest, enum b_serial_flags flags)
{
return B_SUCCESS;
}
static void print_token(struct token *tok)
{
switch (tok->tok_type) {
case TOK_NONE:
printf("TOK_NONE\n");
break;
case TOK_WORD:
printf("TOK_WORD %s\n", tok->tok_str);
break;
case TOK_STRING:
printf("TOK_STRING %s\n", tok->tok_str);
break;
case TOK_TIMESTAMP:
printf("TOK_TIMESTAMP %04ld-%02ld-%02ld "
"%02ld:%02ld:%02ld.%04ld %c"
"%02ld:%02ld\n",
b_datetime_year(tok->tok_value.time),
b_datetime_month(tok->tok_value.time),
b_datetime_day(tok->tok_value.time),
b_datetime_hour(tok->tok_value.time),
b_datetime_minute(tok->tok_value.time),
b_datetime_second(tok->tok_value.time),
b_datetime_subsecond(tok->tok_value.time),
b_datetime_zone_offset_is_negative(tok->tok_value.time)
? '-'
: '+',
b_datetime_zone_offset_hour(tok->tok_value.time),
b_datetime_zone_offset_minute(tok->tok_value.time));
break;
case TOK_INT:
printf("TOK_INT ");
if (tok->tok_value.i.nan) {
printf("NaN");
} else {
printf("%lld", tok->tok_value.i.v);
}
printf("\n");
break;
case TOK_FLOAT:
printf("TOK_FLOAT ");
if (tok->tok_value.f.nan) {
printf("NaN");
} else {
printf("%lf", tok->tok_value.f.v);
}
printf("\n");
break;
case TOK_BOOL:
printf("TOK_BOOL %s\n", tok->tok_value.b ? "true" : "false");
break;
case TOK_NEWLINE:
printf("TOK_NEWLINE\n");
break;
case TOK_EQUAL:
printf("TOK_EQUAL\n");
break;
case TOK_DOT:
printf("TOK_DOT\n");
break;
case TOK_COMMA:
printf("TOK_COMMA\n");
break;
case TOK_LEFT_BRACKET:
printf("TOK_LEFT_BRACKET\n");
break;
case TOK_RIGHT_BRACKET:
printf("TOK_RIGHT_BRACKET\n");
break;
case TOK_DOUBLE_LEFT_BRACKET:
printf("TOK_DOUBLE_LEFT_BRACKET\n");
break;
case TOK_DOUBLE_RIGHT_BRACKET:
printf("TOK_DOUBLE_RIGHT_BRACKET\n");
break;
case TOK_LEFT_BRACE:
printf("TOK_LEFT_BRACE\n");
break;
case TOK_RIGHT_BRACE:
printf("TOK_RIGHT_BRACE\n");
break;
default:
break;
}
}
static enum b_status parse_value(struct ctx *ctx, struct b_object **result);
static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container);
static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
struct b_datetime *dt = tok->tok_value.time;
tok->tok_value.time = NULL;
*result = B_OBJECT(dt);
return B_SUCCESS;
}
static enum b_status parse_string(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
struct b_string *str = b_string_create_from_cstr(tok->tok_str);
if (!str) {
return B_ERR_NO_MEMORY;
}
*result = B_OBJECT(str);
return B_SUCCESS;
}
static enum b_status parse_int(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
struct b_number *val = B_INT64(tok->tok_value.i.v);
if (!val) {
return B_ERR_NO_MEMORY;
}
*result = B_OBJECT(val);
return B_SUCCESS;
}
static enum b_status parse_float(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
struct b_number *val = B_DOUBLE(tok->tok_value.f.v);
if (!val) {
return B_ERR_NO_MEMORY;
}
*result = B_OBJECT(val);
return B_SUCCESS;
}
static enum b_status parse_bool(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
struct b_number *val = B_INT8(tok->tok_value.b);
if (!val) {
return B_ERR_NO_MEMORY;
}
*result = B_OBJECT(val);
return B_SUCCESS;
}
static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result)
{
advance_token(ctx);
struct b_dict *table = b_dict_create();
if (!table) {
return B_ERR_NO_MEMORY;
}
bool done = false;
while (!done) {
struct b_object *value;
enum b_status status = parse_key_value_pair(ctx, table);
if (!B_OK(status)) {
b_dict_release(table);
return status;
}
struct token *tok = peek_token(ctx);
if (!tok) {
b_dict_release(table);
return status;
}
switch (tok->tok_type) {
case TOK_RIGHT_BRACE:
done = true;
break;
case TOK_COMMA:
advance_token(ctx);
break;
default:
b_dict_release(table);
return B_ERR_BAD_FORMAT;
}
}
*result = B_OBJECT(table);
return B_SUCCESS;
}
static void skip_newlines(struct ctx *ctx)
{
struct token *tok = peek_token(ctx);
while (tok && tok->tok_type == TOK_NEWLINE) {
advance_token(ctx);
tok = peek_token(ctx);
}
}
static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result)
{
bool done = false;
advance_token(ctx);
struct b_array *array = b_array_create();
if (!array) {
return B_ERR_NO_MEMORY;
}
struct token *tok = peek_token(ctx);
if (!tok) {
b_array_release(array);
return B_ERR_BAD_FORMAT;
}
if (tok->tok_type == TOK_RIGHT_BRACKET) {
advance_token(ctx);
done = true;
}
while (!done) {
skip_newlines(ctx);
tok = peek_token(ctx);
if (!tok) {
b_array_release(array);
return B_ERR_BAD_FORMAT;
}
if (tok->tok_type == TOK_RIGHT_BRACKET) {
done = true;
break;
}
struct b_object *value;
enum b_status status = parse_value(ctx, &value);
if (!B_OK(status)) {
b_array_release(array);
return status;
}
b_array_append(array, B_RV(value));
skip_newlines(ctx);
tok = peek_token(ctx);
if (tok && tok->tok_type == TOK_RIGHT_BRACKET) {
done = true;
break;
}
if (!tok || tok->tok_type != TOK_COMMA) {
b_array_release(array);
return B_ERR_BAD_FORMAT;
}
advance_token(ctx);
}
*result = B_OBJECT(array);
return B_SUCCESS;
}
static enum b_status parse_value(struct ctx *ctx, struct b_object **result)
{
struct token *tok = peek_token(ctx);
if (!tok) {
return B_ERR_BAD_FORMAT;
}
switch (tok->tok_type) {
case TOK_STRING:
return parse_string(ctx, result);
case TOK_INT:
return parse_int(ctx, result);
case TOK_FLOAT:
return parse_float(ctx, result);
case TOK_BOOL:
return parse_bool(ctx, result);
case TOK_TIMESTAMP:
return parse_timestamp(ctx, result);
case TOK_LEFT_BRACKET:
return parse_array_inline(ctx, result);
case TOK_LEFT_BRACE:
return parse_table_inline(ctx, result);
default:
return B_ERR_BAD_FORMAT;
}
}
static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container)
{
struct token *tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
return B_ERR_BAD_FORMAT;
}
char *key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
if (!tok) {
return B_ERR_BAD_FORMAT;
}
while (tok && tok->tok_type == TOK_DOT) {
struct b_object *sub_dict = b_dict_at(container, key);
if (!sub_dict) {
sub_dict = B_OBJECT(b_dict_create());
b_dict_put(container, key, B_RV(sub_dict));
} else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) {
free(key);
return B_ERR_BAD_FORMAT;
}
if (ctx_object_is_static(ctx, sub_dict)) {
free(key);
return B_ERR_BAD_FORMAT;
}
ctx_add_static_object(ctx, sub_dict);
advance_token(ctx);
tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
free(key);
return B_ERR_BAD_FORMAT;
}
container = B_DICT(sub_dict);
free(key);
key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
}
if (b_dict_has_key(container, key)) {
return B_ERR_BAD_FORMAT;
}
if (!tok) {
return B_ERR_BAD_FORMAT;
}
if (tok->tok_type != TOK_EQUAL) {
return B_ERR_BAD_FORMAT;
}
ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS;
ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS
| CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING;
advance_token(ctx);
struct b_object *value = NULL;
enum b_status status = parse_value(ctx, &value);
ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS;
ctx->ctx_flags
&= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS
| CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING);
if (!B_OK(status)) {
return status;
}
advance_token(ctx);
b_dict_put(container, key, B_RV(value));
if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) {
ctx_add_static_object(ctx, value);
}
return B_SUCCESS;
}
static enum b_status parse_table_header(
struct ctx *ctx, struct b_dict *container, struct b_dict **new_container)
{
advance_token(ctx);
struct token *tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
return B_ERR_BAD_FORMAT;
}
char *key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
if (!tok) {
return B_ERR_BAD_FORMAT;
}
while (tok && tok->tok_type == TOK_DOT) {
struct b_object *sub_dict = b_dict_at(container, key);
if (!sub_dict) {
sub_dict = B_OBJECT(b_dict_create());
b_dict_put(container, key, B_RV(sub_dict));
} else if (B_OBJECT_IS(sub_dict, ARRAY)) {
sub_dict = b_array_at(
B_ARRAY(sub_dict),
b_array_size(B_ARRAY(sub_dict)) - 1);
} else if (!B_OBJECT_IS(sub_dict, DICT)) {
return B_ERR_BAD_FORMAT;
}
advance_token(ctx);
tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
return B_ERR_BAD_FORMAT;
}
container = B_DICT(sub_dict);
free(key);
key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
}
if (!tok || tok->tok_type != TOK_RIGHT_BRACKET) {
return B_ERR_BAD_FORMAT;
}
struct b_dict *new_table = B_DICT(b_dict_at(container, key));
if (new_table) {
if (!B_OBJECT_IS(new_table, DICT)
|| ctx_object_is_static(ctx, B_OBJECT(new_table))) {
return B_ERR_BAD_FORMAT;
}
ctx_add_static_object(ctx, B_OBJECT(new_table));
} else {
new_table = b_dict_create();
if (!new_table) {
free(key);
return B_ERR_NO_MEMORY;
}
b_dict_put(container, key, B_RV(new_table));
ctx_add_static_object(ctx, B_OBJECT(new_table));
}
free(key);
advance_token(ctx);
*new_container = new_table;
return B_SUCCESS;
}
static enum b_status parse_array_header(
struct ctx *ctx, struct b_dict *container, struct b_dict **new_container)
{
advance_token(ctx);
struct token *tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
return B_ERR_BAD_FORMAT;
}
char *key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
if (!tok) {
return B_ERR_BAD_FORMAT;
}
while (tok && tok->tok_type == TOK_DOT) {
struct b_object *sub_dict = b_dict_at(container, key);
if (!sub_dict) {
sub_dict = B_OBJECT(b_dict_create());
b_dict_put(container, key, B_RV(sub_dict));
} else if (B_OBJECT_IS(sub_dict, ARRAY)) {
sub_dict = b_array_at(
B_ARRAY(sub_dict),
b_array_size(B_ARRAY(sub_dict)) - 1);
} else if (!B_OBJECT_IS(sub_dict, DICT)) {
return B_ERR_BAD_FORMAT;
}
advance_token(ctx);
tok = peek_token(ctx);
if (!IS_VALID_KEY_COMPONENT(tok)) {
return B_ERR_BAD_FORMAT;
}
container = B_DICT(sub_dict);
free(key);
key = b_strdup(tok->tok_str);
if (!key) {
return B_ERR_NO_MEMORY;
}
advance_token(ctx);
tok = peek_token(ctx);
}
if (!tok || tok->tok_type != TOK_DOUBLE_RIGHT_BRACKET) {
return B_ERR_BAD_FORMAT;
}
struct b_array *array = B_ARRAY(b_dict_get(container, key));
if (!array) {
array = b_array_create();
b_dict_put(container, key, B_RV(array));
} else if (
!B_OBJECT_IS(array, ARRAY)
|| ctx_object_is_static(ctx, B_OBJECT(array))) {
return B_ERR_BAD_FORMAT;
}
struct b_dict *new_table = b_dict_create();
if (!new_table) {
free(key);
return B_ERR_NO_MEMORY;
}
b_array_append(array, B_RV(new_table));
free(key);
advance_token(ctx);
*new_container = new_table;
return B_SUCCESS;
}
static enum b_status parse_root(struct ctx *ctx, struct b_dict **result)
{
enum b_status status = B_SUCCESS;
struct b_dict *root = b_dict_create();
struct b_dict *current = root;
while (!(ctx->ctx_flags & CTX_EOF) && B_OK(status)) {
struct token *tok = peek_token(ctx);
if (!tok) {
break;
}
switch (tok->tok_type) {
case TOK_LEFT_BRACKET:
status = parse_table_header(ctx, root, &current);
if (!B_OK(status)) {
break;
}
tok = peek_token(ctx);
if (tok && tok->tok_type != TOK_NEWLINE) {
status = B_ERR_BAD_FORMAT;
}
break;
case TOK_DOUBLE_LEFT_BRACKET:
status = parse_array_header(ctx, root, &current);
if (!B_OK(status)) {
break;
}
tok = peek_token(ctx);
if (tok && tok->tok_type != TOK_NEWLINE) {
status = B_ERR_BAD_FORMAT;
}
break;
case TOK_WORD:
case TOK_STRING:
status = parse_key_value_pair(ctx, current);
if (!B_OK(status)) {
break;
}
tok = peek_token(ctx);
if (tok && tok->tok_type != TOK_NEWLINE) {
status = B_ERR_BAD_FORMAT;
}
break;
case TOK_NEWLINE:
advance_token(ctx);
break;
default:
status = B_ERR_BAD_FORMAT;
break;
}
if (!B_OK(ctx->ctx_status) && ctx->ctx_status != B_ERR_NO_DATA) {
status = ctx->ctx_status;
}
}
if (!B_OK(status)) {
b_dict_release(root);
root = NULL;
}
*result = root;
return status;
}
static enum b_status toml_deserialise(
struct b_serial_ctx *serial, struct b_stream *src,
struct b_object **dest, enum b_serial_flags flags)
{
struct ctx ctx = {0};
enum b_status status = ctx_init(&ctx);
if (!B_OK(status)) {
return status;
}
ctx.ctx_src = src;
status = advance_token(&ctx);
if (!B_OK(status)) {
return status;
}
struct b_dict *result = NULL;
status = parse_root(&ctx, &result);
if (!B_OK(status)) {
return status;
}
*dest = B_OBJECT(result);
#if 0
ctx.ctx_flags
= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS;
while (!(ctx.ctx_flags & CTX_EOF) && B_OK(ctx.ctx_status)) {
struct token *tok = peek_token(&ctx);
print_token(tok);
status = advance_token(&ctx);
}
#endif
return B_SUCCESS;
}
const struct b_serial_format_ops z__b_toml_format_ops = {
.fmt_serialise = toml_serialise,
.fmt_deserialise = toml_deserialise,
};