#include "blue/core/status.h" #include "blue/object/datetime.h" #include "serial.h" #include #include #include #include #include #include #include #include #include #define IS_VALID_KEY_COMPONENT(tok) \ ((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING)) enum token_type { TOK_NONE = 0, TOK_WORD, TOK_STRING, TOK_INT, TOK_UINT, TOK_FLOAT, TOK_BOOL, TOK_TIMESTAMP, TOK_NEWLINE, TOK_EQUAL, TOK_DOT, TOK_COMMA, TOK_LEFT_BRACKET, TOK_RIGHT_BRACKET, TOK_DOUBLE_LEFT_BRACKET, TOK_DOUBLE_RIGHT_BRACKET, TOK_LEFT_BRACE, TOK_RIGHT_BRACE, }; struct timestamp { unsigned int ts_year, ts_month, ts_day; unsigned short ts_hour, ts_min, ts_sec; unsigned int ts_msec; unsigned short ts_zone_offset_hour, ts_zone_offset_minute; unsigned char ts_zone_offset_negative; }; struct token { enum token_type tok_type; struct b_queue_entry tok_entry; char *tok_str; union { struct { int64_t v; bool nan; } i; struct { double v; bool nan; } f; bool b; // struct timestamp time; b_datetime *time; } tok_value; }; enum ctx_flags { CTX_EOF = 0x01u, CTX_ENABLE_NUMBERS = 0x02u, CTX_ENABLE_TIMESTAMPS = 0x04u, CTX_ENABLE_BOOLS = 0x08u, CTX_ENABLE_LONG_SYMBOLS = 0x10u, CTX_ENABLE_MULTILINE_STRING = 0x20u, }; enum ctx_state { CTX_STATE_NONE = 0, CTX_STATE_IN_TABLE, CTX_STATE_IN_ARRAY, }; struct ctx { enum ctx_flags ctx_flags; b_stream *ctx_src; b_string *ctx_wordbuf; b_string *ctx_linebuf; b_stream *ctx_linebuf_stream; size_t ctx_linebuf_pos; enum b_status ctx_status; b_hashmap *ctx_static_objects; b_queue ctx_tokens; }; static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj) { b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; b_hashmap_value value = {}; b_hashmap_put(ctx->ctx_static_objects, &key, &value); } static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj) { b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; return b_hashmap_has_key(ctx->ctx_static_objects, &key); } static bool data_available(struct ctx *ctx) { size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL); return len != 0 && ctx->ctx_linebuf_pos < len; } static enum b_status refill_linebuf(struct ctx *ctx) { b_string_clear(ctx->ctx_linebuf); ctx->ctx_linebuf_pos = 0; b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START); enum b_status status = b_stream_read_line_s(ctx->ctx_src, ctx->ctx_linebuf_stream); if (!B_OK(status)) { return status; } b_string_append_cstr(ctx->ctx_linebuf, "\n"); return B_SUCCESS; } static struct b_string *get_wordbuf(struct ctx *ctx) { b_string_clear(ctx->ctx_wordbuf); return ctx->ctx_wordbuf; } static bool is_valid_char(int c) { if (c <= 0) { return false; } switch (c) { case '\0': case '\r': case '\b': case 0x0C: case 0x1F: case 0x7F: case 0xFF: case 0x10: return false; default: return true; } } static int advance_char(struct ctx *ctx) { enum b_status status = B_SUCCESS; if (!data_available(ctx)) { status = refill_linebuf(ctx); } if (!B_OK(status)) { ctx->ctx_status = status; return -1; } if (!data_available(ctx)) { return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); int c = s[ctx->ctx_linebuf_pos++]; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; return -1; } return c; } static int peek_char(struct ctx *ctx) { enum b_status status = B_SUCCESS; if (!data_available(ctx)) { status = refill_linebuf(ctx); } if (!B_OK(status)) { ctx->ctx_status = status; return -1; } if (!data_available(ctx)) { return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); int c = s[ctx->ctx_linebuf_pos]; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; return -1; } return c; } #if 0 static int peek_char(struct ctx *ctx) { int c = __peek_char(ctx); if (c != '#') { return c; } c = __peek_char(ctx); while (c != '\n' && c != -1) { __advance_char(ctx); c = __peek_char(ctx); } return c; } static int advance_char(struct ctx *ctx) { int c = __advance_char(ctx); if (c != '#') { return c; } c = __peek_char(ctx); while (c != '\n' && c != -1) { __advance_char(ctx); c = __peek_char(ctx); } return c; } #endif static struct token *enqueue_token(struct ctx *ctx, enum token_type type) { struct token *tok = malloc(sizeof *tok); if (!tok) { return NULL; } memset(tok, 0x0, sizeof *tok); tok->tok_type = type; b_queue_push_back(&ctx->ctx_tokens, &tok->tok_entry); return tok; } static void discard_token(struct ctx *ctx) { struct b_queue_entry *entry = b_queue_pop_front(&ctx->ctx_tokens); if (!entry) { return; } struct token *tok = b_unbox(struct token, entry, tok_entry); if (tok->tok_str) { free(tok->tok_str); } free(tok); } static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) { b_datetime *dt = b_datetime_parse( B_DATETIME_FORMAT_RFC3339, b_string_ptr(token_str)); if (!dt) { return false; } struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); tok->tok_str = b_string_steal(token_str); tok->tok_value.time = dt; return true; } #if 0 static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) { const char *s = b_string_ptr(token_str); size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); size_t i = 0, c = 0; struct timestamp ts = {0}; bool has_date = false, has_time = false; if (len >= 10 && s[4] == '-' && s[7] == '-') { has_date = true; } if (len >= 8 && s[2] == ':' && s[5] == ':') { has_time = true; } if (len >= 19 && s[4] == '-' && s[7] == '-' && (s[10] == 'T' || s[10] == ' ') && s[13] == ':' && s[16] == ':') { has_date = true; has_time = true; } if (!has_date && !has_time) { return false; } if (has_date) { for (c = 0; c < 4; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_year *= 10; ts.ts_year += (s[i] - '0'); } if (s[i++] != '-') { return false; } for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_month *= 10; ts.ts_month += (s[i] - '0'); } if (s[i++] != '-') { return false; } for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_day *= 10; ts.ts_day += (s[i] - '0'); } } if (has_date && has_time) { if (s[i] != 'T' && s[i] != ' ') { return false; } i++; } if (has_time) { for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_hour *= 10; ts.ts_hour += (s[i] - '0'); } if (s[i++] != ':') { return false; } for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_min *= 10; ts.ts_min += (s[i] - '0'); } if (s[i++] != ':') { return false; } for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_sec *= 10; ts.ts_sec += (s[i] - '0'); } } if (s[i] == '.') { i++; for (c = 0; s[i]; c++, i++) { if (!isdigit(s[i])) { break; } ts.ts_msec *= 10; ts.ts_msec += (s[i] - '0'); } if (c == 0) { return false; } } if (s[i] == '+' || s[i] == '-') { ts.ts_zone_offset_negative = s[i] == '-'; i++; for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_zone_offset_hour *= 10; ts.ts_zone_offset_hour += (s[i] - '0'); } if (s[i++] != ':') { return false; } for (c = 0; c < 2; c++, i++) { if (!isdigit(s[i])) { return false; } ts.ts_zone_offset_minute *= 10; ts.ts_zone_offset_minute += (s[i] - '0'); } } else if (s[i] == 'Z') { i++; } if (s[i] != 0) { return false; } struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); tok->tok_str = b_string_steal(token_str); tok->tok_value.time = ts; return true; } #endif static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str) { size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); struct b_string *str = b_string_duplicate(token_str); const char *s = b_string_ptr(str); if (len == 0) { return false; } size_t offset = 0; bool has_sign = false; int mul = 1; if (s[0] == '+') { offset++; has_sign = true; } else if (s[0] == '-') { offset++; mul = -1; has_sign = true; } int base = 10; if (*(s + offset) == '0') { char b = *(s + offset + 1); switch (b) { case 'x': offset += 2; base = 16; break; case 'b': offset += 2; base = 2; break; case 'o': offset += 2; base = 8; break; default: b_string_release(str); return false; } } if (has_sign && base != 10) { b_string_release(str); return false; } if (offset == len) { b_string_release(str); return false; } bool is_valid = true; bool is_double = false; char previous = 0; for (size_t i = offset; i < len; i++) { char c = s[i]; if (previous == '_' && !isnumber(c)) { is_valid = false; break; } if (c == '_') { if (!isnumber(previous)) { is_valid = false; break; } b_string_remove(str, i, 1); len--; i--; previous = c; continue; } if (c == 'e' || c == '.') { if (!isnumber(c)) { is_valid = false; break; } is_double = true; previous = c; continue; } if ((c == '-' || c == '+') && previous != 'e') { is_valid = false; break; } previous = c; } if (previous == '_' || previous == '.') { is_valid = false; } if (is_double && base != 10) { is_valid = false; } if (!is_valid) { b_string_release(str); return false; } double d = 0; long long i = 0; if (is_double) { int r = 0; int len = strlen(s + offset); // d = strtold(s + offset, &ep) * mul; int ret = sscanf(s + offset, "%lf%n", &d, &r); d *= mul; is_valid = (ret == 1) && r == len; } else { char *ep; i = strtoll(s + offset, &ep, base) * mul; is_valid = ((*ep) == 0); } b_string_release(str); if (!is_valid) { return false; } struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT); tok->tok_str = b_string_steal(token_str); if (is_double) { tok->tok_value.f.v = d; } else { tok->tok_value.i.v = i; } return true; } static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str) { const char *s = b_string_ptr(token_str); struct token *tok = NULL; if (!strcmp(s, "true")) { tok = enqueue_token(ctx, TOK_BOOL); tok->tok_str = b_string_steal(token_str); tok->tok_value.b = true; } else if (!strcmp(s, "false")) { tok = enqueue_token(ctx, TOK_BOOL); tok->tok_str = b_string_steal(token_str); tok->tok_value.b = false; } else { return false; } return true; } static void split_word(struct ctx *ctx, struct b_string *wordbuf) { long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL); if (!len) { return; } char *s = b_string_steal(wordbuf); int trailing_dots = 0; char prev = 0; for (long i = 0; i < len; i++) { if (prev == '.' && s[i] == '.') { ctx->ctx_status = B_ERR_BAD_FORMAT; break; } prev = s[i]; } if (!B_OK(ctx->ctx_status)) { free(s); return; } for (; len > 0; len--) { if (s[len - 1] == '.') { trailing_dots++; } else { break; } } char *ep; char *tok = strtok_r(s, ".", &ep); unsigned int i = 0; while (tok) { if (*tok == 0) { ctx->ctx_status = B_ERR_BAD_FORMAT; break; } if (i > 0) { enqueue_token(ctx, TOK_DOT); } struct token *word = enqueue_token(ctx, TOK_WORD); word->tok_str = b_strdup(tok); i++; tok = strtok_r(NULL, ".", &ep); } for (long i = 0; i < trailing_dots; i++) { enqueue_token(ctx, TOK_DOT); } free(s); } static void read_number(struct ctx *ctx) { int c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { c = peek_char(ctx); if (c == -1 || !B_OK(ctx->ctx_status)) { break; } bool ok = isalnum(c) || c == '_' || c == '-' || c == '.' || c == '+'; if (!ok) { break; } char s[] = {c, 0}; b_string_append_cstr(wordbuf, s); advance_char(ctx); } bool is_number = try_convert_word_to_number(ctx, wordbuf); if (!is_number) { ctx->ctx_status = B_ERR_BAD_FORMAT; } } static void read_word(struct ctx *ctx) { int c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { c = peek_char(ctx); if (c == -1 || !B_OK(ctx->ctx_status)) { break; } bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) { ok = ok || c == ':' || c == ' ' || c == '+'; } if (ctx->ctx_flags & CTX_ENABLE_NUMBERS) { ok = ok || c == '+'; } if (!ok) { break; } char s[] = {c, 0}; b_string_append_cstr(wordbuf, s); advance_char(ctx); } bool parsed = false; b_string_trim(wordbuf); if (ctx->ctx_flags & CTX_ENABLE_BOOLS) { parsed = try_convert_word_to_bool(ctx, wordbuf); } if (!parsed && (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS)) { parsed = try_convert_word_to_timestamp(ctx, wordbuf); } if (!parsed && (ctx->ctx_flags & CTX_ENABLE_NUMBERS)) { parsed = try_convert_word_to_number(ctx, wordbuf); } if (parsed) { return; } const char *s = b_string_ptr(wordbuf); for (size_t i = 0; s[i]; i++) { c = s[i]; bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; if (!ok) { ctx->ctx_status = B_ERR_BAD_FORMAT; return; } } split_word(ctx, wordbuf); } static void read_string(struct ctx *ctx, bool squote) { advance_char(ctx); char term = '"'; if (squote) { term = '\''; } bool multiline = false; struct token *tok = enqueue_token(ctx, TOK_STRING); struct b_string *str = get_wordbuf(ctx); int c = peek_char(ctx); if (c == term) { advance_char(ctx); c = peek_char(ctx); if (c == term) { advance_char(ctx); c = peek_char(ctx); multiline = true; } else { return; } if (c == '\n') { advance_char(ctx); } } if (multiline && !(ctx->ctx_flags & CTX_ENABLE_MULTILINE_STRING)) { ctx->ctx_status = B_ERR_BAD_FORMAT; return; } bool fail = false; bool esc = false; tok->tok_type = TOK_STRING; while (!fail) { c = peek_char(ctx); if (c == -1) { ctx->ctx_status = B_ERR_BAD_FORMAT; fail = true; break; } char s[] = {c, 0}; if (esc) { if (c == '\n') { while (c != -1 && isspace(c)) { advance_char(ctx); c = peek_char(ctx); } esc = false; continue; } switch (c) { case '"': case '\\': b_string_append_cstr(str, s); break; case 'b': s[0] = '\b'; b_string_append_cstr(str, s); break; case 't': s[0] = '\t'; b_string_append_cstr(str, s); break; case 'n': s[0] = '\n'; b_string_append_cstr(str, s); break; case 'r': s[0] = '\r'; b_string_append_cstr(str, s); break; case 'f': s[0] = '\f'; b_string_append_cstr(str, s); break; default: ctx->ctx_status = B_ERR_BAD_FORMAT; fail = true; break; } esc = false; advance_char(ctx); continue; } else if (c == '\\' && !squote) { esc = true; } else if (c == '\n') { if (!multiline) { fail = true; ctx->ctx_status = B_ERR_BAD_FORMAT; break; } b_string_append_cstr(str, s); } else if (c == term) { advance_char(ctx); if (!multiline) { break; } c = peek_char(ctx); if (c != term) { b_string_append_cstr(str, s); continue; } advance_char(ctx); c = peek_char(ctx); if (c != term) { b_string_append_cstr(str, s); b_string_append_cstr(str, s); continue; } advance_char(ctx); c = peek_char(ctx); if (c == term) { b_string_append_cstr(str, s); advance_char(ctx); } break; } else { b_string_append_cstr(str, s); } advance_char(ctx); } if (fail) { discard_token(ctx); return; } tok->tok_str = b_string_steal(str); } static void read_symbol(struct ctx *ctx) { int c = peek_char(ctx); advance_char(ctx); struct token *tok = enqueue_token(ctx, TOK_NONE); char s[] = {c, 0}; switch (c) { case '=': tok->tok_type = TOK_EQUAL; break; case '.': tok->tok_type = TOK_DOT; break; case ',': tok->tok_type = TOK_COMMA; break; case '[': if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { tok->tok_type = TOK_LEFT_BRACKET; break; } c = peek_char(ctx); switch (c) { case '[': tok->tok_type = TOK_DOUBLE_LEFT_BRACKET; advance_char(ctx); break; default: tok->tok_type = TOK_LEFT_BRACKET; break; } break; case ']': if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { /* if we're parsing more complex values, don't generate double-symbol tokens */ tok->tok_type = TOK_RIGHT_BRACKET; break; } c = peek_char(ctx); switch (c) { case ']': tok->tok_type = TOK_DOUBLE_RIGHT_BRACKET; advance_char(ctx); break; default: tok->tok_type = TOK_RIGHT_BRACKET; break; } break; case '{': tok->tok_type = TOK_LEFT_BRACE; break; case '}': tok->tok_type = TOK_RIGHT_BRACE; break; default: discard_token(ctx); ctx->ctx_status = B_ERR_BAD_FORMAT; break; } } static void read_newline(struct ctx *ctx) { int c = peek_char(ctx); while (c == '\n') { advance_char(ctx); c = peek_char(ctx); } enqueue_token(ctx, TOK_NEWLINE); } static void read_comment(struct ctx *ctx) { int c = peek_char(ctx); while (c != '\n' && c != -1) { advance_char(ctx); c = peek_char(ctx); } if (!B_OK(ctx->ctx_status)) { return; } advance_char(ctx); enqueue_token(ctx, TOK_NEWLINE); } static enum b_status advance_token(struct ctx *ctx) { discard_token(ctx); if (!b_queue_empty(&ctx->ctx_tokens)) { return B_SUCCESS; } int c = peek_char(ctx); while (isspace(c) && c != '\n') { advance_char(ctx); c = peek_char(ctx); } if (c == -1) { ctx->ctx_flags |= CTX_EOF; return B_ERR_NO_DATA; } #if 1 while (c == '#') { read_comment(ctx); c = peek_char(ctx); } #endif if (!B_OK(ctx->ctx_status)) { return ctx->ctx_status; } if (c == '"') { read_string(ctx, false); } else if (c == '\'') { read_string(ctx, true); } else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) { read_number(ctx); } else if (ispunct(c)) { read_symbol(ctx); } else if (c == '\n') { read_newline(ctx); } else { read_word(ctx); } return ctx->ctx_status; } static struct token *peek_token(struct ctx *ctx) { struct b_queue_entry *entry = b_queue_first(&ctx->ctx_tokens); if (!entry) { return NULL; } return b_unbox(struct token, entry, tok_entry); } static void ctx_cleanup(struct ctx *ctx) { if (ctx->ctx_linebuf_stream) { b_stream_close(ctx->ctx_linebuf_stream); ctx->ctx_linebuf_stream = NULL; } if (ctx->ctx_linebuf) { b_string_release(ctx->ctx_linebuf); ctx->ctx_linebuf = NULL; } if (ctx->ctx_wordbuf) { b_string_release(ctx->ctx_wordbuf); ctx->ctx_wordbuf = NULL; } if (ctx->ctx_static_objects) { b_hashmap_release(ctx->ctx_static_objects); ctx->ctx_static_objects = NULL; } } static enum b_status ctx_init(struct ctx *ctx) { memset(ctx, 0x0, sizeof *ctx); ctx->ctx_linebuf = b_string_create(); ctx->ctx_wordbuf = b_string_create(); b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream); ctx->ctx_static_objects = b_hashmap_create(NULL, NULL); return B_SUCCESS; } static enum b_status toml_serialise( struct b_serial_ctx *serial, struct b_object *src, struct b_stream *dest, enum b_serial_flags flags) { return B_SUCCESS; } static void print_token(struct token *tok) { switch (tok->tok_type) { case TOK_NONE: printf("TOK_NONE\n"); break; case TOK_WORD: printf("TOK_WORD %s\n", tok->tok_str); break; case TOK_STRING: printf("TOK_STRING %s\n", tok->tok_str); break; case TOK_TIMESTAMP: printf("TOK_TIMESTAMP %04ld-%02ld-%02ld " "%02ld:%02ld:%02ld.%04ld %c" "%02ld:%02ld\n", b_datetime_year(tok->tok_value.time), b_datetime_month(tok->tok_value.time), b_datetime_day(tok->tok_value.time), b_datetime_hour(tok->tok_value.time), b_datetime_minute(tok->tok_value.time), b_datetime_second(tok->tok_value.time), b_datetime_subsecond(tok->tok_value.time), b_datetime_zone_offset_is_negative(tok->tok_value.time) ? '-' : '+', b_datetime_zone_offset_hour(tok->tok_value.time), b_datetime_zone_offset_minute(tok->tok_value.time)); break; case TOK_INT: printf("TOK_INT "); if (tok->tok_value.i.nan) { printf("NaN"); } else { printf("%lld", tok->tok_value.i.v); } printf("\n"); break; case TOK_FLOAT: printf("TOK_FLOAT "); if (tok->tok_value.f.nan) { printf("NaN"); } else { printf("%lf", tok->tok_value.f.v); } printf("\n"); break; case TOK_BOOL: printf("TOK_BOOL %s\n", tok->tok_value.b ? "true" : "false"); break; case TOK_NEWLINE: printf("TOK_NEWLINE\n"); break; case TOK_EQUAL: printf("TOK_EQUAL\n"); break; case TOK_DOT: printf("TOK_DOT\n"); break; case TOK_COMMA: printf("TOK_COMMA\n"); break; case TOK_LEFT_BRACKET: printf("TOK_LEFT_BRACKET\n"); break; case TOK_RIGHT_BRACKET: printf("TOK_RIGHT_BRACKET\n"); break; case TOK_DOUBLE_LEFT_BRACKET: printf("TOK_DOUBLE_LEFT_BRACKET\n"); break; case TOK_DOUBLE_RIGHT_BRACKET: printf("TOK_DOUBLE_RIGHT_BRACKET\n"); break; case TOK_LEFT_BRACE: printf("TOK_LEFT_BRACE\n"); break; case TOK_RIGHT_BRACE: printf("TOK_RIGHT_BRACE\n"); break; default: break; } } static enum b_status parse_value(struct ctx *ctx, struct b_object **result); static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container); static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); struct b_datetime *dt = tok->tok_value.time; tok->tok_value.time = NULL; *result = B_OBJECT(dt); return B_SUCCESS; } static enum b_status parse_string(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); struct b_string *str = b_string_create_from_cstr(tok->tok_str); if (!str) { return B_ERR_NO_MEMORY; } *result = B_OBJECT(str); return B_SUCCESS; } static enum b_status parse_int(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); struct b_number *val = B_INT64(tok->tok_value.i.v); if (!val) { return B_ERR_NO_MEMORY; } *result = B_OBJECT(val); return B_SUCCESS; } static enum b_status parse_float(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); struct b_number *val = B_DOUBLE(tok->tok_value.f.v); if (!val) { return B_ERR_NO_MEMORY; } *result = B_OBJECT(val); return B_SUCCESS; } static enum b_status parse_bool(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); struct b_number *val = B_INT8(tok->tok_value.b); if (!val) { return B_ERR_NO_MEMORY; } *result = B_OBJECT(val); return B_SUCCESS; } static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result) { advance_token(ctx); struct b_dict *table = b_dict_create(); if (!table) { return B_ERR_NO_MEMORY; } bool done = false; while (!done) { struct b_object *value; enum b_status status = parse_key_value_pair(ctx, table); if (!B_OK(status)) { b_dict_release(table); return status; } struct token *tok = peek_token(ctx); if (!tok) { b_dict_release(table); return status; } switch (tok->tok_type) { case TOK_RIGHT_BRACE: done = true; break; case TOK_COMMA: advance_token(ctx); break; default: b_dict_release(table); return B_ERR_BAD_FORMAT; } } *result = B_OBJECT(table); return B_SUCCESS; } static void skip_newlines(struct ctx *ctx) { struct token *tok = peek_token(ctx); while (tok && tok->tok_type == TOK_NEWLINE) { advance_token(ctx); tok = peek_token(ctx); } } static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result) { bool done = false; advance_token(ctx); struct b_array *array = b_array_create(); if (!array) { return B_ERR_NO_MEMORY; } struct token *tok = peek_token(ctx); if (!tok) { b_array_release(array); return B_ERR_BAD_FORMAT; } if (tok->tok_type == TOK_RIGHT_BRACKET) { advance_token(ctx); done = true; } while (!done) { skip_newlines(ctx); tok = peek_token(ctx); if (!tok) { b_array_release(array); return B_ERR_BAD_FORMAT; } if (tok->tok_type == TOK_RIGHT_BRACKET) { done = true; break; } struct b_object *value; enum b_status status = parse_value(ctx, &value); if (!B_OK(status)) { b_array_release(array); return status; } b_array_append(array, B_RV(value)); skip_newlines(ctx); tok = peek_token(ctx); if (tok && tok->tok_type == TOK_RIGHT_BRACKET) { done = true; break; } if (!tok || tok->tok_type != TOK_COMMA) { b_array_release(array); return B_ERR_BAD_FORMAT; } advance_token(ctx); } *result = B_OBJECT(array); return B_SUCCESS; } static enum b_status parse_value(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); if (!tok) { return B_ERR_BAD_FORMAT; } switch (tok->tok_type) { case TOK_STRING: return parse_string(ctx, result); case TOK_INT: return parse_int(ctx, result); case TOK_FLOAT: return parse_float(ctx, result); case TOK_BOOL: return parse_bool(ctx, result); case TOK_TIMESTAMP: return parse_timestamp(ctx, result); case TOK_LEFT_BRACKET: return parse_array_inline(ctx, result); case TOK_LEFT_BRACE: return parse_table_inline(ctx, result); default: return B_ERR_BAD_FORMAT; } } static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container) { struct token *tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } char *key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); if (!tok) { return B_ERR_BAD_FORMAT; } while (tok && tok->tok_type == TOK_DOT) { struct b_object *sub_dict = b_dict_at(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); b_dict_put(container, key, B_RV(sub_dict)); } else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) { free(key); return B_ERR_BAD_FORMAT; } if (ctx_object_is_static(ctx, sub_dict)) { free(key); return B_ERR_BAD_FORMAT; } ctx_add_static_object(ctx, sub_dict); advance_token(ctx); tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { free(key); return B_ERR_BAD_FORMAT; } container = B_DICT(sub_dict); free(key); key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); } if (b_dict_has_key(container, key)) { return B_ERR_BAD_FORMAT; } if (!tok) { return B_ERR_BAD_FORMAT; } if (tok->tok_type != TOK_EQUAL) { return B_ERR_BAD_FORMAT; } ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; advance_token(ctx); struct b_object *value = NULL; enum b_status status = parse_value(ctx, &value); ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; ctx->ctx_flags &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); if (!B_OK(status)) { return status; } advance_token(ctx); b_dict_put(container, key, B_RV(value)); if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) { ctx_add_static_object(ctx, value); } return B_SUCCESS; } static enum b_status parse_table_header( struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) { advance_token(ctx); struct token *tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } char *key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); if (!tok) { return B_ERR_BAD_FORMAT; } while (tok && tok->tok_type == TOK_DOT) { struct b_object *sub_dict = b_dict_at(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); b_dict_put(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { sub_dict = b_array_at( B_ARRAY(sub_dict), b_array_size(B_ARRAY(sub_dict)) - 1); } else if (!B_OBJECT_IS(sub_dict, DICT)) { return B_ERR_BAD_FORMAT; } advance_token(ctx); tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } container = B_DICT(sub_dict); free(key); key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); } if (!tok || tok->tok_type != TOK_RIGHT_BRACKET) { return B_ERR_BAD_FORMAT; } struct b_dict *new_table = B_DICT(b_dict_at(container, key)); if (new_table) { if (!B_OBJECT_IS(new_table, DICT) || ctx_object_is_static(ctx, B_OBJECT(new_table))) { return B_ERR_BAD_FORMAT; } ctx_add_static_object(ctx, B_OBJECT(new_table)); } else { new_table = b_dict_create(); if (!new_table) { free(key); return B_ERR_NO_MEMORY; } b_dict_put(container, key, B_RV(new_table)); ctx_add_static_object(ctx, B_OBJECT(new_table)); } free(key); advance_token(ctx); *new_container = new_table; return B_SUCCESS; } static enum b_status parse_array_header( struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) { advance_token(ctx); struct token *tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } char *key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); if (!tok) { return B_ERR_BAD_FORMAT; } while (tok && tok->tok_type == TOK_DOT) { struct b_object *sub_dict = b_dict_at(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); b_dict_put(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { sub_dict = b_array_at( B_ARRAY(sub_dict), b_array_size(B_ARRAY(sub_dict)) - 1); } else if (!B_OBJECT_IS(sub_dict, DICT)) { return B_ERR_BAD_FORMAT; } advance_token(ctx); tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } container = B_DICT(sub_dict); free(key); key = b_strdup(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } advance_token(ctx); tok = peek_token(ctx); } if (!tok || tok->tok_type != TOK_DOUBLE_RIGHT_BRACKET) { return B_ERR_BAD_FORMAT; } struct b_array *array = B_ARRAY(b_dict_get(container, key)); if (!array) { array = b_array_create(); b_dict_put(container, key, B_RV(array)); } else if ( !B_OBJECT_IS(array, ARRAY) || ctx_object_is_static(ctx, B_OBJECT(array))) { return B_ERR_BAD_FORMAT; } struct b_dict *new_table = b_dict_create(); if (!new_table) { free(key); return B_ERR_NO_MEMORY; } b_array_append(array, B_RV(new_table)); free(key); advance_token(ctx); *new_container = new_table; return B_SUCCESS; } static enum b_status parse_root(struct ctx *ctx, struct b_dict **result) { enum b_status status = B_SUCCESS; struct b_dict *root = b_dict_create(); struct b_dict *current = root; while (!(ctx->ctx_flags & CTX_EOF) && B_OK(status)) { struct token *tok = peek_token(ctx); if (!tok) { break; } switch (tok->tok_type) { case TOK_LEFT_BRACKET: status = parse_table_header(ctx, root, ¤t); if (!B_OK(status)) { break; } tok = peek_token(ctx); if (tok && tok->tok_type != TOK_NEWLINE) { status = B_ERR_BAD_FORMAT; } break; case TOK_DOUBLE_LEFT_BRACKET: status = parse_array_header(ctx, root, ¤t); if (!B_OK(status)) { break; } tok = peek_token(ctx); if (tok && tok->tok_type != TOK_NEWLINE) { status = B_ERR_BAD_FORMAT; } break; case TOK_WORD: case TOK_STRING: status = parse_key_value_pair(ctx, current); if (!B_OK(status)) { break; } tok = peek_token(ctx); if (tok && tok->tok_type != TOK_NEWLINE) { status = B_ERR_BAD_FORMAT; } break; case TOK_NEWLINE: advance_token(ctx); break; default: status = B_ERR_BAD_FORMAT; break; } if (!B_OK(ctx->ctx_status) && ctx->ctx_status != B_ERR_NO_DATA) { status = ctx->ctx_status; } } if (!B_OK(status)) { b_dict_release(root); root = NULL; } *result = root; return status; } static enum b_status toml_deserialise( struct b_serial_ctx *serial, struct b_stream *src, struct b_object **dest, enum b_serial_flags flags) { struct ctx ctx = {0}; enum b_status status = ctx_init(&ctx); if (!B_OK(status)) { return status; } ctx.ctx_src = src; status = advance_token(&ctx); if (!B_OK(status)) { return status; } struct b_dict *result = NULL; status = parse_root(&ctx, &result); if (!B_OK(status)) { return status; } *dest = B_OBJECT(result); #if 0 ctx.ctx_flags = CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS; while (!(ctx.ctx_flags & CTX_EOF) && B_OK(ctx.ctx_status)) { struct token *tok = peek_token(&ctx); print_token(tok); status = advance_token(&ctx); } #endif return B_SUCCESS; } const struct b_serial_format_ops z__b_toml_format_ops = { .fmt_serialise = toml_serialise, .fmt_deserialise = toml_deserialise, };