diff --git a/serial/include/blue/serial.h b/serial/include/blue/serial.h index 8db05db..3c90e64 100644 --- a/serial/include/blue/serial.h +++ b/serial/include/blue/serial.h @@ -11,6 +11,7 @@ typedef enum b_serial_format { B_SERIAL_FORMAT_NONE = 0, B_SERIAL_FORMAT_BITCODE, B_SERIAL_FORMAT_JSON, + B_SERIAL_FORMAT_TOML, } b_serial_format; typedef enum b_serial_flags { diff --git a/serial/serial.c b/serial/serial.c index 07df3b0..795ca43 100644 --- a/serial/serial.c +++ b/serial/serial.c @@ -6,11 +6,13 @@ extern const struct b_serial_format_ops z__b_bitcode_format_ops; extern const struct b_serial_format_ops z__b_json_format_ops; +extern const struct b_serial_format_ops z__b_toml_format_ops; static const struct b_serial_format_ops *format_ops[] = { [B_SERIAL_FORMAT_NONE] = NULL, [B_SERIAL_FORMAT_BITCODE] = &z__b_bitcode_format_ops, [B_SERIAL_FORMAT_JSON] = &z__b_json_format_ops, + [B_SERIAL_FORMAT_TOML] = &z__b_toml_format_ops, }; static const size_t nr_format_ops = sizeof format_ops / sizeof format_ops[0]; diff --git a/serial/toml.c b/serial/toml.c index d35699b..dbf6bf9 100644 --- a/serial/toml.c +++ b/serial/toml.c @@ -1,8 +1,8 @@ -#include "blue/core/status.h" -#include "blue/object/datetime.h" #include "serial.h" +#include #include +#include #include #include #include @@ -15,6 +15,28 @@ #define IS_VALID_KEY_COMPONENT(tok) \ ((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING)) +#define ENABLE_EXTENDED_LEXING(ctx) \ + do { \ + ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; \ + ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \ + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; \ + } while (0) + +#define DISABLE_EXTENDED_LEXING(ctx) \ + do { \ + ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; \ + ctx->ctx_flags \ + &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \ + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); \ + } while (0) + +enum object_flags { + OBJECT_HEADER_MID_DEFINED = 0x01u, + OBJECT_HEADER_END_DEFINED = 0x02u, + OBJECT_KV_MID_DEFINED = 0x04u, + OBJECT_KV_END_DEFINED = 0x08u, +}; + enum token_type { TOK_NONE = 0, TOK_WORD, @@ -36,6 +58,22 @@ enum token_type { TOK_RIGHT_BRACE, }; +enum ctx_flags { + CTX_EOF = 0x01u, + CTX_ENABLE_NUMBERS = 0x02u, + CTX_ENABLE_TIMESTAMPS = 0x04u, + CTX_ENABLE_BOOLS = 0x08u, + CTX_ENABLE_EXTENDED_SYMBOLS = 0x10u, + CTX_ENABLE_LONG_SYMBOLS = 0x20u, + CTX_ENABLE_MULTILINE_STRING = 0x40u, +}; + +enum ctx_state { + CTX_STATE_NONE = 0, + CTX_STATE_IN_TABLE, + CTX_STATE_IN_ARRAY, +}; + struct timestamp { unsigned int ts_year, ts_month, ts_day; unsigned short ts_hour, ts_min, ts_sec; @@ -48,17 +86,17 @@ struct timestamp { struct token { enum token_type tok_type; struct b_queue_entry tok_entry; - char *tok_str; + b_string *tok_str; union { struct { int64_t v; - bool nan; + bool inf, nan; } i; struct { double v; - bool nan; + bool inf, nan; } f; bool b; @@ -67,68 +105,119 @@ struct token { } tok_value; }; -enum ctx_flags { - CTX_EOF = 0x01u, - CTX_ENABLE_NUMBERS = 0x02u, - CTX_ENABLE_TIMESTAMPS = 0x04u, - CTX_ENABLE_BOOLS = 0x08u, - CTX_ENABLE_LONG_SYMBOLS = 0x10u, - CTX_ENABLE_MULTILINE_STRING = 0x20u, -}; - -enum ctx_state { - CTX_STATE_NONE = 0, - CTX_STATE_IN_TABLE, - CTX_STATE_IN_ARRAY, -}; - struct ctx { enum ctx_flags ctx_flags; b_stream *ctx_src; b_string *ctx_wordbuf; b_string *ctx_linebuf; b_stream *ctx_linebuf_stream; - size_t ctx_linebuf_pos; + b_string_iterator ctx_linebuf_ptr; enum b_status ctx_status; - b_hashmap *ctx_static_objects; + b_hashmap *ctx_objects_flags; b_queue ctx_tokens; }; -static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj) +static void ctx_set_object_flags( + struct ctx *ctx, struct b_object *obj, enum object_flags flags) { + if (!obj) { + return; + } + b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; - b_hashmap_value value = {}; + const b_hashmap_value *old_value + = b_hashmap_get(ctx->ctx_objects_flags, &key); - b_hashmap_put(ctx->ctx_static_objects, &key, &value); + enum object_flags new_flags = 0; + if (old_value) { + new_flags = (enum object_flags)(uintptr_t)old_value->value_data; + } + + new_flags |= flags; + + b_hashmap_value value = { + .value_data = (void *)new_flags, + .value_size = sizeof new_flags, + }; + + b_hashmap_put(ctx->ctx_objects_flags, &key, &value); } -static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj) +static void ctx_clear_object_flags( + struct ctx *ctx, struct b_object *obj, enum object_flags mask) { + if (!obj) { + return; + } + b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; - return b_hashmap_has_key(ctx->ctx_static_objects, &key); + const b_hashmap_value *old_value + = b_hashmap_get(ctx->ctx_objects_flags, &key); + + enum object_flags new_flags = 0; + if (old_value) { + new_flags = (enum object_flags)(uintptr_t)old_value->value_data; + } + + new_flags &= ~mask; + + b_hashmap_value value = { + .value_data = (void *)new_flags, + .value_size = sizeof new_flags, + }; + + b_hashmap_put(ctx->ctx_objects_flags, &key, &value); } -static bool data_available(struct ctx *ctx) +static enum object_flags ctx_get_object_flags(struct ctx *ctx, struct b_object *obj) +{ + if (!obj) { + return 0; + } + + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + const b_hashmap_value *value = b_hashmap_get(ctx->ctx_objects_flags, &key); + if (value) { + return (enum object_flags)(uintptr_t)value->value_data; + } + + return 0; +} + +static enum b_status data_available(struct ctx *ctx) { size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL); - return len != 0 && ctx->ctx_linebuf_pos < len; + if (len == 0) { + return B_ERR_NO_DATA; + } + + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + return ctx->ctx_linebuf_ptr.status; + } + + return b_string_iterator_is_valid(&ctx->ctx_linebuf_ptr) ? B_SUCCESS + : B_ERR_NO_DATA; } static enum b_status refill_linebuf(struct ctx *ctx) { b_string_clear(ctx->ctx_linebuf); - ctx->ctx_linebuf_pos = 0; b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START); enum b_status status @@ -137,7 +226,7 @@ static enum b_status refill_linebuf(struct ctx *ctx) return status; } - b_string_append_cstr(ctx->ctx_linebuf, "\n"); + b_string_iterator_begin(ctx->ctx_linebuf, &ctx->ctx_linebuf_ptr); return B_SUCCESS; } @@ -148,7 +237,7 @@ static struct b_string *get_wordbuf(struct ctx *ctx) return ctx->ctx_wordbuf; } -static bool is_valid_char(int c) +static bool is_valid_char(b_wchar c) { if (c <= 0) { return false; @@ -156,12 +245,10 @@ static bool is_valid_char(int c) switch (c) { case '\0': - case '\r': case '\b': case 0x0C: case 0x1F: case 0x7F: - case 0xFF: case 0x10: return false; default: @@ -169,10 +256,10 @@ static bool is_valid_char(int c) } } -static int advance_char(struct ctx *ctx) +static b_wchar advance_char(struct ctx *ctx) { - enum b_status status = B_SUCCESS; - if (!data_available(ctx)) { + enum b_status status = data_available(ctx); + if (status == B_ERR_NO_DATA) { status = refill_linebuf(ctx); } @@ -181,25 +268,34 @@ static int advance_char(struct ctx *ctx) return -1; } - if (!data_available(ctx)) { + status = data_available(ctx); + if (!B_OK(status)) { + ctx->ctx_status = status; return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); - int c = s[ctx->ctx_linebuf_pos++]; + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + b_wchar c = ctx->ctx_linebuf_ptr.char_value; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; return -1; } + b_string_iterator_next(&ctx->ctx_linebuf_ptr); + return c; } -static int peek_char(struct ctx *ctx) +static b_wchar peek_char(struct ctx *ctx) { - enum b_status status = B_SUCCESS; - if (!data_available(ctx)) { + enum b_status status = data_available(ctx); + if (status == B_ERR_NO_DATA) { status = refill_linebuf(ctx); } @@ -208,12 +304,19 @@ static int peek_char(struct ctx *ctx) return -1; } - if (!data_available(ctx)) { + status = data_available(ctx); + if (!B_OK(status)) { + ctx->ctx_status = status; return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); - int c = s[ctx->ctx_linebuf_pos]; + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + b_wchar c = ctx->ctx_linebuf_ptr.char_value; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; @@ -226,7 +329,7 @@ static int peek_char(struct ctx *ctx) #if 0 static int peek_char(struct ctx *ctx) { - int c = __peek_char(ctx); + b_wchar c = __peek_char(ctx); if (c != '#') { return c; @@ -243,7 +346,7 @@ static int peek_char(struct ctx *ctx) static int advance_char(struct ctx *ctx) { - int c = __advance_char(ctx); + b_wchar c = __advance_char(ctx); if (c != '#') { return c; @@ -300,7 +403,7 @@ static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *toke } struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.time = dt; return true; @@ -474,143 +577,238 @@ static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *toke } #endif +static bool is_valid_digit(b_wchar c, int base) +{ + switch (base) { + case 2: + return b_wchar_is_bin_digit(c); + case 8: + return b_wchar_is_oct_digit(c); + case 10: + return b_wchar_is_number(c); + case 16: + return b_wchar_is_hex_digit(c); + default: + return false; + } +} + +static bool has_trailing_zero(const char *s) +{ + int nr_zero = 0; + for (size_t i = 0; s[i]; i++) { + char c = s[i]; + + switch (c) { + case '0': + nr_zero++; + break; + case '.': + case 'e': + return false; + default: + return nr_zero > 0; + } + } + + return false; +} + static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str) { size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); struct b_string *str = b_string_duplicate(token_str); + struct token *tok = NULL; const char *s = b_string_ptr(str); if (len == 0) { return false; } - size_t offset = 0; + size_t validation_offset = 0; + + bool is_decimal = false; bool has_sign = false; - - int mul = 1; - if (s[0] == '+') { - offset++; - has_sign = true; - } else if (s[0] == '-') { - offset++; - mul = -1; - has_sign = true; - } - int base = 10; - if (*(s + offset) == '0') { - char b = *(s + offset + 1); - switch (b) { - case 'x': - offset += 2; - base = 16; - break; + switch (*s) { + case '+': + case '-': + validation_offset++; + has_sign = true; + break; + case '0': + switch (*(s + 1)) { case 'b': - offset += 2; base = 2; + s += 2; break; case 'o': - offset += 2; base = 8; + s += 2; break; + case 'x': + base = 16; + s += 2; + break; + case '.': + case 'e': + case 'E': + break; + case '\0': + tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT); + tok->tok_value.i.v = 0; + return true; default: - b_string_release(str); return false; } + break; + default: + break; } - if (has_sign && base != 10) { - b_string_release(str); - return false; + if (!strcmp(s + validation_offset, "inf")) { + tok = enqueue_token(ctx, TOK_FLOAT); + tok->tok_value.f.v = (*s == '-') ? -1 : 0; + tok->tok_value.f.inf = true; + return true; + } else if (!strcmp(s + validation_offset, "nan")) { + tok = enqueue_token(ctx, TOK_FLOAT); + tok->tok_value.f.v = (*s == '-') ? -1 : 0; + tok->tok_value.f.nan = true; + return true; } - if (offset == len) { - b_string_release(str); - return false; - } - - bool is_valid = true; - bool is_double = false; - char previous = 0; - for (size_t i = offset; i < len; i++) { + + for (size_t i = validation_offset; s[i]; i++) { char c = s[i]; - if (previous == '_' && !isnumber(c)) { - is_valid = false; - break; - } - if (c == '_') { - if (!isnumber(previous)) { - is_valid = false; - break; + if (!is_valid_digit(previous, base)) { + return false; } - b_string_remove(str, i, 1); - len--; + size_t to_remove = (s - b_string_ptr(str)) + i; + b_string_remove(str, to_remove, 1); i--; previous = c; continue; } - if (c == 'e' || c == '.') { - if (!isnumber(c)) { - is_valid = false; - break; + if (c == '.') { + if (base != 10) { + return false; } - is_double = true; + + if (is_decimal) { + return false; + } + + if (!is_valid_digit(previous, base)) { + return false; + } + + is_decimal = true; previous = c; continue; } - if ((c == '-' || c == '+') && previous != 'e') { - is_valid = false; + if (c == 'e' || c == 'E') { + if (base == 16) { + previous = c; + continue; + } + + if (base != 10) { + return false; + } + + if (!is_valid_digit(previous, base)) { + return false; + } + + is_decimal = true; + previous = c; + continue; + } + + if (c == '+' || c == '-') { + if (base != 10) { + return false; + } + + if (previous != 'e' && previous != 'E') { + return false; + } + + previous = c; + continue; + } + + if (!is_valid_digit(c, base)) { + return false; + } + + switch (previous) { + case 0: + case 'e': + case 'E': + case '_': + case '+': + case '-': + case '.': + break; + default: + if (!is_valid_digit(previous, base)) { + return false; + } break; } previous = c; } - if (previous == '_' || previous == '.') { - is_valid = false; + switch (previous) { + case 'e': + case 'E': + if (base == 16) { + break; + } + + case '.': + case '_': + case 0: + return false; + default: + break; } - if (is_double && base != 10) { - is_valid = false; - } - - if (!is_valid) { - b_string_release(str); + if (has_trailing_zero(s + validation_offset) && base == 10) { return false; } - double d = 0; - long long i = 0; + long long i; + double d; + bool is_valid; - if (is_double) { + if (is_decimal) { int r = 0; - int len = strlen(s + offset); + int len = strlen(s); // d = strtold(s + offset, &ep) * mul; - int ret = sscanf(s + offset, "%lf%n", &d, &r); - d *= mul; + int ret = sscanf(s, "%lf%n", &d, &r); is_valid = (ret == 1) && r == len; } else { char *ep; - i = strtoll(s + offset, &ep, base) * mul; + i = strtoll(s, &ep, base); is_valid = ((*ep) == 0); } - b_string_release(str); - if (!is_valid) { return false; } - struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT); - tok->tok_str = b_string_steal(token_str); - - if (is_double) { + tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT); + if (is_decimal) { tok->tok_value.f.v = d; } else { tok->tok_value.i.v = i; @@ -626,11 +824,11 @@ static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str if (!strcmp(s, "true")) { tok = enqueue_token(ctx, TOK_BOOL); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.b = true; } else if (!strcmp(s, "false")) { tok = enqueue_token(ctx, TOK_BOOL); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.b = false; } else { return false; @@ -641,6 +839,7 @@ static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str static void split_word(struct ctx *ctx, struct b_string *wordbuf) { +#if 0 long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL); if (!len) { return; @@ -700,11 +899,31 @@ static void split_word(struct ctx *ctx, struct b_string *wordbuf) } free(s); +#endif + const char *delims[] = {"."}; + size_t nr_delims = sizeof delims / sizeof delims[0]; + b_string_iterator it; + b_string_tokenise( + wordbuf, delims, nr_delims, B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS, + &it); + + while (b_string_iterator_is_valid(&it)) { + if (it.iteration_index > 0) { + enqueue_token(ctx, TOK_DOT); + } + + if (it.string_length > 0) { + struct token *word = enqueue_token(ctx, TOK_WORD); + word->tok_str = b_string_create_from_cstr(it.string_value); + } + + b_string_iterator_next(&it); + } } static void read_number(struct ctx *ctx) { - int c = 0; + b_wchar c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { @@ -714,15 +933,14 @@ static void read_number(struct ctx *ctx) break; } - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.' - || c == '+'; + bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-' + || c == '.' || c == '+'; if (!ok) { break; } - char s[] = {c, 0}; - b_string_append_cstr(wordbuf, s); + b_string_append_wc(wordbuf, c); advance_char(ctx); } @@ -735,7 +953,7 @@ static void read_number(struct ctx *ctx) static void read_word(struct ctx *ctx) { - int c = 0; + b_wchar c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { @@ -745,7 +963,7 @@ static void read_word(struct ctx *ctx) break; } - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-' || c == '.'; if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) { ok = ok || c == ':' || c == ' ' || c == '+'; @@ -759,14 +977,18 @@ static void read_word(struct ctx *ctx) break; } - char s[] = {c, 0}; - b_string_append_cstr(wordbuf, s); + b_string_append_wc(wordbuf, c); advance_char(ctx); } bool parsed = false; b_string_trim(wordbuf); + if (b_string_get_size(wordbuf, B_STRLEN_NORMAL) == 0) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + if (ctx->ctx_flags & CTX_ENABLE_BOOLS) { parsed = try_convert_word_to_bool(ctx, wordbuf); } @@ -783,10 +1005,12 @@ static void read_word(struct ctx *ctx) return; } - const char *s = b_string_ptr(wordbuf); - for (size_t i = 0; s[i]; i++) { - c = s[i]; - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + b_string_iterator it; + b_string_foreach(&it, wordbuf) + { + /* only allow ASCII numbers/letters here */ + bool ok = isalnum(it.char_value) || it.char_value == '_' + || it.char_value == '-' || it.char_value == '.'; if (!ok) { ctx->ctx_status = B_ERR_BAD_FORMAT; return; @@ -796,6 +1020,59 @@ static void read_word(struct ctx *ctx) split_word(ctx, wordbuf); } +static b_wchar read_unicode_sequence(struct ctx *ctx) +{ + b_wchar c = peek_char(ctx); + + int expected_len = 0; + switch (c) { + case 'u': + expected_len = 4; + break; + case 'U': + expected_len = 8; + break; + default: + return B_WCHAR_INVALID; + } + + advance_char(ctx); + + char s[9] = {0}; + int len = 0; + + while (1) { + if (len >= expected_len) { + break; + } + + b_wchar c = peek_char(ctx); + if (c == -1 || c == B_WCHAR_INVALID) { + break; + } + + if (!b_wchar_is_hex_digit(c)) { + break; + } + + s[len++] = (char)c; + s[len] = 0; + advance_char(ctx); + } + + if (len != expected_len) { + return B_WCHAR_INVALID; + } + + char *ep; + c = strtoul(s, &ep, 16); + if (*ep != 0) { + return B_WCHAR_INVALID; + } + + return c; +} + static void read_string(struct ctx *ctx, bool squote) { advance_char(ctx); @@ -809,7 +1086,7 @@ static void read_string(struct ctx *ctx, bool squote) struct token *tok = enqueue_token(ctx, TOK_STRING); struct b_string *str = get_wordbuf(ctx); - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); if (c == term) { advance_char(ctx); c = peek_char(ctx); @@ -819,6 +1096,7 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); multiline = true; } else { + tok->tok_str = b_string_duplicate(str); return; } @@ -834,6 +1112,7 @@ static void read_string(struct ctx *ctx, bool squote) bool fail = false; bool esc = false; + bool cr = false; tok->tok_type = TOK_STRING; @@ -845,7 +1124,17 @@ static void read_string(struct ctx *ctx, bool squote) break; } - char s[] = {c, 0}; + if (c == '\r') { + if (!cr) { + advance_char(ctx); + cr = true; + continue; + } else { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + } if (esc) { if (c == '\n') { @@ -854,6 +1143,29 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); } + cr = false; + esc = false; + continue; + } + + if (isspace(c)) { + while (c != -1 && isspace(c) && c != '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + if (c != '\n') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + while (c != -1 && isspace(c)) { + advance_char(ctx); + c = peek_char(ctx); + } + + cr = false; esc = false; continue; } @@ -861,27 +1173,42 @@ static void read_string(struct ctx *ctx, bool squote) switch (c) { case '"': case '\\': - b_string_append_cstr(str, s); + b_string_append_wc(str, c); + advance_char(ctx); break; case 'b': - s[0] = '\b'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\b'); + advance_char(ctx); break; case 't': - s[0] = '\t'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\t'); + advance_char(ctx); break; case 'n': - s[0] = '\n'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\n'); + advance_char(ctx); break; case 'r': - s[0] = '\r'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\r'); + advance_char(ctx); break; case 'f': - s[0] = '\f'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\f'); + advance_char(ctx); + break; + case 'u': + case 'U': + c = read_unicode_sequence(ctx); + if (c == B_WCHAR_INVALID) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + ctx->ctx_status = B_OK(b_string_append_wc(str, c)) + ? B_SUCCESS + : B_ERR_BAD_FORMAT; + fail = !B_OK(ctx->ctx_status); break; default: ctx->ctx_status = B_ERR_BAD_FORMAT; @@ -890,7 +1217,6 @@ static void read_string(struct ctx *ctx, bool squote) } esc = false; - advance_char(ctx); continue; } @@ -905,7 +1231,12 @@ static void read_string(struct ctx *ctx, bool squote) break; } - b_string_append_cstr(str, s); + if (cr) { + b_string_append_wc(str, '\r'); + cr = false; + } + + b_string_append_wc(str, c); } else if (c == term) { @@ -917,45 +1248,62 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); if (c != term) { - b_string_append_cstr(str, s); + b_string_append_wc(str, term); continue; } advance_char(ctx); c = peek_char(ctx); if (c != term) { - b_string_append_cstr(str, s); - b_string_append_cstr(str, s); + b_string_append_wc(str, term); + b_string_append_wc(str, term); continue; } advance_char(ctx); c = peek_char(ctx); - if (c == term) { - b_string_append_cstr(str, s); - advance_char(ctx); - } break; } else { - b_string_append_cstr(str, s); + b_string_append_wc(str, c); } advance_char(ctx); } + if (cr) { + fail = true; + } + if (fail) { discard_token(ctx); return; } - tok->tok_str = b_string_steal(str); + if (!multiline) { + goto done; + } + + c = peek_char(ctx); + if (c == term) { + b_string_append_wc(str, c); + advance_char(ctx); + } + + c = peek_char(ctx); + if (c == term) { + b_string_append_wc(str, c); + advance_char(ctx); + } + +done: + tok->tok_str = b_string_duplicate(str); } static void read_symbol(struct ctx *ctx) { - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); advance_char(ctx); struct token *tok = enqueue_token(ctx, TOK_NONE); @@ -1022,23 +1370,48 @@ static void read_symbol(struct ctx *ctx) static void read_newline(struct ctx *ctx) { - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); while (c == '\n') { advance_char(ctx); c = peek_char(ctx); } enqueue_token(ctx, TOK_NEWLINE); + ctx->ctx_status = B_SUCCESS; } static void read_comment(struct ctx *ctx) { - int c = peek_char(ctx); - while (c != '\n' && c != -1) { + b_wchar c = peek_char(ctx); + bool cr = false; + + while (1) { + if (c == '\n') { + cr = false; + break; + } + + if (c == -1) { + break; + } + + if (cr) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + if (c == '\r') { + cr = true; + } + advance_char(ctx); c = peek_char(ctx); } + if (cr) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + } + if (!B_OK(ctx->ctx_status)) { return; } @@ -1047,16 +1420,34 @@ static void read_comment(struct ctx *ctx) enqueue_token(ctx, TOK_NEWLINE); } +static bool is_symbol(b_wchar c) +{ + switch (c) { + case '=': + case '.': + case ',': + case '[': + case ']': + case '{': + case '}': + return true; + default: + return false; + } +} + static enum b_status advance_token(struct ctx *ctx) { + b_wchar c = B_WCHAR_INVALID; discard_token(ctx); if (!b_queue_empty(&ctx->ctx_tokens)) { return B_SUCCESS; } - int c = peek_char(ctx); - while (isspace(c) && c != '\n') { +start: + c = peek_char(ctx); + while (isspace(c) && c != '\n' && c != '\r') { advance_char(ctx); c = peek_char(ctx); } @@ -1067,9 +1458,9 @@ static enum b_status advance_token(struct ctx *ctx) } #if 1 - while (c == '#') { + if (c == '#') { read_comment(ctx); - c = peek_char(ctx); + goto start; } #endif @@ -1077,13 +1468,23 @@ static enum b_status advance_token(struct ctx *ctx) return ctx->ctx_status; } + if (c == '\r') { + advance_char(ctx); + c = peek_char(ctx); + + if (c != '\n') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return ctx->ctx_status; + } + } + if (c == '"') { read_string(ctx, false); } else if (c == '\'') { read_string(ctx, true); } else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) { read_number(ctx); - } else if (ispunct(c)) { + } else if (is_symbol(c)) { read_symbol(ctx); } else if (c == '\n') { read_newline(ctx); @@ -1121,9 +1522,9 @@ static void ctx_cleanup(struct ctx *ctx) ctx->ctx_wordbuf = NULL; } - if (ctx->ctx_static_objects) { - b_hashmap_release(ctx->ctx_static_objects); - ctx->ctx_static_objects = NULL; + if (ctx->ctx_objects_flags) { + b_hashmap_release(ctx->ctx_objects_flags); + ctx->ctx_objects_flags = NULL; } } @@ -1136,7 +1537,7 @@ static enum b_status ctx_init(struct ctx *ctx) b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream); - ctx->ctx_static_objects = b_hashmap_create(NULL, NULL); + ctx->ctx_objects_flags = b_hashmap_create(NULL, NULL); return B_SUCCESS; } @@ -1155,10 +1556,10 @@ static void print_token(struct token *tok) printf("TOK_NONE\n"); break; case TOK_WORD: - printf("TOK_WORD %s\n", tok->tok_str); + printf("TOK_WORD %s\n", b_string_ptr(tok->tok_str)); break; case TOK_STRING: - printf("TOK_STRING %s\n", tok->tok_str); + printf("TOK_STRING %s\n", b_string_ptr(tok->tok_str)); break; case TOK_TIMESTAMP: printf("TOK_TIMESTAMP %04ld-%02ld-%02ld " @@ -1251,7 +1652,7 @@ static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result) static enum b_status parse_string(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); - struct b_string *str = b_string_create_from_cstr(tok->tok_str); + struct b_string *str = b_string_duplicate(tok->tok_str); if (!str) { return B_ERR_NO_MEMORY; } @@ -1263,11 +1664,25 @@ static enum b_status parse_string(struct ctx *ctx, struct b_object **result) static enum b_status parse_int(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); - struct b_number *val = B_INT64(tok->tok_value.i.v); + struct b_number *val = B_LONGLONG(tok->tok_value.i.v); if (!val) { return B_ERR_NO_MEMORY; } + if (tok->tok_value.i.inf) { + if (tok->tok_value.i.v >= 0) { + b_number_set_inf_positive(val, true); + } else { + b_number_set_inf_negative(val, true); + } + } else if (tok->tok_value.i.nan) { + if (tok->tok_value.i.v >= 0) { + b_number_set_nan_positive(val, true); + } else { + b_number_set_nan_negative(val, true); + } + } + *result = B_OBJECT(val); return B_SUCCESS; } @@ -1280,6 +1695,20 @@ static enum b_status parse_float(struct ctx *ctx, struct b_object **result) return B_ERR_NO_MEMORY; } + if (tok->tok_value.f.inf) { + if (tok->tok_value.f.v >= 0) { + b_number_set_inf_positive(val, true); + } else { + b_number_set_inf_negative(val, true); + } + } else if (tok->tok_value.f.nan) { + if (tok->tok_value.f.v >= 0) { + b_number_set_nan_positive(val, true); + } else { + b_number_set_nan_negative(val, true); + } + } + *result = B_OBJECT(val); return B_SUCCESS; } @@ -1298,6 +1727,8 @@ static enum b_status parse_bool(struct ctx *ctx, struct b_object **result) static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result) { + DISABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); struct b_dict *table = b_dict_create(); @@ -1305,8 +1736,15 @@ static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **resul return B_ERR_NO_MEMORY; } + struct token *tok = peek_token(ctx); + if (tok && tok->tok_type == TOK_RIGHT_BRACE) { + *result = B_OBJECT(table); + return B_SUCCESS; + } + bool done = false; while (!done) { + struct b_object *value; enum b_status status = parse_key_value_pair(ctx, table); if (!B_OK(status)) { @@ -1314,7 +1752,7 @@ static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **resul return status; } - struct token *tok = peek_token(ctx); + tok = peek_token(ctx); if (!tok) { b_dict_release(table); @@ -1351,6 +1789,8 @@ static void skip_newlines(struct ctx *ctx) static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result) { bool done = false; + ENABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); struct b_array *array = b_array_create(); @@ -1365,7 +1805,6 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul } if (tok->tok_type == TOK_RIGHT_BRACKET) { - advance_token(ctx); done = true; } @@ -1392,7 +1831,9 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul } b_array_append(array, B_RV(value)); + ENABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); skip_newlines(ctx); tok = peek_token(ctx); @@ -1407,9 +1848,11 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul return B_ERR_BAD_FORMAT; } + ENABLE_EXTENDED_LEXING(ctx); advance_token(ctx); } + DISABLE_EXTENDED_LEXING(ctx); *result = B_OBJECT(array); return B_SUCCESS; } @@ -1449,7 +1892,7 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1461,21 +1904,24 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) { free(key); return B_ERR_BAD_FORMAT; } - if (ctx_object_is_static(ctx, sub_dict)) { +#if 1 + enum object_flags flags = ctx_get_object_flags(ctx, sub_dict); + if (flags & (OBJECT_KV_END_DEFINED | OBJECT_HEADER_END_DEFINED)) { free(key); return B_ERR_BAD_FORMAT; } +#endif - ctx_add_static_object(ctx, sub_dict); + ctx_set_object_flags(ctx, sub_dict, OBJECT_KV_MID_DEFINED); advance_token(ctx); tok = peek_token(ctx); @@ -1485,8 +1931,8 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai } container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1495,7 +1941,7 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai tok = peek_token(ctx); } - if (b_dict_has_key(container, key)) { + if (b_dict_has_skey(container, key)) { return B_ERR_BAD_FORMAT; } @@ -1507,29 +1953,27 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai return B_ERR_BAD_FORMAT; } - ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; - ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS - | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; + ENABLE_EXTENDED_LEXING(ctx); advance_token(ctx); struct b_object *value = NULL; enum b_status status = parse_value(ctx, &value); - ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; - ctx->ctx_flags - &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS - | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); + DISABLE_EXTENDED_LEXING(ctx); if (!B_OK(status)) { return status; } - advance_token(ctx); + status = advance_token(ctx); + if (!B_OK(status) && status != B_ERR_NO_DATA) { + return status; + } - b_dict_put(container, key, B_RV(value)); + b_dict_put_sk(container, key, B_RV(value)); if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) { - ctx_add_static_object(ctx, value); + ctx_set_object_flags(ctx, value, OBJECT_KV_END_DEFINED); } return B_SUCCESS; @@ -1544,7 +1988,7 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1556,11 +2000,13 @@ static enum b_status parse_table_header( } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); + enum object_flags flags = ctx_get_object_flags(ctx, sub_dict); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( B_ARRAY(sub_dict), b_array_size(B_ARRAY(sub_dict)) - 1); @@ -1568,15 +2014,21 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } + if (flags & OBJECT_KV_END_DEFINED) { + return B_ERR_BAD_FORMAT; + } + advance_token(ctx); tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } + ctx_set_object_flags(ctx, sub_dict, OBJECT_HEADER_MID_DEFINED); + container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1589,16 +2041,9 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } - struct b_dict *new_table = B_DICT(b_dict_at(container, key)); + struct b_dict *new_table = B_DICT(b_dict_at_sk(container, key)); - if (new_table) { - if (!B_OBJECT_IS(new_table, DICT) - || ctx_object_is_static(ctx, B_OBJECT(new_table))) { - return B_ERR_BAD_FORMAT; - } - - ctx_add_static_object(ctx, B_OBJECT(new_table)); - } else { + if (!new_table) { new_table = b_dict_create(); if (!new_table) { @@ -1606,11 +2051,22 @@ static enum b_status parse_table_header( return B_ERR_NO_MEMORY; } - b_dict_put(container, key, B_RV(new_table)); - ctx_add_static_object(ctx, B_OBJECT(new_table)); + b_dict_put_sk(container, key, B_RV(new_table)); } - free(key); + if (!B_OBJECT_IS(B_OBJECT(new_table), DICT)) { + return B_ERR_BAD_FORMAT; + } + + enum object_flags flags = ctx_get_object_flags(ctx, B_OBJECT(new_table)); + if (flags + & (OBJECT_HEADER_END_DEFINED | OBJECT_KV_MID_DEFINED + | OBJECT_KV_END_DEFINED)) { + return B_ERR_BAD_FORMAT; + } + + ctx_set_object_flags(ctx, B_OBJECT(new_table), OBJECT_HEADER_END_DEFINED); + b_string_release(key); advance_token(ctx); *new_container = new_table; @@ -1626,7 +2082,7 @@ static enum b_status parse_array_header( return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1638,10 +2094,10 @@ static enum b_status parse_array_header( } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { sub_dict = b_array_at( B_ARRAY(sub_dict), @@ -1657,8 +2113,8 @@ static enum b_status parse_array_header( } container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1671,24 +2127,26 @@ static enum b_status parse_array_header( return B_ERR_BAD_FORMAT; } - struct b_array *array = B_ARRAY(b_dict_get(container, key)); + struct b_array *array = B_ARRAY(b_dict_get_sk(container, key)); if (!array) { array = b_array_create(); - b_dict_put(container, key, B_RV(array)); - } else if ( - !B_OBJECT_IS(array, ARRAY) - || ctx_object_is_static(ctx, B_OBJECT(array))) { + b_dict_put_sk(container, key, B_RV(array)); + } else if (!B_OBJECT_IS(array, ARRAY)) { return B_ERR_BAD_FORMAT; } + free(key); + + enum object_flags flags = ctx_get_object_flags(ctx, B_OBJECT(array)); + if (flags & OBJECT_KV_END_DEFINED) { + return B_ERR_NO_MEMORY; + } struct b_dict *new_table = b_dict_create(); if (!new_table) { - free(key); return B_ERR_NO_MEMORY; } b_array_append(array, B_RV(new_table)); - free(key); advance_token(ctx); *new_container = new_table; @@ -1741,6 +2199,7 @@ static enum b_status parse_root(struct ctx *ctx, struct b_dict **result) if (tok && tok->tok_type != TOK_NEWLINE) { status = B_ERR_BAD_FORMAT; } + advance_token(ctx); break; case TOK_NEWLINE: advance_token(ctx); @@ -1776,10 +2235,17 @@ static enum b_status toml_deserialise( } ctx.ctx_src = src; + ctx.ctx_flags = CTX_ENABLE_LONG_SYMBOLS; status = advance_token(&ctx); - if (!B_OK(status)) { - return status; + + if (!B_OK(ctx.ctx_status) && ctx.ctx_status != B_ERR_NO_DATA) { + return ctx.ctx_status; + } + + if (ctx.ctx_flags & CTX_EOF) { + *dest = B_OBJECT(b_dict_create()); + return B_SUCCESS; } struct b_dict *result = NULL; diff --git a/serial/toml.h b/serial/toml.h new file mode 100644 index 0000000..205237b --- /dev/null +++ b/serial/toml.h @@ -0,0 +1,4 @@ +#ifndef TOML_H_ +#define TOML_H_ + +#endif