2273 lines
41 KiB
C
2273 lines
41 KiB
C
#include "serial.h"
|
|
|
|
#include <blue/core/status.h>
|
|
#include <blue/ds/array.h>
|
|
#include <blue/ds/datetime.h>
|
|
#include <blue/ds/dict.h>
|
|
#include <blue/ds/hashmap.h>
|
|
#include <blue/ds/number.h>
|
|
#include <blue/ds/string.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
#define IS_VALID_KEY_COMPONENT(tok) \
|
|
((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING))
|
|
|
|
#define ENABLE_EXTENDED_LEXING(ctx) \
|
|
do { \
|
|
ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; \
|
|
ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \
|
|
| CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; \
|
|
} while (0)
|
|
|
|
#define DISABLE_EXTENDED_LEXING(ctx) \
|
|
do { \
|
|
ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; \
|
|
ctx->ctx_flags \
|
|
&= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \
|
|
| CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); \
|
|
} while (0)
|
|
|
|
enum object_flags {
|
|
OBJECT_HEADER_MID_DEFINED = 0x01u,
|
|
OBJECT_HEADER_END_DEFINED = 0x02u,
|
|
OBJECT_KV_MID_DEFINED = 0x04u,
|
|
OBJECT_KV_END_DEFINED = 0x08u,
|
|
};
|
|
|
|
enum token_type {
|
|
TOK_NONE = 0,
|
|
TOK_WORD,
|
|
TOK_STRING,
|
|
TOK_INT,
|
|
TOK_UINT,
|
|
TOK_FLOAT,
|
|
TOK_BOOL,
|
|
TOK_TIMESTAMP,
|
|
TOK_NEWLINE,
|
|
TOK_EQUAL,
|
|
TOK_DOT,
|
|
TOK_COMMA,
|
|
TOK_LEFT_BRACKET,
|
|
TOK_RIGHT_BRACKET,
|
|
TOK_DOUBLE_LEFT_BRACKET,
|
|
TOK_DOUBLE_RIGHT_BRACKET,
|
|
TOK_LEFT_BRACE,
|
|
TOK_RIGHT_BRACE,
|
|
};
|
|
|
|
enum ctx_flags {
|
|
CTX_EOF = 0x01u,
|
|
CTX_ENABLE_NUMBERS = 0x02u,
|
|
CTX_ENABLE_TIMESTAMPS = 0x04u,
|
|
CTX_ENABLE_BOOLS = 0x08u,
|
|
CTX_ENABLE_EXTENDED_SYMBOLS = 0x10u,
|
|
CTX_ENABLE_LONG_SYMBOLS = 0x20u,
|
|
CTX_ENABLE_MULTILINE_STRING = 0x40u,
|
|
};
|
|
|
|
enum ctx_state {
|
|
CTX_STATE_NONE = 0,
|
|
CTX_STATE_IN_TABLE,
|
|
CTX_STATE_IN_ARRAY,
|
|
};
|
|
|
|
struct timestamp {
|
|
unsigned int ts_year, ts_month, ts_day;
|
|
unsigned short ts_hour, ts_min, ts_sec;
|
|
unsigned int ts_msec;
|
|
|
|
unsigned short ts_zone_offset_hour, ts_zone_offset_minute;
|
|
unsigned char ts_zone_offset_negative;
|
|
};
|
|
|
|
struct token {
|
|
enum token_type tok_type;
|
|
struct b_queue_entry tok_entry;
|
|
b_string *tok_str;
|
|
|
|
union {
|
|
struct {
|
|
int64_t v;
|
|
bool inf, nan;
|
|
} i;
|
|
|
|
struct {
|
|
double v;
|
|
bool inf, nan;
|
|
} f;
|
|
|
|
bool b;
|
|
// struct timestamp time;
|
|
b_datetime *time;
|
|
} tok_value;
|
|
};
|
|
|
|
struct ctx {
|
|
enum ctx_flags ctx_flags;
|
|
b_stream *ctx_src;
|
|
b_string *ctx_wordbuf;
|
|
b_string *ctx_linebuf;
|
|
b_stream *ctx_linebuf_stream;
|
|
b_string_iterator ctx_linebuf_ptr;
|
|
enum b_status ctx_status;
|
|
b_hashmap *ctx_objects_flags;
|
|
|
|
b_queue ctx_tokens;
|
|
};
|
|
|
|
static void ctx_set_object_flags(
|
|
struct ctx *ctx, b_object *obj, enum object_flags flags)
|
|
{
|
|
if (!obj) {
|
|
return;
|
|
}
|
|
|
|
b_hashmap_key key = {
|
|
.key_data = obj,
|
|
.key_size = sizeof(b_object *),
|
|
.key_flags = B_HASHMAP_KEY_F_INTVALUE,
|
|
};
|
|
|
|
const b_hashmap_value *old_value
|
|
= b_hashmap_get(ctx->ctx_objects_flags, &key);
|
|
|
|
enum object_flags new_flags = 0;
|
|
if (old_value) {
|
|
new_flags = (enum object_flags)(uintptr_t)old_value->value_data;
|
|
}
|
|
|
|
new_flags |= flags;
|
|
|
|
b_hashmap_value value = {
|
|
.value_data = (void *)new_flags,
|
|
.value_size = sizeof new_flags,
|
|
};
|
|
|
|
b_hashmap_put(ctx->ctx_objects_flags, &key, &value);
|
|
}
|
|
|
|
static void ctx_clear_object_flags(
|
|
struct ctx *ctx, b_object *obj, enum object_flags mask)
|
|
{
|
|
if (!obj) {
|
|
return;
|
|
}
|
|
|
|
b_hashmap_key key = {
|
|
.key_data = obj,
|
|
.key_size = sizeof(b_object *),
|
|
.key_flags = B_HASHMAP_KEY_F_INTVALUE,
|
|
};
|
|
|
|
const b_hashmap_value *old_value
|
|
= b_hashmap_get(ctx->ctx_objects_flags, &key);
|
|
|
|
enum object_flags new_flags = 0;
|
|
if (old_value) {
|
|
new_flags = (enum object_flags)(uintptr_t)old_value->value_data;
|
|
}
|
|
|
|
new_flags &= ~mask;
|
|
|
|
b_hashmap_value value = {
|
|
.value_data = (void *)new_flags,
|
|
.value_size = sizeof new_flags,
|
|
};
|
|
|
|
b_hashmap_put(ctx->ctx_objects_flags, &key, &value);
|
|
}
|
|
|
|
static enum object_flags ctx_get_object_flags(struct ctx *ctx, b_object *obj)
|
|
{
|
|
if (!obj) {
|
|
return 0;
|
|
}
|
|
|
|
b_hashmap_key key = {
|
|
.key_data = obj,
|
|
.key_size = sizeof(b_object *),
|
|
.key_flags = B_HASHMAP_KEY_F_INTVALUE,
|
|
};
|
|
|
|
const b_hashmap_value *value = b_hashmap_get(ctx->ctx_objects_flags, &key);
|
|
if (value) {
|
|
return (enum object_flags)(uintptr_t)value->value_data;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static enum b_status data_available(struct ctx *ctx)
|
|
{
|
|
size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL);
|
|
if (len == 0) {
|
|
return B_ERR_NO_DATA;
|
|
}
|
|
|
|
if (!B_OK(ctx->ctx_linebuf_ptr.status)) {
|
|
return ctx->ctx_linebuf_ptr.status;
|
|
}
|
|
|
|
return b_string_iterator_is_valid(&ctx->ctx_linebuf_ptr) ? B_SUCCESS
|
|
: B_ERR_NO_DATA;
|
|
}
|
|
|
|
static enum b_status refill_linebuf(struct ctx *ctx)
|
|
{
|
|
b_string_clear(ctx->ctx_linebuf);
|
|
b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START);
|
|
|
|
enum b_status status
|
|
= b_stream_read_line_s(ctx->ctx_src, ctx->ctx_linebuf_stream);
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
b_string_iterator_begin(ctx->ctx_linebuf, &ctx->ctx_linebuf_ptr);
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static b_string *get_wordbuf(struct ctx *ctx)
|
|
{
|
|
b_string_clear(ctx->ctx_wordbuf);
|
|
return ctx->ctx_wordbuf;
|
|
}
|
|
|
|
static bool is_valid_char(b_wchar c)
|
|
{
|
|
if (c <= 0) {
|
|
return false;
|
|
}
|
|
|
|
switch (c) {
|
|
case '\0':
|
|
case '\b':
|
|
case 0x0C:
|
|
case 0x1F:
|
|
case 0x7F:
|
|
case 0x10:
|
|
return false;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
static b_wchar advance_char(struct ctx *ctx)
|
|
{
|
|
enum b_status status = data_available(ctx);
|
|
if (status == B_ERR_NO_DATA) {
|
|
status = refill_linebuf(ctx);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
ctx->ctx_status = status;
|
|
return -1;
|
|
}
|
|
|
|
status = data_available(ctx);
|
|
if (!B_OK(status)) {
|
|
ctx->ctx_status = status;
|
|
return -1;
|
|
}
|
|
|
|
const char *s = b_string_ptr(ctx->ctx_linebuf);
|
|
if (!B_OK(ctx->ctx_linebuf_ptr.status)) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return -1;
|
|
}
|
|
|
|
b_wchar c = ctx->ctx_linebuf_ptr.char_value;
|
|
|
|
if (!is_valid_char(c)) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return -1;
|
|
}
|
|
|
|
b_string_iterator_next(&ctx->ctx_linebuf_ptr);
|
|
|
|
return c;
|
|
}
|
|
|
|
static b_wchar peek_char(struct ctx *ctx)
|
|
{
|
|
enum b_status status = data_available(ctx);
|
|
if (status == B_ERR_NO_DATA) {
|
|
status = refill_linebuf(ctx);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
ctx->ctx_status = status;
|
|
return -1;
|
|
}
|
|
|
|
status = data_available(ctx);
|
|
if (!B_OK(status)) {
|
|
ctx->ctx_status = status;
|
|
return -1;
|
|
}
|
|
|
|
const char *s = b_string_ptr(ctx->ctx_linebuf);
|
|
if (!B_OK(ctx->ctx_linebuf_ptr.status)) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return -1;
|
|
}
|
|
|
|
b_wchar c = ctx->ctx_linebuf_ptr.char_value;
|
|
|
|
if (!is_valid_char(c)) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return -1;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
#if 0
|
|
static int peek_char(struct ctx *ctx)
|
|
{
|
|
b_wchar c = __peek_char(ctx);
|
|
|
|
if (c != '#') {
|
|
return c;
|
|
}
|
|
|
|
c = __peek_char(ctx);
|
|
while (c != '\n' && c != -1) {
|
|
__advance_char(ctx);
|
|
c = __peek_char(ctx);
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
static int advance_char(struct ctx *ctx)
|
|
{
|
|
b_wchar c = __advance_char(ctx);
|
|
|
|
if (c != '#') {
|
|
return c;
|
|
}
|
|
|
|
c = __peek_char(ctx);
|
|
while (c != '\n' && c != -1) {
|
|
__advance_char(ctx);
|
|
c = __peek_char(ctx);
|
|
}
|
|
|
|
return c;
|
|
}
|
|
#endif
|
|
|
|
static struct token *enqueue_token(struct ctx *ctx, enum token_type type)
|
|
{
|
|
struct token *tok = malloc(sizeof *tok);
|
|
if (!tok) {
|
|
return NULL;
|
|
}
|
|
|
|
memset(tok, 0x0, sizeof *tok);
|
|
|
|
tok->tok_type = type;
|
|
|
|
b_queue_push_back(&ctx->ctx_tokens, &tok->tok_entry);
|
|
|
|
return tok;
|
|
}
|
|
|
|
static void discard_token(struct ctx *ctx)
|
|
{
|
|
struct b_queue_entry *entry = b_queue_pop_front(&ctx->ctx_tokens);
|
|
if (!entry) {
|
|
return;
|
|
}
|
|
|
|
struct token *tok = b_unbox(struct token, entry, tok_entry);
|
|
|
|
if (tok->tok_str) {
|
|
free(tok->tok_str);
|
|
}
|
|
|
|
free(tok);
|
|
}
|
|
|
|
static bool try_convert_word_to_timestamp(struct ctx *ctx, b_string *token_str)
|
|
{
|
|
b_datetime *dt = b_datetime_parse(
|
|
B_DATETIME_FORMAT_RFC3339, b_string_ptr(token_str));
|
|
if (!dt) {
|
|
return false;
|
|
}
|
|
|
|
struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP);
|
|
tok->tok_str = b_string_duplicate(token_str);
|
|
tok->tok_value.time = dt;
|
|
|
|
return true;
|
|
}
|
|
|
|
#if 0
|
|
static bool try_convert_word_to_timestamp(struct ctx *ctx, b_string *token_str)
|
|
{
|
|
const char *s = b_string_ptr(token_str);
|
|
size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL);
|
|
|
|
size_t i = 0, c = 0;
|
|
struct timestamp ts = {0};
|
|
|
|
bool has_date = false, has_time = false;
|
|
|
|
if (len >= 10 && s[4] == '-' && s[7] == '-') {
|
|
has_date = true;
|
|
}
|
|
|
|
if (len >= 8 && s[2] == ':' && s[5] == ':') {
|
|
has_time = true;
|
|
}
|
|
|
|
if (len >= 19 && s[4] == '-' && s[7] == '-'
|
|
&& (s[10] == 'T' || s[10] == ' ') && s[13] == ':' && s[16] == ':') {
|
|
has_date = true;
|
|
has_time = true;
|
|
}
|
|
|
|
if (!has_date && !has_time) {
|
|
return false;
|
|
}
|
|
|
|
if (has_date) {
|
|
for (c = 0; c < 4; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_year *= 10;
|
|
ts.ts_year += (s[i] - '0');
|
|
}
|
|
|
|
if (s[i++] != '-') {
|
|
return false;
|
|
}
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_month *= 10;
|
|
ts.ts_month += (s[i] - '0');
|
|
}
|
|
|
|
if (s[i++] != '-') {
|
|
return false;
|
|
}
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_day *= 10;
|
|
ts.ts_day += (s[i] - '0');
|
|
}
|
|
}
|
|
|
|
if (has_date && has_time) {
|
|
if (s[i] != 'T' && s[i] != ' ') {
|
|
return false;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
if (has_time) {
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_hour *= 10;
|
|
ts.ts_hour += (s[i] - '0');
|
|
}
|
|
|
|
if (s[i++] != ':') {
|
|
return false;
|
|
}
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_min *= 10;
|
|
ts.ts_min += (s[i] - '0');
|
|
}
|
|
|
|
if (s[i++] != ':') {
|
|
return false;
|
|
}
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_sec *= 10;
|
|
ts.ts_sec += (s[i] - '0');
|
|
}
|
|
}
|
|
|
|
if (s[i] == '.') {
|
|
i++;
|
|
for (c = 0; s[i]; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
break;
|
|
}
|
|
|
|
ts.ts_msec *= 10;
|
|
ts.ts_msec += (s[i] - '0');
|
|
}
|
|
|
|
if (c == 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (s[i] == '+' || s[i] == '-') {
|
|
ts.ts_zone_offset_negative = s[i] == '-';
|
|
i++;
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_zone_offset_hour *= 10;
|
|
ts.ts_zone_offset_hour += (s[i] - '0');
|
|
}
|
|
|
|
if (s[i++] != ':') {
|
|
return false;
|
|
}
|
|
|
|
for (c = 0; c < 2; c++, i++) {
|
|
if (!isdigit(s[i])) {
|
|
return false;
|
|
}
|
|
|
|
ts.ts_zone_offset_minute *= 10;
|
|
ts.ts_zone_offset_minute += (s[i] - '0');
|
|
}
|
|
} else if (s[i] == 'Z') {
|
|
i++;
|
|
}
|
|
|
|
if (s[i] != 0) {
|
|
return false;
|
|
}
|
|
|
|
struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP);
|
|
tok->tok_str = b_string_steal(token_str);
|
|
tok->tok_value.time = ts;
|
|
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
static bool is_valid_digit(b_wchar c, int base)
|
|
{
|
|
switch (base) {
|
|
case 2:
|
|
return b_wchar_is_bin_digit(c);
|
|
case 8:
|
|
return b_wchar_is_oct_digit(c);
|
|
case 10:
|
|
return b_wchar_is_number(c);
|
|
case 16:
|
|
return b_wchar_is_hex_digit(c);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool has_trailing_zero(const char *s)
|
|
{
|
|
int nr_zero = 0;
|
|
for (size_t i = 0; s[i]; i++) {
|
|
char c = s[i];
|
|
|
|
switch (c) {
|
|
case '0':
|
|
nr_zero++;
|
|
break;
|
|
case '.':
|
|
case 'e':
|
|
return false;
|
|
default:
|
|
return nr_zero > 0;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool try_convert_word_to_number(struct ctx *ctx, b_string *token_str)
|
|
{
|
|
size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL);
|
|
b_string *str = b_string_duplicate(token_str);
|
|
struct token *tok = NULL;
|
|
const char *s = b_string_ptr(str);
|
|
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
|
|
size_t validation_offset = 0;
|
|
|
|
bool is_decimal = false;
|
|
bool has_sign = false;
|
|
int base = 10;
|
|
switch (*s) {
|
|
case '+':
|
|
case '-':
|
|
validation_offset++;
|
|
has_sign = true;
|
|
break;
|
|
case '0':
|
|
switch (*(s + 1)) {
|
|
case 'b':
|
|
base = 2;
|
|
s += 2;
|
|
break;
|
|
case 'o':
|
|
base = 8;
|
|
s += 2;
|
|
break;
|
|
case 'x':
|
|
base = 16;
|
|
s += 2;
|
|
break;
|
|
case '.':
|
|
case 'e':
|
|
case 'E':
|
|
break;
|
|
case '\0':
|
|
tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT);
|
|
tok->tok_value.i.v = 0;
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!strcmp(s + validation_offset, "inf")) {
|
|
tok = enqueue_token(ctx, TOK_FLOAT);
|
|
tok->tok_value.f.v = (*s == '-') ? -1 : 0;
|
|
tok->tok_value.f.inf = true;
|
|
return true;
|
|
} else if (!strcmp(s + validation_offset, "nan")) {
|
|
tok = enqueue_token(ctx, TOK_FLOAT);
|
|
tok->tok_value.f.v = (*s == '-') ? -1 : 0;
|
|
tok->tok_value.f.nan = true;
|
|
return true;
|
|
}
|
|
|
|
char previous = 0;
|
|
|
|
for (size_t i = validation_offset; s[i]; i++) {
|
|
char c = s[i];
|
|
|
|
if (c == '_') {
|
|
if (!is_valid_digit(previous, base)) {
|
|
return false;
|
|
}
|
|
|
|
size_t to_remove = (s - b_string_ptr(str)) + i;
|
|
b_string_remove(str, to_remove, 1);
|
|
i--;
|
|
previous = c;
|
|
continue;
|
|
}
|
|
|
|
if (c == '.') {
|
|
if (base != 10) {
|
|
return false;
|
|
}
|
|
|
|
if (is_decimal) {
|
|
return false;
|
|
}
|
|
|
|
if (!is_valid_digit(previous, base)) {
|
|
return false;
|
|
}
|
|
|
|
is_decimal = true;
|
|
previous = c;
|
|
continue;
|
|
}
|
|
|
|
if (c == 'e' || c == 'E') {
|
|
if (base == 16) {
|
|
previous = c;
|
|
continue;
|
|
}
|
|
|
|
if (base != 10) {
|
|
return false;
|
|
}
|
|
|
|
if (!is_valid_digit(previous, base)) {
|
|
return false;
|
|
}
|
|
|
|
is_decimal = true;
|
|
previous = c;
|
|
continue;
|
|
}
|
|
|
|
if (c == '+' || c == '-') {
|
|
if (base != 10) {
|
|
return false;
|
|
}
|
|
|
|
if (previous != 'e' && previous != 'E') {
|
|
return false;
|
|
}
|
|
|
|
previous = c;
|
|
continue;
|
|
}
|
|
|
|
if (!is_valid_digit(c, base)) {
|
|
return false;
|
|
}
|
|
|
|
switch (previous) {
|
|
case 0:
|
|
case 'e':
|
|
case 'E':
|
|
case '_':
|
|
case '+':
|
|
case '-':
|
|
case '.':
|
|
break;
|
|
default:
|
|
if (!is_valid_digit(previous, base)) {
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
|
|
previous = c;
|
|
}
|
|
|
|
switch (previous) {
|
|
case 'e':
|
|
case 'E':
|
|
if (base == 16) {
|
|
break;
|
|
}
|
|
|
|
case '.':
|
|
case '_':
|
|
case 0:
|
|
return false;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (has_trailing_zero(s + validation_offset) && base == 10) {
|
|
return false;
|
|
}
|
|
|
|
long long i;
|
|
double d;
|
|
bool is_valid;
|
|
|
|
if (is_decimal) {
|
|
int r = 0;
|
|
int len = strlen(s);
|
|
// d = strtold(s + offset, &ep) * mul;
|
|
int ret = sscanf(s, "%lf%n", &d, &r);
|
|
is_valid = (ret == 1) && r == len;
|
|
} else {
|
|
char *ep;
|
|
i = strtoll(s, &ep, base);
|
|
is_valid = ((*ep) == 0);
|
|
}
|
|
|
|
if (!is_valid) {
|
|
return false;
|
|
}
|
|
|
|
tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT);
|
|
if (is_decimal) {
|
|
tok->tok_value.f.v = d;
|
|
} else {
|
|
tok->tok_value.i.v = i;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool try_convert_word_to_bool(struct ctx *ctx, b_string *token_str)
|
|
{
|
|
const char *s = b_string_ptr(token_str);
|
|
struct token *tok = NULL;
|
|
|
|
if (!strcmp(s, "true")) {
|
|
tok = enqueue_token(ctx, TOK_BOOL);
|
|
tok->tok_str = b_string_duplicate(token_str);
|
|
tok->tok_value.b = true;
|
|
} else if (!strcmp(s, "false")) {
|
|
tok = enqueue_token(ctx, TOK_BOOL);
|
|
tok->tok_str = b_string_duplicate(token_str);
|
|
tok->tok_value.b = false;
|
|
} else {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void split_word(struct ctx *ctx, b_string *wordbuf)
|
|
{
|
|
#if 0
|
|
long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL);
|
|
if (!len) {
|
|
return;
|
|
}
|
|
|
|
char *s = b_string_steal(wordbuf);
|
|
int trailing_dots = 0;
|
|
|
|
char prev = 0;
|
|
|
|
for (long i = 0; i < len; i++) {
|
|
if (prev == '.' && s[i] == '.') {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
|
|
prev = s[i];
|
|
}
|
|
|
|
if (!B_OK(ctx->ctx_status)) {
|
|
free(s);
|
|
return;
|
|
}
|
|
|
|
for (; len > 0; len--) {
|
|
if (s[len - 1] == '.') {
|
|
trailing_dots++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
char *ep;
|
|
char *tok = strtok_r(s, ".", &ep);
|
|
|
|
unsigned int i = 0;
|
|
|
|
while (tok) {
|
|
if (*tok == 0) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
|
|
if (i > 0) {
|
|
enqueue_token(ctx, TOK_DOT);
|
|
}
|
|
|
|
struct token *word = enqueue_token(ctx, TOK_WORD);
|
|
word->tok_str = b_strdup(tok);
|
|
|
|
i++;
|
|
tok = strtok_r(NULL, ".", &ep);
|
|
}
|
|
|
|
for (long i = 0; i < trailing_dots; i++) {
|
|
enqueue_token(ctx, TOK_DOT);
|
|
}
|
|
|
|
free(s);
|
|
#endif
|
|
const char *delims[] = {"."};
|
|
size_t nr_delims = sizeof delims / sizeof delims[0];
|
|
b_string_iterator it;
|
|
b_string_tokenise(
|
|
wordbuf, delims, nr_delims, B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS,
|
|
&it);
|
|
|
|
while (b_string_iterator_is_valid(&it)) {
|
|
if (it.iteration_index > 0) {
|
|
enqueue_token(ctx, TOK_DOT);
|
|
}
|
|
|
|
if (it.string_length > 0) {
|
|
struct token *word = enqueue_token(ctx, TOK_WORD);
|
|
word->tok_str = b_string_create_from_cstr(it.string_value);
|
|
}
|
|
|
|
b_string_iterator_next(&it);
|
|
}
|
|
}
|
|
|
|
static void read_number(struct ctx *ctx)
|
|
{
|
|
b_wchar c = 0;
|
|
b_string *wordbuf = get_wordbuf(ctx);
|
|
|
|
while (1) {
|
|
c = peek_char(ctx);
|
|
|
|
if (c == -1 || !B_OK(ctx->ctx_status)) {
|
|
break;
|
|
}
|
|
|
|
bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-'
|
|
|| c == '.' || c == '+';
|
|
|
|
if (!ok) {
|
|
break;
|
|
}
|
|
|
|
b_string_append_wc(wordbuf, c);
|
|
advance_char(ctx);
|
|
}
|
|
|
|
bool is_number = try_convert_word_to_number(ctx, wordbuf);
|
|
|
|
if (!is_number) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
}
|
|
}
|
|
|
|
static void read_word(struct ctx *ctx)
|
|
{
|
|
b_wchar c = 0;
|
|
b_string *wordbuf = get_wordbuf(ctx);
|
|
|
|
while (1) {
|
|
c = peek_char(ctx);
|
|
|
|
if (c == -1 || !B_OK(ctx->ctx_status)) {
|
|
break;
|
|
}
|
|
|
|
bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-' || c == '.';
|
|
|
|
if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) {
|
|
ok = ok || c == ':' || c == ' ' || c == '+';
|
|
}
|
|
|
|
if (ctx->ctx_flags & CTX_ENABLE_NUMBERS) {
|
|
ok = ok || c == '+';
|
|
}
|
|
|
|
if (!ok) {
|
|
break;
|
|
}
|
|
|
|
b_string_append_wc(wordbuf, c);
|
|
advance_char(ctx);
|
|
}
|
|
|
|
bool parsed = false;
|
|
b_string_trim(wordbuf);
|
|
|
|
if (b_string_get_size(wordbuf, B_STRLEN_NORMAL) == 0) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return;
|
|
}
|
|
|
|
if (ctx->ctx_flags & CTX_ENABLE_BOOLS) {
|
|
parsed = try_convert_word_to_bool(ctx, wordbuf);
|
|
}
|
|
|
|
if (!parsed && (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS)) {
|
|
parsed = try_convert_word_to_timestamp(ctx, wordbuf);
|
|
}
|
|
|
|
if (!parsed && (ctx->ctx_flags & CTX_ENABLE_NUMBERS)) {
|
|
parsed = try_convert_word_to_number(ctx, wordbuf);
|
|
}
|
|
|
|
if (parsed) {
|
|
return;
|
|
}
|
|
|
|
b_string_iterator it;
|
|
b_string_foreach(&it, wordbuf)
|
|
{
|
|
/* only allow ASCII numbers/letters here */
|
|
bool ok = isalnum(it.char_value) || it.char_value == '_'
|
|
|| it.char_value == '-' || it.char_value == '.';
|
|
if (!ok) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return;
|
|
}
|
|
}
|
|
|
|
split_word(ctx, wordbuf);
|
|
}
|
|
|
|
static b_wchar read_unicode_sequence(struct ctx *ctx)
|
|
{
|
|
b_wchar c = peek_char(ctx);
|
|
|
|
int expected_len = 0;
|
|
switch (c) {
|
|
case 'u':
|
|
expected_len = 4;
|
|
break;
|
|
case 'U':
|
|
expected_len = 8;
|
|
break;
|
|
default:
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
advance_char(ctx);
|
|
|
|
char s[9] = {0};
|
|
int len = 0;
|
|
|
|
while (1) {
|
|
if (len >= expected_len) {
|
|
break;
|
|
}
|
|
|
|
b_wchar c = peek_char(ctx);
|
|
if (c == -1 || c == B_WCHAR_INVALID) {
|
|
break;
|
|
}
|
|
|
|
if (!b_wchar_is_hex_digit(c)) {
|
|
break;
|
|
}
|
|
|
|
s[len++] = (char)c;
|
|
s[len] = 0;
|
|
advance_char(ctx);
|
|
}
|
|
|
|
if (len != expected_len) {
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
char *ep;
|
|
c = strtoul(s, &ep, 16);
|
|
if (*ep != 0) {
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
static void read_string(struct ctx *ctx, bool squote)
|
|
{
|
|
advance_char(ctx);
|
|
|
|
char term = '"';
|
|
if (squote) {
|
|
term = '\'';
|
|
}
|
|
|
|
bool multiline = false;
|
|
struct token *tok = enqueue_token(ctx, TOK_STRING);
|
|
b_string *str = get_wordbuf(ctx);
|
|
|
|
b_wchar c = peek_char(ctx);
|
|
if (c == term) {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
|
|
if (c == term) {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
multiline = true;
|
|
} else {
|
|
tok->tok_str = b_string_duplicate(str);
|
|
return;
|
|
}
|
|
|
|
if (c == '\n') {
|
|
advance_char(ctx);
|
|
}
|
|
}
|
|
|
|
if (multiline && !(ctx->ctx_flags & CTX_ENABLE_MULTILINE_STRING)) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return;
|
|
}
|
|
|
|
bool fail = false;
|
|
bool esc = false;
|
|
bool cr = false;
|
|
|
|
tok->tok_type = TOK_STRING;
|
|
|
|
while (!fail) {
|
|
c = peek_char(ctx);
|
|
if (c == -1) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
fail = true;
|
|
break;
|
|
}
|
|
|
|
if (c == '\r') {
|
|
if (!cr) {
|
|
advance_char(ctx);
|
|
cr = true;
|
|
continue;
|
|
} else {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
fail = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (esc) {
|
|
if (c == '\n') {
|
|
while (c != -1 && isspace(c)) {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
cr = false;
|
|
esc = false;
|
|
continue;
|
|
}
|
|
|
|
if (isspace(c)) {
|
|
while (c != -1 && isspace(c) && c != '\n') {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
if (c != '\n') {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
fail = true;
|
|
break;
|
|
}
|
|
|
|
while (c != -1 && isspace(c)) {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
cr = false;
|
|
esc = false;
|
|
continue;
|
|
}
|
|
|
|
switch (c) {
|
|
case '"':
|
|
case '\\':
|
|
b_string_append_wc(str, c);
|
|
advance_char(ctx);
|
|
break;
|
|
case 'b':
|
|
b_string_append_c(str, '\b');
|
|
advance_char(ctx);
|
|
break;
|
|
case 't':
|
|
b_string_append_c(str, '\t');
|
|
advance_char(ctx);
|
|
break;
|
|
case 'n':
|
|
b_string_append_c(str, '\n');
|
|
advance_char(ctx);
|
|
break;
|
|
case 'r':
|
|
b_string_append_c(str, '\r');
|
|
advance_char(ctx);
|
|
break;
|
|
case 'f':
|
|
b_string_append_c(str, '\f');
|
|
advance_char(ctx);
|
|
break;
|
|
case 'u':
|
|
case 'U':
|
|
c = read_unicode_sequence(ctx);
|
|
if (c == B_WCHAR_INVALID) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
fail = true;
|
|
break;
|
|
}
|
|
|
|
ctx->ctx_status = B_OK(b_string_append_wc(str, c))
|
|
? B_SUCCESS
|
|
: B_ERR_BAD_FORMAT;
|
|
fail = !B_OK(ctx->ctx_status);
|
|
break;
|
|
default:
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
fail = true;
|
|
break;
|
|
}
|
|
|
|
esc = false;
|
|
continue;
|
|
}
|
|
|
|
else if (c == '\\' && !squote) {
|
|
esc = true;
|
|
}
|
|
|
|
else if (c == '\n') {
|
|
if (!multiline) {
|
|
fail = true;
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
|
|
if (cr) {
|
|
b_string_append_wc(str, '\r');
|
|
cr = false;
|
|
}
|
|
|
|
b_string_append_wc(str, c);
|
|
}
|
|
|
|
else if (c == term) {
|
|
advance_char(ctx);
|
|
|
|
if (!multiline) {
|
|
break;
|
|
}
|
|
|
|
c = peek_char(ctx);
|
|
if (c != term) {
|
|
b_string_append_wc(str, term);
|
|
continue;
|
|
}
|
|
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
if (c != term) {
|
|
b_string_append_wc(str, term);
|
|
b_string_append_wc(str, term);
|
|
continue;
|
|
}
|
|
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
break;
|
|
}
|
|
|
|
else {
|
|
b_string_append_wc(str, c);
|
|
}
|
|
|
|
advance_char(ctx);
|
|
}
|
|
|
|
if (cr) {
|
|
fail = true;
|
|
}
|
|
|
|
if (fail) {
|
|
discard_token(ctx);
|
|
return;
|
|
}
|
|
|
|
if (!multiline) {
|
|
goto done;
|
|
}
|
|
|
|
c = peek_char(ctx);
|
|
if (c == term) {
|
|
b_string_append_wc(str, c);
|
|
advance_char(ctx);
|
|
}
|
|
|
|
c = peek_char(ctx);
|
|
if (c == term) {
|
|
b_string_append_wc(str, c);
|
|
advance_char(ctx);
|
|
}
|
|
|
|
done:
|
|
tok->tok_str = b_string_duplicate(str);
|
|
}
|
|
|
|
static void read_symbol(struct ctx *ctx)
|
|
{
|
|
b_wchar c = peek_char(ctx);
|
|
advance_char(ctx);
|
|
|
|
struct token *tok = enqueue_token(ctx, TOK_NONE);
|
|
|
|
char s[] = {c, 0};
|
|
|
|
switch (c) {
|
|
case '=':
|
|
tok->tok_type = TOK_EQUAL;
|
|
break;
|
|
case '.':
|
|
tok->tok_type = TOK_DOT;
|
|
break;
|
|
case ',':
|
|
tok->tok_type = TOK_COMMA;
|
|
break;
|
|
case '[':
|
|
if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) {
|
|
tok->tok_type = TOK_LEFT_BRACKET;
|
|
break;
|
|
}
|
|
|
|
c = peek_char(ctx);
|
|
switch (c) {
|
|
case '[':
|
|
tok->tok_type = TOK_DOUBLE_LEFT_BRACKET;
|
|
advance_char(ctx);
|
|
break;
|
|
default:
|
|
tok->tok_type = TOK_LEFT_BRACKET;
|
|
break;
|
|
}
|
|
break;
|
|
case ']':
|
|
if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) {
|
|
/* if we're parsing more complex values, don't generate double-symbol tokens */
|
|
tok->tok_type = TOK_RIGHT_BRACKET;
|
|
break;
|
|
}
|
|
|
|
c = peek_char(ctx);
|
|
switch (c) {
|
|
case ']':
|
|
tok->tok_type = TOK_DOUBLE_RIGHT_BRACKET;
|
|
advance_char(ctx);
|
|
break;
|
|
default:
|
|
tok->tok_type = TOK_RIGHT_BRACKET;
|
|
break;
|
|
}
|
|
break;
|
|
case '{':
|
|
tok->tok_type = TOK_LEFT_BRACE;
|
|
break;
|
|
case '}':
|
|
tok->tok_type = TOK_RIGHT_BRACE;
|
|
break;
|
|
default:
|
|
discard_token(ctx);
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void read_newline(struct ctx *ctx)
|
|
{
|
|
b_wchar c = peek_char(ctx);
|
|
while (c == '\n') {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
enqueue_token(ctx, TOK_NEWLINE);
|
|
ctx->ctx_status = B_SUCCESS;
|
|
}
|
|
|
|
static void read_comment(struct ctx *ctx)
|
|
{
|
|
b_wchar c = peek_char(ctx);
|
|
bool cr = false;
|
|
|
|
while (1) {
|
|
if (c == '\n') {
|
|
cr = false;
|
|
break;
|
|
}
|
|
|
|
if (c == -1) {
|
|
break;
|
|
}
|
|
|
|
if (cr) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
|
|
if (c == '\r') {
|
|
cr = true;
|
|
}
|
|
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
if (cr) {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (!B_OK(ctx->ctx_status)) {
|
|
return;
|
|
}
|
|
|
|
advance_char(ctx);
|
|
enqueue_token(ctx, TOK_NEWLINE);
|
|
}
|
|
|
|
static bool is_symbol(b_wchar c)
|
|
{
|
|
switch (c) {
|
|
case '=':
|
|
case '.':
|
|
case ',':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static enum b_status advance_token(struct ctx *ctx)
|
|
{
|
|
b_wchar c = B_WCHAR_INVALID;
|
|
discard_token(ctx);
|
|
|
|
if (!b_queue_empty(&ctx->ctx_tokens)) {
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
start:
|
|
c = peek_char(ctx);
|
|
while (isspace(c) && c != '\n' && c != '\r') {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
}
|
|
|
|
if (c == -1) {
|
|
ctx->ctx_flags |= CTX_EOF;
|
|
return B_ERR_NO_DATA;
|
|
}
|
|
|
|
#if 1
|
|
if (c == '#') {
|
|
read_comment(ctx);
|
|
goto start;
|
|
}
|
|
#endif
|
|
|
|
if (!B_OK(ctx->ctx_status)) {
|
|
return ctx->ctx_status;
|
|
}
|
|
|
|
if (c == '\r') {
|
|
advance_char(ctx);
|
|
c = peek_char(ctx);
|
|
|
|
if (c != '\n') {
|
|
ctx->ctx_status = B_ERR_BAD_FORMAT;
|
|
return ctx->ctx_status;
|
|
}
|
|
}
|
|
|
|
if (c == '"') {
|
|
read_string(ctx, false);
|
|
} else if (c == '\'') {
|
|
read_string(ctx, true);
|
|
} else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) {
|
|
read_number(ctx);
|
|
} else if (is_symbol(c)) {
|
|
read_symbol(ctx);
|
|
} else if (c == '\n') {
|
|
read_newline(ctx);
|
|
} else {
|
|
read_word(ctx);
|
|
}
|
|
|
|
return ctx->ctx_status;
|
|
}
|
|
|
|
static struct token *peek_token(struct ctx *ctx)
|
|
{
|
|
struct b_queue_entry *entry = b_queue_first(&ctx->ctx_tokens);
|
|
if (!entry) {
|
|
return NULL;
|
|
}
|
|
|
|
return b_unbox(struct token, entry, tok_entry);
|
|
}
|
|
|
|
static void ctx_cleanup(struct ctx *ctx)
|
|
{
|
|
if (ctx->ctx_linebuf_stream) {
|
|
b_stream_close(ctx->ctx_linebuf_stream);
|
|
ctx->ctx_linebuf_stream = NULL;
|
|
}
|
|
|
|
if (ctx->ctx_linebuf) {
|
|
b_string_unref(ctx->ctx_linebuf);
|
|
ctx->ctx_linebuf = NULL;
|
|
}
|
|
|
|
if (ctx->ctx_wordbuf) {
|
|
b_string_unref(ctx->ctx_wordbuf);
|
|
ctx->ctx_wordbuf = NULL;
|
|
}
|
|
|
|
if (ctx->ctx_objects_flags) {
|
|
b_hashmap_unref(ctx->ctx_objects_flags);
|
|
ctx->ctx_objects_flags = NULL;
|
|
}
|
|
}
|
|
|
|
static enum b_status ctx_init(struct ctx *ctx)
|
|
{
|
|
memset(ctx, 0x0, sizeof *ctx);
|
|
|
|
ctx->ctx_linebuf = b_string_create();
|
|
ctx->ctx_wordbuf = b_string_create();
|
|
|
|
b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream);
|
|
|
|
ctx->ctx_objects_flags = b_hashmap_create(NULL, NULL);
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status toml_serialise(
|
|
struct b_serial_ctx *serial, b_object *src, struct b_stream *dest,
|
|
enum b_serial_flags flags)
|
|
{
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static void print_token(struct token *tok)
|
|
{
|
|
switch (tok->tok_type) {
|
|
case TOK_NONE:
|
|
printf("TOK_NONE\n");
|
|
break;
|
|
case TOK_WORD:
|
|
printf("TOK_WORD %s\n", b_string_ptr(tok->tok_str));
|
|
break;
|
|
case TOK_STRING:
|
|
printf("TOK_STRING %s\n", b_string_ptr(tok->tok_str));
|
|
break;
|
|
case TOK_TIMESTAMP:
|
|
printf("TOK_TIMESTAMP %04ld-%02ld-%02ld "
|
|
"%02ld:%02ld:%02ld.%04ld %c"
|
|
"%02ld:%02ld\n",
|
|
b_datetime_year(tok->tok_value.time),
|
|
b_datetime_month(tok->tok_value.time),
|
|
b_datetime_day(tok->tok_value.time),
|
|
b_datetime_hour(tok->tok_value.time),
|
|
b_datetime_minute(tok->tok_value.time),
|
|
b_datetime_second(tok->tok_value.time),
|
|
b_datetime_subsecond(tok->tok_value.time),
|
|
b_datetime_zone_offset_is_negative(tok->tok_value.time)
|
|
? '-'
|
|
: '+',
|
|
b_datetime_zone_offset_hour(tok->tok_value.time),
|
|
b_datetime_zone_offset_minute(tok->tok_value.time));
|
|
break;
|
|
case TOK_INT:
|
|
printf("TOK_INT ");
|
|
if (tok->tok_value.i.nan) {
|
|
printf("NaN");
|
|
} else {
|
|
printf("%lld", tok->tok_value.i.v);
|
|
}
|
|
|
|
printf("\n");
|
|
break;
|
|
case TOK_FLOAT:
|
|
printf("TOK_FLOAT ");
|
|
if (tok->tok_value.f.nan) {
|
|
printf("NaN");
|
|
} else {
|
|
printf("%lf", tok->tok_value.f.v);
|
|
}
|
|
|
|
printf("\n");
|
|
break;
|
|
case TOK_BOOL:
|
|
printf("TOK_BOOL %s\n", tok->tok_value.b ? "true" : "false");
|
|
break;
|
|
case TOK_NEWLINE:
|
|
printf("TOK_NEWLINE\n");
|
|
break;
|
|
case TOK_EQUAL:
|
|
printf("TOK_EQUAL\n");
|
|
break;
|
|
case TOK_DOT:
|
|
printf("TOK_DOT\n");
|
|
break;
|
|
case TOK_COMMA:
|
|
printf("TOK_COMMA\n");
|
|
break;
|
|
case TOK_LEFT_BRACKET:
|
|
printf("TOK_LEFT_BRACKET\n");
|
|
break;
|
|
case TOK_RIGHT_BRACKET:
|
|
printf("TOK_RIGHT_BRACKET\n");
|
|
break;
|
|
case TOK_DOUBLE_LEFT_BRACKET:
|
|
printf("TOK_DOUBLE_LEFT_BRACKET\n");
|
|
break;
|
|
case TOK_DOUBLE_RIGHT_BRACKET:
|
|
printf("TOK_DOUBLE_RIGHT_BRACKET\n");
|
|
break;
|
|
case TOK_LEFT_BRACE:
|
|
printf("TOK_LEFT_BRACE\n");
|
|
break;
|
|
case TOK_RIGHT_BRACE:
|
|
printf("TOK_RIGHT_BRACE\n");
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static enum b_status parse_value(struct ctx *ctx, b_object **result);
|
|
static enum b_status parse_key_value_pair(struct ctx *ctx, b_dict *container);
|
|
|
|
static enum b_status parse_timestamp(struct ctx *ctx, b_object **result)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
b_datetime *dt = tok->tok_value.time;
|
|
tok->tok_value.time = NULL;
|
|
|
|
*result = (dt);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_string(struct ctx *ctx, b_object **result)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
b_string *str = b_string_duplicate(tok->tok_str);
|
|
if (!str) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
*result = (str);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_int(struct ctx *ctx, b_object **result)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
b_number *val = B_LONGLONG(tok->tok_value.i.v);
|
|
if (!val) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
if (tok->tok_value.i.inf) {
|
|
if (tok->tok_value.i.v >= 0) {
|
|
b_number_set_inf_positive(val, true);
|
|
} else {
|
|
b_number_set_inf_negative(val, true);
|
|
}
|
|
} else if (tok->tok_value.i.nan) {
|
|
if (tok->tok_value.i.v >= 0) {
|
|
b_number_set_nan_positive(val, true);
|
|
} else {
|
|
b_number_set_nan_negative(val, true);
|
|
}
|
|
}
|
|
|
|
*result = (val);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_float(struct ctx *ctx, b_object **result)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
b_number *val = B_DOUBLE(tok->tok_value.f.v);
|
|
if (!val) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
if (tok->tok_value.f.inf) {
|
|
if (tok->tok_value.f.v >= 0) {
|
|
b_number_set_inf_positive(val, true);
|
|
} else {
|
|
b_number_set_inf_negative(val, true);
|
|
}
|
|
} else if (tok->tok_value.f.nan) {
|
|
if (tok->tok_value.f.v >= 0) {
|
|
b_number_set_nan_positive(val, true);
|
|
} else {
|
|
b_number_set_nan_negative(val, true);
|
|
}
|
|
}
|
|
|
|
*result = (val);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_bool(struct ctx *ctx, b_object **result)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
b_number *val = B_INT8(tok->tok_value.b);
|
|
if (!val) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
*result = (val);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_table_inline(struct ctx *ctx, b_object **result)
|
|
{
|
|
DISABLE_EXTENDED_LEXING(ctx);
|
|
|
|
advance_token(ctx);
|
|
|
|
b_dict *table = b_dict_create();
|
|
if (!table) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
struct token *tok = peek_token(ctx);
|
|
if (tok && tok->tok_type == TOK_RIGHT_BRACE) {
|
|
*result = (table);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
bool done = false;
|
|
while (!done) {
|
|
|
|
b_object *value;
|
|
enum b_status status = parse_key_value_pair(ctx, table);
|
|
if (!B_OK(status)) {
|
|
b_dict_unref(table);
|
|
return status;
|
|
}
|
|
|
|
tok = peek_token(ctx);
|
|
|
|
if (!tok) {
|
|
b_dict_unref(table);
|
|
return status;
|
|
}
|
|
|
|
switch (tok->tok_type) {
|
|
case TOK_RIGHT_BRACE:
|
|
done = true;
|
|
break;
|
|
case TOK_COMMA:
|
|
advance_token(ctx);
|
|
break;
|
|
default:
|
|
b_dict_unref(table);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
}
|
|
|
|
*result = (table);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static void skip_newlines(struct ctx *ctx)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
|
|
while (tok && tok->tok_type == TOK_NEWLINE) {
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
}
|
|
}
|
|
|
|
static enum b_status parse_array_inline(struct ctx *ctx, b_object **result)
|
|
{
|
|
bool done = false;
|
|
ENABLE_EXTENDED_LEXING(ctx);
|
|
|
|
advance_token(ctx);
|
|
|
|
b_array *array = b_array_create();
|
|
if (!array) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
struct token *tok = peek_token(ctx);
|
|
if (!tok) {
|
|
b_array_unref(array);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (tok->tok_type == TOK_RIGHT_BRACKET) {
|
|
done = true;
|
|
}
|
|
|
|
while (!done) {
|
|
skip_newlines(ctx);
|
|
|
|
tok = peek_token(ctx);
|
|
|
|
if (!tok) {
|
|
b_array_unref(array);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (tok->tok_type == TOK_RIGHT_BRACKET) {
|
|
done = true;
|
|
break;
|
|
}
|
|
|
|
b_object *value;
|
|
enum b_status status = parse_value(ctx, &value);
|
|
if (!B_OK(status)) {
|
|
b_array_unref(array);
|
|
return status;
|
|
}
|
|
|
|
b_array_append(array, B_RV(value));
|
|
ENABLE_EXTENDED_LEXING(ctx);
|
|
|
|
advance_token(ctx);
|
|
skip_newlines(ctx);
|
|
|
|
tok = peek_token(ctx);
|
|
|
|
if (tok && tok->tok_type == TOK_RIGHT_BRACKET) {
|
|
done = true;
|
|
break;
|
|
}
|
|
|
|
if (!tok || tok->tok_type != TOK_COMMA) {
|
|
b_array_unref(array);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
ENABLE_EXTENDED_LEXING(ctx);
|
|
advance_token(ctx);
|
|
}
|
|
|
|
DISABLE_EXTENDED_LEXING(ctx);
|
|
*result = (array);
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_value(struct ctx *ctx, b_object **result)
|
|
{
|
|
|
|
struct token *tok = peek_token(ctx);
|
|
if (!tok) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
switch (tok->tok_type) {
|
|
case TOK_STRING:
|
|
return parse_string(ctx, result);
|
|
case TOK_INT:
|
|
return parse_int(ctx, result);
|
|
case TOK_FLOAT:
|
|
return parse_float(ctx, result);
|
|
case TOK_BOOL:
|
|
return parse_bool(ctx, result);
|
|
case TOK_TIMESTAMP:
|
|
return parse_timestamp(ctx, result);
|
|
case TOK_LEFT_BRACKET:
|
|
return parse_array_inline(ctx, result);
|
|
case TOK_LEFT_BRACE:
|
|
return parse_table_inline(ctx, result);
|
|
default:
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
}
|
|
|
|
static enum b_status parse_key_value_pair(struct ctx *ctx, b_dict *container)
|
|
{
|
|
struct token *tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
b_string *key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!tok) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
while (tok && tok->tok_type == TOK_DOT) {
|
|
b_object *sub_dict = b_dict_at_sk(container, key);
|
|
if (!sub_dict) {
|
|
sub_dict = (b_dict_create());
|
|
b_dict_put_sk(container, key, B_RV(sub_dict));
|
|
} else if (sub_dict && !b_object_is_type(sub_dict, B_TYPE_DICT)) {
|
|
free(key);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
#if 1
|
|
enum object_flags flags = ctx_get_object_flags(ctx, sub_dict);
|
|
if (flags & (OBJECT_KV_END_DEFINED | OBJECT_HEADER_END_DEFINED)) {
|
|
free(key);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
#endif
|
|
|
|
ctx_set_object_flags(ctx, sub_dict, OBJECT_KV_MID_DEFINED);
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
free(key);
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
container = sub_dict;
|
|
b_string_unref(key);
|
|
key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
}
|
|
|
|
if (b_dict_has_skey(container, key)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (!tok) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (tok->tok_type != TOK_EQUAL) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
ENABLE_EXTENDED_LEXING(ctx);
|
|
advance_token(ctx);
|
|
|
|
b_object *value = NULL;
|
|
enum b_status status = parse_value(ctx, &value);
|
|
|
|
DISABLE_EXTENDED_LEXING(ctx);
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
status = advance_token(ctx);
|
|
if (!B_OK(status) && status != B_ERR_NO_DATA) {
|
|
return status;
|
|
}
|
|
|
|
b_dict_put_sk(container, key, B_RV(value));
|
|
|
|
if (b_object_is_type(value, B_TYPE_DICT)
|
|
|| b_object_is_type(value, B_TYPE_ARRAY)) {
|
|
ctx_set_object_flags(ctx, value, OBJECT_KV_END_DEFINED);
|
|
}
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_table_header(
|
|
struct ctx *ctx, b_dict *container, b_dict **new_container)
|
|
{
|
|
advance_token(ctx);
|
|
struct token *tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
b_string *key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!tok) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
while (tok && tok->tok_type == TOK_DOT) {
|
|
b_object *sub_dict = b_dict_at_sk(container, key);
|
|
enum object_flags flags = ctx_get_object_flags(ctx, sub_dict);
|
|
if (!sub_dict) {
|
|
sub_dict = (b_dict_create());
|
|
b_dict_put_sk(container, key, B_RV(sub_dict));
|
|
} else if (b_object_is_type(sub_dict, B_TYPE_ARRAY)) {
|
|
|
|
sub_dict = b_array_at(sub_dict, b_array_size(sub_dict) - 1);
|
|
} else if (!b_object_is_type(sub_dict, B_TYPE_DICT)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
if (flags & OBJECT_KV_END_DEFINED) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
ctx_set_object_flags(ctx, sub_dict, OBJECT_HEADER_MID_DEFINED);
|
|
|
|
container = sub_dict;
|
|
b_string_unref(key);
|
|
key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
}
|
|
|
|
if (!tok || tok->tok_type != TOK_RIGHT_BRACKET) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
b_dict *new_table = b_dict_at_sk(container, key);
|
|
|
|
if (!new_table) {
|
|
new_table = b_dict_create();
|
|
|
|
if (!new_table) {
|
|
free(key);
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
b_dict_put_sk(container, key, B_RV(new_table));
|
|
}
|
|
|
|
if (!b_object_is_type((new_table), B_TYPE_DICT)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
enum object_flags flags = ctx_get_object_flags(ctx, (new_table));
|
|
if (flags
|
|
& (OBJECT_HEADER_END_DEFINED | OBJECT_KV_MID_DEFINED
|
|
| OBJECT_KV_END_DEFINED)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
ctx_set_object_flags(ctx, (new_table), OBJECT_HEADER_END_DEFINED);
|
|
b_string_unref(key);
|
|
|
|
advance_token(ctx);
|
|
*new_container = new_table;
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_array_header(
|
|
struct ctx *ctx, b_dict *container, b_dict **new_container)
|
|
{
|
|
advance_token(ctx);
|
|
struct token *tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
b_string *key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!tok) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
while (tok && tok->tok_type == TOK_DOT) {
|
|
b_object *sub_dict = b_dict_at_sk(container, key);
|
|
if (!sub_dict) {
|
|
sub_dict = (b_dict_create());
|
|
b_dict_put_sk(container, key, B_RV(sub_dict));
|
|
} else if (b_object_is_type(sub_dict, B_TYPE_ARRAY)) {
|
|
sub_dict = b_array_at(sub_dict, b_array_size(sub_dict) - 1);
|
|
} else if (!b_object_is_type(sub_dict, B_TYPE_DICT)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
if (!IS_VALID_KEY_COMPONENT(tok)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
container = sub_dict;
|
|
b_string_unref(key);
|
|
key = b_string_duplicate(tok->tok_str);
|
|
if (!key) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
advance_token(ctx);
|
|
tok = peek_token(ctx);
|
|
}
|
|
|
|
if (!tok || tok->tok_type != TOK_DOUBLE_RIGHT_BRACKET) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
|
|
b_array *array = b_dict_get_sk(container, key);
|
|
if (!array) {
|
|
array = b_array_create();
|
|
b_dict_put_sk(container, key, B_RV(array));
|
|
} else if (!b_object_is_type(array, B_TYPE_ARRAY)) {
|
|
return B_ERR_BAD_FORMAT;
|
|
}
|
|
free(key);
|
|
|
|
enum object_flags flags = ctx_get_object_flags(ctx, (array));
|
|
if (flags & OBJECT_KV_END_DEFINED) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
b_dict *new_table = b_dict_create();
|
|
if (!new_table) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
b_array_append(array, B_RV(new_table));
|
|
|
|
advance_token(ctx);
|
|
*new_container = new_table;
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status parse_root(struct ctx *ctx, b_dict **result)
|
|
{
|
|
enum b_status status = B_SUCCESS;
|
|
b_dict *root = b_dict_create();
|
|
b_dict *current = root;
|
|
|
|
while (!(ctx->ctx_flags & CTX_EOF) && B_OK(status)) {
|
|
struct token *tok = peek_token(ctx);
|
|
if (!tok) {
|
|
break;
|
|
}
|
|
|
|
switch (tok->tok_type) {
|
|
case TOK_LEFT_BRACKET:
|
|
status = parse_table_header(ctx, root, ¤t);
|
|
if (!B_OK(status)) {
|
|
break;
|
|
}
|
|
|
|
tok = peek_token(ctx);
|
|
if (tok && tok->tok_type != TOK_NEWLINE) {
|
|
status = B_ERR_BAD_FORMAT;
|
|
}
|
|
break;
|
|
case TOK_DOUBLE_LEFT_BRACKET:
|
|
status = parse_array_header(ctx, root, ¤t);
|
|
if (!B_OK(status)) {
|
|
break;
|
|
}
|
|
|
|
tok = peek_token(ctx);
|
|
if (tok && tok->tok_type != TOK_NEWLINE) {
|
|
status = B_ERR_BAD_FORMAT;
|
|
}
|
|
break;
|
|
case TOK_WORD:
|
|
case TOK_STRING:
|
|
status = parse_key_value_pair(ctx, current);
|
|
if (!B_OK(status)) {
|
|
break;
|
|
}
|
|
|
|
tok = peek_token(ctx);
|
|
if (tok && tok->tok_type != TOK_NEWLINE) {
|
|
status = B_ERR_BAD_FORMAT;
|
|
}
|
|
advance_token(ctx);
|
|
break;
|
|
case TOK_NEWLINE:
|
|
advance_token(ctx);
|
|
break;
|
|
default:
|
|
status = B_ERR_BAD_FORMAT;
|
|
break;
|
|
}
|
|
|
|
if (!B_OK(ctx->ctx_status) && ctx->ctx_status != B_ERR_NO_DATA) {
|
|
status = ctx->ctx_status;
|
|
}
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
b_dict_unref(root);
|
|
root = NULL;
|
|
}
|
|
|
|
*result = root;
|
|
return status;
|
|
}
|
|
|
|
static enum b_status toml_deserialise(
|
|
struct b_serial_ctx *serial, struct b_stream *src, b_object **dest,
|
|
enum b_serial_flags flags)
|
|
{
|
|
struct ctx ctx = {0};
|
|
enum b_status status = ctx_init(&ctx);
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
ctx.ctx_src = src;
|
|
ctx.ctx_flags = CTX_ENABLE_LONG_SYMBOLS;
|
|
|
|
status = advance_token(&ctx);
|
|
|
|
if (!B_OK(ctx.ctx_status) && ctx.ctx_status != B_ERR_NO_DATA) {
|
|
return ctx.ctx_status;
|
|
}
|
|
|
|
if (ctx.ctx_flags & CTX_EOF) {
|
|
*dest = (b_dict_create());
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
b_dict *result = NULL;
|
|
status = parse_root(&ctx, &result);
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
*dest = (result);
|
|
#if 0
|
|
ctx.ctx_flags
|
|
= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS;
|
|
|
|
while (!(ctx.ctx_flags & CTX_EOF) && B_OK(ctx.ctx_status)) {
|
|
struct token *tok = peek_token(&ctx);
|
|
print_token(tok);
|
|
status = advance_token(&ctx);
|
|
}
|
|
#endif
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
const struct b_serial_format_ops z__b_toml_format_ops = {
|
|
.fmt_serialise = toml_serialise,
|
|
.fmt_deserialise = toml_deserialise,
|
|
};
|