From bdf201dedcc28104a776d087c23b7957da0a4dbf Mon Sep 17 00:00:00 2001 From: Max Wash Date: Fri, 19 Sep 2025 15:47:59 +0100 Subject: [PATCH 01/23] TEMP: add toml parser --- serial/toml.c | 1809 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1809 insertions(+) create mode 100644 serial/toml.c diff --git a/serial/toml.c b/serial/toml.c new file mode 100644 index 0000000..d35699b --- /dev/null +++ b/serial/toml.c @@ -0,0 +1,1809 @@ +#include "blue/core/status.h" +#include "blue/object/datetime.h" +#include "serial.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IS_VALID_KEY_COMPONENT(tok) \ + ((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING)) + +enum token_type { + TOK_NONE = 0, + TOK_WORD, + TOK_STRING, + TOK_INT, + TOK_UINT, + TOK_FLOAT, + TOK_BOOL, + TOK_TIMESTAMP, + TOK_NEWLINE, + TOK_EQUAL, + TOK_DOT, + TOK_COMMA, + TOK_LEFT_BRACKET, + TOK_RIGHT_BRACKET, + TOK_DOUBLE_LEFT_BRACKET, + TOK_DOUBLE_RIGHT_BRACKET, + TOK_LEFT_BRACE, + TOK_RIGHT_BRACE, +}; + +struct timestamp { + unsigned int ts_year, ts_month, ts_day; + unsigned short ts_hour, ts_min, ts_sec; + unsigned int ts_msec; + + unsigned short ts_zone_offset_hour, ts_zone_offset_minute; + unsigned char ts_zone_offset_negative; +}; + +struct token { + enum token_type tok_type; + struct b_queue_entry tok_entry; + char *tok_str; + + union { + struct { + int64_t v; + bool nan; + } i; + + struct { + double v; + bool nan; + } f; + + bool b; + // struct timestamp time; + b_datetime *time; + } tok_value; +}; + +enum ctx_flags { + CTX_EOF = 0x01u, + CTX_ENABLE_NUMBERS = 0x02u, + CTX_ENABLE_TIMESTAMPS = 0x04u, + CTX_ENABLE_BOOLS = 0x08u, + CTX_ENABLE_LONG_SYMBOLS = 0x10u, + CTX_ENABLE_MULTILINE_STRING = 0x20u, +}; + +enum ctx_state { + CTX_STATE_NONE = 0, + CTX_STATE_IN_TABLE, + CTX_STATE_IN_ARRAY, +}; + +struct ctx { + enum ctx_flags ctx_flags; + b_stream *ctx_src; + b_string *ctx_wordbuf; + b_string *ctx_linebuf; + b_stream *ctx_linebuf_stream; + size_t ctx_linebuf_pos; + enum b_status ctx_status; + b_hashmap *ctx_static_objects; + + b_queue ctx_tokens; +}; + +static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj) +{ + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + b_hashmap_value value = {}; + + b_hashmap_put(ctx->ctx_static_objects, &key, &value); +} + +static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj) +{ + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + return b_hashmap_has_key(ctx->ctx_static_objects, &key); +} + +static bool data_available(struct ctx *ctx) +{ + size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL); + return len != 0 && ctx->ctx_linebuf_pos < len; +} + +static enum b_status refill_linebuf(struct ctx *ctx) +{ + b_string_clear(ctx->ctx_linebuf); + ctx->ctx_linebuf_pos = 0; + b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START); + + enum b_status status + = b_stream_read_line_s(ctx->ctx_src, ctx->ctx_linebuf_stream); + if (!B_OK(status)) { + return status; + } + + b_string_append_cstr(ctx->ctx_linebuf, "\n"); + + return B_SUCCESS; +} + +static struct b_string *get_wordbuf(struct ctx *ctx) +{ + b_string_clear(ctx->ctx_wordbuf); + return ctx->ctx_wordbuf; +} + +static bool is_valid_char(int c) +{ + if (c <= 0) { + return false; + } + + switch (c) { + case '\0': + case '\r': + case '\b': + case 0x0C: + case 0x1F: + case 0x7F: + case 0xFF: + case 0x10: + return false; + default: + return true; + } +} + +static int advance_char(struct ctx *ctx) +{ + enum b_status status = B_SUCCESS; + if (!data_available(ctx)) { + status = refill_linebuf(ctx); + } + + if (!B_OK(status)) { + ctx->ctx_status = status; + return -1; + } + + if (!data_available(ctx)) { + return -1; + } + + const char *s = b_string_ptr(ctx->ctx_linebuf); + int c = s[ctx->ctx_linebuf_pos++]; + + if (!is_valid_char(c)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + return c; +} + +static int peek_char(struct ctx *ctx) +{ + enum b_status status = B_SUCCESS; + if (!data_available(ctx)) { + status = refill_linebuf(ctx); + } + + if (!B_OK(status)) { + ctx->ctx_status = status; + return -1; + } + + if (!data_available(ctx)) { + return -1; + } + + const char *s = b_string_ptr(ctx->ctx_linebuf); + int c = s[ctx->ctx_linebuf_pos]; + + if (!is_valid_char(c)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + return c; +} + +#if 0 +static int peek_char(struct ctx *ctx) +{ + int c = __peek_char(ctx); + + if (c != '#') { + return c; + } + + c = __peek_char(ctx); + while (c != '\n' && c != -1) { + __advance_char(ctx); + c = __peek_char(ctx); + } + + return c; +} + +static int advance_char(struct ctx *ctx) +{ + int c = __advance_char(ctx); + + if (c != '#') { + return c; + } + + c = __peek_char(ctx); + while (c != '\n' && c != -1) { + __advance_char(ctx); + c = __peek_char(ctx); + } + + return c; +} +#endif + +static struct token *enqueue_token(struct ctx *ctx, enum token_type type) +{ + struct token *tok = malloc(sizeof *tok); + if (!tok) { + return NULL; + } + + memset(tok, 0x0, sizeof *tok); + + tok->tok_type = type; + + b_queue_push_back(&ctx->ctx_tokens, &tok->tok_entry); + + return tok; +} + +static void discard_token(struct ctx *ctx) +{ + struct b_queue_entry *entry = b_queue_pop_front(&ctx->ctx_tokens); + if (!entry) { + return; + } + + struct token *tok = b_unbox(struct token, entry, tok_entry); + + if (tok->tok_str) { + free(tok->tok_str); + } + + free(tok); +} + +static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) +{ + b_datetime *dt = b_datetime_parse( + B_DATETIME_FORMAT_RFC3339, b_string_ptr(token_str)); + if (!dt) { + return false; + } + + struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.time = dt; + + return true; +} + +#if 0 +static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *token_str) +{ + const char *s = b_string_ptr(token_str); + size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); + + size_t i = 0, c = 0; + struct timestamp ts = {0}; + + bool has_date = false, has_time = false; + + if (len >= 10 && s[4] == '-' && s[7] == '-') { + has_date = true; + } + + if (len >= 8 && s[2] == ':' && s[5] == ':') { + has_time = true; + } + + if (len >= 19 && s[4] == '-' && s[7] == '-' + && (s[10] == 'T' || s[10] == ' ') && s[13] == ':' && s[16] == ':') { + has_date = true; + has_time = true; + } + + if (!has_date && !has_time) { + return false; + } + + if (has_date) { + for (c = 0; c < 4; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_year *= 10; + ts.ts_year += (s[i] - '0'); + } + + if (s[i++] != '-') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_month *= 10; + ts.ts_month += (s[i] - '0'); + } + + if (s[i++] != '-') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_day *= 10; + ts.ts_day += (s[i] - '0'); + } + } + + if (has_date && has_time) { + if (s[i] != 'T' && s[i] != ' ') { + return false; + } + + i++; + } + + if (has_time) { + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_hour *= 10; + ts.ts_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_min *= 10; + ts.ts_min += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_sec *= 10; + ts.ts_sec += (s[i] - '0'); + } + } + + if (s[i] == '.') { + i++; + for (c = 0; s[i]; c++, i++) { + if (!isdigit(s[i])) { + break; + } + + ts.ts_msec *= 10; + ts.ts_msec += (s[i] - '0'); + } + + if (c == 0) { + return false; + } + } + + if (s[i] == '+' || s[i] == '-') { + ts.ts_zone_offset_negative = s[i] == '-'; + i++; + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_zone_offset_hour *= 10; + ts.ts_zone_offset_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + return false; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + return false; + } + + ts.ts_zone_offset_minute *= 10; + ts.ts_zone_offset_minute += (s[i] - '0'); + } + } else if (s[i] == 'Z') { + i++; + } + + if (s[i] != 0) { + return false; + } + + struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.time = ts; + + return true; +} +#endif + +static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str) +{ + size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); + struct b_string *str = b_string_duplicate(token_str); + const char *s = b_string_ptr(str); + + if (len == 0) { + return false; + } + + size_t offset = 0; + bool has_sign = false; + + int mul = 1; + if (s[0] == '+') { + offset++; + has_sign = true; + } else if (s[0] == '-') { + offset++; + mul = -1; + has_sign = true; + } + + int base = 10; + if (*(s + offset) == '0') { + char b = *(s + offset + 1); + switch (b) { + case 'x': + offset += 2; + base = 16; + break; + case 'b': + offset += 2; + base = 2; + break; + case 'o': + offset += 2; + base = 8; + break; + default: + b_string_release(str); + return false; + } + } + + if (has_sign && base != 10) { + b_string_release(str); + return false; + } + + if (offset == len) { + b_string_release(str); + return false; + } + + bool is_valid = true; + bool is_double = false; + + char previous = 0; + for (size_t i = offset; i < len; i++) { + char c = s[i]; + + if (previous == '_' && !isnumber(c)) { + is_valid = false; + break; + } + + if (c == '_') { + if (!isnumber(previous)) { + is_valid = false; + break; + } + + b_string_remove(str, i, 1); + len--; + i--; + previous = c; + continue; + } + + if (c == 'e' || c == '.') { + if (!isnumber(c)) { + is_valid = false; + break; + } + is_double = true; + previous = c; + continue; + } + + if ((c == '-' || c == '+') && previous != 'e') { + is_valid = false; + break; + } + + previous = c; + } + + if (previous == '_' || previous == '.') { + is_valid = false; + } + + if (is_double && base != 10) { + is_valid = false; + } + + if (!is_valid) { + b_string_release(str); + return false; + } + + double d = 0; + long long i = 0; + + if (is_double) { + int r = 0; + int len = strlen(s + offset); + // d = strtold(s + offset, &ep) * mul; + int ret = sscanf(s + offset, "%lf%n", &d, &r); + d *= mul; + is_valid = (ret == 1) && r == len; + } else { + char *ep; + i = strtoll(s + offset, &ep, base) * mul; + is_valid = ((*ep) == 0); + } + + b_string_release(str); + + if (!is_valid) { + return false; + } + + struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT); + tok->tok_str = b_string_steal(token_str); + + if (is_double) { + tok->tok_value.f.v = d; + } else { + tok->tok_value.i.v = i; + } + + return true; +} + +static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str) +{ + const char *s = b_string_ptr(token_str); + struct token *tok = NULL; + + if (!strcmp(s, "true")) { + tok = enqueue_token(ctx, TOK_BOOL); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.b = true; + } else if (!strcmp(s, "false")) { + tok = enqueue_token(ctx, TOK_BOOL); + tok->tok_str = b_string_steal(token_str); + tok->tok_value.b = false; + } else { + return false; + } + + return true; +} + +static void split_word(struct ctx *ctx, struct b_string *wordbuf) +{ + long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL); + if (!len) { + return; + } + + char *s = b_string_steal(wordbuf); + int trailing_dots = 0; + + char prev = 0; + + for (long i = 0; i < len; i++) { + if (prev == '.' && s[i] == '.') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + prev = s[i]; + } + + if (!B_OK(ctx->ctx_status)) { + free(s); + return; + } + + for (; len > 0; len--) { + if (s[len - 1] == '.') { + trailing_dots++; + } else { + break; + } + } + + char *ep; + char *tok = strtok_r(s, ".", &ep); + + unsigned int i = 0; + + while (tok) { + if (*tok == 0) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + if (i > 0) { + enqueue_token(ctx, TOK_DOT); + } + + struct token *word = enqueue_token(ctx, TOK_WORD); + word->tok_str = b_strdup(tok); + + i++; + tok = strtok_r(NULL, ".", &ep); + } + + for (long i = 0; i < trailing_dots; i++) { + enqueue_token(ctx, TOK_DOT); + } + + free(s); +} + +static void read_number(struct ctx *ctx) +{ + int c = 0; + struct b_string *wordbuf = get_wordbuf(ctx); + + while (1) { + c = peek_char(ctx); + + if (c == -1 || !B_OK(ctx->ctx_status)) { + break; + } + + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.' + || c == '+'; + + if (!ok) { + break; + } + + char s[] = {c, 0}; + b_string_append_cstr(wordbuf, s); + advance_char(ctx); + } + + bool is_number = try_convert_word_to_number(ctx, wordbuf); + + if (!is_number) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + } +} + +static void read_word(struct ctx *ctx) +{ + int c = 0; + struct b_string *wordbuf = get_wordbuf(ctx); + + while (1) { + c = peek_char(ctx); + + if (c == -1 || !B_OK(ctx->ctx_status)) { + break; + } + + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + + if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) { + ok = ok || c == ':' || c == ' ' || c == '+'; + } + + if (ctx->ctx_flags & CTX_ENABLE_NUMBERS) { + ok = ok || c == '+'; + } + + if (!ok) { + break; + } + + char s[] = {c, 0}; + b_string_append_cstr(wordbuf, s); + advance_char(ctx); + } + + bool parsed = false; + b_string_trim(wordbuf); + + if (ctx->ctx_flags & CTX_ENABLE_BOOLS) { + parsed = try_convert_word_to_bool(ctx, wordbuf); + } + + if (!parsed && (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS)) { + parsed = try_convert_word_to_timestamp(ctx, wordbuf); + } + + if (!parsed && (ctx->ctx_flags & CTX_ENABLE_NUMBERS)) { + parsed = try_convert_word_to_number(ctx, wordbuf); + } + + if (parsed) { + return; + } + + const char *s = b_string_ptr(wordbuf); + for (size_t i = 0; s[i]; i++) { + c = s[i]; + bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + if (!ok) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + } + + split_word(ctx, wordbuf); +} + +static void read_string(struct ctx *ctx, bool squote) +{ + advance_char(ctx); + + char term = '"'; + if (squote) { + term = '\''; + } + + bool multiline = false; + struct token *tok = enqueue_token(ctx, TOK_STRING); + struct b_string *str = get_wordbuf(ctx); + + int c = peek_char(ctx); + if (c == term) { + advance_char(ctx); + c = peek_char(ctx); + + if (c == term) { + advance_char(ctx); + c = peek_char(ctx); + multiline = true; + } else { + return; + } + + if (c == '\n') { + advance_char(ctx); + } + } + + if (multiline && !(ctx->ctx_flags & CTX_ENABLE_MULTILINE_STRING)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + + bool fail = false; + bool esc = false; + + tok->tok_type = TOK_STRING; + + while (!fail) { + c = peek_char(ctx); + if (c == -1) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + char s[] = {c, 0}; + + if (esc) { + if (c == '\n') { + while (c != -1 && isspace(c)) { + advance_char(ctx); + c = peek_char(ctx); + } + + esc = false; + continue; + } + + switch (c) { + case '"': + case '\\': + b_string_append_cstr(str, s); + break; + case 'b': + s[0] = '\b'; + b_string_append_cstr(str, s); + break; + case 't': + s[0] = '\t'; + b_string_append_cstr(str, s); + break; + case 'n': + s[0] = '\n'; + b_string_append_cstr(str, s); + break; + case 'r': + s[0] = '\r'; + b_string_append_cstr(str, s); + break; + case 'f': + s[0] = '\f'; + b_string_append_cstr(str, s); + break; + default: + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + esc = false; + advance_char(ctx); + continue; + } + + else if (c == '\\' && !squote) { + esc = true; + } + + else if (c == '\n') { + if (!multiline) { + fail = true; + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + b_string_append_cstr(str, s); + } + + else if (c == term) { + advance_char(ctx); + + if (!multiline) { + break; + } + + c = peek_char(ctx); + if (c != term) { + b_string_append_cstr(str, s); + continue; + } + + advance_char(ctx); + c = peek_char(ctx); + if (c != term) { + b_string_append_cstr(str, s); + b_string_append_cstr(str, s); + continue; + } + + advance_char(ctx); + c = peek_char(ctx); + if (c == term) { + b_string_append_cstr(str, s); + advance_char(ctx); + } + break; + } + + else { + b_string_append_cstr(str, s); + } + + advance_char(ctx); + } + + if (fail) { + discard_token(ctx); + return; + } + + tok->tok_str = b_string_steal(str); +} + +static void read_symbol(struct ctx *ctx) +{ + int c = peek_char(ctx); + advance_char(ctx); + + struct token *tok = enqueue_token(ctx, TOK_NONE); + + char s[] = {c, 0}; + + switch (c) { + case '=': + tok->tok_type = TOK_EQUAL; + break; + case '.': + tok->tok_type = TOK_DOT; + break; + case ',': + tok->tok_type = TOK_COMMA; + break; + case '[': + if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { + tok->tok_type = TOK_LEFT_BRACKET; + break; + } + + c = peek_char(ctx); + switch (c) { + case '[': + tok->tok_type = TOK_DOUBLE_LEFT_BRACKET; + advance_char(ctx); + break; + default: + tok->tok_type = TOK_LEFT_BRACKET; + break; + } + break; + case ']': + if (!(ctx->ctx_flags & CTX_ENABLE_LONG_SYMBOLS)) { + /* if we're parsing more complex values, don't generate double-symbol tokens */ + tok->tok_type = TOK_RIGHT_BRACKET; + break; + } + + c = peek_char(ctx); + switch (c) { + case ']': + tok->tok_type = TOK_DOUBLE_RIGHT_BRACKET; + advance_char(ctx); + break; + default: + tok->tok_type = TOK_RIGHT_BRACKET; + break; + } + break; + case '{': + tok->tok_type = TOK_LEFT_BRACE; + break; + case '}': + tok->tok_type = TOK_RIGHT_BRACE; + break; + default: + discard_token(ctx); + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } +} + +static void read_newline(struct ctx *ctx) +{ + int c = peek_char(ctx); + while (c == '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + enqueue_token(ctx, TOK_NEWLINE); +} + +static void read_comment(struct ctx *ctx) +{ + int c = peek_char(ctx); + while (c != '\n' && c != -1) { + advance_char(ctx); + c = peek_char(ctx); + } + + if (!B_OK(ctx->ctx_status)) { + return; + } + + advance_char(ctx); + enqueue_token(ctx, TOK_NEWLINE); +} + +static enum b_status advance_token(struct ctx *ctx) +{ + discard_token(ctx); + + if (!b_queue_empty(&ctx->ctx_tokens)) { + return B_SUCCESS; + } + + int c = peek_char(ctx); + while (isspace(c) && c != '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + if (c == -1) { + ctx->ctx_flags |= CTX_EOF; + return B_ERR_NO_DATA; + } + +#if 1 + while (c == '#') { + read_comment(ctx); + c = peek_char(ctx); + } +#endif + + if (!B_OK(ctx->ctx_status)) { + return ctx->ctx_status; + } + + if (c == '"') { + read_string(ctx, false); + } else if (c == '\'') { + read_string(ctx, true); + } else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) { + read_number(ctx); + } else if (ispunct(c)) { + read_symbol(ctx); + } else if (c == '\n') { + read_newline(ctx); + } else { + read_word(ctx); + } + + return ctx->ctx_status; +} + +static struct token *peek_token(struct ctx *ctx) +{ + struct b_queue_entry *entry = b_queue_first(&ctx->ctx_tokens); + if (!entry) { + return NULL; + } + + return b_unbox(struct token, entry, tok_entry); +} + +static void ctx_cleanup(struct ctx *ctx) +{ + if (ctx->ctx_linebuf_stream) { + b_stream_close(ctx->ctx_linebuf_stream); + ctx->ctx_linebuf_stream = NULL; + } + + if (ctx->ctx_linebuf) { + b_string_release(ctx->ctx_linebuf); + ctx->ctx_linebuf = NULL; + } + + if (ctx->ctx_wordbuf) { + b_string_release(ctx->ctx_wordbuf); + ctx->ctx_wordbuf = NULL; + } + + if (ctx->ctx_static_objects) { + b_hashmap_release(ctx->ctx_static_objects); + ctx->ctx_static_objects = NULL; + } +} + +static enum b_status ctx_init(struct ctx *ctx) +{ + memset(ctx, 0x0, sizeof *ctx); + + ctx->ctx_linebuf = b_string_create(); + ctx->ctx_wordbuf = b_string_create(); + + b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream); + + ctx->ctx_static_objects = b_hashmap_create(NULL, NULL); + + return B_SUCCESS; +} + +static enum b_status toml_serialise( + struct b_serial_ctx *serial, struct b_object *src, + struct b_stream *dest, enum b_serial_flags flags) +{ + return B_SUCCESS; +} + +static void print_token(struct token *tok) +{ + switch (tok->tok_type) { + case TOK_NONE: + printf("TOK_NONE\n"); + break; + case TOK_WORD: + printf("TOK_WORD %s\n", tok->tok_str); + break; + case TOK_STRING: + printf("TOK_STRING %s\n", tok->tok_str); + break; + case TOK_TIMESTAMP: + printf("TOK_TIMESTAMP %04ld-%02ld-%02ld " + "%02ld:%02ld:%02ld.%04ld %c" + "%02ld:%02ld\n", + b_datetime_year(tok->tok_value.time), + b_datetime_month(tok->tok_value.time), + b_datetime_day(tok->tok_value.time), + b_datetime_hour(tok->tok_value.time), + b_datetime_minute(tok->tok_value.time), + b_datetime_second(tok->tok_value.time), + b_datetime_subsecond(tok->tok_value.time), + b_datetime_zone_offset_is_negative(tok->tok_value.time) + ? '-' + : '+', + b_datetime_zone_offset_hour(tok->tok_value.time), + b_datetime_zone_offset_minute(tok->tok_value.time)); + break; + case TOK_INT: + printf("TOK_INT "); + if (tok->tok_value.i.nan) { + printf("NaN"); + } else { + printf("%lld", tok->tok_value.i.v); + } + + printf("\n"); + break; + case TOK_FLOAT: + printf("TOK_FLOAT "); + if (tok->tok_value.f.nan) { + printf("NaN"); + } else { + printf("%lf", tok->tok_value.f.v); + } + + printf("\n"); + break; + case TOK_BOOL: + printf("TOK_BOOL %s\n", tok->tok_value.b ? "true" : "false"); + break; + case TOK_NEWLINE: + printf("TOK_NEWLINE\n"); + break; + case TOK_EQUAL: + printf("TOK_EQUAL\n"); + break; + case TOK_DOT: + printf("TOK_DOT\n"); + break; + case TOK_COMMA: + printf("TOK_COMMA\n"); + break; + case TOK_LEFT_BRACKET: + printf("TOK_LEFT_BRACKET\n"); + break; + case TOK_RIGHT_BRACKET: + printf("TOK_RIGHT_BRACKET\n"); + break; + case TOK_DOUBLE_LEFT_BRACKET: + printf("TOK_DOUBLE_LEFT_BRACKET\n"); + break; + case TOK_DOUBLE_RIGHT_BRACKET: + printf("TOK_DOUBLE_RIGHT_BRACKET\n"); + break; + case TOK_LEFT_BRACE: + printf("TOK_LEFT_BRACE\n"); + break; + case TOK_RIGHT_BRACE: + printf("TOK_RIGHT_BRACE\n"); + break; + default: + break; + } +} + +static enum b_status parse_value(struct ctx *ctx, struct b_object **result); +static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container); + +static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_datetime *dt = tok->tok_value.time; + tok->tok_value.time = NULL; + + *result = B_OBJECT(dt); + return B_SUCCESS; +} + +static enum b_status parse_string(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_string *str = b_string_create_from_cstr(tok->tok_str); + if (!str) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(str); + return B_SUCCESS; +} + +static enum b_status parse_int(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_INT64(tok->tok_value.i.v); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_float(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_DOUBLE(tok->tok_value.f.v); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_bool(struct ctx *ctx, struct b_object **result) +{ + struct token *tok = peek_token(ctx); + struct b_number *val = B_INT8(tok->tok_value.b); + if (!val) { + return B_ERR_NO_MEMORY; + } + + *result = B_OBJECT(val); + return B_SUCCESS; +} + +static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result) +{ + advance_token(ctx); + + struct b_dict *table = b_dict_create(); + if (!table) { + return B_ERR_NO_MEMORY; + } + + bool done = false; + while (!done) { + struct b_object *value; + enum b_status status = parse_key_value_pair(ctx, table); + if (!B_OK(status)) { + b_dict_release(table); + return status; + } + + struct token *tok = peek_token(ctx); + + if (!tok) { + b_dict_release(table); + return status; + } + + switch (tok->tok_type) { + case TOK_RIGHT_BRACE: + done = true; + break; + case TOK_COMMA: + advance_token(ctx); + break; + default: + b_dict_release(table); + return B_ERR_BAD_FORMAT; + } + } + + *result = B_OBJECT(table); + return B_SUCCESS; +} + +static void skip_newlines(struct ctx *ctx) +{ + struct token *tok = peek_token(ctx); + + while (tok && tok->tok_type == TOK_NEWLINE) { + advance_token(ctx); + tok = peek_token(ctx); + } +} + +static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result) +{ + bool done = false; + advance_token(ctx); + + struct b_array *array = b_array_create(); + if (!array) { + return B_ERR_NO_MEMORY; + } + + struct token *tok = peek_token(ctx); + if (!tok) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type == TOK_RIGHT_BRACKET) { + advance_token(ctx); + done = true; + } + + while (!done) { + skip_newlines(ctx); + + tok = peek_token(ctx); + + if (!tok) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type == TOK_RIGHT_BRACKET) { + done = true; + break; + } + + struct b_object *value; + enum b_status status = parse_value(ctx, &value); + if (!B_OK(status)) { + b_array_release(array); + return status; + } + + b_array_append(array, B_RV(value)); + + skip_newlines(ctx); + + tok = peek_token(ctx); + + if (tok && tok->tok_type == TOK_RIGHT_BRACKET) { + done = true; + break; + } + + if (!tok || tok->tok_type != TOK_COMMA) { + b_array_release(array); + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + } + + *result = B_OBJECT(array); + return B_SUCCESS; +} + +static enum b_status parse_value(struct ctx *ctx, struct b_object **result) +{ + + struct token *tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + switch (tok->tok_type) { + case TOK_STRING: + return parse_string(ctx, result); + case TOK_INT: + return parse_int(ctx, result); + case TOK_FLOAT: + return parse_float(ctx, result); + case TOK_BOOL: + return parse_bool(ctx, result); + case TOK_TIMESTAMP: + return parse_timestamp(ctx, result); + case TOK_LEFT_BRACKET: + return parse_array_inline(ctx, result); + case TOK_LEFT_BRACE: + return parse_table_inline(ctx, result); + default: + return B_ERR_BAD_FORMAT; + } +} + +static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *container) +{ + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + if (ctx_object_is_static(ctx, sub_dict)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + ctx_add_static_object(ctx, sub_dict); + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + free(key); + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (b_dict_has_key(container, key)) { + return B_ERR_BAD_FORMAT; + } + + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + if (tok->tok_type != TOK_EQUAL) { + return B_ERR_BAD_FORMAT; + } + + ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; + ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; + advance_token(ctx); + + struct b_object *value = NULL; + enum b_status status = parse_value(ctx, &value); + + ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; + ctx->ctx_flags + &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); + + if (!B_OK(status)) { + return status; + } + + advance_token(ctx); + + b_dict_put(container, key, B_RV(value)); + + if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) { + ctx_add_static_object(ctx, value); + } + + return B_SUCCESS; +} + +static enum b_status parse_table_header( + struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) +{ + advance_token(ctx); + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( + B_ARRAY(sub_dict), + b_array_size(B_ARRAY(sub_dict)) - 1); + } else if (!B_OBJECT_IS(sub_dict, DICT)) { + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (!tok || tok->tok_type != TOK_RIGHT_BRACKET) { + return B_ERR_BAD_FORMAT; + } + + struct b_dict *new_table = B_DICT(b_dict_at(container, key)); + + if (new_table) { + if (!B_OBJECT_IS(new_table, DICT) + || ctx_object_is_static(ctx, B_OBJECT(new_table))) { + return B_ERR_BAD_FORMAT; + } + + ctx_add_static_object(ctx, B_OBJECT(new_table)); + } else { + new_table = b_dict_create(); + + if (!new_table) { + free(key); + return B_ERR_NO_MEMORY; + } + + b_dict_put(container, key, B_RV(new_table)); + ctx_add_static_object(ctx, B_OBJECT(new_table)); + } + + free(key); + + advance_token(ctx); + *new_container = new_table; + return B_SUCCESS; +} + +static enum b_status parse_array_header( + struct ctx *ctx, struct b_dict *container, struct b_dict **new_container) +{ + advance_token(ctx); + struct token *tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + char *key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!tok) { + return B_ERR_BAD_FORMAT; + } + + while (tok && tok->tok_type == TOK_DOT) { + struct b_object *sub_dict = b_dict_at(container, key); + if (!sub_dict) { + sub_dict = B_OBJECT(b_dict_create()); + b_dict_put(container, key, B_RV(sub_dict)); + } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( + B_ARRAY(sub_dict), + b_array_size(B_ARRAY(sub_dict)) - 1); + } else if (!B_OBJECT_IS(sub_dict, DICT)) { + return B_ERR_BAD_FORMAT; + } + + advance_token(ctx); + tok = peek_token(ctx); + if (!IS_VALID_KEY_COMPONENT(tok)) { + return B_ERR_BAD_FORMAT; + } + + container = B_DICT(sub_dict); + free(key); + key = b_strdup(tok->tok_str); + if (!key) { + return B_ERR_NO_MEMORY; + } + + advance_token(ctx); + tok = peek_token(ctx); + } + + if (!tok || tok->tok_type != TOK_DOUBLE_RIGHT_BRACKET) { + return B_ERR_BAD_FORMAT; + } + + struct b_array *array = B_ARRAY(b_dict_get(container, key)); + if (!array) { + array = b_array_create(); + b_dict_put(container, key, B_RV(array)); + } else if ( + !B_OBJECT_IS(array, ARRAY) + || ctx_object_is_static(ctx, B_OBJECT(array))) { + return B_ERR_BAD_FORMAT; + } + + struct b_dict *new_table = b_dict_create(); + if (!new_table) { + free(key); + return B_ERR_NO_MEMORY; + } + + b_array_append(array, B_RV(new_table)); + free(key); + + advance_token(ctx); + *new_container = new_table; + return B_SUCCESS; +} + +static enum b_status parse_root(struct ctx *ctx, struct b_dict **result) +{ + enum b_status status = B_SUCCESS; + struct b_dict *root = b_dict_create(); + struct b_dict *current = root; + + while (!(ctx->ctx_flags & CTX_EOF) && B_OK(status)) { + struct token *tok = peek_token(ctx); + if (!tok) { + break; + } + + switch (tok->tok_type) { + case TOK_LEFT_BRACKET: + status = parse_table_header(ctx, root, ¤t); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_DOUBLE_LEFT_BRACKET: + status = parse_array_header(ctx, root, ¤t); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_WORD: + case TOK_STRING: + status = parse_key_value_pair(ctx, current); + if (!B_OK(status)) { + break; + } + + tok = peek_token(ctx); + if (tok && tok->tok_type != TOK_NEWLINE) { + status = B_ERR_BAD_FORMAT; + } + break; + case TOK_NEWLINE: + advance_token(ctx); + break; + default: + status = B_ERR_BAD_FORMAT; + break; + } + + if (!B_OK(ctx->ctx_status) && ctx->ctx_status != B_ERR_NO_DATA) { + status = ctx->ctx_status; + } + } + + if (!B_OK(status)) { + b_dict_release(root); + root = NULL; + } + + *result = root; + return status; +} + +static enum b_status toml_deserialise( + struct b_serial_ctx *serial, struct b_stream *src, + struct b_object **dest, enum b_serial_flags flags) +{ + struct ctx ctx = {0}; + enum b_status status = ctx_init(&ctx); + + if (!B_OK(status)) { + return status; + } + + ctx.ctx_src = src; + + status = advance_token(&ctx); + if (!B_OK(status)) { + return status; + } + + struct b_dict *result = NULL; + status = parse_root(&ctx, &result); + if (!B_OK(status)) { + return status; + } + + *dest = B_OBJECT(result); +#if 0 + ctx.ctx_flags + = CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS | CTX_ENABLE_BOOLS; + + while (!(ctx.ctx_flags & CTX_EOF) && B_OK(ctx.ctx_status)) { + struct token *tok = peek_token(&ctx); + print_token(tok); + status = advance_token(&ctx); + } +#endif + + return B_SUCCESS; +} + +const struct b_serial_format_ops z__b_toml_format_ops = { + .fmt_serialise = toml_serialise, + .fmt_deserialise = toml_deserialise, +}; From db3d1e31839a1fa649cc1e76c254b5691fd7534e Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:28:03 +0100 Subject: [PATCH 02/23] core: add a type to store unicode characters, as well as functions to query them --- core/encoding.c | 1193 +++++++++++++++++++++++++++++ core/include/blue/core/encoding.h | 31 + 2 files changed, 1224 insertions(+) create mode 100644 core/encoding.c create mode 100644 core/include/blue/core/encoding.h diff --git a/core/encoding.c b/core/encoding.c new file mode 100644 index 0000000..870784b --- /dev/null +++ b/core/encoding.c @@ -0,0 +1,1193 @@ +#include +#include + +bool b_wchar_is_number(b_wchar c) +{ + return iswnumber((wchar_t)c); +} + +bool b_wchar_is_alpha(b_wchar c) +{ + if (c == 0) { + return false; + } + + if (c >= 0x0041 && c <= 0x005A) { + return true; + } + + if (c >= 0x0061 && c <= 0x007A) { + return true; + } + + if (c >= 0x00C0 && c <= 0x00D6) { + return true; + } + + if (c >= 0x00D8 && c <= 0x00F6) { + return true; + } + + if (c >= 0x00F8 && c <= 0x02C1) { + return true; + } + + if (c >= 0x02C6 && c <= 0x02D1) { + return true; + } + + if (c >= 0x02E0 && c <= 0x02E4) { + return true; + } + + if (c >= 0x0370 && c <= 0x0374) { + return true; + } + + if (c >= 0x037A && c <= 0x037D) { + return true; + } + + if (c >= 0x0388 && c <= 0x038A) { + return true; + } + + if (c >= 0x038E && c <= 0x03A1) { + return true; + } + + if (c >= 0x03A3 && c <= 0x03F5) { + return true; + } + + if (c >= 0x03F7 && c <= 0x0481) { + return true; + } + + if (c >= 0x048A && c <= 0x0527) { + return true; + } + + if (c >= 0x0531 && c <= 0x0556) { + return true; + } + + if (c >= 0x0561 && c <= 0x0587) { + return true; + } + + if (c >= 0x05D0 && c <= 0x05EA) { + return true; + } + + if (c >= 0x05F0 && c <= 0x05F2) { + return true; + } + + if (c >= 0x0620 && c <= 0x064A) { + return true; + } + + if (c >= 0x0671 && c <= 0x06D3) { + return true; + } + + if (c >= 0x06FA && c <= 0x06FC) { + return true; + } + + if (c >= 0x0712 && c <= 0x072F) { + return true; + } + + if (c >= 0x074D && c <= 0x07A5) { + return true; + } + + if (c >= 0x07CA && c <= 0x07EA) { + return true; + } + + if (c >= 0x0800 && c <= 0x0815) { + return true; + } + + if (c >= 0x0840 && c <= 0x0858) { + return true; + } + + if (c >= 0x08A2 && c <= 0x08AC) { + return true; + } + + if (c >= 0x0904 && c <= 0x0939) { + return true; + } + + if (c >= 0x0958 && c <= 0x0961) { + return true; + } + + if (c >= 0x0971 && c <= 0x0977) { + return true; + } + + if (c >= 0x0979 && c <= 0x097F) { + return true; + } + + if (c >= 0x0985 && c <= 0x098C) { + return true; + } + + if (c >= 0x0993 && c <= 0x09A8) { + return true; + } + + if (c >= 0x09AA && c <= 0x09B0) { + return true; + } + + if (c >= 0x09B6 && c <= 0x09B9) { + return true; + } + + if (c >= 0x09DF && c <= 0x09E1) { + return true; + } + + if (c >= 0x0A05 && c <= 0x0A0A) { + return true; + } + + if (c >= 0x0A13 && c <= 0x0A28) { + return true; + } + + if (c >= 0x0A2A && c <= 0x0A30) { + return true; + } + + if (c >= 0x0A59 && c <= 0x0A5C) { + return true; + } + + if (c >= 0x0A72 && c <= 0x0A74) { + return true; + } + + if (c >= 0x0A85 && c <= 0x0A8D) { + return true; + } + + if (c >= 0x0A8F && c <= 0x0A91) { + return true; + } + + if (c >= 0x0A93 && c <= 0x0AA8) { + return true; + } + + if (c >= 0x0AAA && c <= 0x0AB0) { + return true; + } + + if (c >= 0x0AB5 && c <= 0x0AB9) { + return true; + } + + if (c >= 0x0B05 && c <= 0x0B0C) { + return true; + } + + if (c >= 0x0B13 && c <= 0x0B28) { + return true; + } + + if (c >= 0x0B2A && c <= 0x0B30) { + return true; + } + + if (c >= 0x0B35 && c <= 0x0B39) { + return true; + } + + if (c >= 0x0B5F && c <= 0x0B61) { + return true; + } + + if (c >= 0x0B85 && c <= 0x0B8A) { + return true; + } + + if (c >= 0x0B8E && c <= 0x0B90) { + return true; + } + + if (c >= 0x0B92 && c <= 0x0B95) { + return true; + } + + if (c >= 0x0BA8 && c <= 0x0BAA) { + return true; + } + + if (c >= 0x0BAE && c <= 0x0BB9) { + return true; + } + + if (c >= 0x0C05 && c <= 0x0C0C) { + return true; + } + + if (c >= 0x0C0E && c <= 0x0C10) { + return true; + } + + if (c >= 0x0C12 && c <= 0x0C28) { + return true; + } + + if (c >= 0x0C2A && c <= 0x0C33) { + return true; + } + + if (c >= 0x0C35 && c <= 0x0C39) { + return true; + } + + if (c >= 0x0C85 && c <= 0x0C8C) { + return true; + } + + if (c >= 0x0C8E && c <= 0x0C90) { + return true; + } + + if (c >= 0x0C92 && c <= 0x0CA8) { + return true; + } + + if (c >= 0x0CAA && c <= 0x0CB3) { + return true; + } + + if (c >= 0x0CB5 && c <= 0x0CB9) { + return true; + } + + if (c >= 0x0D05 && c <= 0x0D0C) { + return true; + } + + if (c >= 0x0D0E && c <= 0x0D10) { + return true; + } + + if (c >= 0x0D12 && c <= 0x0D3A) { + return true; + } + + if (c >= 0x0D7A && c <= 0x0D7F) { + return true; + } + + if (c >= 0x0D85 && c <= 0x0D96) { + return true; + } + + if (c >= 0x0D9A && c <= 0x0DB1) { + return true; + } + + if (c >= 0x0DB3 && c <= 0x0DBB) { + return true; + } + + if (c >= 0x0DC0 && c <= 0x0DC6) { + return true; + } + + if (c >= 0x0E01 && c <= 0x0E30) { + return true; + } + + if (c >= 0x0E40 && c <= 0x0E46) { + return true; + } + + if (c >= 0x0E94 && c <= 0x0E97) { + return true; + } + + if (c >= 0x0E99 && c <= 0x0E9F) { + return true; + } + + if (c >= 0x0EA1 && c <= 0x0EA3) { + return true; + } + + if (c >= 0x0EAD && c <= 0x0EB0) { + return true; + } + + if (c >= 0x0EC0 && c <= 0x0EC4) { + return true; + } + + if (c >= 0x0EDC && c <= 0x0EDF) { + return true; + } + + if (c >= 0x0F40 && c <= 0x0F47) { + return true; + } + + if (c >= 0x0F49 && c <= 0x0F6C) { + return true; + } + + if (c >= 0x0F88 && c <= 0x0F8C) { + return true; + } + + if (c >= 0x1000 && c <= 0x102A) { + return true; + } + + if (c >= 0x1050 && c <= 0x1055) { + return true; + } + + if (c >= 0x105A && c <= 0x105D) { + return true; + } + + if (c >= 0x106E && c <= 0x1070) { + return true; + } + + if (c >= 0x1075 && c <= 0x1081) { + return true; + } + + if (c >= 0x10A0 && c <= 0x10C5) { + return true; + } + + if (c >= 0x10D0 && c <= 0x10FA) { + return true; + } + + if (c >= 0x10FC && c <= 0x1248) { + return true; + } + + if (c >= 0x124A && c <= 0x124D) { + return true; + } + + if (c >= 0x1250 && c <= 0x1256) { + return true; + } + + if (c >= 0x125A && c <= 0x125D) { + return true; + } + + if (c >= 0x1260 && c <= 0x1288) { + return true; + } + + if (c >= 0x128A && c <= 0x128D) { + return true; + } + + if (c >= 0x1290 && c <= 0x12B0) { + return true; + } + + if (c >= 0x12B2 && c <= 0x12B5) { + return true; + } + + if (c >= 0x12B8 && c <= 0x12BE) { + return true; + } + + if (c >= 0x12C2 && c <= 0x12C5) { + return true; + } + + if (c >= 0x12C8 && c <= 0x12D6) { + return true; + } + + if (c >= 0x12D8 && c <= 0x1310) { + return true; + } + + if (c >= 0x1312 && c <= 0x1315) { + return true; + } + + if (c >= 0x1318 && c <= 0x135A) { + return true; + } + + if (c >= 0x1380 && c <= 0x138F) { + return true; + } + + if (c >= 0x13A0 && c <= 0x13F4) { + return true; + } + + if (c >= 0x1401 && c <= 0x166C) { + return true; + } + + if (c >= 0x166F && c <= 0x167F) { + return true; + } + + if (c >= 0x1681 && c <= 0x169A) { + return true; + } + + if (c >= 0x16A0 && c <= 0x16EA) { + return true; + } + + if (c >= 0x1700 && c <= 0x170C) { + return true; + } + + if (c >= 0x170E && c <= 0x1711) { + return true; + } + + if (c >= 0x1720 && c <= 0x1731) { + return true; + } + + if (c >= 0x1740 && c <= 0x1751) { + return true; + } + + if (c >= 0x1760 && c <= 0x176C) { + return true; + } + + if (c >= 0x176E && c <= 0x1770) { + return true; + } + + if (c >= 0x1780 && c <= 0x17B3) { + return true; + } + + if (c >= 0x1820 && c <= 0x1877) { + return true; + } + + if (c >= 0x1880 && c <= 0x18A8) { + return true; + } + + if (c >= 0x18B0 && c <= 0x18F5) { + return true; + } + + if (c >= 0x1900 && c <= 0x191C) { + return true; + } + + if (c >= 0x1950 && c <= 0x196D) { + return true; + } + + if (c >= 0x1970 && c <= 0x1974) { + return true; + } + + if (c >= 0x1980 && c <= 0x19AB) { + return true; + } + + if (c >= 0x19C1 && c <= 0x19C7) { + return true; + } + + if (c >= 0x1A00 && c <= 0x1A16) { + return true; + } + + if (c >= 0x1A20 && c <= 0x1A54) { + return true; + } + + if (c >= 0x1B05 && c <= 0x1B33) { + return true; + } + + if (c >= 0x1B45 && c <= 0x1B4B) { + return true; + } + + if (c >= 0x1B83 && c <= 0x1BA0) { + return true; + } + + if (c >= 0x1BBA && c <= 0x1BE5) { + return true; + } + + if (c >= 0x1C00 && c <= 0x1C23) { + return true; + } + + if (c >= 0x1C4D && c <= 0x1C4F) { + return true; + } + + if (c >= 0x1C5A && c <= 0x1C7D) { + return true; + } + + if (c >= 0x1CE9 && c <= 0x1CEC) { + return true; + } + + if (c >= 0x1CEE && c <= 0x1CF1) { + return true; + } + + if (c >= 0x1D00 && c <= 0x1DBF) { + return true; + } + + if (c >= 0x1E00 && c <= 0x1F15) { + return true; + } + + if (c >= 0x1F18 && c <= 0x1F1D) { + return true; + } + + if (c >= 0x1F20 && c <= 0x1F45) { + return true; + } + + if (c >= 0x1F48 && c <= 0x1F4D) { + return true; + } + + if (c >= 0x1F50 && c <= 0x1F57) { + return true; + } + + if (c >= 0x1F5F && c <= 0x1F7D) { + return true; + } + + if (c >= 0x1F80 && c <= 0x1FB4) { + return true; + } + + if (c >= 0x1FB6 && c <= 0x1FBC) { + return true; + } + + if (c >= 0x1FC2 && c <= 0x1FC4) { + return true; + } + + if (c >= 0x1FC6 && c <= 0x1FCC) { + return true; + } + + if (c >= 0x1FD0 && c <= 0x1FD3) { + return true; + } + + if (c >= 0x1FD6 && c <= 0x1FDB) { + return true; + } + + if (c >= 0x1FE0 && c <= 0x1FEC) { + return true; + } + + if (c >= 0x1FF2 && c <= 0x1FF4) { + return true; + } + + if (c >= 0x1FF6 && c <= 0x1FFC) { + return true; + } + + if (c >= 0x2090 && c <= 0x209C) { + return true; + } + + if (c >= 0x210A && c <= 0x2113) { + return true; + } + + if (c >= 0x2119 && c <= 0x211D) { + return true; + } + + if (c >= 0x212A && c <= 0x212D) { + return true; + } + + if (c >= 0x212F && c <= 0x2139) { + return true; + } + + if (c >= 0x213C && c <= 0x213F) { + return true; + } + + if (c >= 0x2145 && c <= 0x2149) { + return true; + } + + if (c >= 0x2C00 && c <= 0x2C2E) { + return true; + } + + if (c >= 0x2C30 && c <= 0x2C5E) { + return true; + } + + if (c >= 0x2C60 && c <= 0x2CE4) { + return true; + } + + if (c >= 0x2CEB && c <= 0x2CEE) { + return true; + } + + if (c >= 0x2D00 && c <= 0x2D25) { + return true; + } + + if (c >= 0x2D30 && c <= 0x2D67) { + return true; + } + + if (c >= 0x2D80 && c <= 0x2D96) { + return true; + } + + if (c >= 0x2DA0 && c <= 0x2DA6) { + return true; + } + + if (c >= 0x2DA8 && c <= 0x2DAE) { + return true; + } + + if (c >= 0x2DB0 && c <= 0x2DB6) { + return true; + } + + if (c >= 0x2DB8 && c <= 0x2DBE) { + return true; + } + + if (c >= 0x2DC0 && c <= 0x2DC6) { + return true; + } + + if (c >= 0x2DC8 && c <= 0x2DCE) { + return true; + } + + if (c >= 0x2DD0 && c <= 0x2DD6) { + return true; + } + + if (c >= 0x2DD8 && c <= 0x2DDE) { + return true; + } + + if (c >= 0x3031 && c <= 0x3035) { + return true; + } + + if (c >= 0x3041 && c <= 0x3096) { + return true; + } + + if (c >= 0x309D && c <= 0x309F) { + return true; + } + + if (c >= 0x30A1 && c <= 0x30FA) { + return true; + } + + if (c >= 0x30FC && c <= 0x30FF) { + return true; + } + + if (c >= 0x3105 && c <= 0x312D) { + return true; + } + + if (c >= 0x3131 && c <= 0x318E) { + return true; + } + + if (c >= 0x31A0 && c <= 0x31BA) { + return true; + } + + if (c >= 0x31F0 && c <= 0x31FF) { + return true; + } + + if (c >= 0x3400 && c <= 0x4DB5) { + return true; + } + + if (c >= 0x4E00 && c <= 0x9FCC) { + return true; + } + + if (c >= 0xA000 && c <= 0xA48C) { + return true; + } + + if (c >= 0xA4D0 && c <= 0xA4FD) { + return true; + } + + if (c >= 0xA500 && c <= 0xA60C) { + return true; + } + + if (c >= 0xA610 && c <= 0xA61F) { + return true; + } + + if (c >= 0xA640 && c <= 0xA66E) { + return true; + } + + if (c >= 0xA67F && c <= 0xA697) { + return true; + } + + if (c >= 0xA6A0 && c <= 0xA6E5) { + return true; + } + + if (c >= 0xA717 && c <= 0xA71F) { + return true; + } + + if (c >= 0xA722 && c <= 0xA788) { + return true; + } + + if (c >= 0xA78B && c <= 0xA78E) { + return true; + } + + if (c >= 0xA790 && c <= 0xA793) { + return true; + } + + if (c >= 0xA7A0 && c <= 0xA7AA) { + return true; + } + + if (c >= 0xA7F8 && c <= 0xA801) { + return true; + } + + if (c >= 0xA803 && c <= 0xA805) { + return true; + } + + if (c >= 0xA807 && c <= 0xA80A) { + return true; + } + + if (c >= 0xA80C && c <= 0xA822) { + return true; + } + + if (c >= 0xA840 && c <= 0xA873) { + return true; + } + + if (c >= 0xA882 && c <= 0xA8B3) { + return true; + } + + if (c >= 0xA8F2 && c <= 0xA8F7) { + return true; + } + + if (c >= 0xA90A && c <= 0xA925) { + return true; + } + + if (c >= 0xA930 && c <= 0xA946) { + return true; + } + + if (c >= 0xA960 && c <= 0xA97C) { + return true; + } + + if (c >= 0xA984 && c <= 0xA9B2) { + return true; + } + + if (c >= 0xAA00 && c <= 0xAA28) { + return true; + } + + if (c >= 0xAA40 && c <= 0xAA42) { + return true; + } + + if (c >= 0xAA44 && c <= 0xAA4B) { + return true; + } + + if (c >= 0xAA60 && c <= 0xAA76) { + return true; + } + + if (c >= 0xAA80 && c <= 0xAAAF) { + return true; + } + + if (c >= 0xAAB9 && c <= 0xAABD) { + return true; + } + + if (c >= 0xAADB && c <= 0xAADD) { + return true; + } + + if (c >= 0xAAE0 && c <= 0xAAEA) { + return true; + } + + if (c >= 0xAAF2 && c <= 0xAAF4) { + return true; + } + + if (c >= 0xAB01 && c <= 0xAB06) { + return true; + } + + if (c >= 0xAB09 && c <= 0xAB0E) { + return true; + } + + if (c >= 0xAB11 && c <= 0xAB16) { + return true; + } + + if (c >= 0xAB20 && c <= 0xAB26) { + return true; + } + + if (c >= 0xAB28 && c <= 0xAB2E) { + return true; + } + + if (c >= 0xABC0 && c <= 0xABE2) { + return true; + } + + if (c >= 0xAC00 && c <= 0xD7A3) { + return true; + } + + if (c >= 0xD7B0 && c <= 0xD7C6) { + return true; + } + + if (c >= 0xD7CB && c <= 0xD7FB) { + return true; + } + + if (c >= 0xF900 && c <= 0xFA6D) { + return true; + } + + if (c >= 0xFA70 && c <= 0xFAD9) { + return true; + } + + if (c >= 0xFB00 && c <= 0xFB06) { + return true; + } + + if (c >= 0xFB13 && c <= 0xFB17) { + return true; + } + + if (c >= 0xFB1F && c <= 0xFB28) { + return true; + } + + if (c >= 0xFB2A && c <= 0xFB36) { + return true; + } + + if (c >= 0xFB38 && c <= 0xFB3C) { + return true; + } + + if (c >= 0xFB46 && c <= 0xFBB1) { + return true; + } + + if (c >= 0xFBD3 && c <= 0xFD3D) { + return true; + } + + if (c >= 0xFD50 && c <= 0xFD8F) { + return true; + } + + if (c >= 0xFD92 && c <= 0xFDC7) { + return true; + } + + if (c >= 0xFDF0 && c <= 0xFDFB) { + return true; + } + + if (c >= 0xFE70 && c <= 0xFE74) { + return true; + } + + if (c >= 0xFE76 && c <= 0xFEFC) { + return true; + } + + if (c >= 0xFF21 && c <= 0xFF3A) { + return true; + } + + if (c >= 0xFF41 && c <= 0xFF5A) { + return true; + } + + if (c >= 0xFF66 && c <= 0xFFBE) { + return true; + } + + if (c >= 0xFFC2 && c <= 0xFFC7) { + return true; + } + + if (c >= 0xFFCA && c <= 0xFFCF) { + return true; + } + + if (c >= 0xFFD2 && c <= 0xFFD7) { + return true; + } + + if (c >= 0xFFDA && c <= 0xFFDC) { + return true; + } + + switch (c) { + case 0x00AA: + case 0x00B5: + case 0x00BA: + case 0x02EC: + case 0x02EE: + case 0x0376: + case 0x0377: + case 0x0386: + case 0x038C: + case 0x0559: + case 0x066E: + case 0x066F: + case 0x06D5: + case 0x06E5: + case 0x06E6: + case 0x06EE: + case 0x06EF: + case 0x06FF: + case 0x0710: + case 0x07B1: + case 0x07F4: + case 0x07F5: + case 0x07FA: + case 0x081A: + case 0x0824: + case 0x0828: + case 0x08A0: + case 0x093D: + case 0x0950: + case 0x098F: + case 0x0990: + case 0x09B2: + case 0x09BD: + case 0x09CE: + case 0x09DC: + case 0x09DD: + case 0x09F0: + case 0x09F1: + case 0x0A0F: + case 0x0A10: + case 0x0A32: + case 0x0A33: + case 0x0A35: + case 0x0A36: + case 0x0A38: + case 0x0A39: + case 0x0A5E: + case 0x0AB2: + case 0x0AB3: + case 0x0ABD: + case 0x0AD0: + case 0x0AE0: + case 0x0AE1: + case 0x0B0F: + case 0x0B10: + case 0x0B32: + case 0x0B33: + case 0x0B3D: + case 0x0B5C: + case 0x0B5D: + case 0x0B71: + case 0x0B83: + case 0x0B99: + case 0x0B9A: + case 0x0B9C: + case 0x0B9E: + case 0x0B9F: + case 0x0BA3: + case 0x0BA4: + case 0x0BD0: + case 0x0C3D: + case 0x0C58: + case 0x0C59: + case 0x0C60: + case 0x0C61: + case 0x0CBD: + case 0x0CDE: + case 0x0CE0: + case 0x0CE1: + case 0x0CF1: + case 0x0CF2: + case 0x0D3D: + case 0x0D4E: + case 0x0D60: + case 0x0D61: + case 0x0DBD: + case 0x0E32: + case 0x0E33: + case 0x0E81: + case 0x0E82: + case 0x0E84: + case 0x0E87: + case 0x0E88: + case 0x0E8A: + case 0x0E8D: + case 0x0EA5: + case 0x0EA7: + case 0x0EAA: + case 0x0EAB: + case 0x0EB2: + case 0x0EB3: + case 0x0EBD: + case 0x0EC6: + case 0x0F00: + case 0x103F: + case 0x1061: + case 0x1065: + case 0x1066: + case 0x108E: + case 0x10C7: + case 0x10CD: + case 0x1258: + case 0x12C0: + case 0x17D7: + case 0x17DC: + case 0x18AA: + case 0x1AA7: + case 0x1BAE: + case 0x1BAF: + case 0x1CF5: + case 0x1CF6: + case 0x1F59: + case 0x1F5B: + case 0x1F5D: + case 0x1FBE: + case 0x2071: + case 0x207F: + case 0x2102: + case 0x2107: + case 0x2115: + case 0x2124: + case 0x2126: + case 0x2128: + case 0x214E: + case 0x2183: + case 0x2184: + case 0x2CF2: + case 0x2CF3: + case 0x2D27: + case 0x2D2D: + case 0x2D6F: + case 0x2E2F: + case 0x3005: + case 0x3006: + case 0x303B: + case 0x303C: + case 0xA62A: + case 0xA62B: + case 0xA8FB: + case 0xA9CF: + case 0xAA7A: + case 0xAAB1: + case 0xAAB5: + case 0xAAB6: + case 0xAAC0: + case 0xAAC2: + case 0xFB1D: + case 0xFB3E: + case 0xFB40: + case 0xFB41: + case 0xFB43: + case 0xFB44: + return true; + default: + return false; + } +} + +bool b_wchar_is_hex_digit(b_wchar c) +{ + return isxdigit(c); +} + +bool b_wchar_is_space(b_wchar c) +{ + return iswspace((wchar_t)c); +} + +bool b_wchar_is_punct(b_wchar c) +{ + return iswpunct((wchar_t)c); +} diff --git a/core/include/blue/core/encoding.h b/core/include/blue/core/encoding.h new file mode 100644 index 0000000..97817ae --- /dev/null +++ b/core/include/blue/core/encoding.h @@ -0,0 +1,31 @@ +#ifndef BLUE_CORE_ENCODING_H_ +#define BLUE_CORE_ENCODING_H_ + +#include +#include +#include + +#define B_WCHAR_INVALID ((b_wchar) - 1) + +typedef int32_t b_wchar; + +BLUE_API bool b_wchar_is_alpha(b_wchar c); +BLUE_API bool b_wchar_is_number(b_wchar c); +static inline bool b_wchar_is_bin_digit(b_wchar c) +{ + return c >= '0' && c <= '1'; +} +static inline bool b_wchar_is_oct_digit(b_wchar c) +{ + return c >= '0' && c <= '7'; +} +BLUE_API bool b_wchar_is_hex_digit(b_wchar c); +BLUE_API bool b_wchar_is_space(b_wchar c); +static inline bool b_wchar_is_alnum(b_wchar c) +{ + return b_wchar_is_alpha(c) || b_wchar_is_number(c); +} + +BLUE_API bool b_wchar_is_punct(b_wchar c); + +#endif From 0a2a1c695a60554a9aafe40a592ea14953d08572 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:29:01 +0100 Subject: [PATCH 03/23] core: error: add function to test if an error has a particular vendor/code --- core/error.c | 14 ++++++++++++++ core/include/blue/core/error.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/core/error.c b/core/error.c index 12d6064..ede2f76 100644 --- a/core/error.c +++ b/core/error.c @@ -38,6 +38,20 @@ static void error_cleanup(void) } } +bool b_result_is( + struct b_error *err, const b_error_vendor *vendor, b_error_status_code code) +{ + if (!err) { + return false; + } + + if (err->err_vendor != vendor) { + return false; + } + + return err->err_code == code; +} + const struct b_error_vendor *b_error_vendor_get_builtin(void) { return &builtin_vendor; diff --git a/core/include/blue/core/error.h b/core/include/blue/core/error.h index 8ec3071..b052f66 100644 --- a/core/include/blue/core/error.h +++ b/core/include/blue/core/error.h @@ -330,6 +330,9 @@ BLUE_API b_error *z__b_error_caused_by(b_error *, b_error *); BLUE_API b_error *z__b_error_caused_by_b_status(b_error *, b_status); BLUE_API void z__b_error_throw(b_error *, const char *, unsigned int, const char *); +BLUE_API bool b_result_is( + b_result result, const b_error_vendor *vendor, b_error_status_code code); + BLUE_API const b_error_vendor *b_error_vendor_get_builtin(void); BLUE_API const b_error_vendor *b_error_vendor_get_errno(void); BLUE_API const b_error_definition *b_error_vendor_get_error_definition( From 072903f896cc4c62e0da08301873ac90f9ddfd79 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:29:48 +0100 Subject: [PATCH 04/23] core: error: fix b_error_caused_by_code calling an undefined function --- core/include/blue/core/error.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/include/blue/core/error.h b/core/include/blue/core/error.h index b052f66..a3c32ce 100644 --- a/core/include/blue/core/error.h +++ b/core/include/blue/core/error.h @@ -86,7 +86,7 @@ __FILE__, __LINE__, __FUNCTION__, NULL)) #define b_error_caused_by_code(vendor, code, cause_vendor, cause_code) \ (z__b_error_create( \ - vendor, code, z__b_error_with_code(cause_vendor, cause_code), \ + vendor, code, b_error_with_code(cause_vendor, cause_code), \ __FILE__, __LINE__, __FUNCTION__, NULL)) #define b_error_with_string(vendor, code, ...) \ (z__b_error_create( \ From d52992c8ba99144538e1f3eae10852708b67f7c9 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:30:40 +0100 Subject: [PATCH 05/23] core: printf: switch to an enhanced version of embedded printf --- core/printf.c | 2590 +++++++++++++++++++++++++++++++------------------ core/printf.h | 220 +++-- 2 files changed, 1794 insertions(+), 1016 deletions(-) diff --git a/core/printf.c b/core/printf.c index 662a1f1..f13a858 100644 --- a/core/printf.c +++ b/core/printf.c @@ -1,957 +1,1633 @@ -/////////////////////////////////////////////////////////////////////////////// -// \author (c) Marco Paland (info@paland.com) -// 2014-2019, PALANDesign Hannover, Germany -// -// \license The MIT License (MIT) -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// \brief Tiny printf, sprintf and (v)snprintf implementation, optimized for speed on -// embedded systems with a very limited resources. These routines are thread -// safe and reentrant! -// Use this instead of the bloated standard/newlib printf cause these use -// malloc for printf (and may not be thread safe). -// -/////////////////////////////////////////////////////////////////////////////// - -#include "printf.h" - -#include -#include - -// define this globally (e.g. gcc -DPRINTF_INCLUDE_CONFIG_H ...) to include the -// printf_config.h header file -// default: undefined -#ifdef PRINTF_INCLUDE_CONFIG_H -#include "printf_config.h" -#endif - -// 'ntoa' conversion buffer size, this must be big enough to hold one converted -// numeric number including padded zeros (dynamically created on stack) -// default: 32 byte -#ifndef PRINTF_NTOA_BUFFER_SIZE -#define PRINTF_NTOA_BUFFER_SIZE 32U -#endif - -// 'ftoa' conversion buffer size, this must be big enough to hold one converted -// float number including padded zeros (dynamically created on stack) -// default: 32 byte -#ifndef PRINTF_FTOA_BUFFER_SIZE -#define PRINTF_FTOA_BUFFER_SIZE 32U -#endif - -// support for the floating point type (%f) -// default: activated -#ifndef PRINTF_DISABLE_SUPPORT_FLOAT -#define PRINTF_SUPPORT_FLOAT -#endif - -// support for exponential floating point notation (%e/%g) -// default: activated -#ifndef PRINTF_DISABLE_SUPPORT_EXPONENTIAL -#define PRINTF_SUPPORT_EXPONENTIAL -#endif - -// define the default floating point precision -// default: 6 digits -#ifndef PRINTF_DEFAULT_FLOAT_PRECISION -#define PRINTF_DEFAULT_FLOAT_PRECISION 6U -#endif - -// define the largest float suitable to print with %f -// default: 1e9 -#ifndef PRINTF_MAX_FLOAT -#define PRINTF_MAX_FLOAT 1e9 -#endif - -// support for the long long types (%llu or %p) -// default: activated -#ifndef PRINTF_DISABLE_SUPPORT_LONG_LONG -#define PRINTF_SUPPORT_LONG_LONG -#endif - -// support for the ptrdiff_t type (%t) -// ptrdiff_t is normally defined in as long or long long type -// default: activated -#ifndef PRINTF_DISABLE_SUPPORT_PTRDIFF_T -#define PRINTF_SUPPORT_PTRDIFF_T -#endif - -/////////////////////////////////////////////////////////////////////////////// - -// internal flag definitions -#define FLAGS_ZEROPAD (1U << 0U) -#define FLAGS_LEFT (1U << 1U) -#define FLAGS_PLUS (1U << 2U) -#define FLAGS_SPACE (1U << 3U) -#define FLAGS_HASH (1U << 4U) -#define FLAGS_UPPERCASE (1U << 5U) -#define FLAGS_CHAR (1U << 6U) -#define FLAGS_SHORT (1U << 7U) -#define FLAGS_LONG (1U << 8U) -#define FLAGS_LONG_LONG (1U << 9U) -#define FLAGS_PRECISION (1U << 10U) -#define FLAGS_ADAPT_EXP (1U << 11U) - -// import float.h for DBL_MAX -#if defined(PRINTF_SUPPORT_FLOAT) -#include -#endif - -// output function type -typedef void (*out_fct_type)( - char character, void *buffer, size_t idx, size_t maxlen); - -// wrapper (used as buffer) for output function type -typedef struct { - void (*fct)(char character, void *arg); - void *arg; -} out_fct_wrap_type; - -// internal buffer output -static inline void _out_buffer( - char character, void *buffer, size_t idx, size_t maxlen) -{ - if (idx < maxlen) { - ((char *)buffer)[idx] = character; - } -} - -// internal null output -static inline void _out_null(char character, void *buffer, size_t idx, size_t maxlen) -{ - (void)character; - (void)buffer; - (void)idx; - (void)maxlen; -} - -// internal output function wrapper -static inline void _out_fct(char character, void *buffer, size_t idx, size_t maxlen) -{ - (void)idx; - (void)maxlen; - if (character) { - // buffer is the output fct pointer - ((out_fct_wrap_type *)buffer) - ->fct(character, ((out_fct_wrap_type *)buffer)->arg); - } -} - -// internal secure strlen -// \return The length of the string (excluding the terminating 0) limited by 'maxsize' -static inline unsigned int _strnlen_s(const char *str, size_t maxsize) -{ - const char *s; - for (s = str; *s && maxsize--; ++s) - ; - return (unsigned int)(s - str); -} - -// internal test if char is a digit (0-9) -// \return true if char is a digit -static inline bool _is_digit(char ch) -{ - return (ch >= '0') && (ch <= '9'); -} - -// internal ASCII string to unsigned int conversion -static unsigned int _atoi(const char **str) -{ - unsigned int i = 0U; - while (_is_digit(**str)) { - i = i * 10U + (unsigned int)(*((*str)++) - '0'); - } - return i; -} - -// output the specified string in reverse, taking care of any zero-padding -static size_t _out_rev( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, - const char *buf, size_t len, unsigned int width, unsigned int flags) -{ - const size_t start_idx = idx; - - // pad spaces up to given width - if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { - for (size_t i = len; i < width; i++) { - out(' ', buffer, idx++, maxlen); - } - } - - // reverse string - while (len) { - out(buf[--len], buffer, idx++, maxlen); - } - - // append pad spaces up to given width - if (flags & FLAGS_LEFT) { - while (idx - start_idx < width) { - out(' ', buffer, idx++, maxlen); - } - } - - return idx; -} - -// internal itoa format -static size_t _ntoa_format( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, char *buf, - size_t len, bool negative, unsigned int base, unsigned int prec, - unsigned int width, unsigned int flags) -{ - // pad leading zeros - if (!(flags & FLAGS_LEFT)) { - if (width && (flags & FLAGS_ZEROPAD) - && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { - width--; - } - while ((len < prec) && (len < PRINTF_NTOA_BUFFER_SIZE)) { - buf[len++] = '0'; - } - while ((flags & FLAGS_ZEROPAD) && (len < width) - && (len < PRINTF_NTOA_BUFFER_SIZE)) { - buf[len++] = '0'; - } - } - - // handle hash - if (flags & FLAGS_HASH) { - if (!(flags & FLAGS_PRECISION) && len - && ((len == prec) || (len == width))) { - len--; - if (len && (base == 16U)) { - len--; - } - } - if ((base == 16U) && !(flags & FLAGS_UPPERCASE) - && (len < PRINTF_NTOA_BUFFER_SIZE)) { - buf[len++] = 'x'; - } else if ( - (base == 16U) && (flags & FLAGS_UPPERCASE) - && (len < PRINTF_NTOA_BUFFER_SIZE)) { - buf[len++] = 'X'; - } else if ((base == 2U) && (len < PRINTF_NTOA_BUFFER_SIZE)) { - buf[len++] = 'b'; - } - if (len < PRINTF_NTOA_BUFFER_SIZE) { - buf[len++] = '0'; - } - } - - if (len < PRINTF_NTOA_BUFFER_SIZE) { - if (negative) { - buf[len++] = '-'; - } else if (flags & FLAGS_PLUS) { - buf[len++] = '+'; // ignore the space if the '+' exists - } else if (flags & FLAGS_SPACE) { - buf[len++] = ' '; - } - } - - return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags); -} - -// internal itoa for 'long' type -static size_t _ntoa_long( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, - unsigned long value, bool negative, unsigned long base, - unsigned int prec, unsigned int width, unsigned int flags) -{ - char buf[PRINTF_NTOA_BUFFER_SIZE]; - size_t len = 0U; - - // no hash for 0 values - if (!value) { - flags &= ~FLAGS_HASH; - } - - // write if precision != 0 and value is != 0 - if (!(flags & FLAGS_PRECISION) || value) { - do { - const char digit = (char)(value % base); - buf[len++] = digit < 10 - ? '0' + digit - : (flags & FLAGS_UPPERCASE ? 'A' : 'a') - + digit - 10; - value /= base; - } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); - } - - return _ntoa_format( - out, buffer, idx, maxlen, buf, len, negative, - (unsigned int)base, prec, width, flags); -} - -// internal itoa for 'long long' type -#if defined(PRINTF_SUPPORT_LONG_LONG) -static size_t _ntoa_long_long( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, - unsigned long long value, bool negative, unsigned long long base, - unsigned int prec, unsigned int width, unsigned int flags) -{ - char buf[PRINTF_NTOA_BUFFER_SIZE]; - size_t len = 0U; - - // no hash for 0 values - if (!value) { - flags &= ~FLAGS_HASH; - } - - // write if precision != 0 and value is != 0 - if (!(flags & FLAGS_PRECISION) || value) { - do { - const char digit = (char)(value % base); - buf[len++] = digit < 10 - ? '0' + digit - : (flags & FLAGS_UPPERCASE ? 'A' : 'a') - + digit - 10; - value /= base; - } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); - } - - return _ntoa_format( - out, buffer, idx, maxlen, buf, len, negative, - (unsigned int)base, prec, width, flags); -} -#endif // PRINTF_SUPPORT_LONG_LONG - -#if defined(PRINTF_SUPPORT_FLOAT) - -#if defined(PRINTF_SUPPORT_EXPONENTIAL) -// forward declaration so that _ftoa can switch to exp notation for values > PRINTF_MAX_FLOAT -static size_t _etoa( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, - unsigned int prec, unsigned int width, unsigned int flags); -#endif - -// internal ftoa for fixed decimal floating point -static size_t _ftoa( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, - unsigned int prec, unsigned int width, unsigned int flags) -{ - char buf[PRINTF_FTOA_BUFFER_SIZE]; - size_t len = 0U; - double diff = 0.0; - - // powers of 10 - static const double pow10[] - = {1, 10, 100, 1000, 10000, - 100000, 1000000, 10000000, 100000000, 1000000000}; - - // test for special values - if (value != value) - return _out_rev(out, buffer, idx, maxlen, "nan", 3, width, flags); - if (value < -DBL_MAX) - return _out_rev(out, buffer, idx, maxlen, "fni-", 4, width, flags); - if (value > DBL_MAX) - return _out_rev( - out, buffer, idx, maxlen, - (flags & FLAGS_PLUS) ? "fni+" : "fni", - (flags & FLAGS_PLUS) ? 4U : 3U, width, flags); - - // test for very large values - // standard printf behavior is to print EVERY whole number digit -- - // which could be 100s of characters overflowing your buffers == bad - if ((value > PRINTF_MAX_FLOAT) || (value < -PRINTF_MAX_FLOAT)) { -#if defined(PRINTF_SUPPORT_EXPONENTIAL) - return _etoa(out, buffer, idx, maxlen, value, prec, width, flags); -#else - return 0U; -#endif - } - - // test for negative - bool negative = false; - if (value < 0) { - negative = true; - value = 0 - value; - } - - // set default precision, if not set explicitly - if (!(flags & FLAGS_PRECISION)) { - prec = PRINTF_DEFAULT_FLOAT_PRECISION; - } - // limit precision to 9, cause a prec >= 10 can lead to overflow errors - while ((len < PRINTF_FTOA_BUFFER_SIZE) && (prec > 9U)) { - buf[len++] = '0'; - prec--; - } - - int whole = (int)value; - double tmp = (value - whole) * pow10[prec]; - unsigned long frac = (unsigned long)tmp; - diff = tmp - frac; - - if (diff > 0.5) { - ++frac; - // handle rollover, e.g. case 0.99 with prec 1 is 1.0 - if (frac >= pow10[prec]) { - frac = 0; - ++whole; - } - } else if (diff < 0.5) { - } else if ((frac == 0U) || (frac & 1U)) { - // if halfway, round up if odd OR if last digit is 0 - ++frac; - } - - if (prec == 0U) { - diff = value - (double)whole; - if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) { - // exactly 0.5 and ODD, then round up - // 1.5 -> 2, but 2.5 -> 2 - ++whole; - } - } else { - unsigned int count = prec; - // now do fractional part, as an unsigned number - while (len < PRINTF_FTOA_BUFFER_SIZE) { - --count; - buf[len++] = (char)(48U + (frac % 10U)); - if (!(frac /= 10U)) { - break; - } - } - // add extra 0s - while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) { - buf[len++] = '0'; - } - if (len < PRINTF_FTOA_BUFFER_SIZE) { - // add decimal - buf[len++] = '.'; - } - } - - // do whole part, number is reversed - while (len < PRINTF_FTOA_BUFFER_SIZE) { - buf[len++] = (char)(48 + (whole % 10)); - if (!(whole /= 10)) { - break; - } - } - - // pad leading zeros - if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) { - if (width && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { - width--; - } - while ((len < width) && (len < PRINTF_FTOA_BUFFER_SIZE)) { - buf[len++] = '0'; - } - } - - if (len < PRINTF_FTOA_BUFFER_SIZE) { - if (negative) { - buf[len++] = '-'; - } else if (flags & FLAGS_PLUS) { - buf[len++] = '+'; // ignore the space if the '+' exists - } else if (flags & FLAGS_SPACE) { - buf[len++] = ' '; - } - } - - return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags); -} - -#if defined(PRINTF_SUPPORT_EXPONENTIAL) -// internal ftoa variant for exponential floating-point type, contributed by Martijn Jasperse -static size_t _etoa( - out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, - unsigned int prec, unsigned int width, unsigned int flags) -{ - // check for NaN and special values - if ((value != value) || (value > DBL_MAX) || (value < -DBL_MAX)) { - return _ftoa(out, buffer, idx, maxlen, value, prec, width, flags); - } - - // determine the sign - const bool negative = value < 0; - if (negative) { - value = -value; - } - - // default precision - if (!(flags & FLAGS_PRECISION)) { - prec = PRINTF_DEFAULT_FLOAT_PRECISION; - } - - // determine the decimal exponent - // based on the algorithm by David Gay (https://www.ampl.com/netlib/fp/dtoa.c) - union { - uint64_t U; - double F; - } conv; - - conv.F = value; - int exp2 = (int)((conv.U >> 52U) & 0x07FFU) - 1023; // effectively log2 - conv.U = (conv.U & ((1ULL << 52U) - 1U)) - | (1023ULL << 52U); // drop the exponent so conv.F is now in [1,2) - // now approximate log10 from the log2 integer part and an expansion of ln around 1.5 - int expval = (int)(0.1760912590558 + exp2 * 0.301029995663981 - + (conv.F - 1.5) * 0.289529654602168); - // now we want to compute 10^expval but we want to be sure it won't overflow - exp2 = (int)(expval * 3.321928094887362 + 0.5); - const double z = expval * 2.302585092994046 - exp2 * 0.6931471805599453; - const double z2 = z * z; - conv.U = (uint64_t)(exp2 + 1023) << 52U; - // compute exp(z) using continued fractions, see https://en.wikipedia.org/wiki/Exponential_function#Continued_fractions_for_ex - conv.F *= 1 + 2 * z / (2 - z + (z2 / (6 + (z2 / (10 + z2 / 14))))); - // correct for rounding errors - if (value < conv.F) { - expval--; - conv.F /= 10; - } - - // the exponent format is "%+03d" and largest value is "307", so set aside 4-5 characters - unsigned int minwidth = ((expval < 100) && (expval > -100)) ? 4U : 5U; - - // in "%g" mode, "prec" is the number of *significant figures* not decimals - if (flags & FLAGS_ADAPT_EXP) { - // do we want to fall-back to "%f" mode? - if ((value >= 1e-4) && (value < 1e6)) { - if ((int)prec > expval) { - prec = (unsigned)((int)prec - expval - 1); - } else { - prec = 0; - } - flags |= FLAGS_PRECISION; // make sure _ftoa respects precision - // no characters in exponent - minwidth = 0U; - expval = 0; - } else { - // we use one sigfig for the whole part - if ((prec > 0) && (flags & FLAGS_PRECISION)) { - --prec; - } - } - } - - // will everything fit? - unsigned int fwidth = width; - if (width > minwidth) { - // we didn't fall-back so subtract the characters required for the exponent - fwidth -= minwidth; - } else { - // not enough characters, so go back to default sizing - fwidth = 0U; - } - if ((flags & FLAGS_LEFT) && minwidth) { - // if we're padding on the right, DON'T pad the floating part - fwidth = 0U; - } - - // rescale the float value - if (expval) { - value /= conv.F; - } - - // output the floating part - const size_t start_idx = idx; - idx - = _ftoa(out, buffer, idx, maxlen, negative ? -value : value, - prec, fwidth, flags & ~FLAGS_ADAPT_EXP); - - // output the exponent part - if (minwidth) { - // output the exponential symbol - out((flags & FLAGS_UPPERCASE) ? 'E' : 'e', buffer, idx++, maxlen); - // output the exponent value - idx = _ntoa_long( - out, buffer, idx, maxlen, - (expval < 0) ? -expval : expval, expval < 0, 10, 0, - minwidth - 1, FLAGS_ZEROPAD | FLAGS_PLUS); - // might need to right-pad spaces - if (flags & FLAGS_LEFT) { - while (idx - start_idx < width) - out(' ', buffer, idx++, maxlen); - } - } - return idx; -} -#endif // PRINTF_SUPPORT_EXPONENTIAL -#endif // PRINTF_SUPPORT_FLOAT - -// internal vsnprintf -static int _vsnprintf( - out_fct_type out, char *buffer, const size_t maxlen, const char *format, - va_list va) -{ - unsigned int flags, width, precision, n; - size_t idx = 0U; - - if (!buffer) { - // use null output function - out = _out_null; - } - - while (*format) { - // format specifier? %[flags][width][.precision][length] - if (*format != '%') { - // no - out(*format, buffer, idx++, maxlen); - format++; - continue; - } else { - // yes, evaluate it - format++; - } - - // evaluate flags - flags = 0U; - do { - switch (*format) { - case '0': - flags |= FLAGS_ZEROPAD; - format++; - n = 1U; - break; - case '-': - flags |= FLAGS_LEFT; - format++; - n = 1U; - break; - case '+': - flags |= FLAGS_PLUS; - format++; - n = 1U; - break; - case ' ': - flags |= FLAGS_SPACE; - format++; - n = 1U; - break; - case '#': - flags |= FLAGS_HASH; - format++; - n = 1U; - break; - default: - n = 0U; - break; - } - } while (n); - - // evaluate width field - width = 0U; - if (_is_digit(*format)) { - width = _atoi(&format); - } else if (*format == '*') { - const int w = va_arg(va, int); - if (w < 0) { - flags |= FLAGS_LEFT; // reverse padding - width = (unsigned int)-w; - } else { - width = (unsigned int)w; - } - format++; - } - - // evaluate precision field - precision = 0U; - if (*format == '.') { - flags |= FLAGS_PRECISION; - format++; - if (_is_digit(*format)) { - precision = _atoi(&format); - } else if (*format == '*') { - const int prec = (int)va_arg(va, int); - precision = prec > 0 ? (unsigned int)prec : 0U; - format++; - } - } - - // evaluate length field - switch (*format) { - case 'l': - flags |= FLAGS_LONG; - format++; - if (*format == 'l') { - flags |= FLAGS_LONG_LONG; - format++; - } - break; - case 'h': - flags |= FLAGS_SHORT; - format++; - if (*format == 'h') { - flags |= FLAGS_CHAR; - format++; - } - break; -#if defined(PRINTF_SUPPORT_PTRDIFF_T) - case 't': - flags - |= (sizeof(ptrdiff_t) == sizeof(long) - ? FLAGS_LONG - : FLAGS_LONG_LONG); - format++; - break; -#endif - case 'j': - flags - |= (sizeof(intmax_t) == sizeof(long) - ? FLAGS_LONG - : FLAGS_LONG_LONG); - format++; - break; - case 'z': - flags - |= (sizeof(size_t) == sizeof(long) - ? FLAGS_LONG - : FLAGS_LONG_LONG); - format++; - break; - default: - break; - } - - // evaluate specifier - switch (*format) { - case 'd': - case 'i': - case 'u': - case 'x': - case 'X': - case 'o': - case 'b': { - // set the base - unsigned int base; - if (*format == 'x' || *format == 'X') { - base = 16U; - } else if (*format == 'o') { - base = 8U; - } else if (*format == 'b') { - base = 2U; - } else { - base = 10U; - flags &= ~FLAGS_HASH; // no hash for dec format - } - // uppercase - if (*format == 'X') { - flags |= FLAGS_UPPERCASE; - } - - // no plus or space flag for u, x, X, o, b - if ((*format != 'i') && (*format != 'd')) { - flags &= ~(FLAGS_PLUS | FLAGS_SPACE); - } - - // ignore '0' flag when precision is given - if (flags & FLAGS_PRECISION) { - flags &= ~FLAGS_ZEROPAD; - } - - // convert the integer - if ((*format == 'i') || (*format == 'd')) { - // signed - if (flags & FLAGS_LONG_LONG) { -#if defined(PRINTF_SUPPORT_LONG_LONG) - const long long value - = va_arg(va, long long); - idx = _ntoa_long_long( - out, buffer, idx, maxlen, - (unsigned long long)(value > 0 ? value - : 0 - value), - value < 0, base, precision, - width, flags); -#endif - } else if (flags & FLAGS_LONG) { - const long value = va_arg(va, long); - idx = _ntoa_long( - out, buffer, idx, maxlen, - (unsigned long)(value > 0 ? value - : 0 - value), - value < 0, base, precision, - width, flags); - } else { - const int value - = (flags & FLAGS_CHAR) - ? (char)va_arg(va, int) - : (flags & FLAGS_SHORT) - ? (short int)va_arg(va, int) - : va_arg(va, int); - idx = _ntoa_long( - out, buffer, idx, maxlen, - (unsigned int)(value > 0 ? value - : 0 - value), - value < 0, base, precision, - width, flags); - } - } else { - // unsigned - if (flags & FLAGS_LONG_LONG) { -#if defined(PRINTF_SUPPORT_LONG_LONG) - idx = _ntoa_long_long( - out, buffer, idx, maxlen, - va_arg(va, unsigned long long), - false, base, precision, width, - flags); -#endif - } else if (flags & FLAGS_LONG) { - idx = _ntoa_long( - out, buffer, idx, maxlen, - va_arg(va, unsigned long), false, - base, precision, width, flags); - } else { - const unsigned int value - = (flags & FLAGS_CHAR) - ? (unsigned char)va_arg( - va, unsigned int) - : (flags & FLAGS_SHORT) - ? (unsigned short int)va_arg( - va, unsigned int) - : va_arg(va, unsigned int); - idx = _ntoa_long( - out, buffer, idx, maxlen, value, - false, base, precision, width, - flags); - } - } - format++; - break; - } -#if defined(PRINTF_SUPPORT_FLOAT) - case 'f': - case 'F': - if (*format == 'F') - flags |= FLAGS_UPPERCASE; - idx = _ftoa( - out, buffer, idx, maxlen, va_arg(va, double), - precision, width, flags); - format++; - break; -#if defined(PRINTF_SUPPORT_EXPONENTIAL) - case 'e': - case 'E': - case 'g': - case 'G': - if ((*format == 'g') || (*format == 'G')) - flags |= FLAGS_ADAPT_EXP; - if ((*format == 'E') || (*format == 'G')) - flags |= FLAGS_UPPERCASE; - idx = _etoa( - out, buffer, idx, maxlen, va_arg(va, double), - precision, width, flags); - format++; - break; -#endif // PRINTF_SUPPORT_EXPONENTIAL -#endif // PRINTF_SUPPORT_FLOAT - case 'c': { - unsigned int l = 1U; - // pre padding - if (!(flags & FLAGS_LEFT)) { - while (l++ < width) { - out(' ', buffer, idx++, maxlen); - } - } - // char output - out((char)va_arg(va, int), buffer, idx++, maxlen); - // post padding - if (flags & FLAGS_LEFT) { - while (l++ < width) { - out(' ', buffer, idx++, maxlen); - } - } - format++; - break; - } - - case 's': { - const char *p = va_arg(va, char *); - unsigned int l = _strnlen_s( - p, precision ? precision : (size_t)-1); - // pre padding - if (flags & FLAGS_PRECISION) { - l = (l < precision ? l : precision); - } - if (!(flags & FLAGS_LEFT)) { - while (l++ < width) { - out(' ', buffer, idx++, maxlen); - } - } - // string output - while ((*p != 0) - && (!(flags & FLAGS_PRECISION) || precision--)) { - out(*(p++), buffer, idx++, maxlen); - } - // post padding - if (flags & FLAGS_LEFT) { - while (l++ < width) { - out(' ', buffer, idx++, maxlen); - } - } - format++; - break; - } - - case 'p': { - width = sizeof(void *) * 2U; - flags |= FLAGS_ZEROPAD | FLAGS_UPPERCASE; -#if defined(PRINTF_SUPPORT_LONG_LONG) - const bool is_ll = sizeof(uintptr_t) == sizeof(long long); - if (is_ll) { - idx = _ntoa_long_long( - out, buffer, idx, maxlen, - (uintptr_t)va_arg(va, void *), false, - 16U, precision, width, flags); - } else { -#endif - idx = _ntoa_long( - out, buffer, idx, maxlen, - (unsigned long)((uintptr_t)va_arg( - va, void *)), - false, 16U, precision, width, flags); -#if defined(PRINTF_SUPPORT_LONG_LONG) - } -#endif - format++; - break; - } - - case '%': - out('%', buffer, idx++, maxlen); - format++; - break; - - default: - out(*format, buffer, idx++, maxlen); - format++; - break; - } - } - - // termination - out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen); - - // return written chars without terminating \0 - return (int)idx; -} - -/////////////////////////////////////////////////////////////////////////////// - -int z__b_fctprintf( - void (*out)(char character, void *arg), void *arg, const char *format, - va_list va) -{ - const out_fct_wrap_type out_fct_wrap = {out, arg}; - const int ret = _vsnprintf( - _out_fct, (char *)(uintptr_t)&out_fct_wrap, (size_t)-1, format, va); - return ret; -} +/** + * @author (c) Eyal Rozenberg + * 2021-2023, Haifa, Palestine/Israel + * @author (c) Marco Paland (info@paland.com) + * 2014-2019, PALANDesign Hannover, Germany + * + * @note Others have made smaller contributions to this file: see the + * contributors page at https://github.com/eyalroz/printf/graphs/contributors + * or ask one of the authors. The original code for exponential specifiers was + * contributed by Martijn Jasperse . + * + * @brief Small stand-alone implementation of the printf family of functions + * (`(v)printf`, `(v)s(n)printf` etc., geared towards use on embedded systems + * with limited resources. + * + * @note the implementations are thread-safe; re-entrant; use no functions from + * the standard library; and do not dynamically allocate any memory. + * + * @license The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +// Define this globally (e.g. gcc -DPRINTF_INCLUDE_CONFIG_H=1 ...) to include +// the printf_config.h header file +#if PRINTF_INCLUDE_CONFIG_H +#include "printf_config.h" +#endif + +#include "printf.h" + +#ifdef __cplusplus +#include +#include +#else +#include +#include +#include +#endif // __cplusplus + +#if PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_HARD +#define printf_ printf +#define sprintf_ sprintf +#define vsprintf_ vsprintf +#define snprintf_ snprintf +#define vsnprintf_ vsnprintf +#define vprintf_ vprintf +#endif + +// 'ntoa' conversion buffer size, this must be big enough to hold one converted +// numeric number including padded zeros (dynamically created on stack) +#ifndef PRINTF_INTEGER_BUFFER_SIZE +#define PRINTF_INTEGER_BUFFER_SIZE 32 +#endif + +// size of the fixed (on-stack) buffer for printing individual decimal numbers. +// this must be big enough to hold one converted floating-point value including +// padded zeros. +#ifndef PRINTF_DECIMAL_BUFFER_SIZE +#define PRINTF_DECIMAL_BUFFER_SIZE 32 +#endif + +// Support for the decimal notation floating point conversion specifiers (%f, %F) +#ifndef PRINTF_SUPPORT_DECIMAL_SPECIFIERS +#define PRINTF_SUPPORT_DECIMAL_SPECIFIERS 1 +#endif + +// Support for the exponential notation floating point conversion specifiers (%e, %g, %E, %G) +#ifndef PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS +#define PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS 1 +#endif + +// Support for the length write-back specifier (%n) +#ifndef PRINTF_SUPPORT_WRITEBACK_SPECIFIER +#define PRINTF_SUPPORT_WRITEBACK_SPECIFIER 1 +#endif + +// Default precision for the floating point conversion specifiers (the C standard sets this at 6) +#ifndef PRINTF_DEFAULT_FLOAT_PRECISION +#define PRINTF_DEFAULT_FLOAT_PRECISION 6 +#endif + +// Default choice of type to use for internal floating-point computations +#ifndef PRINTF_USE_DOUBLE_INTERNALLY +#define PRINTF_USE_DOUBLE_INTERNALLY 1 +#endif + +// According to the C languages standard, printf() and related functions must be +// able to print any integral number in floating-point notation, regardless of +// length, when using the %f specifier - possibly hundreds of characters, +// potentially overflowing your buffers. In this implementation, all values +// beyond this threshold are switched to exponential notation. +#ifndef PRINTF_MAX_INTEGRAL_DIGITS_FOR_DECIMAL +#define PRINTF_MAX_INTEGRAL_DIGITS_FOR_DECIMAL 9 +#endif + +// Support for the long long integral types (with the ll, z and t length +// modifiers for specifiers %d,%i,%o,%x,%X,%u, and with the %p specifier). +#ifndef PRINTF_SUPPORT_LONG_LONG +#define PRINTF_SUPPORT_LONG_LONG 1 +#endif + +// The number of terms in a Taylor series expansion of log_10(x) to +// use for approximation - including the power-zero term (i.e. the +// value at the point of expansion). +#ifndef PRINTF_LOG10_TAYLOR_TERMS +#define PRINTF_LOG10_TAYLOR_TERMS 4 +#endif + +#if PRINTF_LOG10_TAYLOR_TERMS <= 1 +#error "At least one non-constant Taylor expansion is necessary for the log10() calculation" +#endif + +// Be extra-safe, and don't assume format specifiers are completed correctly +// before the format string end. +#ifndef PRINTF_CHECK_FOR_NUL_IN_FORMAT_SPECIFIER +#define PRINTF_CHECK_FOR_NUL_IN_FORMAT_SPECIFIER 1 +#endif + +#define PRINTF_PREFER_DECIMAL false +#define PRINTF_PREFER_EXPONENTIAL true + +/////////////////////////////////////////////////////////////////////////////// + +// The following will convert the number-of-digits into an exponential-notation literal +#define PRINTF_CONCATENATE(s1, s2) s1##s2 +#define PRINTF_EXPAND_THEN_CONCATENATE(s1, s2) PRINTF_CONCATENATE(s1, s2) +#define PRINTF_FLOAT_NOTATION_THRESHOLD \ + ((floating_point_t)PRINTF_EXPAND_THEN_CONCATENATE( \ + 1e, PRINTF_MAX_INTEGRAL_DIGITS_FOR_DECIMAL)) + +// internal flag definitions +#define FLAGS_ZEROPAD (1U << 0U) +#define FLAGS_LEFT (1U << 1U) +#define FLAGS_PLUS (1U << 2U) +#define FLAGS_SPACE (1U << 3U) +#define FLAGS_HASH (1U << 4U) +#define FLAGS_UPPERCASE (1U << 5U) +#define FLAGS_CHAR (1U << 6U) +#define FLAGS_SHORT (1U << 7U) +#define FLAGS_INT (1U << 8U) +// Only used with PRINTF_SUPPORT_MSVC_STYLE_INTEGER_SPECIFIERS +#define FLAGS_LONG (1U << 9U) +#define FLAGS_LONG_LONG (1U << 10U) +#define FLAGS_PRECISION (1U << 11U) +#define FLAGS_ADAPT_EXP (1U << 12U) +#define FLAGS_POINTER (1U << 13U) +// Note: Similar, but not identical, effect as FLAGS_HASH +#define FLAGS_SIGNED (1U << 14U) +#define FLAGS_LONG_DOUBLE (1U << 15U) +// Only used with PRINTF_SUPPORT_MSVC_STYLE_INTEGER_SPECIFIERS + +#ifdef PRINTF_SUPPORT_MSVC_STYLE_INTEGER_SPECIFIERS + +#define FLAGS_INT8 FLAGS_CHAR + +#if (SHRT_MAX == 32767LL) +#define FLAGS_INT16 FLAGS_SHORT +#elif (INT_MAX == 32767LL) +#define FLAGS_INT16 FLAGS_INT +#elif (LONG_MAX == 32767LL) +#define FLAGS_INT16 FLAGS_LONG +#elif (LLONG_MAX == 32767LL) +#define FLAGS_INT16 FLAGS_LONG_LONG +#else +#error "No basic integer type has a size of 16 bits exactly" +#endif + +#if (SHRT_MAX == 2147483647LL) +#define FLAGS_INT32 FLAGS_SHORT +#elif (INT_MAX == 2147483647LL) +#define FLAGS_INT32 FLAGS_INT +#elif (LONG_MAX == 2147483647LL) +#define FLAGS_INT32 FLAGS_LONG +#elif (LLONG_MAX == 2147483647LL) +#define FLAGS_INT32 FLAGS_LONG_LONG +#else +#error "No basic integer type has a size of 32 bits exactly" +#endif + +#if (SHRT_MAX == 9223372036854775807LL) +#define FLAGS_INT64 FLAGS_SHORT +#elif (INT_MAX == 9223372036854775807LL) +#define FLAGS_INT64 FLAGS_INT +#elif (LONG_MAX == 9223372036854775807LL) +#define FLAGS_INT64 FLAGS_LONG +#elif (LLONG_MAX == 9223372036854775807LL) +#define FLAGS_INT64 FLAGS_LONG_LONG +#else +#error "No basic integer type has a size of 64 bits exactly" +#endif + +#endif // PRINTF_SUPPORT_MSVC_STYLE_INTEGER_SPECIFIERS + +typedef unsigned int printf_flags_t; + +#define BASE_BINARY 2 +#define BASE_OCTAL 8 +#define BASE_DECIMAL 10 +#define BASE_HEX 16 + +typedef uint8_t numeric_base_t; + +#if PRINTF_SUPPORT_LONG_LONG +typedef unsigned long long printf_unsigned_value_t; +typedef long long printf_signed_value_t; +#else +typedef unsigned long printf_unsigned_value_t; +typedef long printf_signed_value_t; +#endif + +// The printf()-family functions return an `int`; it is therefore +// unnecessary/inappropriate to use size_t - often larger than int +// in practice - for non-negative related values, such as widths, +// precisions, offsets into buffers used for printing and the sizes +// of these buffers. instead, we use: +typedef unsigned int printf_size_t; +#define PRINTF_MAX_POSSIBLE_BUFFER_SIZE INT_MAX +// If we were to nitpick, this would actually be INT_MAX + 1, +// since INT_MAX is the maximum return value, which excludes the +// trailing '\0'. + +#if (PRINTF_SUPPORT_DECIMAL_SPECIFIERS || PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS) +#include +#if FLT_RADIX != 2 +#error "Non-binary-radix floating-point types are unsupported." +#endif + +/** + * This library supports taking float-point arguments up to and including + * long double's; but - it currently does _not_ support internal + * representation and manipulation of values as long doubles; the options + * are either single-precision `float` or double-precision `double`. + */ +#if PRINTF_USE_DOUBLE_INTERNALLY +typedef double floating_point_t; +#define FP_TYPE_MANT_DIG DBL_MANT_DIG +#else +typedef float floating_point_t; +#define FP_TYPE_MANT_DIG FLT_MANT_DIG +#endif + +#define NUM_DECIMAL_DIGITS_IN_INT64_T 18 + +#if FP_TYPE_MANT_DIG == 24 + +typedef uint32_t printf_fp_uint_t; +#define FP_TYPE_SIZE_IN_BITS 32 +#define FP_TYPE_EXPONENT_MASK 0xFFU +#define FP_TYPE_BASE_EXPONENT 127 +#define FP_TYPE_MAX FLT_MAX +#define FP_TYPE_MAX_10_EXP FLT_MAX_10_EXP +#define FP_TYPE_MAX_SUBNORMAL_EXPONENT_OF_10 -38 +#define FP_TYPE_MAX_SUBNORMAL_POWER_OF_10 1e-38f +#define PRINTF_MAX_PRECOMPUTED_POWER_OF_10 10 + +#elif FP_TYPE_MANT_DIG == 53 + +typedef uint64_t printf_fp_uint_t; +#define FP_TYPE_SIZE_IN_BITS 64 +#define FP_TYPE_EXPONENT_MASK 0x7FFU +#define FP_TYPE_BASE_EXPONENT 1023 +#define FP_TYPE_MAX DBL_MAX +#define FP_TYPE_MAX_10_EXP DBL_MAX_10_EXP +#define FP_TYPE_MAX_10_EXP DBL_MAX_10_EXP +#define FP_TYPE_MAX_SUBNORMAL_EXPONENT_OF_10 -308 +#define FP_TYPE_MAX_SUBNORMAL_POWER_OF_10 1e-308 +#define PRINTF_MAX_PRECOMPUTED_POWER_OF_10 NUM_DECIMAL_DIGITS_IN_INT64_T - 1 + +#else +#error "Unsupported floating point type configuration" +#endif +#define FP_TYPE_STORED_MANTISSA_BITS (FP_TYPE_MANT_DIG - 1) + +typedef union { + printf_fp_uint_t U; + floating_point_t F; +} floating_point_with_bit_access; + +// This is unnecessary in C99, since compound initializers can be used, +// but: +// 1. Some compilers are finicky about this; +// 2. Some people may want to convert this to C89; +// 3. If you try to use it as C++, only C++20 supports compound literals +static inline floating_point_with_bit_access get_bit_access(floating_point_t x) +{ + floating_point_with_bit_access dwba; + dwba.F = x; + return dwba; +} + +static inline int get_sign_bit(floating_point_t x) +{ + // The sign is stored in the highest bit + return (int)(get_bit_access(x).U >> (FP_TYPE_SIZE_IN_BITS - 1)); +} + +static inline int get_exp2(floating_point_with_bit_access x) +{ + // The exponent in an IEEE-754 floating-point number occupies a + // contiguous sequence of bits (e.g. 52..62 for 64-bit doubles), but + // with a non-trivial representation: An unsigned offset from some + // negative value (with the extremal offset values reserved for special + // use). + return (int)((x.U >> FP_TYPE_STORED_MANTISSA_BITS) & FP_TYPE_EXPONENT_MASK) + - FP_TYPE_BASE_EXPONENT; +} +#define PRINTF_ABS(_x) ((_x) > 0 ? (_x) : -(_x)) + +#endif // (PRINTF_SUPPORT_DECIMAL_SPECIFIERS || PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS) + +// Note in particular the behavior here on LONG_MIN or LLONG_MIN; it is valid +// and well-defined, but if you're not careful you can easily trigger undefined +// behavior with -LONG_MIN or -LLONG_MIN +#define ABS_FOR_PRINTING(_x) \ + ((printf_unsigned_value_t)((_x) > 0 ? (_x) : -((printf_signed_value_t)_x))) + +// wrapper (used as buffer) for output function type +// +// One of the following must hold: +// 1. max_chars is 0 +// 2. buffer is non-null +// 3. function is non-null +// +// ... otherwise bad things will happen. +typedef struct { + void (*function)(char c, void *extra_arg); + void *extra_function_arg; + char *buffer; + printf_size_t pos; + printf_size_t max_chars; +} output_gadget_t; + +// Note: This function currently assumes it is not passed a '\0' c, +// or alternatively, that '\0' can be passed to the function in the output +// gadget. The former assumption holds within the printf library. It also +// assumes that the output gadget has been properly initialized. +static inline void putchar_via_gadget(output_gadget_t *gadget, char c) +{ + printf_size_t write_pos = gadget->pos++; + // We're _always_ increasing pos, so as to count how may characters + // _would_ have been written if not for the max_chars limitation + if (write_pos >= gadget->max_chars) { + return; + } + if (gadget->function != NULL) { + // No check for c == '\0' . + gadget->function(c, gadget->extra_function_arg); + } else { + // it must be the case that gadget->buffer != NULL , due to the constraint + // on output_gadget_t ; and note we're relying on write_pos being non-negative. + gadget->buffer[write_pos] = c; + } +} + +// Possibly-write the string-terminating '\0' character +static inline void append_termination_with_gadget(output_gadget_t *gadget) +{ + if (gadget->function != NULL || gadget->max_chars == 0) { + return; + } + if (gadget->buffer == NULL) { + return; + } + printf_size_t null_char_pos = gadget->pos < gadget->max_chars + ? gadget->pos + : gadget->max_chars - 1; + gadget->buffer[null_char_pos] = '\0'; +} + +// We can't use putchar_ as is, since our output gadget +// only takes pointers to functions with an extra argument +static inline void putchar_wrapper(char c, void *unused) +{ + (void)unused; + putchar_(c); +} + +static inline output_gadget_t discarding_gadget(void) +{ + output_gadget_t gadget; + gadget.function = NULL; + gadget.extra_function_arg = NULL; + gadget.buffer = NULL; + gadget.pos = 0; + gadget.max_chars = 0; + return gadget; +} + +static inline output_gadget_t buffer_gadget(char *buffer, size_t buffer_size) +{ + printf_size_t usable_buffer_size + = (buffer_size > PRINTF_MAX_POSSIBLE_BUFFER_SIZE) + ? PRINTF_MAX_POSSIBLE_BUFFER_SIZE + : (printf_size_t)buffer_size; + output_gadget_t result = discarding_gadget(); + if (buffer != NULL) { + result.buffer = buffer; + result.max_chars = usable_buffer_size; + } + return result; +} + +static inline output_gadget_t function_gadget( + void (*function)(char, void *), void *extra_arg) +{ + output_gadget_t result = discarding_gadget(); + result.function = function; + result.extra_function_arg = extra_arg; + result.max_chars = PRINTF_MAX_POSSIBLE_BUFFER_SIZE; + return result; +} + +static inline output_gadget_t extern_putchar_gadget(void) +{ + return function_gadget(putchar_wrapper, NULL); +} + +// internal secure strlen +// @return The length of the string (excluding the terminating 0) limited by 'maxsize' +// @note strlen uses size_t, but wes only use this function with printf_size_t +// variables - hence the signature. +static inline printf_size_t strnlen_s_(const char *str, printf_size_t maxsize) +{ + const char *s; + for (s = str; *s && maxsize--; ++s) + ; + return (printf_size_t)(s - str); +} + +// internal test if char is a digit (0-9) +// @return true if char is a digit +static inline bool is_digit_(char ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +// internal ASCII string to printf_size_t conversion +static printf_size_t atou_(const char **str) +{ + printf_size_t i = 0U; + while (is_digit_(**str)) { + i = i * 10U + (printf_size_t)(*((*str)++) - '0'); + } + return i; +} + +// output the specified string in reverse, taking care of any zero-padding +static void out_rev_( + output_gadget_t *output, const char *buf, printf_size_t len, + printf_size_t width, printf_flags_t flags) +{ + const printf_size_t start_pos = output->pos; + + // pad spaces up to given width + if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { + for (printf_size_t i = len; i < width; i++) { + putchar_via_gadget(output, ' '); + } + } + + // reverse string + while (len) { + putchar_via_gadget(output, buf[--len]); + } + + // append pad spaces up to given width + if (flags & FLAGS_LEFT) { + while (output->pos - start_pos < width) { + putchar_via_gadget(output, ' '); + } + } +} + +// Invoked by print_integer after the actual number has been printed, performing necessary +// work on the number's prefix (as the number is initially printed in reverse order) +static void print_integer_finalization( + output_gadget_t *output, char *buf, printf_size_t len, bool negative, + numeric_base_t base, printf_size_t precision, printf_size_t width, + printf_flags_t flags) +{ + printf_size_t unpadded_len = len; + + // pad with leading zeros + { + if (!(flags & FLAGS_LEFT)) { + if (width && (flags & FLAGS_ZEROPAD) + && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { + width--; + } + while ((flags & FLAGS_ZEROPAD) && (len < width) + && (len < PRINTF_INTEGER_BUFFER_SIZE)) { + buf[len++] = '0'; + } + } + + while ((len < precision) && (len < PRINTF_INTEGER_BUFFER_SIZE)) { + buf[len++] = '0'; + } + + if (base == BASE_OCTAL && (len > unpadded_len)) { + // Since we've written some zeros, we've satisfied the alternative format leading space requirement + flags &= ~FLAGS_HASH; + } + } + + // handle hash + if (flags & (FLAGS_HASH | FLAGS_POINTER)) { + if (!(flags & FLAGS_PRECISION) && len + && ((len == precision) || (len == width))) { + // Let's take back some padding digits to fit in what + // will eventually be the format-specific prefix + if (unpadded_len < len) { + len--; // This should suffice for BASE_OCTAL + } + if (len && (base == BASE_HEX || base == BASE_BINARY) + && (unpadded_len < len)) { + len--; // ... and an extra one for 0x or 0b + } + } + if ((base == BASE_HEX) && !(flags & FLAGS_UPPERCASE) + && (len < PRINTF_INTEGER_BUFFER_SIZE)) { + buf[len++] = 'x'; + } else if ( + (base == BASE_HEX) && (flags & FLAGS_UPPERCASE) + && (len < PRINTF_INTEGER_BUFFER_SIZE)) { + buf[len++] = 'X'; + } else if ( + (base == BASE_BINARY) + && (len < PRINTF_INTEGER_BUFFER_SIZE)) { + buf[len++] = 'b'; + } + if (len < PRINTF_INTEGER_BUFFER_SIZE) { + buf[len++] = '0'; + } + } + + if (len < PRINTF_INTEGER_BUFFER_SIZE) { + if (negative) { + buf[len++] = '-'; + } else if (flags & FLAGS_PLUS) { + buf[len++] = '+'; // ignore the space if the '+' exists + } else if (flags & FLAGS_SPACE) { + buf[len++] = ' '; + } + } + + out_rev_(output, buf, len, width, flags); +} + +// An internal itoa-like function +static void print_integer( + output_gadget_t *output, printf_unsigned_value_t value, bool negative, + numeric_base_t base, printf_size_t precision, printf_size_t width, + printf_flags_t flags) +{ + char buf[PRINTF_INTEGER_BUFFER_SIZE]; + printf_size_t len = 0U; + + if (!value) { + if (!(flags & FLAGS_PRECISION)) { + buf[len++] = '0'; + flags &= ~FLAGS_HASH; + // We drop this flag this since either the alternative + // and regular modes of the specifier don't differ on 0 + // values, or (in the case of octal) we've already + // provided the special handling for this mode. + } else if (base == BASE_HEX) { + flags &= ~FLAGS_HASH; + // We drop this flag this since either the alternative + // and regular modes of the specifier don't differ on 0 + // values + } + } else { + do { + const char digit = (char)(value % base); + buf[len++] = (char)(digit < 10 ? '0' + digit + : (flags & FLAGS_UPPERCASE + ? 'A' + : 'a') + + digit - 10); + value /= base; + } while (value && (len < PRINTF_INTEGER_BUFFER_SIZE)); + } + + print_integer_finalization( + output, buf, len, negative, base, precision, width, flags); +} + +#if (PRINTF_SUPPORT_DECIMAL_SPECIFIERS || PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS) + +// Stores a fixed-precision representation of a floating-point number relative +// to a fixed precision (which cannot be determined by examining this structure) +struct floating_point_components { + int_fast64_t integral; + int_fast64_t fractional; + // ... truncation of the actual fractional part of the floating_point_t + // value, scaled by the precision value + bool is_negative; +}; + +static const floating_point_t powers_of_10[PRINTF_MAX_PRECOMPUTED_POWER_OF_10 + 1] + = {1e00, + 1e01, + 1e02, + 1e03, + 1e04, + 1e05, + 1e06, + 1e07, + 1e08, + 1e09, + 1e10 +#if PRINTF_MAX_PRECOMPUTED_POWER_OF_10 > 10 + , + 1e11, + 1e12, + 1e13, + 1e14, + 1e15, + 1e16, + 1e17 +#endif +}; + +// Note: This value does not mean that all floating-point values printed with +// the library will be correct up to this precision; it is just an upper-bound +// for avoiding buffer overruns and such +#define PRINTF_MAX_SUPPORTED_PRECISION (NUM_DECIMAL_DIGITS_IN_INT64_T - 1) + +// Break up a floating-point number - which is known to be a finite non-negative number - +// into its base-10 parts: integral - before the decimal point, and fractional - after it. +// Taken the precision into account, but does not change it even internally. +static struct floating_point_components get_components( + floating_point_t number, printf_size_t precision) +{ + struct floating_point_components number_; + number_.is_negative = get_sign_bit(number); + floating_point_t abs_number = (number_.is_negative) ? -number : number; + number_.integral = (int_fast64_t)abs_number; + floating_point_t scaled_remainder + = (abs_number - (floating_point_t)number_.integral) + * powers_of_10[precision]; + number_.fractional = (int_fast64_t) + scaled_remainder; // for precision == 0U, this will be 0 + + floating_point_t remainder + = scaled_remainder - (floating_point_t)number_.fractional; + const floating_point_t one_half = (floating_point_t)0.5; + + if (remainder > one_half) { + ++number_.fractional; + // handle rollover, e.g. case 0.99 with precision 1 is 1.0 + if ((floating_point_t)number_.fractional + >= powers_of_10[precision]) { + number_.fractional = 0; + ++number_.integral; + } + } else if ((remainder == one_half) && (number_.fractional & 1U)) { + // Banker's rounding, i.e. round half to even: + // 1.5 -> 2, but 2.5 -> 2 + ++number_.fractional; + } + + if (precision == 0U) { + remainder = abs_number - (floating_point_t)number_.integral; + if ((remainder == one_half) && (number_.integral & 1U)) { + // Banker's rounding, i.e. round half to even: + // 1.5 -> 2, but 2.5 -> 2 + ++number_.integral; + } + } + return number_; +} + +#if PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS +struct scaling_factor { + floating_point_t raw_factor; + bool multiply; // if true, need to multiply by raw_factor; otherwise need to divide by it +}; + +static floating_point_t apply_scaling( + floating_point_t num, struct scaling_factor normalization) +{ + return normalization.multiply ? num * normalization.raw_factor + : num / normalization.raw_factor; +} + +static floating_point_t unapply_scaling( + floating_point_t normalized, struct scaling_factor normalization) +{ +#ifdef __GNUC__ +// accounting for a static analysis bug in GCC 6.x and earlier +#pragma GCC diagnostic push +#if !defined(__has_warning) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#elif __has_warning("-Wmaybe-uninitialized") +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#endif + return normalization.multiply ? normalized / normalization.raw_factor + : normalized * normalization.raw_factor; +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +} + +static struct scaling_factor update_normalization( + struct scaling_factor sf, floating_point_t extra_multiplicative_factor) +{ + struct scaling_factor result; + if (sf.multiply) { + result.multiply = true; + result.raw_factor = sf.raw_factor * extra_multiplicative_factor; + } else { + int factor_exp2 = get_exp2(get_bit_access(sf.raw_factor)); + int extra_factor_exp2 + = get_exp2(get_bit_access(extra_multiplicative_factor)); + + // Divide the larger-exponent raw raw_factor by the smaller + if (PRINTF_ABS(factor_exp2) > PRINTF_ABS(extra_factor_exp2)) { + result.multiply = false; + result.raw_factor + = sf.raw_factor / extra_multiplicative_factor; + } else { + result.multiply = true; + result.raw_factor + = extra_multiplicative_factor / sf.raw_factor; + } + } + return result; +} + +static struct floating_point_components get_normalized_components( + bool negative, printf_size_t precision, floating_point_t non_normalized, + struct scaling_factor normalization, int floored_exp10) +{ + struct floating_point_components components; + components.is_negative = negative; + floating_point_t scaled = apply_scaling(non_normalized, normalization); + + bool close_to_representation_extremum + = ((-floored_exp10 + (int)precision) >= FP_TYPE_MAX_10_EXP - 1); + if (close_to_representation_extremum) { + // We can't have a normalization factor which also accounts for the precision, i.e. moves + // some decimal digits into the mantissa, since it's unrepresentable, or nearly unrepresentable. + // So, we'll give up early on getting extra precision... + return get_components(negative ? -scaled : scaled, precision); + } + components.integral = (int_fast64_t)scaled; + floating_point_t remainder + = non_normalized + - unapply_scaling( + (floating_point_t)components.integral, normalization); + floating_point_t prec_power_of_10 = powers_of_10[precision]; + struct scaling_factor account_for_precision + = update_normalization(normalization, prec_power_of_10); + floating_point_t scaled_remainder + = apply_scaling(remainder, account_for_precision); + floating_point_t rounding_threshold = 0.5; + + components.fractional = (int_fast64_t) + scaled_remainder; // when precision == 0, the assigned value should be 0 + scaled_remainder + -= (floating_point_t)components + .fractional; // when precision == 0, this will not change scaled_remainder + + components.fractional += (scaled_remainder >= rounding_threshold); + if (scaled_remainder == rounding_threshold) { + // banker's rounding: Round towards the even number (making the mean error 0) + components.fractional &= ~((int_fast64_t)0x1); + } + // handle rollover, e.g. the case of 0.99 with precision 1 becoming (0,100), + // and must then be corrected into (1, 0). + // Note: for precision = 0, this will "translate" the rounding effect from + // the fractional part to the integral part where it should actually be + // felt (as prec_power_of_10 is 1) + if ((floating_point_t)components.fractional >= prec_power_of_10) { + components.fractional = 0; + ++components.integral; + } + return components; +} +#endif // PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + +static void print_broken_up_decimal( + struct floating_point_components number_, output_gadget_t *output, + printf_size_t precision, printf_size_t width, printf_flags_t flags, + char *buf, printf_size_t len) +{ + if (precision != 0U) { + // do fractional part, as an unsigned number + + printf_size_t count = precision; + + // %g/%G mandates we skip the trailing 0 digits... + if ((flags & FLAGS_ADAPT_EXP) && !(flags & FLAGS_HASH) + && (number_.fractional > 0)) { + while (true) { + int_fast64_t digit = number_.fractional % 10U; + if (digit != 0) { + break; + } + --count; + number_.fractional /= 10U; + } + // ... and even the decimal point if there are no + // non-zero fractional part digits (see below) + } + + if (number_.fractional > 0 || !(flags & FLAGS_ADAPT_EXP) + || (flags & FLAGS_HASH)) { + while (len < PRINTF_DECIMAL_BUFFER_SIZE) { + --count; + buf[len++] + = (char)('0' + number_.fractional % 10U); + if (!(number_.fractional /= 10U)) { + break; + } + } + // add extra 0s + while ((len < PRINTF_DECIMAL_BUFFER_SIZE) && (count > 0U)) { + buf[len++] = '0'; + --count; + } + if (len < PRINTF_DECIMAL_BUFFER_SIZE) { + buf[len++] = '.'; + } + } + } else { + if ((flags & FLAGS_HASH) && (len < PRINTF_DECIMAL_BUFFER_SIZE)) { + buf[len++] = '.'; + } + } + + // Write the integer part of the number (it comes after the fractional + // since the character order is reversed) + while (len < PRINTF_DECIMAL_BUFFER_SIZE) { + buf[len++] = (char)('0' + (number_.integral % 10)); + if (!(number_.integral /= 10)) { + break; + } + } + + // pad leading zeros + if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) { + if (width + && (number_.is_negative + || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { + width--; + } + while ((len < width) && (len < PRINTF_DECIMAL_BUFFER_SIZE)) { + buf[len++] = '0'; + } + } + + if (len < PRINTF_DECIMAL_BUFFER_SIZE) { + if (number_.is_negative) { + buf[len++] = '-'; + } else if (flags & FLAGS_PLUS) { + buf[len++] = '+'; // ignore the space if the '+' exists + } else if (flags & FLAGS_SPACE) { + buf[len++] = ' '; + } + } + + out_rev_(output, buf, len, width, flags); +} + +// internal ftoa for fixed decimal floating point +static void print_decimal_number( + output_gadget_t *output, floating_point_t number, printf_size_t precision, + printf_size_t width, printf_flags_t flags, char *buf, printf_size_t len) +{ + struct floating_point_components value_ + = get_components(number, precision); + print_broken_up_decimal(value_, output, precision, width, flags, buf, len); +} + +#if PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + +// A floor function - but one which only works for numbers whose +// floor value is representable by an int. +static int bastardized_floor(floating_point_t x) +{ + if (x >= 0) { + return (int)x; + } + int n = (int)x; + return (((floating_point_t)n) == x) ? n : n - 1; +} + +// Computes the base-10 logarithm of the input number - which must be an actual +// positive number (not infinity or NaN, nor a sub-normal) +static floating_point_t log10_of_positive(floating_point_t positive_number) +{ + // The implementation follows David Gay (https://www.ampl.com/netlib/fp/dtoa.c). + // + // Since log_10 ( M * 2^x ) = log_10(M) + x , we can separate the components of + // our input number, and need only solve log_10(M) for M between 1 and 2 (as + // the base-2 mantissa is always 1-point-something). In that limited range, a + // Taylor series expansion of log10(x) should serve us well enough; and we'll + // take the mid-point, 1.5, as the point of expansion. + + floating_point_with_bit_access dwba = get_bit_access(positive_number); + // based on the algorithm by David Gay (https://www.ampl.com/netlib/fp/dtoa.c) + int exp2 = get_exp2(dwba); + // drop the exponent, so dwba.F comes into the range [1,2) + dwba.U = (dwba.U + & (((printf_fp_uint_t)(1) << FP_TYPE_STORED_MANTISSA_BITS) - 1U)) + | ((printf_fp_uint_t)FP_TYPE_BASE_EXPONENT + << FP_TYPE_STORED_MANTISSA_BITS); + floating_point_t z = (dwba.F - (floating_point_t)1.5); + return ( + // Taylor expansion around 1.5: + (floating_point_t)0.1760912590556812420 // Expansion term 0: ln(1.5) / ln(10) + + z * (floating_point_t)0.2895296546021678851 // Expansion term 1: (M - 1.5) * 2/3 / ln(10) +#if PRINTF_LOG10_TAYLOR_TERMS > 2 + - z * z * (floating_point_t)0.0965098848673892950 // Expansion term 2: (M - 1.5)^2 * 2/9 / ln(10) +#if PRINTF_LOG10_TAYLOR_TERMS > 3 + + z + * z * z * (floating_point_t)0.0428932821632841311 // Expansion term 2: (M - 1.5)^3 * 8/81 / ln(10) +#endif +#endif + // exact log_2 of the exponent x, with logarithm base change + + (floating_point_t)exp2 * (floating_point_t)0.30102999566398119521 // = exp2 * log_10(2) = exp2 * ln(2)/ln(10) + ); +} + +static floating_point_t pow10_of_int(int floored_exp10) +{ + // A crude hack for avoiding undesired behavior with barely-normal or slightly-subnormal values. + if (floored_exp10 == FP_TYPE_MAX_SUBNORMAL_EXPONENT_OF_10) { + return FP_TYPE_MAX_SUBNORMAL_POWER_OF_10; + } + // Compute 10^(floored_exp10) but (try to) make sure that doesn't overflow + floating_point_with_bit_access dwba; + int exp2 = bastardized_floor( + (floating_point_t)(floored_exp10 * 3.321928094887362 + 0.5)); + const floating_point_t z + = (floating_point_t)(floored_exp10 * 2.302585092994046 + - exp2 * 0.6931471805599453); + const floating_point_t z2 = z * z; + dwba.U = ((printf_fp_uint_t)(exp2) + FP_TYPE_BASE_EXPONENT) + << FP_TYPE_STORED_MANTISSA_BITS; + // compute exp(z) using continued fractions, + // see https://en.wikipedia.org/wiki/Exponential_function#Continued_fractions_for_ex + dwba.F *= 1 + 2 * z / (2 - z + (z2 / (6 + (z2 / (10 + z2 / 14))))); + return dwba.F; +} + +static void print_exponential_number( + output_gadget_t *output, floating_point_t number, printf_size_t precision, + printf_size_t width, printf_flags_t flags, char *buf, printf_size_t len) +{ + const bool negative = get_sign_bit(number); + // This number will decrease gradually (by factors of 10) as we "extract" the exponent out of it + floating_point_t abs_number = negative ? -number : number; + + int floored_exp10; + bool abs_exp10_covered_by_powers_table; + struct scaling_factor normalization; + + // Determine the decimal exponent + if (abs_number == (floating_point_t)0.0) { + // TODO: This is a special-case for 0.0 (and -0.0); but proper handling is required for denormals more generally. + floored_exp10 + = 0; // ... and no need to set a normalization factor or check the powers table + } else { + floating_point_t exp10 = log10_of_positive(abs_number); + floored_exp10 = bastardized_floor(exp10); + floating_point_t p10 = pow10_of_int(floored_exp10); + // correct for rounding errors + if (abs_number < p10) { + floored_exp10--; + p10 /= 10; + } + abs_exp10_covered_by_powers_table + = PRINTF_ABS(floored_exp10) + < PRINTF_MAX_PRECOMPUTED_POWER_OF_10; + normalization.raw_factor + = abs_exp10_covered_by_powers_table + ? powers_of_10[PRINTF_ABS(floored_exp10)] + : p10; + } + + // We now begin accounting for the widths of the two parts of our + // printed field: the decimal part after decimal exponent extraction, + // and the base-10 exponent part. For both of these, the value of 0 has + // a special meaning, but not the same one: a 0 exponent-part width + // means "don't print the exponent"; a 0 decimal-part width means "use + // as many characters as necessary". + + bool fall_back_to_decimal_only_mode = false; + if (flags & FLAGS_ADAPT_EXP) { + int required_significant_digits + = (precision == 0) ? 1 : (int)precision; + // Should we want to fall-back to "%f" mode, and only print the decimal part? + fall_back_to_decimal_only_mode + = (floored_exp10 >= -4 + && floored_exp10 < required_significant_digits); + // Now, let's adjust the precision + // This also decided how we adjust the precision value - as in + // "%g" mode, "precision" is the number of _significant digits_, + // and this is when we "translate" the precision value to an + // actual number of decimal digits. + int precision_ + = fall_back_to_decimal_only_mode ? (int)precision - 1 + - floored_exp10 + : (int)precision - 1; // the presence of the exponent ensures only one significant digit comes before the decimal point + precision = (precision_ > 0 ? (unsigned)precision_ : 0U); + flags |= FLAGS_PRECISION; // make sure print_broken_up_decimal respects our choice above + } + +#ifdef __GNUC__ +// accounting for a static analysis bug in GCC 6.x and earlier +#pragma GCC diagnostic push +#if !defined(__has_warning) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#elif __has_warning("-Wmaybe-uninitialized") +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#endif + normalization.multiply + = (floored_exp10 < 0 && abs_exp10_covered_by_powers_table); +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + bool should_skip_normalization + = (fall_back_to_decimal_only_mode || floored_exp10 == 0); + struct floating_point_components decimal_part_components + = should_skip_normalization + ? get_components( + negative ? -abs_number : abs_number, precision) + : get_normalized_components( + negative, precision, abs_number, + normalization, floored_exp10); + + // Account for roll-over, e.g. rounding from 9.99 to 100.0 - which + // effects the exponent and may require additional tweaking of the parts + if (fall_back_to_decimal_only_mode) { + if ((flags & FLAGS_ADAPT_EXP) && floored_exp10 >= -1 + && decimal_part_components.integral + == powers_of_10[floored_exp10 + 1]) { + floored_exp10++; // Not strictly necessary, since floored_exp10 is no longer really used + if (precision > 0U) { + precision--; + } + // ... and it should already be the case that decimal_part_components.fractional == 0 + } + // TODO: What about rollover strictly within the fractional part? + } else { + if (decimal_part_components.integral >= 10) { + floored_exp10++; + decimal_part_components.integral = 1; + decimal_part_components.fractional = 0; + } + } + + // the floored_exp10 format is "E%+03d" and largest possible floored_exp10 value for a 64-bit double + // is "307" (for 2^1023), so we set aside 4-5 characters overall + printf_size_t exp10_part_width = fall_back_to_decimal_only_mode ? 0U + : (PRINTF_ABS(floored_exp10) < 100) ? 4U + : 5U; + + printf_size_t decimal_part_width + = ((flags & FLAGS_LEFT) && exp10_part_width) + ? + // We're padding on the right, so the width constraint + // is the exponent part's problem, not the decimal + // part's, so we'll use as many characters as we need: + 0U + : + // We're padding on the left; so the width constraint is the decimal part's + // problem. Well, can both the decimal part and the exponent part fit within our overall width? + ((width > exp10_part_width) + ? + // Yes, so we limit our decimal part's width. + // (Note this is trivially valid even if we've fallen back to "%f" mode) + width - exp10_part_width + : + // No; we just give up on any restriction on + // the decimal part and use as many + // characters as we need + 0U); + + const printf_size_t printed_exponential_start_pos = output->pos; + print_broken_up_decimal( + decimal_part_components, output, precision, decimal_part_width, + flags, buf, len); + + if (!fall_back_to_decimal_only_mode) { + putchar_via_gadget(output, (flags & FLAGS_UPPERCASE) ? 'E' : 'e'); + print_integer( + output, ABS_FOR_PRINTING(floored_exp10), floored_exp10 < 0, + 10, 0, exp10_part_width - 1, FLAGS_ZEROPAD | FLAGS_PLUS); + if (flags & FLAGS_LEFT) { + // We need to right-pad with spaces to meet the width requirement + while (output->pos - printed_exponential_start_pos < width) { + putchar_via_gadget(output, ' '); + } + } + } +} +#endif // PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + +static void print_floating_point( + output_gadget_t *output, floating_point_t value, printf_size_t precision, + printf_size_t width, printf_flags_t flags, bool prefer_exponential) +{ + char buf[PRINTF_DECIMAL_BUFFER_SIZE]; + printf_size_t len = 0U; + + // test for special values + if (value != value) { + out_rev_(output, "nan", 3, width, flags); + return; + } + if (value < -FP_TYPE_MAX) { + out_rev_(output, "fni-", 4, width, flags); + return; + } + if (value > FP_TYPE_MAX) { + out_rev_( + output, (flags & FLAGS_PLUS) ? "fni+" : "fni", + (flags & FLAGS_PLUS) ? 4U : 3U, width, flags); + return; + } + + if (!prefer_exponential + && ((value > PRINTF_FLOAT_NOTATION_THRESHOLD) + || (value < -PRINTF_FLOAT_NOTATION_THRESHOLD))) { + // The required behavior of standard printf is to print _every_ + // integral-part digit -- which could mean printing hundreds of + // characters, overflowing any fixed internal buffer and + // necessitating a more complicated implementation. +#if PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + print_exponential_number( + output, value, precision, width, flags, buf, len); +#endif + return; + } + + // set default precision, if not set explicitly + if (!(flags & FLAGS_PRECISION)) { + precision = PRINTF_DEFAULT_FLOAT_PRECISION; + } + + // limit precision so that our integer holding the fractional part does not overflow + while ((len < PRINTF_DECIMAL_BUFFER_SIZE) + && (precision > PRINTF_MAX_SUPPORTED_PRECISION)) { + buf[len++] = '0'; // This respects the precision in terms of result length only + precision--; + } + +#if PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + if (prefer_exponential) + print_exponential_number( + output, value, precision, width, flags, buf, len); + else +#endif + print_decimal_number( + output, value, precision, width, flags, buf, len); +} + +#endif // (PRINTF_SUPPORT_DECIMAL_SPECIFIERS || PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS) + +// Advances the format pointer past the flags, and returns the parsed flags +// due to the characters passed +static printf_flags_t parse_flags(const char **format) +{ + printf_flags_t flags = 0U; + do { + switch (**format) { + case '0': + flags |= FLAGS_ZEROPAD; + (*format)++; + break; + case '-': + flags |= FLAGS_LEFT; + (*format)++; + break; + case '+': + flags |= FLAGS_PLUS; + (*format)++; + break; + case ' ': + flags |= FLAGS_SPACE; + (*format)++; + break; + case '#': + flags |= FLAGS_HASH; + (*format)++; + break; + default: + return flags; + } + } while (true); +} + +static inline void format_string_loop( + output_gadget_t *output, const char *format, va_list args) +{ +#if PRINTF_CHECK_FOR_NUL_IN_FORMAT_SPECIFIER +#define ADVANCE_IN_FORMAT_STRING(cptr_) \ + do { \ + (cptr_)++; \ + if (!*(cptr_)) \ + return; \ + } while (0) +#else +#define ADVANCE_IN_FORMAT_STRING(cptr_) (cptr_)++ +#endif + + while (*format) { + if (*format != '%') { + // A regular content character + putchar_via_gadget(output, *format); + format++; + continue; + } + // We're parsing a format specifier: %[flags][width][.precision][length] + ADVANCE_IN_FORMAT_STRING(format); + + printf_flags_t flags = parse_flags(&format); + + // evaluate width field + printf_size_t width = 0U; + if (is_digit_(*format)) { + width = (printf_size_t)atou_(&format); + } else if (*format == '*') { + const int w = va_arg(args, int); + if (w < 0) { + flags |= FLAGS_LEFT; // reverse padding + width = (printf_size_t)-w; + } else { + width = (printf_size_t)w; + } + ADVANCE_IN_FORMAT_STRING(format); + } + + // evaluate precision field + printf_size_t precision = 0U; + if (*format == '.') { + flags |= FLAGS_PRECISION; + ADVANCE_IN_FORMAT_STRING(format); + if (is_digit_(*format)) { + precision = (printf_size_t)atou_(&format); + } else if (*format == '*') { + const int precision_ = va_arg(args, int); + precision = precision_ > 0 + ? (printf_size_t)precision_ + : 0U; + ADVANCE_IN_FORMAT_STRING(format); + } + } + + // evaluate length field + switch (*format) { +#ifdef PRINTF_SUPPORT_MSVC_STYLE_INTEGER_SPECIFIERS + case 'I': { + ADVANCE_IN_FORMAT_STRING(format); + // Greedily parse for size in bits: 8, 16, 32 or 64 + switch (*format) { + case '8': + flags |= FLAGS_INT8; + ADVANCE_IN_FORMAT_STRING(format); + break; + case '1': + ADVANCE_IN_FORMAT_STRING(format); + if (*format == '6') { + format++; + flags |= FLAGS_INT16; + } + break; + case '3': + ADVANCE_IN_FORMAT_STRING(format); + if (*format == '2') { + ADVANCE_IN_FORMAT_STRING(format); + flags |= FLAGS_INT32; + } + break; + case '6': + ADVANCE_IN_FORMAT_STRING(format); + if (*format == '4') { + ADVANCE_IN_FORMAT_STRING(format); + flags |= FLAGS_INT64; + } + break; + default: + break; + } + break; + } +#endif + case 'l': + flags |= FLAGS_LONG; + ADVANCE_IN_FORMAT_STRING(format); + if (*format == 'l') { + flags |= FLAGS_LONG_LONG; + ADVANCE_IN_FORMAT_STRING(format); + } + break; + case 'L': + flags |= FLAGS_LONG_DOUBLE; + ADVANCE_IN_FORMAT_STRING(format); + break; + case 'h': + flags |= FLAGS_SHORT; + ADVANCE_IN_FORMAT_STRING(format); + if (*format == 'h') { + flags |= FLAGS_CHAR; + ADVANCE_IN_FORMAT_STRING(format); + } + break; + case 't': + flags |= (sizeof(ptrdiff_t) <= sizeof(int)) ? FLAGS_INT + : (sizeof(ptrdiff_t) == sizeof(long)) + ? FLAGS_LONG + : FLAGS_LONG_LONG; + ADVANCE_IN_FORMAT_STRING(format); + break; + case 'j': + flags + |= (sizeof(intmax_t) == sizeof(long) + ? FLAGS_LONG + : FLAGS_LONG_LONG); + ADVANCE_IN_FORMAT_STRING(format); + break; + case 'z': + flags |= (sizeof(size_t) <= sizeof(int)) ? FLAGS_INT + : (sizeof(size_t) == sizeof(long)) + ? FLAGS_LONG + : FLAGS_LONG_LONG; + ADVANCE_IN_FORMAT_STRING(format); + break; + default: + break; + } + + // evaluate specifier + switch (*format) { + case 'd': + case 'i': + case 'u': + case 'x': + case 'X': + case 'o': + case 'b': { + + if (*format == 'd' || *format == 'i') { + flags |= FLAGS_SIGNED; + } + + numeric_base_t base; + if (*format == 'x' || *format == 'X') { + base = BASE_HEX; + } else if (*format == 'o') { + base = BASE_OCTAL; + } else if (*format == 'b') { + base = BASE_BINARY; + } else { + base = BASE_DECIMAL; + flags &= ~FLAGS_HASH; // decimal integers have no alternative presentation + } + + if (*format == 'X') { + flags |= FLAGS_UPPERCASE; + } + + format++; + // ignore '0' flag when precision is given + if (flags & FLAGS_PRECISION) { + flags &= ~FLAGS_ZEROPAD; + } + + if (flags & FLAGS_SIGNED) { + // A signed specifier: d, i or possibly I + bit size if enabled + + if (flags & FLAGS_LONG_LONG) { +#if PRINTF_SUPPORT_LONG_LONG + const long long value + = va_arg(args, long long); + print_integer( + output, ABS_FOR_PRINTING(value), + value < 0, base, precision, + width, flags); +#endif + } else if (flags & FLAGS_LONG) { + const long value = va_arg(args, long); + print_integer( + output, ABS_FOR_PRINTING(value), + value < 0, base, precision, + width, flags); + } else { + // We never try to interpret the argument as something potentially-smaller than int, + // due to integer promotion rules: Even if the user passed a short int, short unsigned + // etc. - these will come in after promotion, as int's (or unsigned for the case of + // short unsigned when it has the same size as int) + const int value + = (flags & FLAGS_CHAR) + ? (signed char)va_arg( + args, int) + : (flags & FLAGS_SHORT) + ? (short int)va_arg(args, int) + : va_arg(args, int); + print_integer( + output, ABS_FOR_PRINTING(value), + value < 0, base, precision, + width, flags); + } + } else { + // An unsigned specifier: u, x, X, o, b + + flags &= ~(FLAGS_PLUS | FLAGS_SPACE); + + if (flags & FLAGS_LONG_LONG) { +#if PRINTF_SUPPORT_LONG_LONG + print_integer( + output, + (printf_unsigned_value_t)va_arg( + args, unsigned long long), + false, base, precision, width, + flags); +#endif + } else if (flags & FLAGS_LONG) { + print_integer( + output, + (printf_unsigned_value_t)va_arg( + args, unsigned long), + false, base, precision, width, + flags); + } else { + const unsigned int value + = (flags & FLAGS_CHAR) + ? (unsigned char)va_arg( + args, unsigned int) + : (flags & FLAGS_SHORT) + ? (unsigned short int)va_arg( + args, unsigned int) + : va_arg(args, + unsigned int); + print_integer( + output, + (printf_unsigned_value_t)value, + false, base, precision, width, + flags); + } + } + break; + } +#if PRINTF_SUPPORT_DECIMAL_SPECIFIERS + case 'f': + case 'F': { + floating_point_t value + = (floating_point_t)(flags & FLAGS_LONG_DOUBLE + ? va_arg(args, long double) + : va_arg(args, double)); + if (*format == 'F') + flags |= FLAGS_UPPERCASE; + print_floating_point( + output, value, precision, width, flags, + PRINTF_PREFER_DECIMAL); + format++; + break; + } +#endif +#if PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + case 'e': + case 'E': + case 'g': + case 'G': { + floating_point_t value + = (floating_point_t)(flags & FLAGS_LONG_DOUBLE + ? va_arg(args, long double) + : va_arg(args, double)); + if ((*format == 'g') || (*format == 'G')) + flags |= FLAGS_ADAPT_EXP; + if ((*format == 'E') || (*format == 'G')) + flags |= FLAGS_UPPERCASE; + print_floating_point( + output, value, precision, width, flags, + PRINTF_PREFER_EXPONENTIAL); + format++; + break; + } +#endif // PRINTF_SUPPORT_EXPONENTIAL_SPECIFIERS + case 'c': { + printf_size_t l = 1U; + // pre padding + if (!(flags & FLAGS_LEFT)) { + while (l++ < width) { + putchar_via_gadget(output, ' '); + } + } + // char output + putchar_via_gadget(output, (char)va_arg(args, int)); + // post padding + if (flags & FLAGS_LEFT) { + while (l++ < width) { + putchar_via_gadget(output, ' '); + } + } + format++; + break; + } + + case 's': { + const char *p = va_arg(args, char *); + if (p == NULL) { + out_rev_(output, ")llun(", 6, width, flags); + } else { + printf_size_t l = strnlen_s_( + p, precision ? precision + : PRINTF_MAX_POSSIBLE_BUFFER_SIZE); + // pre padding + if (flags & FLAGS_PRECISION) { + l = (l < precision ? l : precision); + } + if (!(flags & FLAGS_LEFT)) { + while (l++ < width) { + putchar_via_gadget(output, ' '); + } + } + // string output + while ((*p != 0) + && (!(flags & FLAGS_PRECISION) || precision)) { + putchar_via_gadget(output, *(p++)); + --precision; + } + // post padding + if (flags & FLAGS_LEFT) { + while (l++ < width) { + putchar_via_gadget(output, ' '); + } + } + } + format++; + break; + } + + case 'p': { + width = sizeof(void *) * 2U + + 2; // 2 hex chars per byte + the "0x" prefix + flags |= FLAGS_ZEROPAD | FLAGS_POINTER; + uintptr_t value = (uintptr_t)va_arg(args, void *); + (value == (uintptr_t)NULL) + ? out_rev_(output, ")lin(", 5, width, flags) + : print_integer( + output, + (printf_unsigned_value_t)value, false, + BASE_HEX, precision, width, flags); + format++; + break; + } + + case '%': + putchar_via_gadget(output, '%'); + format++; + break; + + // Many people prefer to disable support for %n, as it + // lets the caller engineer a write to an arbitrary + // location, of a value the caller effectively controls + // - which could be a security concern in some cases. +#if PRINTF_SUPPORT_WRITEBACK_SPECIFIER + case 'n': { + if (flags & FLAGS_CHAR) + *(va_arg(args, char *)) = (char)output->pos; + else if (flags & FLAGS_SHORT) + *(va_arg(args, short *)) = (short)output->pos; + else if (flags & FLAGS_LONG) + *(va_arg(args, long *)) = (long)output->pos; +#if PRINTF_SUPPORT_LONG_LONG + else if (flags & FLAGS_LONG_LONG) + *(va_arg(args, long long *)) + = (long long int)output->pos; +#endif // PRINTF_SUPPORT_LONG_LONG + else + *(va_arg(args, int *)) = (int)output->pos; + format++; + break; + } +#endif // PRINTF_SUPPORT_WRITEBACK_SPECIFIER + + default: + putchar_via_gadget(output, *format); + format++; + break; + } + } +} + +// internal vsnprintf - used for implementing _all library functions +static int vsnprintf_impl(output_gadget_t *output, const char *format, va_list args) +{ + // Note: The library only calls vsnprintf_impl() with output->pos being 0. However, it is + // possible to call this function with a non-zero pos value for some "remedial printing". + format_string_loop(output, format, args); + + // termination + append_termination_with_gadget(output); + + // return written chars without terminating \0 + return (int)output->pos; +} + +int z__b_fctprintf( + void (*out)(char c, void *extra_arg), void *extra_arg, + const char *format, va_list arg) +{ + if (out == NULL) { + return 0; + } + output_gadget_t gadget = function_gadget(out, extra_arg); + return vsnprintf_impl(&gadget, format, arg); +} diff --git a/core/printf.h b/core/printf.h index 0c1f31a..59d92cf 100644 --- a/core/printf.h +++ b/core/printf.h @@ -1,59 +1,161 @@ -/////////////////////////////////////////////////////////////////////////////// -// \author (c) Marco Paland (info@paland.com) -// 2014-2019, PALANDesign Hannover, Germany -// -// \license The MIT License (MIT) -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// \brief Tiny printf, sprintf and snprintf implementation, optimized for speed on -// embedded systems with a very limited resources. -// Use this instead of bloated standard/newlib printf. -// These routines are thread safe and reentrant. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _PRINTF_H_ -#define _PRINTF_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * printf with output function - * You may use this as dynamic alternative to printf() with its fixed _putchar() - * output \param out An output function which takes one character and an - * argument pointer \param arg An argument pointer for user data passed to - * output function \param format A string that specifies the format of the - * output \return The number of characters that are sent to the output function, - * not counting the terminating null character - */ -int z__b_fctprintf( - void (*out)(char character, void *arg), void *arg, const char *format, - va_list va); - -#ifdef __cplusplus -} -#endif - -#endif // _PRINTF_H_ +/** + * @author (c) Eyal Rozenberg + * 2021-2023, Haifa, Palestine/Israel + * @author (c) Marco Paland (info@paland.com) + * 2014-2019, PALANDesign Hannover, Germany + * + * @note Others have made smaller contributions to this file: see the + * contributors page at https://github.com/eyalroz/printf/graphs/contributors + * or ask one of the authors. + * + * @brief Small stand-alone implementation of the printf family of functions + * (`(v)printf`, `(v)s(n)printf` etc., geared towards use on embedded systems + * with a very limited resources. + * + * @note the implementations are thread-safe; re-entrant; use no functions from + * the standard library; and do not dynamically allocate any memory. + * + * @license The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef PRINTF_H_ +#define PRINTF_H_ + +#ifdef __cplusplus +#include +#include +extern "C" { +#else +#include +#include +#endif + +#ifdef __GNUC__ +#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __GNUC__ > 4) +#define ATTR_PRINTF(one_based_format_index, first_arg) \ + __attribute__((format(gnu_printf, (one_based_format_index), (first_arg)))) +#else +#define ATTR_PRINTF(one_based_format_index, first_arg) \ + __attribute__((format(printf, (one_based_format_index), (first_arg)))) +#endif +#define ATTR_VPRINTF(one_based_format_index) \ + ATTR_PRINTF((one_based_format_index), 0) +#else +#define ATTR_PRINTF(one_based_format_index, first_arg) +#define ATTR_VPRINTF(one_based_format_index) +#endif + +#ifndef PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_SOFT +#define PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_SOFT 0 +#endif + +#ifndef PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_HARD +#define PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_HARD 0 +#endif + +#if PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_HARD +#define printf_ printf +#define sprintf_ sprintf +#define vsprintf_ vsprintf +#define snprintf_ snprintf +#define vsnprintf_ vsnprintf +#define vprintf_ vprintf +#endif + +// If you want to include this implementation file directly rather than +// link against it, this will let you control the functions' visibility, +// e.g. make them static so as not to clash with other objects also +// using them. +#ifndef PRINTF_VISIBILITY +#define PRINTF_VISIBILITY +#endif + +/** + * Prints/send a single character to some opaque output entity + * + * @note This function is not implemented by the library, only declared; you + * must provide an implementation if you wish to use the @ref printf / @ref + * vprintf function (and possibly for linking against the library, if your + * toolchain does not support discarding unused functions) + * + * @note The output could be as simple as a wrapper for the `write()` system + * call on a Unix-like * system, or even libc's @ref putchar , for replicating + * actual functionality of libc's @ref printf * function; but on an embedded + * system it may involve interaction with a special output device, like a UART, + * etc. + * + * @note in libc's @ref putchar, the parameter type is an int; this was intended + * to support the representation of either a proper character or EOF in a + * variable - but this is really not meaningful to pass into @ref putchar and is + * discouraged today. See further discussion in: + * @link https://stackoverflow.com/q/17452847/1593077 + * + * @param c the single character to print + */ +PRINTF_VISIBILITY +void putchar_(char c); + +/** + * printf/vprintf with user-specified output function + * + * An alternative to @ref printf_, in which the output function is specified + * dynamically (rather than @ref putchar_ being used) + * + * @param out An output function which takes one character and a type-erased + * additional parameters + * @param extra_arg The type-erased argument to pass to the output function @p + * out with each call + * @param format A string specifying the format of the output, with %-marked + * specifiers of how to interpret additional arguments. + * @param arg Additional arguments to the function, one for each specifier in + * @p format + * @return The number of characters for which the output f unction was invoked, + * not counting the terminating null character + * + */ +PRINTF_VISIBILITY +int z__b_fctprintf( + void (*out)(char c, void *extra_arg), void *extra_arg, + const char *format, va_list arg) ATTR_VPRINTF(3); + +#ifdef __cplusplus +} // extern "C" +#endif + +#if PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_HARD +#undef printf_ +#undef sprintf_ +#undef vsprintf_ +#undef snprintf_ +#undef vsnprintf_ +#undef vprintf_ +#else +#if PRINTF_ALIAS_STANDARD_FUNCTION_NAMES_SOFT +#define printf printf_ +#define sprintf sprintf_ +#define vsprintf vsprintf_ +#define snprintf snprintf_ +#define vsnprintf vsnprintf_ +#define vprintf vprintf_ +#endif +#endif + +#endif // PRINTF_H_ From 4ab524a66b2fff87f002f58b46f21a06b34b5f96 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:31:30 +0100 Subject: [PATCH 06/23] core: hash: rename b_hash_string to avoid conflict with b_string --- core/hash/hash.c | 6 +++--- core/include/blue/core/hash.h | 4 ++-- core/include/blue/core/rope.h | 4 ++-- core/rope.c | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/hash/hash.c b/core/hash/hash.c index b0a3da9..a9f6353 100644 --- a/core/hash/hash.c +++ b/core/hash/hash.c @@ -41,13 +41,13 @@ static const struct b_hash_function_ops *hash_functions[] = { static const size_t nr_hash_functions = sizeof hash_functions / sizeof hash_functions[0]; -uint64_t b_hash_string(const char *s) +uint64_t b_hash_cstr(const char *s) { size_t x = 0; - return b_hash_string_ex(s, &x); + return b_hash_cstr_ex(s, &x); } -uint64_t b_hash_string_ex(const char *s, size_t *len) +uint64_t b_hash_cstr_ex(const char *s, size_t *len) { uint64_t hash = FNV1_OFFSET_BASIS; size_t i = 0; diff --git a/core/include/blue/core/hash.h b/core/include/blue/core/hash.h index e43452c..f309369 100644 --- a/core/include/blue/core/hash.h +++ b/core/include/blue/core/hash.h @@ -97,8 +97,8 @@ typedef struct b_hash_ctx { } ctx_state; } b_hash_ctx; -BLUE_API uint64_t b_hash_string(const char *s); -BLUE_API uint64_t b_hash_string_ex(const char *s, size_t *len); +BLUE_API uint64_t b_hash_cstr(const char *s); +BLUE_API uint64_t b_hash_cstr_ex(const char *s, size_t *len); BLUE_API b_status b_hash_ctx_init(b_hash_ctx *ctx, b_hash_function func); BLUE_API b_status b_hash_ctx_reset(b_hash_ctx *ctx); diff --git a/core/include/blue/core/rope.h b/core/include/blue/core/rope.h index dd3666d..dd1632d 100644 --- a/core/include/blue/core/rope.h +++ b/core/include/blue/core/rope.h @@ -24,7 +24,7 @@ struct b_string; .r_v = { \ .v_cstr = { \ .s = (str), \ - .hash = b_hash_string(str), \ + .hash = b_hash_cstr(str), \ }, \ }, \ } @@ -36,7 +36,7 @@ struct b_string; .r_v = { \ .v_cstr = { \ .s = (str), \ - .hash = b_hash_string(str), \ + .hash = b_hash_cstr(str), \ }, \ }, \ } diff --git a/core/rope.c b/core/rope.c index ad146d6..ecab2ae 100644 --- a/core/rope.c +++ b/core/rope.c @@ -16,7 +16,7 @@ void b_rope_init_cstr(struct b_rope *rope, const char *s) { memset(rope, 0x0, sizeof *rope); rope->r_flags = B_ROPE_F_CSTR; - rope->r_v.v_cstr.hash = b_hash_string_ex(s, &rope->r_len_total); + rope->r_v.v_cstr.hash = b_hash_cstr_ex(s, &rope->r_len_total); rope->r_len_left = rope->r_len_total; char *s2 = malloc(rope->r_len_total + 1); @@ -36,7 +36,7 @@ void b_rope_init_cstr_borrowed(struct b_rope *rope, const char *s) memset(rope, 0x0, sizeof *rope); rope->r_flags = B_ROPE_F_CSTR_BORROWED; rope->r_v.v_cstr.s = s; - rope->r_v.v_cstr.hash = b_hash_string_ex(s, &rope->r_len_total); + rope->r_v.v_cstr.hash = b_hash_cstr_ex(s, &rope->r_len_total); rope->r_len_left = rope->r_len_total; } @@ -45,7 +45,7 @@ void b_rope_init_cstr_static(struct b_rope *rope, const char *s) memset(rope, 0x0, sizeof *rope); rope->r_flags = B_ROPE_F_CSTR_STATIC; rope->r_v.v_cstr.s = s; - rope->r_v.v_cstr.hash = b_hash_string_ex(s, &rope->r_len_total); + rope->r_v.v_cstr.hash = b_hash_cstr_ex(s, &rope->r_len_total); rope->r_len_left = rope->r_len_total; } From bc5986150c730e62188cf02426b7ed7f8a7fc2a4 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:31:53 +0100 Subject: [PATCH 07/23] core: stream: fix buffer overflow in stream indentation stack --- core/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/stream.c b/core/stream.c index dee1ba3..87f3323 100644 --- a/core/stream.c +++ b/core/stream.c @@ -256,7 +256,7 @@ enum b_status b_stream_push_indent(b_stream *stream, int indent) stream->s_istack_ptr = 0; } - if (stream->s_istack_ptr + 1 > stream->s_istack_size) { + if (stream->s_istack_ptr + 1 >= stream->s_istack_size) { int *buf = realloc( stream->s_istack, (stream->s_istack_size + 4) * sizeof(int)); From cbaeb002f88f433f320c4c33a130b61a5cecd0f0 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:32:37 +0100 Subject: [PATCH 08/23] core: stream: b_stream_read_line_s now includes linefeed in output and correctly reports EOF condition --- core/stream.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/stream.c b/core/stream.c index 87f3323..fa7ea46 100644 --- a/core/stream.c +++ b/core/stream.c @@ -386,14 +386,19 @@ enum b_status b_stream_read_line_s(struct b_stream *src, b_stream *dest) break; } + b_stream_write_char(dest, c); + i++; + if (c == '\n') { break; } - - b_stream_write_char(dest, c); } - return B_SUCCESS; + if (status == B_ERR_NO_DATA && i > 0) { + status = B_SUCCESS; + } + + return status; } enum b_status b_stream_read_all_bytes( From 2fcadf7f398a529781da9ef3c10055852adcf763 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:36:26 +0100 Subject: [PATCH 09/23] core: string: add UTF-8 and null-char support; and some new string functions b_string now uses UTF-8 internally, and can correctly manipulate strings that contain non-ASCII and multi-byte codepoints. b_string now tracks the length of a string in both bytes and unicode codepoints. string insertion functions have been updated to correctly handle strings with multi-byte codepoints, so the index parameter of each function now refers to codepoints rather than bytes. inserting single-byte chars into a string with no multi-byte codepoints is still optimised to used array indexing and memmove. a b_string_iterator has been added to simplify iterating through a UTF-8 string, without having to use a charAt()-style interface that would incur performance penalties. strings can now also contain null bytes. new functions include: - b_string_tokenise: a b_iterator interface for iterating through tokens in a string. similar to strtok except that: * it is re-entrant, and uses no global state. * it supports delimiters that are longer than one character and/or contain multi-byte UTF-8 codepoints. * it doesn't modify the string that is being iterated over. * it correctly handles strings with multi-byte UTF-8 codepoints and null chars. - b_string_compare: for comparing strings. necessary to use this rather than strcpy as b_strings can now contain null chars. --- object/include/blue/object/string.h | 77 +- object/string.c | 1364 ++++++++++++++++++++++++--- object/string.h | 9 +- 3 files changed, 1288 insertions(+), 162 deletions(-) diff --git a/object/include/blue/object/string.h b/object/include/blue/object/string.h index c8853e2..bc9550f 100644 --- a/object/include/blue/object/string.h +++ b/object/include/blue/object/string.h @@ -1,6 +1,7 @@ #ifndef BLUELIB_STRING_H_ #define BLUELIB_STRING_H_ +#include #include #include #include @@ -13,16 +14,44 @@ struct b_stream; #define B_CSTR(s) (b_string_create_from_cstr(s)) #define B_RV_CSTR(s) (B_RV(b_string_create_from_cstr(s))) +#define b_string_foreach(it, str) \ + for (int z__b_unique_name() = b_string_iterator_begin(str, it); \ + b_string_iterator_is_valid(it); b_string_iterator_next(it)) + typedef struct b_string b_string; +typedef struct b_string_iterator { + b_iterator _base; + int _m, _f; + b_string *_s, *_tmp; + const char **_d; + size_t _nd, _ds; + + b_status status; + size_t iteration_index; + size_t byte_index; + size_t codepoint_index; + b_wchar char_value; + const char *string_value; + size_t string_length; + size_t string_codepoints; +} b_string_iterator; + typedef enum b_strlen_flags { B_STRLEN_NORMAL = 0, B_STRLEN_IGNORE_ESC = 0x01u, B_STRLEN_IGNORE_MOD = 0x02u, + B_STRLEN_CODEPOINTS = 0x04u, } b_strlen_flags; +typedef enum b_string_tokenise_flags { + B_STRING_TOK_F_NORMAL = 0x00u, + B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS = 0x01u, +} b_string_tokenise_flags; + BLUE_API b_string *b_string_create(void); BLUE_API b_string *b_string_create_from_cstr(const char *s); +BLUE_API b_string *b_string_create_from_wstr(const b_wchar *s); BLUE_API b_string *b_string_create_from_c(char c, size_t count); BLUE_API b_string *b_string_duplicate(const b_string *str); @@ -41,6 +70,7 @@ BLUE_API b_status b_string_replace( BLUE_API b_status b_string_replace_all(b_string *str, const char *new_data); BLUE_API b_status b_string_remove(b_string *str, size_t start, size_t length); BLUE_API b_status b_string_transform(b_string *str, int (*transformer)(int)); +BLUE_API b_status b_string_trim(b_string *str); static inline b_status b_string_toupper(b_string *str) { return b_string_transform(str, toupper); @@ -51,22 +81,42 @@ static inline b_status b_string_tolower(b_string *str) } BLUE_API b_status b_string_open_stream(b_string *str, struct b_stream **out); -BLUE_API void b_string_append_s(b_string *dest, const b_string *src); -BLUE_API void b_string_append_cstr(b_string *dest, const char *src); -BLUE_API void b_string_append_cstrf(b_string *dest, const char *format, ...); -BLUE_API void b_string_prepend_cstr(b_string *dest, const char *src); -BLUE_API void b_string_prepend_cstrf(b_string *dest, const char *format, ...); -BLUE_API void b_string_insert_s(b_string *dest, const b_string *src, size_t at); -BLUE_API void b_string_insert_cstr(b_string *dest, const char *src, size_t at); -BLUE_API void b_string_insert_cstrn( +BLUE_API b_status b_string_append_c(b_string *dest, char c); +BLUE_API b_status b_string_append_wc(b_string *dest, b_wchar c); +BLUE_API b_status b_string_append_s(b_string *dest, const b_string *src); +BLUE_API b_status b_string_append_cstr(b_string *dest, const char *src); +BLUE_API b_status b_string_append_wstr(b_string *dest, const b_wchar *src); +BLUE_API b_status b_string_append_cstrf(b_string *dest, const char *format, ...); + +BLUE_API b_status b_string_prepend_c(b_string *dest, char c); +BLUE_API b_status b_string_prepend_wc(b_string *dest, b_wchar c); +BLUE_API b_status b_string_prepend_cstr(b_string *dest, const char *src); +BLUE_API b_status b_string_prepend_wstr(b_string *dest, const b_wchar *src); +BLUE_API b_status b_string_prepend_cstrf(b_string *dest, const char *format, ...); + +BLUE_API b_status b_string_insert_c(b_string *dest, char c, size_t at); +BLUE_API b_status b_string_insert_wc(b_string *dest, b_wchar c, size_t at); +BLUE_API b_status b_string_insert_s(b_string *dest, const b_string *src, size_t at); +BLUE_API b_status b_string_insert_cstr(b_string *dest, const char *src, size_t at); +BLUE_API b_status b_string_insert_wstr( + b_string *dest, const b_wchar *src, size_t at); +BLUE_API b_status b_string_insert_cstrn( b_string *dest, const char *src, size_t len, size_t at); -BLUE_API void b_string_insert_cstrf( +BLUE_API b_status b_string_insert_wstrn( + b_string *dest, const char *src, size_t len, size_t at); +BLUE_API b_status b_string_insert_cstrf( b_string *dest, size_t at, const char *format, ...); BLUE_API void b_string_clear(b_string *str); +BLUE_API b_status b_string_tokenise( + b_string *str, const char *delims[], size_t nr_delims, + b_string_tokenise_flags flags, b_string_iterator *it); + BLUE_API size_t b_string_get_size(const b_string *str, b_strlen_flags flags); BLUE_API size_t b_string_get_capacity(const b_string *str); +BLUE_API bool b_string_compare(const b_string *a, const b_string *b); + BLUE_API char b_string_front(const b_string *str); BLUE_API char b_string_back(const b_string *str); @@ -75,9 +125,16 @@ BLUE_API void b_string_pop_back(b_string *str); BLUE_API const char *b_string_ptr(const b_string *str); BLUE_API b_string *b_string_substr(const b_string *str, size_t start, size_t len); +BLUE_API int b_string_iterator_begin(const b_string *string, b_string_iterator *it); +BLUE_API bool b_string_iterator_next(b_string_iterator *it); +// BLUE_API b_status b_string_iterator_erase(b_string_iterator *it); +BLUE_API bool b_string_iterator_is_valid(const b_string_iterator *it); + BLUE_API char *b_strdup(const char *s); BLUE_API size_t b_strlen(const char *s, b_strlen_flags flags); +BLUE_API b_wchar *b_wstrdup(const b_wchar *s); +BLUE_API size_t b_wstrlen(const b_wchar *s); -BLUE_API uint64_t b_cstr_hash(const char *s); +BLUE_API uint64_t b_string_hash(const b_string *s); #endif diff --git a/object/string.c b/object/string.c index d948a29..d2d3077 100644 --- a/object/string.c +++ b/object/string.c @@ -11,6 +11,17 @@ #include #include +#define IS_VALID_UTF8_SCALAR(x) \ + (((x) >= 0x0000 && (x) <= 0xD7FF) || ((x) >= 0xE000 && (x) <= 0x10FFFF)) + +#define STRING_TOK_F_FOUND_DELIM 0x80 + +enum iterator_mode { + ITERATOR_MODE_NONE = 0, + ITERATOR_MODE_CHARS, + ITERATOR_MODE_TOKENS, +}; + static void string_release(struct b_object *obj); static void string_to_string(struct b_object *obj, struct b_stream *out); @@ -23,6 +34,225 @@ static struct b_object_type string_type = { .t_to_string = string_to_string, }; +static size_t utf8_codepoint_size(b_wchar c) +{ + if (!IS_VALID_UTF8_SCALAR(c)) { + return 0; + } + + if (c <= 0x7F) { + return 1; + } + + if (c <= 0x7FF) { + return 2; + } + + if (c <= 0xFFFF) { + return 3; + } + + if (c <= 0x10FFFF) { + return 4; + } + + return 0; +} + +int32_t decode_utf8_trailer_byte(char c) +{ + if (!(c & 0x80) || (c & 0x40)) { + return -1; + } + + return c & 0x3F; +} + +static b_wchar utf8_codepoint_decode(const char *s) +{ + b_wchar result = 0; + int len = 0; + + if (!(s[0] & 0x80)) { + len = 1; + result = s[0] & 0x7F; + } else if (s[0] & 0xC0 && !(s[0] & 0x20)) { + len = 2; + result = s[0] & 0x1F; + result <<= 6; + } else if (s[0] & 0xE0 && !(s[0] & 0x10)) { + len = 3; + result = s[0] & 0x0F; + result <<= 12; + } else if (s[0] & 0xF0 && !(s[0] & 0x08)) { + len = 4; + result = s[0] & 0x07; + result <<= 18; + } else { + return B_WCHAR_INVALID; + } + + for (int i = 1; i < len; i++) { + int32_t c = decode_utf8_trailer_byte(s[i]); + if (c == -1) { + return B_WCHAR_INVALID; + } + + c <<= 6 * (len - i - 1); + result |= c; + } + + if (!IS_VALID_UTF8_SCALAR(result)) { + return B_WCHAR_INVALID; + } + + return result; +} + +static size_t utf8_codepoint_encode(b_wchar c, char s[4]) +{ + size_t len = utf8_codepoint_size(c); + + switch (len) { + case 1: + s[0] = c & 0x7F; + break; + case 2: + s[0] = ((c >> 6) & 0x1F) | 0xC0; + s[1] = (c & 0x3F) | 0x80; + break; + case 3: + s[0] = ((c >> 12) & 0x0F) | 0xE0; + s[1] = ((c >> 6) & 0x3F) | 0x80; + s[2] = (c & 0x3F) | 0x80; + break; + case 4: + s[0] = ((c >> 18) & 0x07) | 0xF0; + s[1] = ((c >> 12) & 0x3F) | 0x80; + s[2] = ((c >> 6) & 0x3F) | 0x80; + s[3] = (c & 0x3F) | 0x80; + break; + default: + return 0; + } + + return len; +} + +static size_t codepoint_stride(const char *s) +{ + char c = *s; + + if (!(c & 0x80)) { + return 1; + } + + if ((c & 0xC0) && !(c & 0x20)) { + return 2; + } + + if ((c & 0xE0) && !(c & 0x10)) { + return 3; + } + + if ((c & 0xF0) && !(c & 0x08)) { + return 4; + } + + return 0; +} + +static size_t get_number_of_codepoints(const char *s, size_t len) +{ + size_t nr_codepoints = 0; + const char *end = s + len; + + while (*s && s < end) { + size_t stride = codepoint_stride(s); + if (stride == 0) { + /* invalid codepoint */ + return 0; + } + + nr_codepoints++; + s += stride; + } + + if (*s != 0) { + /* string is not null-terminated */ + return 0; + } + + return nr_codepoints; +} + +static size_t get_utf8_encoded_size(const b_wchar *s, size_t nr_codepoints) +{ + size_t len = 0; + for (size_t i = 0; i < nr_codepoints; i++) { + size_t l = utf8_codepoint_size(s[i]); + if (l == 0) { + /* invalid codepoint */ + return 0; + } + + len += l; + } + + return len; +} + +static enum b_status convert_codepoint_range_to_byte_range( + const struct b_string *str, size_t cp_start, size_t cp_length, + size_t *out_byte_start, size_t *out_byte_length) +{ + const char *s = b_string_ptr(str); + size_t byte_offset = 0, byte_length = 0; + + for (size_t i = 0; i < cp_start; i++) { + const char *cp = &s[byte_offset]; + if (!cp || byte_offset >= str->s_len) { + /* out of range */ + return B_ERR_OUT_OF_BOUNDS; + } + + size_t stride = codepoint_stride(cp); + if (!stride) { + /* invalid codepoint */ + return B_ERR_BAD_STATE; + } + + byte_offset += stride; + } + + for (size_t i = 0; i < cp_length; i++) { + size_t cp_offset = byte_offset + byte_length; + const char *cp = &s[cp_offset]; + if (!cp || (cp_offset >= str->s_len)) { + /* out of range */ + return B_ERR_OUT_OF_BOUNDS; + } + + size_t stride = codepoint_stride(cp); + if (!stride) { + /* invalid codepoint */ + return B_ERR_BAD_STATE; + } + + byte_length += stride; + } + + if (out_byte_start) { + *out_byte_start = byte_offset; + } + + if (out_byte_length) { + *out_byte_length = byte_length; + } + + return B_SUCCESS; +} + struct b_string *b_string_create(void) { struct b_string *str @@ -32,6 +262,7 @@ struct b_string *b_string_create(void) } str->s_len = 0; + str->s_codepoints = 0; str->s_max = STRING_INLINE_CAPACITY; return str; @@ -52,6 +283,67 @@ static char *string_ptr(struct b_string *str) return str->s_data.d_external; } +static char *get_next_codepoint(struct b_string *str, char *this_codepoint) +{ + char c = *this_codepoint; + char *end = this_codepoint - 1; + size_t len = 0; + if (!(c & 0x80)) { + len = 1; + } else if ((c & 0xC0) && !(c & 0x20)) { + len = 2; + } else if ((c & 0xE0) && !(c & 0x10)) { + len = 3; + } else if ((c & 0xF0) && !(c & 0x08)) { + len = 4; + } else { + return NULL; + } + + return this_codepoint + len; +} + +static char *get_previous_codepoint(struct b_string *str, char *this_codepoint) +{ + char *start = string_ptr(str); + char *end = this_codepoint - 1; + + while (end >= start) { + char c = *end; + if ((c & 0x80) && !(c & 0x40)) { + end--; + continue; + } + + if ((c & 0xF0) && !(c & 0x08)) { + return end; + } + + if ((c & 0xE0) && !(c & 0x10)) { + return end; + } + + if ((c & 0xC0) && !(c & 0x20)) { + return end; + } + + if (!(c & 0x80)) { + return end; + } + } + + return NULL; +} + +static char *get_last_codepoint(struct b_string *str) +{ + if (str->s_len == 0) { + return NULL; + } + + return get_previous_codepoint(str, string_ptr(str) + str->s_len); +} + static int string_make_inline(struct b_string *str) { char *buffer = string_ptr(str); @@ -143,12 +435,16 @@ struct b_string *b_string_create_from_cstr(const char *s) return str; } - str->s_len = strlen(s); - string_change_capacity(str, str->s_len); + size_t s_len = strlen(s); + size_t s_codepoints = get_number_of_codepoints(s, s_len); + b_string_reserve(str, s_len); char *dest = string_ptr(str); - memcpy(dest, s, str->s_len); - dest[str->s_len] = 0; + memcpy(dest, s, s_len); + dest[s_len] = 0; + + str->s_len = s_len; + str->s_codepoints = s_codepoints; return str; } @@ -167,6 +463,7 @@ struct b_string *b_string_create_from_c(char c, size_t count) } str->s_len = count; + str->s_codepoints = count; return str; } @@ -183,6 +480,7 @@ struct b_string *b_string_duplicate(const struct b_string *str) memcpy(dst, src, str->s_len); new_str->s_len = str->s_len; + new_str->s_codepoints = str->s_codepoints; return new_str; } @@ -193,7 +491,9 @@ char *b_string_steal(struct b_string *str) char *src = string_ptr(str); if (string_is_inline(str)) { - dest = b_strdup(src); + dest = malloc(str->s_len + 1); + memcpy(dest, src, str->s_len); + dest[str->s_len] = 0; src[0] = 0; } else { dest = src; @@ -202,6 +502,7 @@ char *b_string_steal(struct b_string *str) } str->s_len = 0; + str->s_codepoints = 0; return dest; } @@ -216,7 +517,7 @@ b_status b_string_reserve(struct b_string *str, size_t capacity) return err == 0 ? B_SUCCESS : B_ERR_NO_MEMORY; } -b_status b_string_replace( +static enum b_status replace_ansi( struct b_string *str, size_t start, size_t length, const char *new_data) { b_status status = B_SUCCESS; @@ -255,6 +556,70 @@ b_status b_string_replace( return B_SUCCESS; } +static enum b_status replace_utf8( + struct b_string *str, size_t start, size_t length, const char *new_data) +{ + if (start >= str->s_codepoints) { + return B_ERR_INVALID_ARGUMENT; + } + + if (start + length >= str->s_codepoints) { + length = str->s_codepoints - start; + } + + size_t new_data_nr_bytes = strlen(new_data); + size_t new_data_nr_codepoints + = get_number_of_codepoints(new_data, new_data_nr_bytes); + if (new_data_nr_codepoints == 0) { + /* new_data is not a valid utf-8 string */ + return B_ERR_INVALID_ARGUMENT; + } + + size_t old_data_offset = 0, old_data_nr_bytes = 0; + size_t old_data_nr_codepoints = length; + enum b_status status = convert_codepoint_range_to_byte_range( + str, start, length, &old_data_offset, &old_data_nr_bytes); + if (!B_OK(status)) { + return status; + } + + size_t new_total_bytes = str->s_len - old_data_nr_bytes + new_data_nr_bytes; + if (new_total_bytes > str->s_max) { + status = b_string_reserve(str, new_total_bytes); + } + + if (!B_OK(status)) { + return status; + } + + char *s = string_ptr(str); + + char *substitution_start = s + old_data_offset; + char *excess_src = s + old_data_offset + old_data_nr_bytes; + size_t excess_length = str->s_len - old_data_offset - old_data_nr_bytes; + char *excess_dest = substitution_start + new_data_nr_bytes; + + memmove(excess_dest, excess_src, excess_length); + memmove(substitution_start, new_data, new_data_nr_bytes); + s[new_total_bytes] = '\0'; + + str->s_len = new_total_bytes; + str->s_codepoints -= old_data_nr_codepoints; + str->s_codepoints += new_data_nr_codepoints; + + return B_SUCCESS; +} + +b_status b_string_replace( + struct b_string *str, size_t start, size_t length, const char *new_data) +{ + if (str->s_len == str->s_codepoints) { + return replace_ansi(str, start, length, new_data); + } + + return replace_utf8(str, start, length, new_data); +} + b_status b_string_replace_all(b_string *str, const char *new_data) { size_t new_len = strlen(new_data); @@ -267,7 +632,7 @@ b_status b_string_replace_all(b_string *str, const char *new_data) return B_SUCCESS; } -b_status b_string_remove(b_string *str, size_t start, size_t length) +static enum b_status remove_ansi(struct b_string *str, size_t start, size_t length) { b_status status = B_SUCCESS; @@ -295,7 +660,42 @@ b_status b_string_remove(b_string *str, size_t start, size_t length) return B_SUCCESS; } -b_status b_string_transform(b_string *str, int (*transformer)(int)) +static enum b_status remove_utf8(struct b_string *str, size_t start, size_t length) +{ + size_t remove_offset = 0, remove_nr_bytes = 0; + enum b_status status = convert_codepoint_range_to_byte_range( + str, start, length, &remove_offset, &remove_nr_bytes); + if (!B_OK(status)) { + return status; + } + + size_t new_total_bytes = str->s_len - remove_nr_bytes; + + char *s = string_ptr(str); + + char *removal_start = s + remove_offset; + char *excess_src = s + remove_offset + remove_nr_bytes; + size_t excess_length = str->s_len - remove_offset - remove_nr_bytes; + + memmove(removal_start, excess_src, excess_length); + s[new_total_bytes] = '\0'; + + str->s_len = new_total_bytes; + str->s_codepoints -= length; + + return B_SUCCESS; +} + +enum b_status b_string_remove(struct b_string *str, size_t start, size_t length) +{ + if (str->s_len == str->s_codepoints) { + return remove_ansi(str, start, length); + } + + return remove_utf8(str, start, length); +} + +b_status b_string_transform(struct b_string *str, int (*transformer)(int)) { char *s = string_ptr(str); for (size_t i = 0; i < str->s_len; i++) { @@ -309,220 +709,397 @@ b_status b_string_transform(b_string *str, int (*transformer)(int)) return B_SUCCESS; } -static enum b_status stream_close(struct b_stream *stream) +static enum b_status trim_ansi(struct b_string *str) { - struct b_string *str = stream->s_ptr; - b_string_release(str); - - return B_SUCCESS; -} - -static enum b_status stream_getc(struct b_stream *stream, int *out) -{ - struct b_string *str = stream->s_ptr; - if (stream->s_cursor >= str->s_len) { - return B_ERR_NO_DATA; + char *s = string_ptr(str); + size_t whitespace_end = 0; + for (size_t i = 0; i < str->s_len; i++) { + if (!isspace(s[i])) { + whitespace_end = i; + break; + } } - char *s = string_ptr(str); - *out = s[stream->s_cursor]; - stream->s_cursor++; + memmove(s, s + whitespace_end, str->s_len - whitespace_end); + str->s_len -= whitespace_end; + + for (long i = str->s_len - 1; i >= 0; i--) { + if (isspace(s[i])) { + s[i] = 0; + str->s_len--; + } else { + break; + } + } return B_SUCCESS; } -static enum b_status stream_read( - struct b_stream *stream, unsigned char *buf, size_t count, size_t *nr_read) +static enum b_status trim_utf8(struct b_string *str) { - struct b_string *str = stream->s_ptr; - if (stream->s_cursor >= str->s_len) { - *nr_read = 0; + char *s = string_ptr(str); + size_t whitespace_end = 0; + size_t nr_whitespace_codepoints = 0; + for (size_t i = 0; i < str->s_len;) { + b_wchar c = utf8_codepoint_decode(&s[i]); + + if (!b_wchar_is_space(s[i])) { + whitespace_end = i; + break; + } + + nr_whitespace_codepoints++; + } + + memmove(s, s + whitespace_end, str->s_len - whitespace_end); + str->s_len -= whitespace_end; + str->s_codepoints -= nr_whitespace_codepoints; + + char *p = get_last_codepoint(str); + if (!p) { + return B_ERR_BAD_STATE; + } + + for (long i = str->s_len - 1; i >= 0;) { + b_wchar c = utf8_codepoint_decode(p); + size_t c_size = utf8_codepoint_size(c); + + if (b_wchar_is_space(c)) { + memset(p, 0, c_size); + str->s_len -= c_size; + str->s_codepoints--; + } else { + break; + } + + p = get_previous_codepoint(str, p); + } + + return B_SUCCESS; +} + +b_status b_string_trim(struct b_string *str) +{ + if (str->s_len == 0) { return B_SUCCESS; } - size_t available = str->s_len - stream->s_cursor; - size_t to_read = b_min(size_t, count, available); - - char *s = string_ptr(str) + stream->s_cursor; - - memcpy(buf, s, to_read); - - *nr_read = to_read; - - return B_SUCCESS; -} - -static enum b_status stream_write( - struct b_stream *stream, const unsigned char *buf, size_t count, - size_t *nr_written) -{ - struct b_string *str = stream->s_ptr; - enum b_status status = B_SUCCESS; - - if (stream->s_cursor + count > str->s_max) { - status = b_string_reserve(str, stream->s_cursor + count); + if (str->s_len == str->s_codepoints) { + return trim_ansi(str); } - if (!B_OK(status)) { - return status; - } - - char *s = string_ptr(str) + stream->s_cursor; - memcpy(s, buf, count); - s[str->s_max] = '\0'; - stream->s_cursor += count; - str->s_len = b_max(size_t, str->s_len, stream->s_cursor + count); - - *nr_written = count; - - return B_SUCCESS; + return trim_utf8(str); } -static enum b_status stream_seek( - struct b_stream *stream, long long offset, b_stream_seek_origin origin) -{ - struct b_string *str = stream->s_ptr; - - size_t abs_offset; - switch (origin) { - case B_STREAM_SEEK_START: - abs_offset = offset; - break; - case B_STREAM_SEEK_CURRENT: - abs_offset = stream->s_cursor + offset; - break; - case B_STREAM_SEEK_END: - abs_offset = str->s_len + offset; - break; - default: - return B_ERR_INVALID_ARGUMENT; - } - - stream->s_cursor = abs_offset; - - return B_SUCCESS; -} - -static enum b_status stream_reserve(struct b_stream *stream, size_t len) -{ - struct b_string *str = stream->s_ptr; - - size_t new_capacity = str->s_len + len; - return b_string_reserve(str, new_capacity); -} - -enum b_status b_string_open_stream(struct b_string *str, struct b_stream **out) -{ - struct b_stream *stream = malloc(sizeof *stream); - if (!stream) { - return B_ERR_NO_MEMORY; - } - - memset(stream, 0x0, sizeof *stream); - - stream->s_mode |= B_STREAM_READ | B_STREAM_WRITE; - - stream->s_ptr = b_string_retain(str); - stream->s_close = stream_close; - stream->s_getc = stream_getc; - stream->s_read = stream_read; - stream->s_write = stream_write; - stream->s_seek = stream_seek; - stream->s_reserve = stream_reserve; - - *out = stream; - - return B_SUCCESS; -} - -static void string_insert( - struct b_string *dest, const char *src, size_t len, size_t at) +static enum b_status string_insert_cstr_ansi( + struct b_string *dest, const char *src, size_t nr_bytes, size_t at) { if (at >= dest->s_len) { at = dest->s_len; } - size_t new_size = dest->s_len + len; + size_t new_size = dest->s_len + nr_bytes; if (dest->s_max < new_size) { string_change_capacity(dest, new_size); } char *dest_buf = string_ptr(dest); char *from = dest_buf + at; - char *to = dest_buf + at + len; + char *to = dest_buf + at + nr_bytes; memmove(to, from, dest->s_len - at); - memcpy(from, src, len); + memcpy(from, src, nr_bytes); dest_buf[new_size] = '\0'; dest->s_len = new_size; + dest->s_codepoints += nr_bytes; + return B_SUCCESS; } -static void string_insertf( +static enum b_status string_insert_cstr_utf8( + struct b_string *dest, const char *src, size_t nr_bytes, + size_t codepoint_offset) +{ + if (codepoint_offset >= dest->s_codepoints) { + codepoint_offset = dest->s_codepoints; + } + + size_t byte_offset = 0; + enum b_status status = B_SUCCESS; + + if (codepoint_offset == dest->s_codepoints) { + byte_offset = dest->s_len; + } else { + status = convert_codepoint_range_to_byte_range( + dest, 0, codepoint_offset, NULL, &byte_offset); + } + + if (!B_OK(status)) { + return status; + } + + size_t new_total_bytes = dest->s_len + nr_bytes; + if (dest->s_max < new_total_bytes) { + string_change_capacity(dest, new_total_bytes); + } + + char *dest_buf = string_ptr(dest); + char *from = dest_buf + byte_offset; + char *to = dest_buf + byte_offset + nr_bytes; + + memmove(to, from, dest->s_len - byte_offset); + memcpy(from, src, nr_bytes); + dest_buf[new_total_bytes] = '\0'; + + dest->s_len += nr_bytes; + dest->s_codepoints += get_number_of_codepoints(src, nr_bytes); + + return B_SUCCESS; +} + +static enum b_status string_insert_wstr_ansi( + struct b_string *dest, const b_wchar *src, size_t nr_codepoints, size_t at) +{ + if (at >= dest->s_len) { + at = dest->s_len; + } + + size_t utf8_encoded_size = get_utf8_encoded_size(src, nr_codepoints); + if (utf8_encoded_size == 0) { + return B_ERR_INVALID_ARGUMENT; + } + + size_t new_total_bytes = dest->s_len + utf8_encoded_size; + if (dest->s_max < new_total_bytes) { + string_change_capacity(dest, new_total_bytes); + } + + char *dest_buf = string_ptr(dest); + char *from = dest_buf + at; + char *to = dest_buf + at + utf8_encoded_size; + memmove(to, from, dest->s_len - at); + + char *ptr = dest_buf + at; + for (size_t i = 0; i < nr_codepoints; i++) { + char c[4]; + size_t c_len = utf8_codepoint_encode(src[i], c); + if (c_len == 0) { + /* the input string was already checked by + * get_utf8_encoded_size, so this should never happen */ + return B_ERR_INVALID_ARGUMENT; + } + + memcpy(ptr, c, c_len); + ptr += c_len; + } + + dest_buf[new_total_bytes] = '\0'; + + dest->s_len += utf8_encoded_size; + dest->s_codepoints += nr_codepoints; + + return B_SUCCESS; +} + +static enum b_status string_insert_wstr_utf8( + struct b_string *dest, const b_wchar *src, size_t nr_codepoints, + size_t codepoint_offset) +{ + if (codepoint_offset >= dest->s_codepoints) { + codepoint_offset = dest->s_codepoints; + } + + size_t utf8_encoded_size = get_utf8_encoded_size(src, nr_codepoints); + if (utf8_encoded_size == 0) { + return B_ERR_INVALID_ARGUMENT; + } + + size_t new_total_bytes = dest->s_len + utf8_encoded_size; + if (dest->s_max < new_total_bytes) { + string_change_capacity(dest, new_total_bytes); + } + + size_t move_offset = 0; + enum b_status status = B_SUCCESS; + + if (codepoint_offset == dest->s_codepoints) { + move_offset = dest->s_len; + } else { + status = convert_codepoint_range_to_byte_range( + dest, 0, codepoint_offset, NULL, &move_offset); + } + + if (!B_OK(status)) { + return status; + } + + char *dest_buf = string_ptr(dest); + char *from = dest_buf + move_offset; + char *to = dest_buf + move_offset + utf8_encoded_size; + memmove(to, from, dest->s_len - move_offset); + + char *ptr = dest_buf + move_offset; + for (size_t i = 0; i < nr_codepoints; i++) { + char c[4]; + size_t c_len = utf8_codepoint_encode(src[i], c); + if (c_len == 0) { + /* the input string was already checked by + * get_utf8_encoded_size, so this should never happen */ + return B_ERR_INVALID_ARGUMENT; + } + + memcpy(ptr, c, c_len); + ptr += c_len; + } + + dest_buf[new_total_bytes] = '\0'; + + dest->s_len += utf8_encoded_size; + dest->s_codepoints += nr_codepoints; + + return B_SUCCESS; +} + +static enum b_status string_insert_cstr( + struct b_string *dest, const char *src, size_t nr_bytes, size_t at) +{ + if (dest->s_len == dest->s_codepoints) { + return string_insert_cstr_ansi(dest, src, nr_bytes, at); + } + + return string_insert_cstr_utf8(dest, src, nr_bytes, at); +} + +static enum b_status string_insert_wstr( + struct b_string *dest, const b_wchar *src, size_t nr_codepoints, size_t at) +{ + if (dest->s_len == dest->s_codepoints) { + return string_insert_wstr_ansi(dest, src, nr_codepoints, at); + } + + return string_insert_wstr_utf8(dest, src, nr_codepoints, at); +} + +static enum b_status string_insertf( struct b_string *dest, size_t at, const char *format, va_list arg) { char buf[1024]; size_t len = vsnprintf(buf, sizeof buf, format, arg); - string_insert(dest, buf, len, at); + return string_insert_cstr(dest, buf, len, at); } -void b_string_insert_s(struct b_string *dest, const struct b_string *src, size_t at) +enum b_status b_string_insert_c(struct b_string *dest, char c, size_t at) { - string_insert(dest, b_string_ptr(src), src->s_len, at); + return string_insert_cstr(dest, &c, 1, at); } -void b_string_insert_cstr(struct b_string *dest, const char *src, size_t at) +enum b_status b_string_insert_wc(struct b_string *dest, b_wchar c, size_t at) { - string_insert(dest, src, strlen(src), at); + return string_insert_wstr(dest, &c, 1, at); } -void b_string_insert_cstrf(struct b_string *dest, size_t at, const char *format, ...) +enum b_status b_string_insert_s( + struct b_string *dest, const struct b_string *src, size_t at) +{ + return string_insert_cstr(dest, b_string_ptr(src), src->s_len, at); +} + +enum b_status b_string_insert_cstr(struct b_string *dest, const char *src, size_t at) +{ + return string_insert_cstr(dest, src, strlen(src), at); +} + +enum b_status b_string_insert_wstr( + struct b_string *dest, const b_wchar *src, size_t at) +{ + return string_insert_wstr(dest, src, b_wstrlen(src), at); +} + +enum b_status b_string_insert_cstrf( + struct b_string *dest, size_t at, const char *format, ...) { va_list arg; va_start(arg, format); - string_insertf(dest, at, format, arg); + enum b_status status = string_insertf(dest, at, format, arg); va_end(arg); + + return status; } -void b_string_insert_cstrn(b_string *dest, const char *src, size_t len, size_t at) +enum b_status b_string_insert_cstrn( + b_string *dest, const char *src, size_t len, size_t at) { - string_insert(dest, src, len, at); + return string_insert_cstr(dest, src, len, at); } -void b_string_append_s(struct b_string *dest, const struct b_string *src) +enum b_status b_string_append_c(struct b_string *dest, char c) { - b_string_insert_s(dest, src, SIZE_MAX); + return b_string_insert_c(dest, c, SIZE_MAX); } -void b_string_append_cstr(struct b_string *dest, const char *src) +enum b_status b_string_append_wc(struct b_string *dest, b_wchar c) { - b_string_insert_cstr(dest, src, SIZE_MAX); + return b_string_insert_wc(dest, c, SIZE_MAX); } -void b_string_append_cstrf(struct b_string *dest, const char *format, ...) +enum b_status b_string_append_s(struct b_string *dest, const struct b_string *src) +{ + return b_string_insert_s(dest, src, SIZE_MAX); +} + +enum b_status b_string_append_cstr(struct b_string *dest, const char *src) +{ + return b_string_insert_cstr(dest, src, SIZE_MAX); +} + +enum b_status b_string_append_wstr(struct b_string *dest, const b_wchar *src) +{ + return b_string_insert_wstr(dest, src, SIZE_MAX); +} + +enum b_status b_string_append_cstrf(struct b_string *dest, const char *format, ...) { va_list arg; va_start(arg, format); - string_insertf(dest, SIZE_MAX, format, arg); + enum b_status status = string_insertf(dest, SIZE_MAX, format, arg); va_end(arg); + + return status; } -void b_string_prepend_s(struct b_string *dest, const struct b_string *src) +enum b_status b_string_prepend_c(struct b_string *dest, char c) { - b_string_insert_s(dest, src, 0); + return b_string_insert_c(dest, c, 0); } -void b_string_prepend_cstr(struct b_string *dest, const char *src) +enum b_status b_string_prepend_wc(struct b_string *dest, b_wchar c) { - b_string_insert_cstr(dest, src, 0); + return b_string_insert_wc(dest, c, 0); } -void b_string_prepend_cstrf(struct b_string *dest, const char *format, ...) +enum b_status b_string_prepend_s(struct b_string *dest, const struct b_string *src) +{ + return b_string_insert_s(dest, src, 0); +} + +enum b_status b_string_prepend_cstr(struct b_string *dest, const char *src) +{ + return b_string_insert_cstr(dest, src, 0); +} + +enum b_status b_string_prepend_wstr(struct b_string *dest, const b_wchar *src) +{ + return b_string_insert_wstr(dest, src, 0); +} + +enum b_status b_string_prepend_cstrf(struct b_string *dest, const char *format, ...) { va_list arg; va_start(arg, format); - string_insertf(dest, 0, format, arg); + enum b_status status = string_insertf(dest, 0, format, arg); va_end(arg); + + return status; } void b_string_clear(struct b_string *str) @@ -534,15 +1111,150 @@ void b_string_clear(struct b_string *str) char *s = string_ptr(str); *s = '\0'; str->s_len = 0; + str->s_codepoints = 0; +} + +static struct b_iterator_ops it_ops; + +static bool has_prefix(const char *s, const char *prefix, size_t *prefix_len) +{ + size_t len = 0; + for (size_t i = 0;; i++) { + if (s[i] == 0 || prefix[i] == 0) { + break; + } + + if (s[i] != prefix[i]) { + return false; + } + + len++; + } + + *prefix_len = len; + return true; +} + +static bool has_prefixes( + const char *s, const char **prefixes, size_t nr_prefixes, + size_t *selected_prefix_len) +{ + for (size_t i = 0; i < nr_prefixes; i++) { + const char *delim = prefixes[i]; + if (has_prefix(s, delim, selected_prefix_len)) { + return true; + } + } + + return false; +} + +static enum b_status find_next_token(struct b_string_iterator *it) +{ + size_t offset = it->_ds; + size_t prefix_len = 0; + char *start = string_ptr(it->_s); + bool found_delim_last_time = (it->_f & STRING_TOK_F_FOUND_DELIM) != 0; + bool found_delim = false; + bool include_empty = (it->_f & B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS); + bool found_null = false; + b_string_clear(it->_tmp); + + while (1) { + char *s = start + offset; + if (*s == 0) { + it->_f &= ~STRING_TOK_F_FOUND_DELIM; + break; + } + + found_delim = has_prefixes(s, it->_d, it->_nd, &prefix_len); + if (found_delim) { + if (it->_tmp->s_len == 0 && !include_empty) { + /* this token is empty, skip it */ + offset += prefix_len; + found_delim = false; + continue; + } + + it->_f |= STRING_TOK_F_FOUND_DELIM; + break; + } + + b_wchar c = utf8_codepoint_decode(s); + if (c == B_WCHAR_INVALID) { + return B_ERR_BAD_STATE; + } + + b_string_append_wc(it->_tmp, c); + offset += utf8_codepoint_size(c); + + if (offset > it->_s->s_len) { + break; + } + } + + bool end = !found_delim && it->_tmp->s_len == 0; + + if (include_empty && found_delim_last_time) { + end = false; + } + + if (end) { + it->string_value = NULL; + it->string_length = 0; + it->string_codepoints = 0; + return B_ERR_NO_DATA; + } + + it->_ds = offset + prefix_len; + it->string_value = b_string_ptr(it->_tmp); + it->string_length = it->_tmp->s_len; + it->string_codepoints = it->_tmp->s_codepoints; + return B_SUCCESS; +} + +enum b_status b_string_tokenise( + struct b_string *str, const char *delims[], size_t nr_delims, + b_string_tokenise_flags flags, struct b_string_iterator *it) +{ + memset(it, 0x0, sizeof *it); + + if (!nr_delims) { + return B_ERR_INVALID_ARGUMENT; + } + + struct b_string *tmp = b_string_create(); + if (!tmp) { + return B_ERR_NO_MEMORY; + } + + it->_base.it_ops = &it_ops; + it->_m = ITERATOR_MODE_TOKENS; + it->_d = delims; + it->_nd = nr_delims; + it->_s = str; + it->_f = flags; + it->_tmp = tmp; + + enum b_status status = find_next_token(it); + if (!B_OK(status)) { + b_string_release(tmp); + it->_tmp = NULL; + } + + return status; } size_t b_string_get_size(const struct b_string *str, b_strlen_flags flags) { - if (flags != B_STRLEN_NORMAL) { + switch (flags) { + case B_STRLEN_NORMAL: + return str->s_len; + case B_STRLEN_CODEPOINTS: + return str->s_codepoints; + default: return b_strlen(b_string_ptr(str), flags); } - - return str->s_len; } size_t b_string_get_capacity(const struct b_string *str) @@ -550,6 +1262,28 @@ size_t b_string_get_capacity(const struct b_string *str) return str->s_max; } +bool b_string_compare(const struct b_string *a, const struct b_string *b) +{ + if (a->s_len != b->s_len) { + return false; + } + + if (a == b) { + return true; + } + + const char *ap = b_string_ptr(a); + const char *bp = b_string_ptr(b); + + for (size_t i = 0; i < a->s_len; i++) { + if (ap[i] != bp[i]) { + return false; + } + } + + return true; +} + char b_string_front(const struct b_string *str) { if (str->s_len == 0) { @@ -613,6 +1347,295 @@ struct b_string *b_string_substr(const struct b_string *str, size_t start, size_ return newstr; } +static enum b_status stream_close(struct b_stream *stream) +{ + struct b_string *str = stream->s_ptr; + b_string_release(str); + + return B_SUCCESS; +} + +static enum b_status stream_getc(struct b_stream *stream, int *out) +{ + struct b_string *str = stream->s_ptr; + if (stream->s_cursor >= str->s_len) { + return B_ERR_NO_DATA; + } + + char *s = string_ptr(str); + *out = s[stream->s_cursor]; + stream->s_cursor++; + + return B_SUCCESS; +} + +static enum b_status stream_read( + struct b_stream *stream, unsigned char *buf, size_t count, size_t *nr_read) +{ + struct b_string *str = stream->s_ptr; + if (stream->s_cursor >= str->s_len) { + *nr_read = 0; + return B_SUCCESS; + } + + size_t available = str->s_len - stream->s_cursor; + size_t to_read = b_min(size_t, count, available); + + char *s = string_ptr(str) + stream->s_cursor; + + memcpy(buf, s, to_read); + + *nr_read = to_read; + + return B_SUCCESS; +} + +static enum b_status stream_write( + struct b_stream *stream, const unsigned char *buf, size_t count, + size_t *nr_written) +{ + struct b_string *str = stream->s_ptr; + enum b_status status = B_SUCCESS; + + if (stream->s_cursor + count > str->s_max) { + status = b_string_reserve(str, stream->s_cursor + count); + } + + if (!B_OK(status)) { + return status; + } + + string_insert_cstr(str, (const char *)buf, count, stream->s_cursor); + stream->s_cursor += count; + + *nr_written = count; + + return B_SUCCESS; +} + +static enum b_status stream_seek( + struct b_stream *stream, long long offset, b_stream_seek_origin origin) +{ + struct b_string *str = stream->s_ptr; + + size_t abs_offset; + switch (origin) { + case B_STREAM_SEEK_START: + abs_offset = offset; + break; + case B_STREAM_SEEK_CURRENT: + abs_offset = stream->s_cursor + offset; + break; + case B_STREAM_SEEK_END: + abs_offset = str->s_len + offset; + break; + default: + return B_ERR_INVALID_ARGUMENT; + } + + stream->s_cursor = abs_offset; + + return B_SUCCESS; +} + +static enum b_status stream_reserve(struct b_stream *stream, size_t len) +{ + struct b_string *str = stream->s_ptr; + + size_t new_capacity = str->s_len + len; + return b_string_reserve(str, new_capacity); +} + +enum b_status b_string_open_stream(struct b_string *str, struct b_stream **out) +{ + struct b_stream *stream = malloc(sizeof *stream); + if (!stream) { + return B_ERR_NO_MEMORY; + } + + memset(stream, 0x0, sizeof *stream); + + stream->s_mode |= B_STREAM_READ | B_STREAM_WRITE; + + stream->s_ptr = b_string_retain(str); + stream->s_close = stream_close; + stream->s_getc = stream_getc; + stream->s_read = stream_read; + stream->s_write = stream_write; + stream->s_seek = stream_seek; + stream->s_reserve = stream_reserve; + + *out = stream; + + return B_SUCCESS; +} + +static bool string_iterator_next(struct b_iterator *it) +{ + return b_string_iterator_next((struct b_string_iterator *)it); +} + +static bool string_iterator_is_valid(const struct b_iterator *it) +{ + return b_string_iterator_is_valid((struct b_string_iterator *)it); +} + +static struct b_iterator_ops it_ops = { + .it_next = string_iterator_next, + .it_close = NULL, + .it_is_valid = string_iterator_is_valid, +}; + +static void iterator_cleanup(b_string_iterator *it) +{ + if (it->_tmp) { + b_string_release(it->_tmp); + } + + memset(it, 0x0, sizeof *it); +} + +int b_string_iterator_begin(const struct b_string *string, b_string_iterator *it) +{ + memset(it, 0x0, sizeof *it); + + it->_base.it_ops = &it_ops; + + if (!string->s_len) { + it->status = B_ERR_NO_DATA; + return -1; + } + + const char *p = b_string_ptr(string); + it->_m = ITERATOR_MODE_CHARS; + it->_s = B_STRING(string); + it->char_value = utf8_codepoint_decode(p); + + if (it->char_value == B_WCHAR_INVALID) { + it->status = B_ERR_BAD_FORMAT; + return -1; + } + + return 0; +} + +static bool chars_iterator_next(b_string_iterator *it) +{ + if (!b_string_iterator_is_valid(it)) { + return false; + } + + size_t stride = utf8_codepoint_size(it->char_value); + if (stride == 0) { + iterator_cleanup(it); + return false; + } + + it->byte_index += stride; + it->codepoint_index += 1; + + if (it->byte_index >= it->_s->s_len) { + iterator_cleanup(it); + it->_s = NULL; + it->byte_index = 0; + it->codepoint_index = 0; + it->char_value = B_WCHAR_INVALID; + it->status = B_ERR_NO_DATA; + return false; + } + + char *p = string_ptr(it->_s) + it->byte_index; + it->char_value = utf8_codepoint_decode(p); + if (it->char_value == B_WCHAR_INVALID) { + iterator_cleanup(it); + it->_s = NULL; + it->byte_index = 0; + it->codepoint_index = 0; + it->char_value = B_WCHAR_INVALID; + it->status = B_ERR_BAD_FORMAT; + return false; + } + + it->iteration_index++; + return true; +} + +static bool tokens_iterator_next(b_string_iterator *it) +{ + if (!b_string_iterator_is_valid(it)) { + return false; + } + + enum b_status status = find_next_token(it); + if (!B_OK(status)) { + iterator_cleanup(it); + return false; + } + + it->string_value = string_ptr(it->_tmp); + it->iteration_index++; + + return true; +} + +bool b_string_iterator_next(b_string_iterator *it) +{ + switch (it->_m) { + case ITERATOR_MODE_CHARS: + return chars_iterator_next(it); + case ITERATOR_MODE_TOKENS: + return tokens_iterator_next(it); + default: + return false; + } +} + +static bool chars_iterator_is_valid(const struct b_string_iterator *it) +{ + if (!it->_s) { + return false; + } + + if (it->byte_index >= it->_s->s_len) { + return false; + } + + if (it->char_value == B_WCHAR_INVALID) { + return false; + } + + return true; +} + +static bool tokens_iterator_is_valid(const struct b_string_iterator *it) +{ + if (!it->_s) { + return false; + } + + if (it->byte_index >= it->_s->s_len) { + return false; + } + + if (!it->string_value) { + return false; + } + + return true; +} + +bool b_string_iterator_is_valid(const struct b_string_iterator *it) +{ + switch (it->_m) { + case ITERATOR_MODE_CHARS: + return chars_iterator_is_valid(it); + case ITERATOR_MODE_TOKENS: + return tokens_iterator_is_valid(it); + default: + return false; + } +} + static void string_release(struct b_object *obj) { struct b_string *str = B_STRING(obj); @@ -624,7 +1647,10 @@ static void string_release(struct b_object *obj) static void string_to_string(struct b_object *obj, struct b_stream *out) { b_string *str = B_STRING(obj); - b_stream_write_fmt(out, NULL, "%s", b_string_ptr(str)); + const char *s = b_string_ptr(str); + for (size_t i = 0; i < str->s_len; i++) { + b_stream_write_char(out, s[i]); + } } char *b_strdup(const char *s) @@ -677,6 +1703,44 @@ size_t b_strlen(const char *s, b_strlen_flags flags) return out; } +b_wchar *b_wstrdup(const b_wchar *s) +{ + size_t len = b_wstrlen(s); + b_wchar *buf = calloc(len + 1, sizeof(b_wchar)); + if (!buf) { + return NULL; + } + + memcpy(buf, s, len * sizeof(b_wchar)); + + return buf; +} + +size_t b_wstrlen(const b_wchar *s) +{ + size_t len; + for (len = 0; s[len] != 0; len++) + ; + return len; +} + +uint64_t b_string_hash(const struct b_string *str) +{ +#define FNV1_OFFSET_BASIS 0xcbf29ce484222325 +#define FNV1_PRIME 0x100000001b3 + uint64_t hash = FNV1_OFFSET_BASIS; + size_t i = 0; + + const char *s = b_string_ptr(str); + + for (i = 0; i < str->s_len; i++) { + hash ^= s[i]; + hash *= FNV1_PRIME; + } + + return hash; +} + b_object_type_id b_string_type_id(void) { return (b_object_type_id)&string_type; diff --git a/object/string.h b/object/string.h index fecf196..4b39c5f 100644 --- a/object/string.h +++ b/object/string.h @@ -8,9 +8,14 @@ struct b_string { struct b_object s_base; - /* length of string, not including null-terminator */ + /* length of string in bytes, not including null-terminator. + * a multi-byte utf-8 codepoint will be counted as multiple bytes here */ unsigned int s_len; - /* maximum length of string storable in the currently-allocated buffer, not including null terminator */ + /* length of string in codepoints, not including null-terminator. + * a multi-byte utf-8 codepoint will be counted as one codepoint here */ + unsigned int s_codepoints; + /* maximum length of string storable in the currently-allocated buffer + * in bytes, not including null terminator */ unsigned int s_max; union { char d_inline[STRING_INLINE_CAPACITY + 1]; From 16b68b6fbaadf1c714e1b4d6a0501c040666d0f6 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:44:56 +0100 Subject: [PATCH 10/23] object: add a type for storing, parsing, and stringifying date/time values --- object/datetime.c | 489 ++++++++++++++++++++++++++ object/datetime.h | 17 + object/include/blue/object/datetime.h | 49 +++ object/include/blue/object/type.h | 1 + 4 files changed, 556 insertions(+) create mode 100644 object/datetime.c create mode 100644 object/datetime.h create mode 100644 object/include/blue/object/datetime.h diff --git a/object/datetime.c b/object/datetime.c new file mode 100644 index 0000000..4e6e6eb --- /dev/null +++ b/object/datetime.c @@ -0,0 +1,489 @@ +#include "datetime.h" + +#include "blue/core/stream.h" +#include "blue/object/string.h" + +#include + +static void datetime_to_string(const struct b_object *obj, struct b_stream *out); + +static struct b_object_type string_type = { + .t_name = "corelib::string", + .t_flags = B_OBJECT_FUNDAMENTAL, + .t_id = B_OBJECT_TYPE_DATETIME, + .t_instance_size = sizeof(struct b_datetime), + .t_to_string = datetime_to_string, +}; + +struct b_datetime *b_datetime_create(void) +{ + return (struct b_datetime *)b_object_type_instantiate(&string_type); +} + +static bool is_leap_year(const struct b_datetime *dt) +{ + if ((dt->dt_year % 400) == 0) { + return true; + } + + if ((dt->dt_year % 4) == 0 && (dt->dt_year % 100) != 0) { + return true; + } + + return false; +} + +static bool is_year_valid(const struct b_datetime *dt) +{ + return dt->dt_year >= 0; +} + +static bool is_month_valid(const struct b_datetime *dt) +{ + return dt->dt_month >= 1 && dt->dt_month <= 12; +} + +static bool is_day_valid(const struct b_datetime *dt) +{ + if (dt->dt_day < 1) { + return false; + } + + switch (dt->dt_month) { + case 2: + return dt->dt_day <= (is_leap_year(dt) ? 29 : 28); + case 4: + case 6: + case 9: + case 11: + return dt->dt_day <= 30; + case 1: + case 3: + case 5: + case 7: + case 8: + case 10: + case 12: + return dt->dt_day <= 31; + default: + return false; + } +} + +static bool is_time_valid(const struct b_datetime *dt) +{ + if (!(dt->dt_hour >= 0 && dt->dt_hour <= 23)) { + return false; + } + + if (!(dt->dt_min >= 0 && dt->dt_min <= 59)) { + return false; + } + + if (!(dt->dt_sec >= 0 && dt->dt_sec <= 60)) { + return false; + } + + return true; +} + +static bool is_zone_valid(const struct b_datetime *dt) +{ + if (!(dt->dt_zone_offset_hour >= 0 && dt->dt_zone_offset_hour <= 23)) { + return false; + } + + if (!(dt->dt_zone_offset_minute >= 0 && dt->dt_zone_offset_minute <= 59)) { + return false; + } + + return true; +} + +static bool validate(const struct b_datetime *dt) +{ + if (dt->dt_has_date) { + if (!is_year_valid(dt)) { + return false; + } + + if (!is_month_valid(dt)) { + return false; + } + + if (!is_day_valid(dt)) { + return false; + } + } + + if (dt->dt_has_time) { + if (!is_time_valid(dt)) { + return false; + } + + if (!is_zone_valid(dt)) { + return false; + } + } + + return true; +} + +struct b_datetime *parse_rfc3339(const char *s) +{ + struct b_datetime *dt = b_datetime_create(); + if (!dt) { + return NULL; + } + + size_t len = strlen(s); + + size_t i = 0, c = 0; + + bool has_date = false, has_time = false; + dt->dt_localtime = true; + + if (len >= 10 && s[4] == '-' && s[7] == '-') { + has_date = true; + } + + if (len >= 8 && s[2] == ':' && s[5] == ':') { + has_time = true; + } + + if (len >= 19 && s[4] == '-' && s[7] == '-' + && (s[10] == 'T' || s[10] == 't' || s[10] == ' ') && s[13] == ':' + && s[16] == ':') { + has_date = true; + has_time = true; + } + + if (!has_date && !has_time) { + goto fail; + } + + if (has_date) { + for (c = 0; c < 4; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_year *= 10; + dt->dt_year += (s[i] - '0'); + } + + if (s[i++] != '-') { + goto fail; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_month *= 10; + dt->dt_month += (s[i] - '0'); + } + + if (s[i++] != '-' || dt->dt_month > 12) { + goto fail; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_day *= 10; + dt->dt_day += (s[i] - '0'); + } + + if (dt->dt_day > 31) { + goto fail; + } + } + + if ((s[i] == 'T' || s[i] == 't' || s[i] == ' ') && !has_time) { + goto fail; + } + + if (has_date && has_time) { + if (s[i] != 'T' && s[i] != 't' && s[i] != ' ') { + goto fail; + } + + i++; + } + + if (has_time) { + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_hour *= 10; + dt->dt_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + goto fail; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_min *= 10; + dt->dt_min += (s[i] - '0'); + } + + if (s[i++] != ':') { + goto fail; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_sec *= 10; + dt->dt_sec += (s[i] - '0'); + } + + if (s[i] == '.') { + i++; + for (c = 0; s[i]; c++, i++) { + if (!isdigit(s[i])) { + break; + } + + dt->dt_msec *= 10; + dt->dt_msec += (s[i] - '0'); + } + + if (c == 0) { + goto fail; + } + } + + if (s[i] == '+' || s[i] == '-') { + dt->dt_localtime = false; + dt->dt_zone_offset_negative = s[i] == '-'; + i++; + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_zone_offset_hour *= 10; + dt->dt_zone_offset_hour += (s[i] - '0'); + } + + if (s[i++] != ':') { + goto fail; + } + + for (c = 0; c < 2; c++, i++) { + if (!isdigit(s[i])) { + goto fail; + } + + dt->dt_zone_offset_minute *= 10; + dt->dt_zone_offset_minute += (s[i] - '0'); + } + } else if (s[i] == 'Z' || s[i] == 'z') { + dt->dt_localtime = false; + i++; + } + } + + if (s[i] != 0) { + goto fail; + } + + dt->dt_has_date = has_date; + dt->dt_has_time = has_time; + return dt; +fail: + b_datetime_release(dt); + return NULL; +} + +struct b_datetime *b_datetime_parse(enum b_datetime_format format, const char *s) +{ + struct b_datetime *dt = NULL; + + switch (format) { + case B_DATETIME_FORMAT_RFC3339: + dt = parse_rfc3339(s); + break; + default: + return NULL; + } + + if (!dt) { + return NULL; + } + + if (!validate(dt)) { + b_datetime_release(dt); + return NULL; + } + + return dt; +} + +enum b_status encode_rfc3339(const struct b_datetime *dt, struct b_stream *out) +{ + if (dt->dt_has_date) { + b_stream_write_fmt( + out, NULL, "%04ld-%02ld-%02ld", dt->dt_year, + dt->dt_month, dt->dt_day); + } + + if (dt->dt_has_date && dt->dt_has_time) { + b_stream_write_char(out, 'T'); + } + + if (dt->dt_has_time) { + b_stream_write_fmt( + out, NULL, "%02ld:%02ld:%02ld", dt->dt_hour, dt->dt_min, + dt->dt_sec); + + if (dt->dt_msec > 0) { + b_stream_write_fmt(out, NULL, ".%04ld", dt->dt_msec); + } + + if (!dt->dt_localtime) { + if (dt->dt_zone_offset_hour == 0 + && dt->dt_zone_offset_minute == 0) { + b_stream_write_char(out, 'Z'); + } else { + b_stream_write_fmt( + out, NULL, "%c%02ld:%02ld", + dt->dt_zone_offset_negative ? '-' : '+', + dt->dt_zone_offset_hour, + dt->dt_zone_offset_minute); + } + } + } + + return B_SUCCESS; +} + +void b_datetime_to_string( + const b_datetime *dt, b_datetime_format format, struct b_string *dest) +{ + struct b_stream *out; + b_string_open_stream(dest, &out); + + switch (format) { + case B_DATETIME_FORMAT_RFC3339: + encode_rfc3339(dt, out); + break; + default: + break; + } + + b_stream_close(out); +} + +bool b_datetime_is_localtime(const b_datetime *dt) +{ + return dt->dt_localtime; +} + +bool b_datetime_has_date(const b_datetime *dt) +{ + return dt->dt_has_date; +} + +bool b_datetime_has_time(const b_datetime *dt) +{ + return dt->dt_has_time; +} + +long b_datetime_year(const b_datetime *dt) +{ + return dt->dt_year; +} + +long b_datetime_month(const b_datetime *dt) +{ + return dt->dt_month; +} + +long b_datetime_day(const b_datetime *dt) +{ + return dt->dt_day; +} + +long b_datetime_hour(const b_datetime *dt) +{ + return dt->dt_hour; +} + +long b_datetime_minute(const b_datetime *dt) +{ + return dt->dt_min; +} + +long b_datetime_second(const b_datetime *dt) +{ + return dt->dt_sec; +} + +long b_datetime_subsecond(const b_datetime *dt) +{ + return dt->dt_msec; +} + +bool b_datetime_zone_offset_is_negative(const b_datetime *dt) +{ + return dt->dt_zone_offset_negative; +} + +long b_datetime_zone_offset_hour(const b_datetime *dt) +{ + return dt->dt_zone_offset_hour; +} + +long b_datetime_zone_offset_minute(const b_datetime *dt) +{ + return dt->dt_zone_offset_minute; +} + +static void datetime_to_string(const struct b_object *obj, struct b_stream *out) +{ + struct b_datetime *dt = B_DATETIME(obj); + + if (dt->dt_has_date) { + b_stream_write_fmt( + out, NULL, "%04ld-%02ld-%02ld", dt->dt_year, + dt->dt_month, dt->dt_day); + } + + if (dt->dt_has_date && dt->dt_has_time) { + b_stream_write_char(out, ' '); + } + + if (dt->dt_has_time) { + b_stream_write_fmt( + out, NULL, "%02ld:%02ld:%02ld", dt->dt_hour, dt->dt_min, + dt->dt_sec); + + if (dt->dt_msec > 0) { + b_stream_write_fmt(out, NULL, ".%04ld", dt->dt_msec); + } + + if (!dt->dt_localtime) { + b_stream_write_fmt( + out, NULL, " %c%02ld:%02ld", + dt->dt_zone_offset_negative ? '-' : '+', + dt->dt_zone_offset_hour, + dt->dt_zone_offset_minute); + } + } +} diff --git a/object/datetime.h b/object/datetime.h new file mode 100644 index 0000000..ff391f8 --- /dev/null +++ b/object/datetime.h @@ -0,0 +1,17 @@ +#ifndef _BLUELIB_DATETIME_H_ +#define _BLUELIB_DATETIME_H_ + +#include "object.h" + +struct b_datetime { + struct b_object dt_base; + unsigned int dt_year, dt_month, dt_day; + unsigned short dt_hour, dt_min, dt_sec; + unsigned int dt_msec; + + bool dt_has_date, dt_has_time, dt_localtime; + unsigned short dt_zone_offset_hour, dt_zone_offset_minute; + bool dt_zone_offset_negative; +}; + +#endif diff --git a/object/include/blue/object/datetime.h b/object/include/blue/object/datetime.h new file mode 100644 index 0000000..0f07fbd --- /dev/null +++ b/object/include/blue/object/datetime.h @@ -0,0 +1,49 @@ +#ifndef BLUELIB_DATETIME_H_ +#define BLUELIB_DATETIME_H_ + +#include +#include +#include +#include + +struct b_string; + +#define B_DATETIME(p) ((b_datetime *)(p)) + +typedef struct b_datetime b_datetime; + +typedef enum b_datetime_format { + B_DATETIME_FORMAT_RFC3339 = 1, +} b_datetime_format; + +BLUE_API b_datetime *b_datetime_create(void); +BLUE_API b_datetime *b_datetime_parse(b_datetime_format format, const char *s); +BLUE_API void b_datetime_to_string( + const b_datetime *dt, b_datetime_format format, struct b_string *dest); + +static inline b_datetime *b_datetime_retain(b_datetime *dt) +{ + return B_DATETIME(b_retain(B_OBJECT(dt))); +} +static inline void b_datetime_release(b_datetime *dt) +{ + b_release(B_OBJECT(dt)); +} + +BLUE_API bool b_datetime_is_localtime(const b_datetime *dt); +BLUE_API bool b_datetime_has_date(const b_datetime *dt); +BLUE_API bool b_datetime_has_time(const b_datetime *dt); + +BLUE_API long b_datetime_year(const b_datetime *dt); +BLUE_API long b_datetime_month(const b_datetime *dt); +BLUE_API long b_datetime_day(const b_datetime *dt); +BLUE_API long b_datetime_hour(const b_datetime *dt); +BLUE_API long b_datetime_minute(const b_datetime *dt); +BLUE_API long b_datetime_second(const b_datetime *dt); +BLUE_API long b_datetime_subsecond(const b_datetime *dt); + +BLUE_API bool b_datetime_zone_offset_is_negative(const b_datetime *dt); +BLUE_API long b_datetime_zone_offset_hour(const b_datetime *dt); +BLUE_API long b_datetime_zone_offset_minute(const b_datetime *dt); + +#endif diff --git a/object/include/blue/object/type.h b/object/include/blue/object/type.h index b45c200..2888813 100644 --- a/object/include/blue/object/type.h +++ b/object/include/blue/object/type.h @@ -30,6 +30,7 @@ typedef enum b_fundamental_type_id { B_OBJECT_TYPE_PATH, B_OBJECT_TYPE_FILE, B_OBJECT_TYPE_DIRECTORY, + B_OBJECT_TYPE_DATETIME, } b_fundamental_type_id; typedef enum b_object_type_flags { From d9041cda3f1a1e95f8555fd753812fc00c92c0eb Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:47:33 +0100 Subject: [PATCH 11/23] object: to_string() now takes a const object pointer --- object/array.c | 4 ++-- object/dict.c | 4 ++-- object/include/blue/object/object.h | 2 +- object/include/blue/object/type.h | 2 +- object/number.c | 4 ++-- object/object.c | 2 +- object/string.c | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/object/array.c b/object/array.c index b8cf20c..325a6fa 100644 --- a/object/array.c +++ b/object/array.c @@ -8,7 +8,7 @@ #include static void array_release(struct b_object *obj); -static void array_to_string(struct b_object *obj, struct b_stream *out); +static void array_to_string(const struct b_object *obj, struct b_stream *out); static struct b_object_type array_type = { .t_flags = B_OBJECT_FUNDAMENTAL, @@ -221,7 +221,7 @@ size_t b_array_capacity(const struct b_array *array) return array->ar_cap; } -static void array_to_string(struct b_object *obj, struct b_stream *out) +static void array_to_string(const struct b_object *obj, struct b_stream *out) { struct b_array *array = B_ARRAY(obj); diff --git a/object/dict.c b/object/dict.c index c6711ea..d4b1ff0 100644 --- a/object/dict.c +++ b/object/dict.c @@ -24,7 +24,7 @@ uint64_t b_cstr_hash(const char *s) } static void dict_release(struct b_object *obj); -static void dict_to_string(struct b_object *obj, struct b_stream *out); +static void dict_to_string(const struct b_object *obj, struct b_stream *out); static struct b_object_type dict_type = { .t_name = "corelib::dict", @@ -186,7 +186,7 @@ bool b_dict_is_empty(const b_dict *dict) return false; } -static void dict_to_string(struct b_object *obj, struct b_stream *out) +static void dict_to_string(const struct b_object *obj, struct b_stream *out) { struct b_dict *dict = B_DICT(obj); diff --git a/object/include/blue/object/object.h b/object/include/blue/object/object.h index 185d6ff..997d1c7 100644 --- a/object/include/blue/object/object.h +++ b/object/include/blue/object/object.h @@ -32,7 +32,7 @@ BLUE_API b_object *b_make_rvalue(b_object *obj); BLUE_API b_object *b_retain(b_object *obj); BLUE_API void b_release(b_object *obj); -BLUE_API void b_to_string(b_object *obj, struct b_stream *out); +BLUE_API void b_to_string(const b_object *obj, struct b_stream *out); BLUE_API b_object_type_id b_typeid(const b_object *obj); BLUE_API b_comparison_result_t b_compare(const b_object *a, const b_object *b); diff --git a/object/include/blue/object/type.h b/object/include/blue/object/type.h index 2888813..56a3baf 100644 --- a/object/include/blue/object/type.h +++ b/object/include/blue/object/type.h @@ -45,7 +45,7 @@ typedef struct b_object_type { b_queue_entry t_entry; void (*t_init)(struct b_object *); void (*t_release)(struct b_object *); - void (*t_to_string)(struct b_object *, struct b_stream *); + void (*t_to_string)(const struct b_object *, struct b_stream *); } b_object_type; BLUE_API b_status b_object_type_register(b_object_type *type); diff --git a/object/number.c b/object/number.c index ca18f6c..a031cfe 100644 --- a/object/number.c +++ b/object/number.c @@ -10,7 +10,7 @@ typedef int (*number_converter_t)(const struct b_number *, void *); static number_converter_t converters[B_NUMBER_TYPE_COUNT][B_NUMBER_TYPE_COUNT]; -static void number_to_string(struct b_object *obj, struct b_stream *out); +static void number_to_string(const struct b_object *obj, struct b_stream *out); static struct b_object_type number_type = { .t_name = "corelib::number", @@ -175,7 +175,7 @@ size_t b_number_data_size(const struct b_number *number) } } -static void number_to_string(struct b_object *obj, struct b_stream *out) +static void number_to_string(const struct b_object *obj, struct b_stream *out) { struct b_number *number = B_NUMBER(obj); switch (number->n_type) { diff --git a/object/object.c b/object/object.c index 812e9c4..750a08d 100644 --- a/object/object.c +++ b/object/object.c @@ -41,7 +41,7 @@ void b_release(struct b_object *obj) free(obj); } -void b_to_string(struct b_object *obj, struct b_stream *out) +void b_to_string(const struct b_object *obj, struct b_stream *out) { if (obj->ob_type->t_to_string) { obj->ob_type->t_to_string(obj, out); diff --git a/object/string.c b/object/string.c index d2d3077..2d92538 100644 --- a/object/string.c +++ b/object/string.c @@ -23,7 +23,7 @@ enum iterator_mode { }; static void string_release(struct b_object *obj); -static void string_to_string(struct b_object *obj, struct b_stream *out); +static void string_to_string(const struct b_object *obj, struct b_stream *out); static struct b_object_type string_type = { .t_name = "corelib::string", @@ -1644,7 +1644,7 @@ static void string_release(struct b_object *obj) } } -static void string_to_string(struct b_object *obj, struct b_stream *out) +static void string_to_string(const struct b_object *obj, struct b_stream *out) { b_string *str = B_STRING(obj); const char *s = b_string_ptr(str); From b7da91ac9357e0fb5c0f00ce0bfee32db21ee7c7 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:48:05 +0100 Subject: [PATCH 12/23] object: hashmap: implement integer-based hashmap keys rather than always interpreting a b_hashmap_key as a buffer to be hashed, b_hashmap can now be told to consider the value of the key_data pointer itself as the key, treating it as a buffer of size sizeof(void*). --- object/hashmap.c | 67 ++++++++++++++++++++-------- object/include/blue/object/hashmap.h | 6 +++ 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/object/hashmap.c b/object/hashmap.c index 7f50283..4017f20 100644 --- a/object/hashmap.c +++ b/object/hashmap.c @@ -38,6 +38,45 @@ static uint64_t hash_data(const void *p, size_t size) return hash; } +static uint64_t hash_key(const struct b_hashmap_key *key) +{ + if (key->key_flags & B_HASHMAP_KEY_F_INTVALUE) { + return hash_data(&key->key_data, sizeof key->key_data); + } else { + return hash_data(key->key_data, key->key_size); + } +} + +static bool compare_key( + const struct b_hashmap_key *a, const struct b_hashmap_key *b) +{ + const void *a_data = NULL, *b_data = NULL; + size_t a_len = 0, b_len = 0; + + if (a->key_flags & B_HASHMAP_KEY_F_INTVALUE) { + a_data = &a->key_data; + a_len = sizeof a->key_data; + } else { + a_data = a->key_data; + a_len = a->key_size; + } + + if (b->key_flags & B_HASHMAP_KEY_F_INTVALUE) { + b_data = &b->key_data; + b_len = sizeof b->key_data; + } else { + b_data = b->key_data; + b_len = b->key_size; + } + + if (a_len != b_len) { + return false; + } + + size_t cmp_len = a_len; + return memcmp(a_data, b_data, cmp_len) == 0; +} + static void hashmap_release(struct b_object *obj); static struct b_object_type hashmap_type = { @@ -101,7 +140,7 @@ b_status b_hashmap_put( struct b_hashmap *hashmap, const b_hashmap_key *key, const b_hashmap_value *value) { - uint64_t hash = hash_data(key->key_data, key->key_size); + uint64_t hash = hash_key(key); struct b_hashmap_bucket *bucket = get_bucket(&hashmap->h_buckets, hash); if (!bucket) { @@ -119,12 +158,9 @@ b_status b_hashmap_put( struct b_hashmap_bucket_item *item = b_unbox( struct b_hashmap_bucket_item, it.entry, bi_entry); - if (item->bi_key.key_size != key->key_size) { - continue; - } - - if (!memcmp(item->bi_key.key_data, key->key_data, key->key_size)) { - return B_ERR_NAME_EXISTS; + if (compare_key(&item->bi_key, key)) { + memcpy(&item->bi_value, value, sizeof *value); + return B_SUCCESS; } } @@ -144,7 +180,8 @@ b_status b_hashmap_put( const struct b_hashmap_value *b_hashmap_get( const struct b_hashmap *hashmap, const struct b_hashmap_key *key) { - uint64_t hash = hash_data(key->key_data, key->key_size); + uint64_t hash = hash_key(key); + struct b_hashmap_bucket *bucket = get_bucket(&hashmap->h_buckets, hash); if (!bucket) { return NULL; @@ -155,11 +192,7 @@ const struct b_hashmap_value *b_hashmap_get( struct b_hashmap_bucket_item *item = b_unbox( struct b_hashmap_bucket_item, it.entry, bi_entry); - if (item->bi_key.key_size != key->key_size) { - continue; - } - - if (!memcmp(item->bi_key.key_data, key->key_data, key->key_size)) { + if (compare_key(&item->bi_key, key)) { return &item->bi_value; } } @@ -169,7 +202,7 @@ const struct b_hashmap_value *b_hashmap_get( bool b_hashmap_has_key(const struct b_hashmap *hashmap, const b_hashmap_key *key) { - uint64_t hash = hash_data(key->key_data, key->key_size); + uint64_t hash = hash_key(key); struct b_hashmap_bucket *bucket = get_bucket(&hashmap->h_buckets, hash); if (!bucket) { return false; @@ -180,11 +213,7 @@ bool b_hashmap_has_key(const struct b_hashmap *hashmap, const b_hashmap_key *key struct b_hashmap_bucket_item *item = b_unbox( struct b_hashmap_bucket_item, it.entry, bi_entry); - if (item->bi_key.key_size != key->key_size) { - continue; - } - - if (!memcmp(item->bi_key.key_data, key->key_data, key->key_size)) { + if (compare_key(&item->bi_key, key)) { return true; } } diff --git a/object/include/blue/object/hashmap.h b/object/include/blue/object/hashmap.h index 225b2f2..217aafd 100644 --- a/object/include/blue/object/hashmap.h +++ b/object/include/blue/object/hashmap.h @@ -7,6 +7,7 @@ #include #include #include +#include #define B_HASHMAP(p) ((b_hashmap *)(p)) @@ -38,7 +39,12 @@ typedef struct b_hashmap b_hashmap; typedef void (*b_hashmap_key_destructor)(void *); typedef void (*b_hashmap_value_destructor)(void *); +typedef enum b_hashmap_key_flags { + B_HASHMAP_KEY_F_INTVALUE = 0x01u, +} b_hashmap_key_flags; + typedef struct b_hashmap_key { + b_hashmap_key_flags key_flags; const void *key_data; size_t key_size; } b_hashmap_key; From be47176524be470b6f0c4e9086b47d1a9dab280a Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:50:07 +0100 Subject: [PATCH 13/23] object: dict: use b_strings as keys rather than plain c-strings. now that strings can contain UTF-8 codepoints and null chars, the b_dict api has been enhanced to accept keys as b_strings as well as regular c-strings. keys are now stored as b_strings internally, to allow a wider range of keys to be used. --- object/dict.c | 82 ++++++++++++++++++++++++++++--- object/dict.h | 4 +- object/include/blue/object/dict.h | 9 +++- 3 files changed, 85 insertions(+), 10 deletions(-) diff --git a/object/dict.c b/object/dict.c index d4b1ff0..8e98b83 100644 --- a/object/dict.c +++ b/object/dict.c @@ -105,7 +105,35 @@ b_status b_dict_put(struct b_dict *dict, const char *key, b_object *value) return B_ERR_NO_MEMORY; } - item->bi_str = b_strdup(key); + item->bi_str = b_string_create_from_cstr(key); + item->bi_value = b_retain(value); + + b_queue_push_back(&bucket->bk_items, &item->bi_entry); + + return B_SUCCESS; +} + +b_status b_dict_put_sk( + struct b_dict *dict, const struct b_string *key, b_object *value) +{ + uint64_t hash = b_string_hash(key); + struct b_dict_bucket *bucket = get_bucket(&dict->d_buckets, hash); + if (!bucket) { + bucket = create_bucket(); + if (!bucket) { + return B_ERR_NO_MEMORY; + } + + bucket->bk_hash = hash; + put_bucket(&dict->d_buckets, bucket); + } + + struct b_dict_bucket_item *item = create_bucket_item(); + if (!item) { + return B_ERR_NO_MEMORY; + } + + item->bi_str = b_string_duplicate(key); item->bi_value = b_retain(value); b_queue_push_back(&bucket->bk_items, &item->bi_entry); @@ -126,7 +154,28 @@ b_object *b_dict_at(const struct b_dict *dict, const char *key) struct b_dict_bucket_item *item = b_unbox(struct b_dict_bucket_item, it.entry, bi_entry); - if (!strcmp(item->bi_str, key)) { + if (!strcmp(b_string_ptr(item->bi_str), key)) { + return item->bi_value; + } + } + + return NULL; +} + +b_object *b_dict_at_sk(const struct b_dict *dict, const struct b_string *key) +{ + uint64_t hash = b_string_hash(key); + struct b_dict_bucket *bucket = get_bucket(&dict->d_buckets, hash); + if (!bucket) { + return NULL; + } + + b_queue_iterator it; + b_queue_foreach (&it, &bucket->bk_items) { + struct b_dict_bucket_item *item + = b_unbox(struct b_dict_bucket_item, it.entry, bi_entry); + + if (b_string_compare(item->bi_str, key)) { return item->bi_value; } } @@ -144,11 +193,26 @@ b_object *b_dict_get(struct b_dict *dict, const char *key) return value; } +b_object *b_dict_get_sk(struct b_dict *dict, const struct b_string *key) +{ + b_object *value = b_dict_at_sk(dict, key); + if (value) { + b_retain(value); + } + + return value; +} + bool b_dict_has_key(const struct b_dict *dict, const char *key) { return b_dict_at(dict, key) != NULL; } +bool b_dict_has_skey(const struct b_dict *dict, const struct b_string *key) +{ + return b_dict_at_sk(dict, key) != NULL; +} + size_t b_dict_get_size(const struct b_dict *dict) { size_t count = 0; @@ -203,7 +267,8 @@ static void dict_to_string(const struct b_object *obj, struct b_stream *out) b_dict_iterator it; b_dict_foreach(&it, dict) { - b_stream_write_fmt(out, NULL, "%s: ", it.key); + b_to_string(B_OBJECT(it.key), out); + b_stream_write_string(out, ": ", NULL); bool is_string = b_typeid(it.value) == B_OBJECT_TYPE_STRING; @@ -243,11 +308,12 @@ static bool dict_iterator_is_valid(const struct b_iterator *it) return b_dict_iterator_is_valid((struct b_dict_iterator *)it); } -static struct b_iterator_ops it_ops - = {.it_next = dict_iterator_next, - .it_close = NULL, - .it_erase = dict_iterator_erase, - .it_is_valid = dict_iterator_is_valid}; +static struct b_iterator_ops it_ops = { + .it_next = dict_iterator_next, + .it_close = NULL, + .it_erase = dict_iterator_erase, + .it_is_valid = dict_iterator_is_valid, +}; int b_dict_iterator_begin(struct b_dict *dict, b_dict_iterator *it) { diff --git a/object/dict.h b/object/dict.h index e0bc013..3f8eb94 100644 --- a/object/dict.h +++ b/object/dict.h @@ -6,9 +6,11 @@ #include #include +struct b_string; + struct b_dict_bucket_item { b_queue_entry bi_entry; - char *bi_str; + struct b_string *bi_str; struct b_object *bi_value; }; diff --git a/object/include/blue/object/dict.h b/object/include/blue/object/dict.h index 531e069..7ba5604 100644 --- a/object/include/blue/object/dict.h +++ b/object/include/blue/object/dict.h @@ -8,6 +8,8 @@ #include #include +struct b_string; + #define B_DICT(p) ((b_dict *)(p)) #define B_DICT_ITEM(k, v) \ @@ -28,7 +30,7 @@ typedef struct b_dict b_dict; typedef struct b_dict_iterator { b_iterator _base; size_t i; - const char *key; + const struct b_string *key; b_object *value; b_dict *_d; @@ -54,10 +56,15 @@ static inline void b_dict_release(b_dict *dict) } BLUE_API b_status b_dict_put(b_dict *dict, const char *key, b_object *value); +BLUE_API b_status b_dict_put_sk( + b_dict *dict, const struct b_string *key, b_object *value); BLUE_API b_object *b_dict_at(const b_dict *dict, const char *key); +BLUE_API b_object *b_dict_at_sk(const b_dict *dict, const struct b_string *key); BLUE_API b_object *b_dict_get(b_dict *dict, const char *key); +BLUE_API b_object *b_dict_get_sk(b_dict *dict, const struct b_string *key); BLUE_API bool b_dict_has_key(const b_dict *dict, const char *key); +BLUE_API bool b_dict_has_skey(const b_dict *dict, const struct b_string *key); BLUE_API size_t b_dict_get_size(const b_dict *dict); BLUE_API bool b_dict_is_empty(const b_dict *dict); From 15cb22553320d495905c7eecac42223eb94c8739 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:52:02 +0100 Subject: [PATCH 14/23] object: number: add support for positive/negative infinite and NaN values --- object/include/blue/object/number.h | 11 + object/number.c | 523 ++++++++++++++++++++++++++++ object/number.h | 6 + 3 files changed, 540 insertions(+) diff --git a/object/include/blue/object/number.h b/object/include/blue/object/number.h index b8eaa0c..0051846 100644 --- a/object/include/blue/object/number.h +++ b/object/include/blue/object/number.h @@ -237,6 +237,17 @@ static inline size_t b_number_get_size_t(const b_number *number) BLUE_API bool b_number_is_integer(const b_number *number); BLUE_API bool b_number_is_float(const b_number *number); +BLUE_API bool b_number_is_inf(const b_number *number); +BLUE_API bool b_number_is_inf_positive(const b_number *number); +BLUE_API bool b_number_is_inf_negative(const b_number *number); +BLUE_API bool b_number_is_nan(const b_number *number); +BLUE_API bool b_number_is_nan_positive(const b_number *number); +BLUE_API bool b_number_is_nan_negative(const b_number *number); + +BLUE_API void b_number_set_inf_positive(b_number *number, bool v); +BLUE_API void b_number_set_inf_negative(b_number *number, bool v); +BLUE_API void b_number_set_nan_positive(b_number *number, bool v); +BLUE_API void b_number_set_nan_negative(b_number *number, bool v); BLUE_API size_t b_number_data_size(const b_number *number); diff --git a/object/number.c b/object/number.c index a031cfe..075520f 100644 --- a/object/number.c +++ b/object/number.c @@ -139,6 +139,408 @@ bool b_number_is_float(const struct b_number *number) } } +bool b_number_is_inf(const b_number *number) +{ + return (number->n_flags & NUMBER_F_INF) != 0; +} + +bool b_number_is_inf_positive(const b_number *number) +{ + if (!(number->n_flags & NUMBER_F_INF)) { + return false; + } + + switch (number->n_type) { + case B_NUMBER_INT8: + return number->n_value.v_int8 >= 0; + case B_NUMBER_INT16: + return number->n_value.v_int16 >= 0; + case B_NUMBER_INT32: + return number->n_value.v_int32 >= 0; + case B_NUMBER_INT64: + return number->n_value.v_int64 >= 0; + case B_NUMBER_FLOAT32: + return number->n_value.v_float32 >= 0; + case B_NUMBER_FLOAT64: + return number->n_value.v_float64 >= 0; + case B_NUMBER_CHAR: + return number->n_value.v_char >= 0; + case B_NUMBER_SHORT: + return number->n_value.v_short >= 0; + case B_NUMBER_INT: + return number->n_value.v_int >= 0; + case B_NUMBER_LONG: + return number->n_value.v_long >= 0; + case B_NUMBER_LONGLONG: + return number->n_value.v_longlong >= 0; + case B_NUMBER_FLOAT: + return number->n_value.v_float >= 0; + case B_NUMBER_DOUBLE: + return number->n_value.v_double >= 0; + case B_NUMBER_SIZE_T: + return number->n_value.v_size_t >= 0; + default: + return true; + } +} + +bool b_number_is_inf_negative(const b_number *number) +{ + if (!(number->n_flags & NUMBER_F_INF)) { + return false; + } + + switch (number->n_type) { + case B_NUMBER_INT8: + return number->n_value.v_int8 < 0; + case B_NUMBER_INT16: + return number->n_value.v_int16 < 0; + case B_NUMBER_INT32: + return number->n_value.v_int32 < 0; + case B_NUMBER_INT64: + return number->n_value.v_int64 < 0; + case B_NUMBER_FLOAT32: + return number->n_value.v_float32 < 0; + case B_NUMBER_FLOAT64: + return number->n_value.v_float64 < 0; + case B_NUMBER_CHAR: + return number->n_value.v_char < 0; + case B_NUMBER_SHORT: + return number->n_value.v_short < 0; + case B_NUMBER_INT: + return number->n_value.v_int < 0; + case B_NUMBER_LONG: + return number->n_value.v_long < 0; + case B_NUMBER_LONGLONG: + return number->n_value.v_longlong < 0; + case B_NUMBER_FLOAT: + return number->n_value.v_float < 0; + case B_NUMBER_DOUBLE: + return number->n_value.v_double < 0; + case B_NUMBER_SIZE_T: + return number->n_value.v_size_t < 0; + default: + return false; + } +} + +bool b_number_is_nan(const b_number *number) +{ + return (number->n_flags & NUMBER_F_NAN) != 0; +} + +bool b_number_is_nan_positive(const b_number *number) +{ + if (!(number->n_flags & NUMBER_F_NAN)) { + return false; + } + + switch (number->n_type) { + case B_NUMBER_INT8: + return number->n_value.v_int8 >= 0; + case B_NUMBER_INT16: + return number->n_value.v_int16 >= 0; + case B_NUMBER_INT32: + return number->n_value.v_int32 >= 0; + case B_NUMBER_INT64: + return number->n_value.v_int64 >= 0; + case B_NUMBER_FLOAT32: + return number->n_value.v_float32 >= 0; + case B_NUMBER_FLOAT64: + return number->n_value.v_float64 >= 0; + case B_NUMBER_CHAR: + return number->n_value.v_char >= 0; + case B_NUMBER_SHORT: + return number->n_value.v_short >= 0; + case B_NUMBER_INT: + return number->n_value.v_int >= 0; + case B_NUMBER_LONG: + return number->n_value.v_long >= 0; + case B_NUMBER_LONGLONG: + return number->n_value.v_longlong >= 0; + case B_NUMBER_FLOAT: + return number->n_value.v_float >= 0; + case B_NUMBER_DOUBLE: + return number->n_value.v_double >= 0; + case B_NUMBER_SIZE_T: + return number->n_value.v_size_t >= 0; + default: + return true; + } +} + +bool b_number_is_nan_negative(const b_number *number) +{ + if (!(number->n_flags & NUMBER_F_NAN)) { + return false; + } + + switch (number->n_type) { + case B_NUMBER_INT8: + return number->n_value.v_int8 < 0; + case B_NUMBER_INT16: + return number->n_value.v_int16 < 0; + case B_NUMBER_INT32: + return number->n_value.v_int32 < 0; + case B_NUMBER_INT64: + return number->n_value.v_int64 < 0; + case B_NUMBER_FLOAT32: + return number->n_value.v_float32 < 0; + case B_NUMBER_FLOAT64: + return number->n_value.v_float64 < 0; + case B_NUMBER_CHAR: + return number->n_value.v_char < 0; + case B_NUMBER_SHORT: + return number->n_value.v_short < 0; + case B_NUMBER_INT: + return number->n_value.v_int < 0; + case B_NUMBER_LONG: + return number->n_value.v_long < 0; + case B_NUMBER_LONGLONG: + return number->n_value.v_longlong < 0; + case B_NUMBER_FLOAT: + return number->n_value.v_float < 0; + case B_NUMBER_DOUBLE: + return number->n_value.v_double < 0; + case B_NUMBER_SIZE_T: + return number->n_value.v_size_t < 0; + default: + return false; + } +} + +void b_number_set_inf_positive(b_number *number, bool v) +{ + if (!v) { + number->n_flags &= ~NUMBER_F_INF; + return; + } + + number->n_flags &= ~NUMBER_F_NAN; + number->n_flags |= NUMBER_F_INF; + + switch (number->n_type) { + case B_NUMBER_INT8: + number->n_value.v_int8 = 0; + break; + case B_NUMBER_INT16: + number->n_value.v_int16 = 0; + break; + case B_NUMBER_INT32: + number->n_value.v_int32 = 0; + break; + case B_NUMBER_INT64: + number->n_value.v_int64 = 0; + break; + case B_NUMBER_FLOAT32: + number->n_value.v_float32 = 0; + break; + case B_NUMBER_FLOAT64: + number->n_value.v_float64 = 0; + break; + case B_NUMBER_CHAR: + number->n_value.v_char = 0; + break; + case B_NUMBER_SHORT: + number->n_value.v_short = 0; + break; + case B_NUMBER_INT: + number->n_value.v_int = 0; + break; + case B_NUMBER_LONG: + number->n_value.v_long = 0; + break; + case B_NUMBER_LONGLONG: + number->n_value.v_longlong = 0; + break; + case B_NUMBER_FLOAT: + number->n_value.v_float = 0; + break; + case B_NUMBER_DOUBLE: + number->n_value.v_double = 0; + break; + case B_NUMBER_SIZE_T: + number->n_value.v_size_t = 0; + break; + default: + break; + } +} + +void b_number_set_inf_negative(b_number *number, bool v) +{ + if (!v) { + number->n_flags &= ~NUMBER_F_INF; + return; + } + + number->n_flags &= ~NUMBER_F_NAN; + number->n_flags |= NUMBER_F_INF; + + switch (number->n_type) { + case B_NUMBER_INT8: + number->n_value.v_int8 = -1; + break; + case B_NUMBER_INT16: + number->n_value.v_int16 = -1; + break; + case B_NUMBER_INT32: + number->n_value.v_int32 = -1; + break; + case B_NUMBER_INT64: + number->n_value.v_int64 = -1; + break; + case B_NUMBER_FLOAT32: + number->n_value.v_float32 = -1; + break; + case B_NUMBER_FLOAT64: + number->n_value.v_float64 = -1; + break; + case B_NUMBER_CHAR: + number->n_value.v_char = -1; + break; + case B_NUMBER_SHORT: + number->n_value.v_short = -1; + break; + case B_NUMBER_INT: + number->n_value.v_int = -1; + break; + case B_NUMBER_LONG: + number->n_value.v_long = -1; + break; + case B_NUMBER_LONGLONG: + number->n_value.v_longlong = -1; + break; + case B_NUMBER_FLOAT: + number->n_value.v_float = -1; + break; + case B_NUMBER_DOUBLE: + number->n_value.v_double = -1; + break; + case B_NUMBER_SIZE_T: + number->n_value.v_size_t = -1; + break; + default: + break; + } +} + +void b_number_set_nan_positive(b_number *number, bool v) +{ + if (!v) { + number->n_flags &= ~NUMBER_F_NAN; + return; + } + + number->n_flags &= ~NUMBER_F_INF; + number->n_flags |= NUMBER_F_NAN; + + switch (number->n_type) { + case B_NUMBER_INT8: + number->n_value.v_int8 = 0; + break; + case B_NUMBER_INT16: + number->n_value.v_int16 = 0; + break; + case B_NUMBER_INT32: + number->n_value.v_int32 = 0; + break; + case B_NUMBER_INT64: + number->n_value.v_int64 = 0; + break; + case B_NUMBER_FLOAT32: + number->n_value.v_float32 = 0; + break; + case B_NUMBER_FLOAT64: + number->n_value.v_float64 = 0; + break; + case B_NUMBER_CHAR: + number->n_value.v_char = 0; + break; + case B_NUMBER_SHORT: + number->n_value.v_short = 0; + break; + case B_NUMBER_INT: + number->n_value.v_int = 0; + break; + case B_NUMBER_LONG: + number->n_value.v_long = 0; + break; + case B_NUMBER_LONGLONG: + number->n_value.v_longlong = 0; + break; + case B_NUMBER_FLOAT: + number->n_value.v_float = 0; + break; + case B_NUMBER_DOUBLE: + number->n_value.v_double = 0; + break; + case B_NUMBER_SIZE_T: + number->n_value.v_size_t = 0; + break; + default: + break; + } +} + +void b_number_set_nan_negative(b_number *number, bool v) +{ + if (!v) { + number->n_flags &= ~NUMBER_F_NAN; + return; + } + + number->n_flags &= ~NUMBER_F_INF; + number->n_flags |= NUMBER_F_NAN; + + switch (number->n_type) { + case B_NUMBER_INT8: + number->n_value.v_int8 = -1; + break; + case B_NUMBER_INT16: + number->n_value.v_int16 = -1; + break; + case B_NUMBER_INT32: + number->n_value.v_int32 = -1; + break; + case B_NUMBER_INT64: + number->n_value.v_int64 = -1; + break; + case B_NUMBER_FLOAT32: + number->n_value.v_float32 = -1; + break; + case B_NUMBER_FLOAT64: + number->n_value.v_float64 = -1; + break; + case B_NUMBER_CHAR: + number->n_value.v_char = -1; + break; + case B_NUMBER_SHORT: + number->n_value.v_short = -1; + break; + case B_NUMBER_INT: + number->n_value.v_int = -1; + break; + case B_NUMBER_LONG: + number->n_value.v_long = -1; + break; + case B_NUMBER_LONGLONG: + number->n_value.v_longlong = -1; + break; + case B_NUMBER_FLOAT: + number->n_value.v_float = -1; + break; + case B_NUMBER_DOUBLE: + number->n_value.v_double = -1; + break; + case B_NUMBER_SIZE_T: + number->n_value.v_size_t = -1; + break; + default: + break; + } +} + size_t b_number_data_size(const struct b_number *number) { switch (number->n_type) { @@ -175,9 +577,130 @@ size_t b_number_data_size(const struct b_number *number) } } +static void print_inf(const struct b_number *n, struct b_stream *out) +{ + switch (n->n_type) { + case B_NUMBER_INT8: + b_stream_write_string(out, n->n_value.v_int8 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT16: + b_stream_write_string(out, n->n_value.v_int16 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT32: + b_stream_write_string(out, n->n_value.v_int32 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT64: + b_stream_write_string(out, n->n_value.v_int64 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT32: + b_stream_write_string( + out, n->n_value.v_float32 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT64: + b_stream_write_string( + out, n->n_value.v_float64 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_CHAR: + b_stream_write_string(out, n->n_value.v_char < 0 ? "-" : "", NULL); + break; + case B_NUMBER_SHORT: + b_stream_write_string(out, n->n_value.v_short < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT: + b_stream_write_string(out, n->n_value.v_int < 0 ? "-" : "", NULL); + break; + case B_NUMBER_LONG: + b_stream_write_string(out, n->n_value.v_long < 0 ? "-" : "", NULL); + break; + case B_NUMBER_LONGLONG: + b_stream_write_string( + out, n->n_value.v_longlong < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT: + b_stream_write_string(out, n->n_value.v_float < 0 ? "-" : "", NULL); + break; + case B_NUMBER_DOUBLE: + b_stream_write_string(out, n->n_value.v_double < 0 ? "-" : "", NULL); + break; + case B_NUMBER_SIZE_T: + b_stream_write_string(out, n->n_value.v_size_t < 0 ? "-" : "", NULL); + break; + default: + break; + } + + b_stream_write_string(out, "INF", NULL); +} + +static void print_nan(const struct b_number *n, struct b_stream *out) +{ + switch (n->n_type) { + case B_NUMBER_INT8: + b_stream_write_string(out, n->n_value.v_int8 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT16: + b_stream_write_string(out, n->n_value.v_int16 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT32: + b_stream_write_string(out, n->n_value.v_int32 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT64: + b_stream_write_string(out, n->n_value.v_int64 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT32: + b_stream_write_string( + out, n->n_value.v_float32 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT64: + b_stream_write_string( + out, n->n_value.v_float64 < 0 ? "-" : "", NULL); + break; + case B_NUMBER_CHAR: + b_stream_write_string(out, n->n_value.v_char < 0 ? "-" : "", NULL); + break; + case B_NUMBER_SHORT: + b_stream_write_string(out, n->n_value.v_short < 0 ? "-" : "", NULL); + break; + case B_NUMBER_INT: + b_stream_write_string(out, n->n_value.v_int < 0 ? "-" : "", NULL); + break; + case B_NUMBER_LONG: + b_stream_write_string(out, n->n_value.v_long < 0 ? "-" : "", NULL); + break; + case B_NUMBER_LONGLONG: + b_stream_write_string( + out, n->n_value.v_longlong < 0 ? "-" : "", NULL); + break; + case B_NUMBER_FLOAT: + b_stream_write_string(out, n->n_value.v_float < 0 ? "-" : "", NULL); + break; + case B_NUMBER_DOUBLE: + b_stream_write_string(out, n->n_value.v_double < 0 ? "-" : "", NULL); + break; + case B_NUMBER_SIZE_T: + b_stream_write_string(out, n->n_value.v_size_t < 0 ? "-" : "", NULL); + break; + default: + break; + } + + b_stream_write_string(out, "NaN", NULL); +} + static void number_to_string(const struct b_object *obj, struct b_stream *out) { struct b_number *number = B_NUMBER(obj); + + if (number->n_flags & NUMBER_F_INF) { + print_inf(number, out); + return; + } + + if (number->n_flags & NUMBER_F_NAN) { + print_nan(number, out); + return; + } + switch (number->n_type) { case B_NUMBER_INT8: b_stream_write_fmt(out, NULL, "%" PRIu8, number->n_value.v_int8); diff --git a/object/number.h b/object/number.h index fe29158..2aa7048 100644 --- a/object/number.h +++ b/object/number.h @@ -5,9 +5,15 @@ #include +enum b_number_flags { + NUMBER_F_INF = 0x01u, + NUMBER_F_NAN = 0x02u, +}; + struct b_number { struct b_object n_base; b_number_type n_type; + enum b_number_flags n_flags; union { int8_t v_int8; int16_t v_int16; From 49ad80bcf22c9287c7cf96da0a5fdb7e60366c8a Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:52:56 +0100 Subject: [PATCH 15/23] io: path: use new to_string() and b_string apis --- io/sys/darwin/path.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io/sys/darwin/path.c b/io/sys/darwin/path.c index ff098d1..c420843 100644 --- a/io/sys/darwin/path.c +++ b/io/sys/darwin/path.c @@ -18,7 +18,7 @@ struct b_path { }; static void path_release(struct b_object *obj); -static void path_to_string(struct b_object *obj, struct b_stream *out); +static void path_to_string(const struct b_object *obj, struct b_stream *out); static struct b_object_type path_type = { .t_name = "corelib::path", @@ -58,7 +58,7 @@ struct b_path *b_path_create_root() b_string_append_cstr(path->pathstr, system_drive); if (system_drive[system_drive_len - 1] != '\\') { - b_string_append_cstr(path->pathstr, "\\"); + b_string_append_c(path->pathstr, '\\'); } return path; @@ -108,8 +108,7 @@ struct b_path *b_path_create_from_cstr(const char *cstr) continue; } - char s[] = {c, 0}; - b_string_append_cstr(path->pathstr, s); + b_string_append_c(path->pathstr, c); prev = c; } @@ -312,7 +311,7 @@ void path_release(struct b_object *obj) b_string_release(path->pathstr); } -void path_to_string(struct b_object *obj, struct b_stream *out) +void path_to_string(const struct b_object *obj, struct b_stream *out) { struct b_path *path = (struct b_path *)obj; From d90291c0b170d4dca50cb80264508b0d2331ecc6 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:53:23 +0100 Subject: [PATCH 16/23] io: path: add function to query the path used to open a file --- io/include/blue/io/file.h | 1 + io/sys/darwin/file.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/io/include/blue/io/file.h b/io/include/blue/io/file.h index bcf801d..e665c22 100644 --- a/io/include/blue/io/file.h +++ b/io/include/blue/io/file.h @@ -62,6 +62,7 @@ BLUE_API b_status b_file_size(b_file *file, size_t *out_len); BLUE_API b_status b_file_cursor(b_file *file, size_t *out_pos); BLUE_API b_status b_file_resize(b_file *file, size_t len); BLUE_API b_status b_file_seek(b_file *file, long long offset, b_seek_basis basis); +BLUE_API const struct b_path *b_file_path(const b_file *file); BLUE_API b_status b_file_swap_shadow(b_file *main_file, b_file *shadow_file); diff --git a/io/sys/darwin/file.c b/io/sys/darwin/file.c index 7d8a7d8..ddfe6d7 100644 --- a/io/sys/darwin/file.c +++ b/io/sys/darwin/file.c @@ -274,6 +274,11 @@ static enum b_status stream_seek( return b_file_cursor(file, &stream->s_cursor); } +const struct b_path *b_file_path(const struct b_file *file) +{ + return file->path; +} + enum b_status b_file_open_stream(struct b_file *file, struct b_stream **out) { struct b_stream *stream = malloc(sizeof *stream); From 9fa2168599fc4912991a82a4fae06dd497dd0525 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:54:11 +0100 Subject: [PATCH 17/23] serial: json: update b_dict usage --- serial/json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serial/json.c b/serial/json.c index c73e15e..34700ef 100644 --- a/serial/json.c +++ b/serial/json.c @@ -2978,7 +2978,7 @@ static enum b_status serialise_dict(const struct b_object *object, cJSON **out) return status; } - cJSON_AddItemToObject(json_dict, it.key, child); + cJSON_AddItemToObject(json_dict, b_string_ptr(it.key), child); } *out = json_dict; From 30210d67e0cf0fb32521117c6d688b6883409553 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:54:33 +0100 Subject: [PATCH 18/23] serial: add toml parser --- serial/include/blue/serial.h | 1 + serial/serial.c | 2 + serial/toml.c | 918 ++++++++++++++++++++++++++--------- serial/toml.h | 4 + 4 files changed, 699 insertions(+), 226 deletions(-) create mode 100644 serial/toml.h diff --git a/serial/include/blue/serial.h b/serial/include/blue/serial.h index 8db05db..3c90e64 100644 --- a/serial/include/blue/serial.h +++ b/serial/include/blue/serial.h @@ -11,6 +11,7 @@ typedef enum b_serial_format { B_SERIAL_FORMAT_NONE = 0, B_SERIAL_FORMAT_BITCODE, B_SERIAL_FORMAT_JSON, + B_SERIAL_FORMAT_TOML, } b_serial_format; typedef enum b_serial_flags { diff --git a/serial/serial.c b/serial/serial.c index 07df3b0..795ca43 100644 --- a/serial/serial.c +++ b/serial/serial.c @@ -6,11 +6,13 @@ extern const struct b_serial_format_ops z__b_bitcode_format_ops; extern const struct b_serial_format_ops z__b_json_format_ops; +extern const struct b_serial_format_ops z__b_toml_format_ops; static const struct b_serial_format_ops *format_ops[] = { [B_SERIAL_FORMAT_NONE] = NULL, [B_SERIAL_FORMAT_BITCODE] = &z__b_bitcode_format_ops, [B_SERIAL_FORMAT_JSON] = &z__b_json_format_ops, + [B_SERIAL_FORMAT_TOML] = &z__b_toml_format_ops, }; static const size_t nr_format_ops = sizeof format_ops / sizeof format_ops[0]; diff --git a/serial/toml.c b/serial/toml.c index d35699b..dbf6bf9 100644 --- a/serial/toml.c +++ b/serial/toml.c @@ -1,8 +1,8 @@ -#include "blue/core/status.h" -#include "blue/object/datetime.h" #include "serial.h" +#include #include +#include #include #include #include @@ -15,6 +15,28 @@ #define IS_VALID_KEY_COMPONENT(tok) \ ((tok) && ((tok)->tok_type == TOK_WORD || (tok)->tok_type == TOK_STRING)) +#define ENABLE_EXTENDED_LEXING(ctx) \ + do { \ + ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; \ + ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \ + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; \ + } while (0) + +#define DISABLE_EXTENDED_LEXING(ctx) \ + do { \ + ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; \ + ctx->ctx_flags \ + &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS \ + | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); \ + } while (0) + +enum object_flags { + OBJECT_HEADER_MID_DEFINED = 0x01u, + OBJECT_HEADER_END_DEFINED = 0x02u, + OBJECT_KV_MID_DEFINED = 0x04u, + OBJECT_KV_END_DEFINED = 0x08u, +}; + enum token_type { TOK_NONE = 0, TOK_WORD, @@ -36,6 +58,22 @@ enum token_type { TOK_RIGHT_BRACE, }; +enum ctx_flags { + CTX_EOF = 0x01u, + CTX_ENABLE_NUMBERS = 0x02u, + CTX_ENABLE_TIMESTAMPS = 0x04u, + CTX_ENABLE_BOOLS = 0x08u, + CTX_ENABLE_EXTENDED_SYMBOLS = 0x10u, + CTX_ENABLE_LONG_SYMBOLS = 0x20u, + CTX_ENABLE_MULTILINE_STRING = 0x40u, +}; + +enum ctx_state { + CTX_STATE_NONE = 0, + CTX_STATE_IN_TABLE, + CTX_STATE_IN_ARRAY, +}; + struct timestamp { unsigned int ts_year, ts_month, ts_day; unsigned short ts_hour, ts_min, ts_sec; @@ -48,17 +86,17 @@ struct timestamp { struct token { enum token_type tok_type; struct b_queue_entry tok_entry; - char *tok_str; + b_string *tok_str; union { struct { int64_t v; - bool nan; + bool inf, nan; } i; struct { double v; - bool nan; + bool inf, nan; } f; bool b; @@ -67,68 +105,119 @@ struct token { } tok_value; }; -enum ctx_flags { - CTX_EOF = 0x01u, - CTX_ENABLE_NUMBERS = 0x02u, - CTX_ENABLE_TIMESTAMPS = 0x04u, - CTX_ENABLE_BOOLS = 0x08u, - CTX_ENABLE_LONG_SYMBOLS = 0x10u, - CTX_ENABLE_MULTILINE_STRING = 0x20u, -}; - -enum ctx_state { - CTX_STATE_NONE = 0, - CTX_STATE_IN_TABLE, - CTX_STATE_IN_ARRAY, -}; - struct ctx { enum ctx_flags ctx_flags; b_stream *ctx_src; b_string *ctx_wordbuf; b_string *ctx_linebuf; b_stream *ctx_linebuf_stream; - size_t ctx_linebuf_pos; + b_string_iterator ctx_linebuf_ptr; enum b_status ctx_status; - b_hashmap *ctx_static_objects; + b_hashmap *ctx_objects_flags; b_queue ctx_tokens; }; -static void ctx_add_static_object(struct ctx *ctx, struct b_object *obj) +static void ctx_set_object_flags( + struct ctx *ctx, struct b_object *obj, enum object_flags flags) { + if (!obj) { + return; + } + b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; - b_hashmap_value value = {}; + const b_hashmap_value *old_value + = b_hashmap_get(ctx->ctx_objects_flags, &key); - b_hashmap_put(ctx->ctx_static_objects, &key, &value); + enum object_flags new_flags = 0; + if (old_value) { + new_flags = (enum object_flags)(uintptr_t)old_value->value_data; + } + + new_flags |= flags; + + b_hashmap_value value = { + .value_data = (void *)new_flags, + .value_size = sizeof new_flags, + }; + + b_hashmap_put(ctx->ctx_objects_flags, &key, &value); } -static bool ctx_object_is_static(struct ctx *ctx, struct b_object *obj) +static void ctx_clear_object_flags( + struct ctx *ctx, struct b_object *obj, enum object_flags mask) { + if (!obj) { + return; + } + b_hashmap_key key = { .key_data = obj, .key_size = sizeof(struct b_object *), .key_flags = B_HASHMAP_KEY_F_INTVALUE, }; - return b_hashmap_has_key(ctx->ctx_static_objects, &key); + const b_hashmap_value *old_value + = b_hashmap_get(ctx->ctx_objects_flags, &key); + + enum object_flags new_flags = 0; + if (old_value) { + new_flags = (enum object_flags)(uintptr_t)old_value->value_data; + } + + new_flags &= ~mask; + + b_hashmap_value value = { + .value_data = (void *)new_flags, + .value_size = sizeof new_flags, + }; + + b_hashmap_put(ctx->ctx_objects_flags, &key, &value); } -static bool data_available(struct ctx *ctx) +static enum object_flags ctx_get_object_flags(struct ctx *ctx, struct b_object *obj) +{ + if (!obj) { + return 0; + } + + b_hashmap_key key = { + .key_data = obj, + .key_size = sizeof(struct b_object *), + .key_flags = B_HASHMAP_KEY_F_INTVALUE, + }; + + const b_hashmap_value *value = b_hashmap_get(ctx->ctx_objects_flags, &key); + if (value) { + return (enum object_flags)(uintptr_t)value->value_data; + } + + return 0; +} + +static enum b_status data_available(struct ctx *ctx) { size_t len = b_string_get_size(ctx->ctx_linebuf, B_STRLEN_NORMAL); - return len != 0 && ctx->ctx_linebuf_pos < len; + if (len == 0) { + return B_ERR_NO_DATA; + } + + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + return ctx->ctx_linebuf_ptr.status; + } + + return b_string_iterator_is_valid(&ctx->ctx_linebuf_ptr) ? B_SUCCESS + : B_ERR_NO_DATA; } static enum b_status refill_linebuf(struct ctx *ctx) { b_string_clear(ctx->ctx_linebuf); - ctx->ctx_linebuf_pos = 0; b_stream_seek(ctx->ctx_linebuf_stream, 0, B_STREAM_SEEK_START); enum b_status status @@ -137,7 +226,7 @@ static enum b_status refill_linebuf(struct ctx *ctx) return status; } - b_string_append_cstr(ctx->ctx_linebuf, "\n"); + b_string_iterator_begin(ctx->ctx_linebuf, &ctx->ctx_linebuf_ptr); return B_SUCCESS; } @@ -148,7 +237,7 @@ static struct b_string *get_wordbuf(struct ctx *ctx) return ctx->ctx_wordbuf; } -static bool is_valid_char(int c) +static bool is_valid_char(b_wchar c) { if (c <= 0) { return false; @@ -156,12 +245,10 @@ static bool is_valid_char(int c) switch (c) { case '\0': - case '\r': case '\b': case 0x0C: case 0x1F: case 0x7F: - case 0xFF: case 0x10: return false; default: @@ -169,10 +256,10 @@ static bool is_valid_char(int c) } } -static int advance_char(struct ctx *ctx) +static b_wchar advance_char(struct ctx *ctx) { - enum b_status status = B_SUCCESS; - if (!data_available(ctx)) { + enum b_status status = data_available(ctx); + if (status == B_ERR_NO_DATA) { status = refill_linebuf(ctx); } @@ -181,25 +268,34 @@ static int advance_char(struct ctx *ctx) return -1; } - if (!data_available(ctx)) { + status = data_available(ctx); + if (!B_OK(status)) { + ctx->ctx_status = status; return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); - int c = s[ctx->ctx_linebuf_pos++]; + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + b_wchar c = ctx->ctx_linebuf_ptr.char_value; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; return -1; } + b_string_iterator_next(&ctx->ctx_linebuf_ptr); + return c; } -static int peek_char(struct ctx *ctx) +static b_wchar peek_char(struct ctx *ctx) { - enum b_status status = B_SUCCESS; - if (!data_available(ctx)) { + enum b_status status = data_available(ctx); + if (status == B_ERR_NO_DATA) { status = refill_linebuf(ctx); } @@ -208,12 +304,19 @@ static int peek_char(struct ctx *ctx) return -1; } - if (!data_available(ctx)) { + status = data_available(ctx); + if (!B_OK(status)) { + ctx->ctx_status = status; return -1; } const char *s = b_string_ptr(ctx->ctx_linebuf); - int c = s[ctx->ctx_linebuf_pos]; + if (!B_OK(ctx->ctx_linebuf_ptr.status)) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return -1; + } + + b_wchar c = ctx->ctx_linebuf_ptr.char_value; if (!is_valid_char(c)) { ctx->ctx_status = B_ERR_BAD_FORMAT; @@ -226,7 +329,7 @@ static int peek_char(struct ctx *ctx) #if 0 static int peek_char(struct ctx *ctx) { - int c = __peek_char(ctx); + b_wchar c = __peek_char(ctx); if (c != '#') { return c; @@ -243,7 +346,7 @@ static int peek_char(struct ctx *ctx) static int advance_char(struct ctx *ctx) { - int c = __advance_char(ctx); + b_wchar c = __advance_char(ctx); if (c != '#') { return c; @@ -300,7 +403,7 @@ static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *toke } struct token *tok = enqueue_token(ctx, TOK_TIMESTAMP); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.time = dt; return true; @@ -474,143 +577,238 @@ static bool try_convert_word_to_timestamp(struct ctx *ctx, struct b_string *toke } #endif +static bool is_valid_digit(b_wchar c, int base) +{ + switch (base) { + case 2: + return b_wchar_is_bin_digit(c); + case 8: + return b_wchar_is_oct_digit(c); + case 10: + return b_wchar_is_number(c); + case 16: + return b_wchar_is_hex_digit(c); + default: + return false; + } +} + +static bool has_trailing_zero(const char *s) +{ + int nr_zero = 0; + for (size_t i = 0; s[i]; i++) { + char c = s[i]; + + switch (c) { + case '0': + nr_zero++; + break; + case '.': + case 'e': + return false; + default: + return nr_zero > 0; + } + } + + return false; +} + static bool try_convert_word_to_number(struct ctx *ctx, struct b_string *token_str) { size_t len = b_string_get_size(token_str, B_STRLEN_NORMAL); struct b_string *str = b_string_duplicate(token_str); + struct token *tok = NULL; const char *s = b_string_ptr(str); if (len == 0) { return false; } - size_t offset = 0; + size_t validation_offset = 0; + + bool is_decimal = false; bool has_sign = false; - - int mul = 1; - if (s[0] == '+') { - offset++; - has_sign = true; - } else if (s[0] == '-') { - offset++; - mul = -1; - has_sign = true; - } - int base = 10; - if (*(s + offset) == '0') { - char b = *(s + offset + 1); - switch (b) { - case 'x': - offset += 2; - base = 16; - break; + switch (*s) { + case '+': + case '-': + validation_offset++; + has_sign = true; + break; + case '0': + switch (*(s + 1)) { case 'b': - offset += 2; base = 2; + s += 2; break; case 'o': - offset += 2; base = 8; + s += 2; break; + case 'x': + base = 16; + s += 2; + break; + case '.': + case 'e': + case 'E': + break; + case '\0': + tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT); + tok->tok_value.i.v = 0; + return true; default: - b_string_release(str); return false; } + break; + default: + break; } - if (has_sign && base != 10) { - b_string_release(str); - return false; + if (!strcmp(s + validation_offset, "inf")) { + tok = enqueue_token(ctx, TOK_FLOAT); + tok->tok_value.f.v = (*s == '-') ? -1 : 0; + tok->tok_value.f.inf = true; + return true; + } else if (!strcmp(s + validation_offset, "nan")) { + tok = enqueue_token(ctx, TOK_FLOAT); + tok->tok_value.f.v = (*s == '-') ? -1 : 0; + tok->tok_value.f.nan = true; + return true; } - if (offset == len) { - b_string_release(str); - return false; - } - - bool is_valid = true; - bool is_double = false; - char previous = 0; - for (size_t i = offset; i < len; i++) { + + for (size_t i = validation_offset; s[i]; i++) { char c = s[i]; - if (previous == '_' && !isnumber(c)) { - is_valid = false; - break; - } - if (c == '_') { - if (!isnumber(previous)) { - is_valid = false; - break; + if (!is_valid_digit(previous, base)) { + return false; } - b_string_remove(str, i, 1); - len--; + size_t to_remove = (s - b_string_ptr(str)) + i; + b_string_remove(str, to_remove, 1); i--; previous = c; continue; } - if (c == 'e' || c == '.') { - if (!isnumber(c)) { - is_valid = false; - break; + if (c == '.') { + if (base != 10) { + return false; } - is_double = true; + + if (is_decimal) { + return false; + } + + if (!is_valid_digit(previous, base)) { + return false; + } + + is_decimal = true; previous = c; continue; } - if ((c == '-' || c == '+') && previous != 'e') { - is_valid = false; + if (c == 'e' || c == 'E') { + if (base == 16) { + previous = c; + continue; + } + + if (base != 10) { + return false; + } + + if (!is_valid_digit(previous, base)) { + return false; + } + + is_decimal = true; + previous = c; + continue; + } + + if (c == '+' || c == '-') { + if (base != 10) { + return false; + } + + if (previous != 'e' && previous != 'E') { + return false; + } + + previous = c; + continue; + } + + if (!is_valid_digit(c, base)) { + return false; + } + + switch (previous) { + case 0: + case 'e': + case 'E': + case '_': + case '+': + case '-': + case '.': + break; + default: + if (!is_valid_digit(previous, base)) { + return false; + } break; } previous = c; } - if (previous == '_' || previous == '.') { - is_valid = false; + switch (previous) { + case 'e': + case 'E': + if (base == 16) { + break; + } + + case '.': + case '_': + case 0: + return false; + default: + break; } - if (is_double && base != 10) { - is_valid = false; - } - - if (!is_valid) { - b_string_release(str); + if (has_trailing_zero(s + validation_offset) && base == 10) { return false; } - double d = 0; - long long i = 0; + long long i; + double d; + bool is_valid; - if (is_double) { + if (is_decimal) { int r = 0; - int len = strlen(s + offset); + int len = strlen(s); // d = strtold(s + offset, &ep) * mul; - int ret = sscanf(s + offset, "%lf%n", &d, &r); - d *= mul; + int ret = sscanf(s, "%lf%n", &d, &r); is_valid = (ret == 1) && r == len; } else { char *ep; - i = strtoll(s + offset, &ep, base) * mul; + i = strtoll(s, &ep, base); is_valid = ((*ep) == 0); } - b_string_release(str); - if (!is_valid) { return false; } - struct token *tok = enqueue_token(ctx, is_double ? TOK_FLOAT : TOK_INT); - tok->tok_str = b_string_steal(token_str); - - if (is_double) { + tok = enqueue_token(ctx, is_decimal ? TOK_FLOAT : TOK_INT); + if (is_decimal) { tok->tok_value.f.v = d; } else { tok->tok_value.i.v = i; @@ -626,11 +824,11 @@ static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str if (!strcmp(s, "true")) { tok = enqueue_token(ctx, TOK_BOOL); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.b = true; } else if (!strcmp(s, "false")) { tok = enqueue_token(ctx, TOK_BOOL); - tok->tok_str = b_string_steal(token_str); + tok->tok_str = b_string_duplicate(token_str); tok->tok_value.b = false; } else { return false; @@ -641,6 +839,7 @@ static bool try_convert_word_to_bool(struct ctx *ctx, struct b_string *token_str static void split_word(struct ctx *ctx, struct b_string *wordbuf) { +#if 0 long len = b_string_get_size(wordbuf, B_STRLEN_NORMAL); if (!len) { return; @@ -700,11 +899,31 @@ static void split_word(struct ctx *ctx, struct b_string *wordbuf) } free(s); +#endif + const char *delims[] = {"."}; + size_t nr_delims = sizeof delims / sizeof delims[0]; + b_string_iterator it; + b_string_tokenise( + wordbuf, delims, nr_delims, B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS, + &it); + + while (b_string_iterator_is_valid(&it)) { + if (it.iteration_index > 0) { + enqueue_token(ctx, TOK_DOT); + } + + if (it.string_length > 0) { + struct token *word = enqueue_token(ctx, TOK_WORD); + word->tok_str = b_string_create_from_cstr(it.string_value); + } + + b_string_iterator_next(&it); + } } static void read_number(struct ctx *ctx) { - int c = 0; + b_wchar c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { @@ -714,15 +933,14 @@ static void read_number(struct ctx *ctx) break; } - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.' - || c == '+'; + bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-' + || c == '.' || c == '+'; if (!ok) { break; } - char s[] = {c, 0}; - b_string_append_cstr(wordbuf, s); + b_string_append_wc(wordbuf, c); advance_char(ctx); } @@ -735,7 +953,7 @@ static void read_number(struct ctx *ctx) static void read_word(struct ctx *ctx) { - int c = 0; + b_wchar c = 0; struct b_string *wordbuf = get_wordbuf(ctx); while (1) { @@ -745,7 +963,7 @@ static void read_word(struct ctx *ctx) break; } - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + bool ok = b_wchar_is_alnum(c) || c == '_' || c == '-' || c == '.'; if (ctx->ctx_flags & CTX_ENABLE_TIMESTAMPS) { ok = ok || c == ':' || c == ' ' || c == '+'; @@ -759,14 +977,18 @@ static void read_word(struct ctx *ctx) break; } - char s[] = {c, 0}; - b_string_append_cstr(wordbuf, s); + b_string_append_wc(wordbuf, c); advance_char(ctx); } bool parsed = false; b_string_trim(wordbuf); + if (b_string_get_size(wordbuf, B_STRLEN_NORMAL) == 0) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return; + } + if (ctx->ctx_flags & CTX_ENABLE_BOOLS) { parsed = try_convert_word_to_bool(ctx, wordbuf); } @@ -783,10 +1005,12 @@ static void read_word(struct ctx *ctx) return; } - const char *s = b_string_ptr(wordbuf); - for (size_t i = 0; s[i]; i++) { - c = s[i]; - bool ok = isalnum(c) || c == '_' || c == '-' || c == '.'; + b_string_iterator it; + b_string_foreach(&it, wordbuf) + { + /* only allow ASCII numbers/letters here */ + bool ok = isalnum(it.char_value) || it.char_value == '_' + || it.char_value == '-' || it.char_value == '.'; if (!ok) { ctx->ctx_status = B_ERR_BAD_FORMAT; return; @@ -796,6 +1020,59 @@ static void read_word(struct ctx *ctx) split_word(ctx, wordbuf); } +static b_wchar read_unicode_sequence(struct ctx *ctx) +{ + b_wchar c = peek_char(ctx); + + int expected_len = 0; + switch (c) { + case 'u': + expected_len = 4; + break; + case 'U': + expected_len = 8; + break; + default: + return B_WCHAR_INVALID; + } + + advance_char(ctx); + + char s[9] = {0}; + int len = 0; + + while (1) { + if (len >= expected_len) { + break; + } + + b_wchar c = peek_char(ctx); + if (c == -1 || c == B_WCHAR_INVALID) { + break; + } + + if (!b_wchar_is_hex_digit(c)) { + break; + } + + s[len++] = (char)c; + s[len] = 0; + advance_char(ctx); + } + + if (len != expected_len) { + return B_WCHAR_INVALID; + } + + char *ep; + c = strtoul(s, &ep, 16); + if (*ep != 0) { + return B_WCHAR_INVALID; + } + + return c; +} + static void read_string(struct ctx *ctx, bool squote) { advance_char(ctx); @@ -809,7 +1086,7 @@ static void read_string(struct ctx *ctx, bool squote) struct token *tok = enqueue_token(ctx, TOK_STRING); struct b_string *str = get_wordbuf(ctx); - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); if (c == term) { advance_char(ctx); c = peek_char(ctx); @@ -819,6 +1096,7 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); multiline = true; } else { + tok->tok_str = b_string_duplicate(str); return; } @@ -834,6 +1112,7 @@ static void read_string(struct ctx *ctx, bool squote) bool fail = false; bool esc = false; + bool cr = false; tok->tok_type = TOK_STRING; @@ -845,7 +1124,17 @@ static void read_string(struct ctx *ctx, bool squote) break; } - char s[] = {c, 0}; + if (c == '\r') { + if (!cr) { + advance_char(ctx); + cr = true; + continue; + } else { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + } if (esc) { if (c == '\n') { @@ -854,6 +1143,29 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); } + cr = false; + esc = false; + continue; + } + + if (isspace(c)) { + while (c != -1 && isspace(c) && c != '\n') { + advance_char(ctx); + c = peek_char(ctx); + } + + if (c != '\n') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + while (c != -1 && isspace(c)) { + advance_char(ctx); + c = peek_char(ctx); + } + + cr = false; esc = false; continue; } @@ -861,27 +1173,42 @@ static void read_string(struct ctx *ctx, bool squote) switch (c) { case '"': case '\\': - b_string_append_cstr(str, s); + b_string_append_wc(str, c); + advance_char(ctx); break; case 'b': - s[0] = '\b'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\b'); + advance_char(ctx); break; case 't': - s[0] = '\t'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\t'); + advance_char(ctx); break; case 'n': - s[0] = '\n'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\n'); + advance_char(ctx); break; case 'r': - s[0] = '\r'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\r'); + advance_char(ctx); break; case 'f': - s[0] = '\f'; - b_string_append_cstr(str, s); + b_string_append_c(str, '\f'); + advance_char(ctx); + break; + case 'u': + case 'U': + c = read_unicode_sequence(ctx); + if (c == B_WCHAR_INVALID) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + fail = true; + break; + } + + ctx->ctx_status = B_OK(b_string_append_wc(str, c)) + ? B_SUCCESS + : B_ERR_BAD_FORMAT; + fail = !B_OK(ctx->ctx_status); break; default: ctx->ctx_status = B_ERR_BAD_FORMAT; @@ -890,7 +1217,6 @@ static void read_string(struct ctx *ctx, bool squote) } esc = false; - advance_char(ctx); continue; } @@ -905,7 +1231,12 @@ static void read_string(struct ctx *ctx, bool squote) break; } - b_string_append_cstr(str, s); + if (cr) { + b_string_append_wc(str, '\r'); + cr = false; + } + + b_string_append_wc(str, c); } else if (c == term) { @@ -917,45 +1248,62 @@ static void read_string(struct ctx *ctx, bool squote) c = peek_char(ctx); if (c != term) { - b_string_append_cstr(str, s); + b_string_append_wc(str, term); continue; } advance_char(ctx); c = peek_char(ctx); if (c != term) { - b_string_append_cstr(str, s); - b_string_append_cstr(str, s); + b_string_append_wc(str, term); + b_string_append_wc(str, term); continue; } advance_char(ctx); c = peek_char(ctx); - if (c == term) { - b_string_append_cstr(str, s); - advance_char(ctx); - } break; } else { - b_string_append_cstr(str, s); + b_string_append_wc(str, c); } advance_char(ctx); } + if (cr) { + fail = true; + } + if (fail) { discard_token(ctx); return; } - tok->tok_str = b_string_steal(str); + if (!multiline) { + goto done; + } + + c = peek_char(ctx); + if (c == term) { + b_string_append_wc(str, c); + advance_char(ctx); + } + + c = peek_char(ctx); + if (c == term) { + b_string_append_wc(str, c); + advance_char(ctx); + } + +done: + tok->tok_str = b_string_duplicate(str); } static void read_symbol(struct ctx *ctx) { - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); advance_char(ctx); struct token *tok = enqueue_token(ctx, TOK_NONE); @@ -1022,23 +1370,48 @@ static void read_symbol(struct ctx *ctx) static void read_newline(struct ctx *ctx) { - int c = peek_char(ctx); + b_wchar c = peek_char(ctx); while (c == '\n') { advance_char(ctx); c = peek_char(ctx); } enqueue_token(ctx, TOK_NEWLINE); + ctx->ctx_status = B_SUCCESS; } static void read_comment(struct ctx *ctx) { - int c = peek_char(ctx); - while (c != '\n' && c != -1) { + b_wchar c = peek_char(ctx); + bool cr = false; + + while (1) { + if (c == '\n') { + cr = false; + break; + } + + if (c == -1) { + break; + } + + if (cr) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + break; + } + + if (c == '\r') { + cr = true; + } + advance_char(ctx); c = peek_char(ctx); } + if (cr) { + ctx->ctx_status = B_ERR_BAD_FORMAT; + } + if (!B_OK(ctx->ctx_status)) { return; } @@ -1047,16 +1420,34 @@ static void read_comment(struct ctx *ctx) enqueue_token(ctx, TOK_NEWLINE); } +static bool is_symbol(b_wchar c) +{ + switch (c) { + case '=': + case '.': + case ',': + case '[': + case ']': + case '{': + case '}': + return true; + default: + return false; + } +} + static enum b_status advance_token(struct ctx *ctx) { + b_wchar c = B_WCHAR_INVALID; discard_token(ctx); if (!b_queue_empty(&ctx->ctx_tokens)) { return B_SUCCESS; } - int c = peek_char(ctx); - while (isspace(c) && c != '\n') { +start: + c = peek_char(ctx); + while (isspace(c) && c != '\n' && c != '\r') { advance_char(ctx); c = peek_char(ctx); } @@ -1067,9 +1458,9 @@ static enum b_status advance_token(struct ctx *ctx) } #if 1 - while (c == '#') { + if (c == '#') { read_comment(ctx); - c = peek_char(ctx); + goto start; } #endif @@ -1077,13 +1468,23 @@ static enum b_status advance_token(struct ctx *ctx) return ctx->ctx_status; } + if (c == '\r') { + advance_char(ctx); + c = peek_char(ctx); + + if (c != '\n') { + ctx->ctx_status = B_ERR_BAD_FORMAT; + return ctx->ctx_status; + } + } + if (c == '"') { read_string(ctx, false); } else if (c == '\'') { read_string(ctx, true); } else if ((c == '+' || c == '-') && ctx->ctx_flags & CTX_ENABLE_NUMBERS) { read_number(ctx); - } else if (ispunct(c)) { + } else if (is_symbol(c)) { read_symbol(ctx); } else if (c == '\n') { read_newline(ctx); @@ -1121,9 +1522,9 @@ static void ctx_cleanup(struct ctx *ctx) ctx->ctx_wordbuf = NULL; } - if (ctx->ctx_static_objects) { - b_hashmap_release(ctx->ctx_static_objects); - ctx->ctx_static_objects = NULL; + if (ctx->ctx_objects_flags) { + b_hashmap_release(ctx->ctx_objects_flags); + ctx->ctx_objects_flags = NULL; } } @@ -1136,7 +1537,7 @@ static enum b_status ctx_init(struct ctx *ctx) b_string_open_stream(ctx->ctx_linebuf, &ctx->ctx_linebuf_stream); - ctx->ctx_static_objects = b_hashmap_create(NULL, NULL); + ctx->ctx_objects_flags = b_hashmap_create(NULL, NULL); return B_SUCCESS; } @@ -1155,10 +1556,10 @@ static void print_token(struct token *tok) printf("TOK_NONE\n"); break; case TOK_WORD: - printf("TOK_WORD %s\n", tok->tok_str); + printf("TOK_WORD %s\n", b_string_ptr(tok->tok_str)); break; case TOK_STRING: - printf("TOK_STRING %s\n", tok->tok_str); + printf("TOK_STRING %s\n", b_string_ptr(tok->tok_str)); break; case TOK_TIMESTAMP: printf("TOK_TIMESTAMP %04ld-%02ld-%02ld " @@ -1251,7 +1652,7 @@ static enum b_status parse_timestamp(struct ctx *ctx, struct b_object **result) static enum b_status parse_string(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); - struct b_string *str = b_string_create_from_cstr(tok->tok_str); + struct b_string *str = b_string_duplicate(tok->tok_str); if (!str) { return B_ERR_NO_MEMORY; } @@ -1263,11 +1664,25 @@ static enum b_status parse_string(struct ctx *ctx, struct b_object **result) static enum b_status parse_int(struct ctx *ctx, struct b_object **result) { struct token *tok = peek_token(ctx); - struct b_number *val = B_INT64(tok->tok_value.i.v); + struct b_number *val = B_LONGLONG(tok->tok_value.i.v); if (!val) { return B_ERR_NO_MEMORY; } + if (tok->tok_value.i.inf) { + if (tok->tok_value.i.v >= 0) { + b_number_set_inf_positive(val, true); + } else { + b_number_set_inf_negative(val, true); + } + } else if (tok->tok_value.i.nan) { + if (tok->tok_value.i.v >= 0) { + b_number_set_nan_positive(val, true); + } else { + b_number_set_nan_negative(val, true); + } + } + *result = B_OBJECT(val); return B_SUCCESS; } @@ -1280,6 +1695,20 @@ static enum b_status parse_float(struct ctx *ctx, struct b_object **result) return B_ERR_NO_MEMORY; } + if (tok->tok_value.f.inf) { + if (tok->tok_value.f.v >= 0) { + b_number_set_inf_positive(val, true); + } else { + b_number_set_inf_negative(val, true); + } + } else if (tok->tok_value.f.nan) { + if (tok->tok_value.f.v >= 0) { + b_number_set_nan_positive(val, true); + } else { + b_number_set_nan_negative(val, true); + } + } + *result = B_OBJECT(val); return B_SUCCESS; } @@ -1298,6 +1727,8 @@ static enum b_status parse_bool(struct ctx *ctx, struct b_object **result) static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **result) { + DISABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); struct b_dict *table = b_dict_create(); @@ -1305,8 +1736,15 @@ static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **resul return B_ERR_NO_MEMORY; } + struct token *tok = peek_token(ctx); + if (tok && tok->tok_type == TOK_RIGHT_BRACE) { + *result = B_OBJECT(table); + return B_SUCCESS; + } + bool done = false; while (!done) { + struct b_object *value; enum b_status status = parse_key_value_pair(ctx, table); if (!B_OK(status)) { @@ -1314,7 +1752,7 @@ static enum b_status parse_table_inline(struct ctx *ctx, struct b_object **resul return status; } - struct token *tok = peek_token(ctx); + tok = peek_token(ctx); if (!tok) { b_dict_release(table); @@ -1351,6 +1789,8 @@ static void skip_newlines(struct ctx *ctx) static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **result) { bool done = false; + ENABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); struct b_array *array = b_array_create(); @@ -1365,7 +1805,6 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul } if (tok->tok_type == TOK_RIGHT_BRACKET) { - advance_token(ctx); done = true; } @@ -1392,7 +1831,9 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul } b_array_append(array, B_RV(value)); + ENABLE_EXTENDED_LEXING(ctx); + advance_token(ctx); skip_newlines(ctx); tok = peek_token(ctx); @@ -1407,9 +1848,11 @@ static enum b_status parse_array_inline(struct ctx *ctx, struct b_object **resul return B_ERR_BAD_FORMAT; } + ENABLE_EXTENDED_LEXING(ctx); advance_token(ctx); } + DISABLE_EXTENDED_LEXING(ctx); *result = B_OBJECT(array); return B_SUCCESS; } @@ -1449,7 +1892,7 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1461,21 +1904,24 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (sub_dict && !B_OBJECT_IS(sub_dict, DICT)) { free(key); return B_ERR_BAD_FORMAT; } - if (ctx_object_is_static(ctx, sub_dict)) { +#if 1 + enum object_flags flags = ctx_get_object_flags(ctx, sub_dict); + if (flags & (OBJECT_KV_END_DEFINED | OBJECT_HEADER_END_DEFINED)) { free(key); return B_ERR_BAD_FORMAT; } +#endif - ctx_add_static_object(ctx, sub_dict); + ctx_set_object_flags(ctx, sub_dict, OBJECT_KV_MID_DEFINED); advance_token(ctx); tok = peek_token(ctx); @@ -1485,8 +1931,8 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai } container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1495,7 +1941,7 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai tok = peek_token(ctx); } - if (b_dict_has_key(container, key)) { + if (b_dict_has_skey(container, key)) { return B_ERR_BAD_FORMAT; } @@ -1507,29 +1953,27 @@ static enum b_status parse_key_value_pair(struct ctx *ctx, struct b_dict *contai return B_ERR_BAD_FORMAT; } - ctx->ctx_flags &= ~CTX_ENABLE_LONG_SYMBOLS; - ctx->ctx_flags |= CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS - | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING; + ENABLE_EXTENDED_LEXING(ctx); advance_token(ctx); struct b_object *value = NULL; enum b_status status = parse_value(ctx, &value); - ctx->ctx_flags |= CTX_ENABLE_LONG_SYMBOLS; - ctx->ctx_flags - &= ~(CTX_ENABLE_NUMBERS | CTX_ENABLE_TIMESTAMPS - | CTX_ENABLE_BOOLS | CTX_ENABLE_MULTILINE_STRING); + DISABLE_EXTENDED_LEXING(ctx); if (!B_OK(status)) { return status; } - advance_token(ctx); + status = advance_token(ctx); + if (!B_OK(status) && status != B_ERR_NO_DATA) { + return status; + } - b_dict_put(container, key, B_RV(value)); + b_dict_put_sk(container, key, B_RV(value)); if (B_OBJECT_IS(value, DICT) || B_OBJECT_IS(value, ARRAY)) { - ctx_add_static_object(ctx, value); + ctx_set_object_flags(ctx, value, OBJECT_KV_END_DEFINED); } return B_SUCCESS; @@ -1544,7 +1988,7 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1556,11 +2000,13 @@ static enum b_status parse_table_header( } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); + enum object_flags flags = ctx_get_object_flags(ctx, sub_dict); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { + sub_dict = b_array_at( B_ARRAY(sub_dict), b_array_size(B_ARRAY(sub_dict)) - 1); @@ -1568,15 +2014,21 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } + if (flags & OBJECT_KV_END_DEFINED) { + return B_ERR_BAD_FORMAT; + } + advance_token(ctx); tok = peek_token(ctx); if (!IS_VALID_KEY_COMPONENT(tok)) { return B_ERR_BAD_FORMAT; } + ctx_set_object_flags(ctx, sub_dict, OBJECT_HEADER_MID_DEFINED); + container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1589,16 +2041,9 @@ static enum b_status parse_table_header( return B_ERR_BAD_FORMAT; } - struct b_dict *new_table = B_DICT(b_dict_at(container, key)); + struct b_dict *new_table = B_DICT(b_dict_at_sk(container, key)); - if (new_table) { - if (!B_OBJECT_IS(new_table, DICT) - || ctx_object_is_static(ctx, B_OBJECT(new_table))) { - return B_ERR_BAD_FORMAT; - } - - ctx_add_static_object(ctx, B_OBJECT(new_table)); - } else { + if (!new_table) { new_table = b_dict_create(); if (!new_table) { @@ -1606,11 +2051,22 @@ static enum b_status parse_table_header( return B_ERR_NO_MEMORY; } - b_dict_put(container, key, B_RV(new_table)); - ctx_add_static_object(ctx, B_OBJECT(new_table)); + b_dict_put_sk(container, key, B_RV(new_table)); } - free(key); + if (!B_OBJECT_IS(B_OBJECT(new_table), DICT)) { + return B_ERR_BAD_FORMAT; + } + + enum object_flags flags = ctx_get_object_flags(ctx, B_OBJECT(new_table)); + if (flags + & (OBJECT_HEADER_END_DEFINED | OBJECT_KV_MID_DEFINED + | OBJECT_KV_END_DEFINED)) { + return B_ERR_BAD_FORMAT; + } + + ctx_set_object_flags(ctx, B_OBJECT(new_table), OBJECT_HEADER_END_DEFINED); + b_string_release(key); advance_token(ctx); *new_container = new_table; @@ -1626,7 +2082,7 @@ static enum b_status parse_array_header( return B_ERR_BAD_FORMAT; } - char *key = b_strdup(tok->tok_str); + b_string *key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1638,10 +2094,10 @@ static enum b_status parse_array_header( } while (tok && tok->tok_type == TOK_DOT) { - struct b_object *sub_dict = b_dict_at(container, key); + struct b_object *sub_dict = b_dict_at_sk(container, key); if (!sub_dict) { sub_dict = B_OBJECT(b_dict_create()); - b_dict_put(container, key, B_RV(sub_dict)); + b_dict_put_sk(container, key, B_RV(sub_dict)); } else if (B_OBJECT_IS(sub_dict, ARRAY)) { sub_dict = b_array_at( B_ARRAY(sub_dict), @@ -1657,8 +2113,8 @@ static enum b_status parse_array_header( } container = B_DICT(sub_dict); - free(key); - key = b_strdup(tok->tok_str); + b_string_release(key); + key = b_string_duplicate(tok->tok_str); if (!key) { return B_ERR_NO_MEMORY; } @@ -1671,24 +2127,26 @@ static enum b_status parse_array_header( return B_ERR_BAD_FORMAT; } - struct b_array *array = B_ARRAY(b_dict_get(container, key)); + struct b_array *array = B_ARRAY(b_dict_get_sk(container, key)); if (!array) { array = b_array_create(); - b_dict_put(container, key, B_RV(array)); - } else if ( - !B_OBJECT_IS(array, ARRAY) - || ctx_object_is_static(ctx, B_OBJECT(array))) { + b_dict_put_sk(container, key, B_RV(array)); + } else if (!B_OBJECT_IS(array, ARRAY)) { return B_ERR_BAD_FORMAT; } + free(key); + + enum object_flags flags = ctx_get_object_flags(ctx, B_OBJECT(array)); + if (flags & OBJECT_KV_END_DEFINED) { + return B_ERR_NO_MEMORY; + } struct b_dict *new_table = b_dict_create(); if (!new_table) { - free(key); return B_ERR_NO_MEMORY; } b_array_append(array, B_RV(new_table)); - free(key); advance_token(ctx); *new_container = new_table; @@ -1741,6 +2199,7 @@ static enum b_status parse_root(struct ctx *ctx, struct b_dict **result) if (tok && tok->tok_type != TOK_NEWLINE) { status = B_ERR_BAD_FORMAT; } + advance_token(ctx); break; case TOK_NEWLINE: advance_token(ctx); @@ -1776,10 +2235,17 @@ static enum b_status toml_deserialise( } ctx.ctx_src = src; + ctx.ctx_flags = CTX_ENABLE_LONG_SYMBOLS; status = advance_token(&ctx); - if (!B_OK(status)) { - return status; + + if (!B_OK(ctx.ctx_status) && ctx.ctx_status != B_ERR_NO_DATA) { + return ctx.ctx_status; + } + + if (ctx.ctx_flags & CTX_EOF) { + *dest = B_OBJECT(b_dict_create()); + return B_SUCCESS; } struct b_dict *result = NULL; diff --git a/serial/toml.h b/serial/toml.h new file mode 100644 index 0000000..205237b --- /dev/null +++ b/serial/toml.h @@ -0,0 +1,4 @@ +#ifndef TOML_H_ +#define TOML_H_ + +#endif From 11fd147031de55adf7140c29cdef92729a2b8c12 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:55:04 +0100 Subject: [PATCH 19/23] term: tty: update hash api usage --- term/tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/term/tty.c b/term/tty.c index d782d40..05d1cb0 100644 --- a/term/tty.c +++ b/term/tty.c @@ -51,7 +51,7 @@ static void apply_code_to_vmode(struct tty_format_buf *fmt) { const char *modifier = fmt->buf; - uint64_t mod_hash = b_hash_string(modifier); + uint64_t mod_hash = b_hash_cstr(modifier); if (COMPARE_MOD_NAME(modifier, "black", mod_hash, MOD_HASH_BLACK)) { fmt->vmode.v_fg.c_mode = B_TTY_COLOUR_16; From b4360e9bdc4968147a7cc69e72f07fc55887d72e Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 22 Sep 2025 10:55:20 +0100 Subject: [PATCH 20/23] test: add and update a few tests --- object-test/trees.c | 2 +- object-test/unicode-strings.c | 26 ++++ serial-test/toml-decode.c | 233 ++++++++++++++++++++++++++++++++++ serial-test/toml-encode.c | 28 ++++ test/test.toml | 1 + test/toml-read.c | 48 +++++++ 6 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 object-test/unicode-strings.c create mode 100644 serial-test/toml-decode.c create mode 100644 serial-test/toml-encode.c create mode 100644 test/test.toml create mode 100644 test/toml-read.c diff --git a/object-test/trees.c b/object-test/trees.c index e3fd35c..0cb6f51 100644 --- a/object-test/trees.c +++ b/object-test/trees.c @@ -32,7 +32,7 @@ int main(void) b_dict_iterator it; b_dict_foreach(&it, dict) { - printf("item %zu: %s=%d\n", it.i, it.key, + printf("item %zu: %s=%d\n", it.i, b_string_ptr(it.key), b_number_get_int(B_NUMBER(it.value))); } diff --git a/object-test/unicode-strings.c b/object-test/unicode-strings.c new file mode 100644 index 0000000..2aa9e98 --- /dev/null +++ b/object-test/unicode-strings.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include + +int main(void) +{ + printf("здравс\u26A0твуите\n"); + b_string *str = b_string_create_from_cstr("здравствуите"); + const char *s = b_string_ptr(str); + printf("%s\n", s); + printf("len: %zu\n", b_string_get_size(str, B_STRLEN_NORMAL)); + printf("codepoints: %zu\n", b_string_get_size(str, B_STRLEN_CODEPOINTS)); + + b_string_iterator it; + const char *delims[] = {"в"}; + size_t nr_delims = sizeof delims / sizeof delims[0]; + + b_string_tokenise(str, delims, nr_delims, 0, &it); + while (b_string_iterator_is_valid(&it)) { + printf("%s\n", it.string_value); + b_string_iterator_next(&it); + } + + return 0; +} diff --git a/serial-test/toml-decode.c b/serial-test/toml-decode.c new file mode 100644 index 0000000..c6f51fe --- /dev/null +++ b/serial-test/toml-decode.c @@ -0,0 +1,233 @@ +#include "blue/object/datetime.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +void write_tagged_value(b_object *data); + +void write_raw_string(const b_string *data) +{ + b_stream_write_string(b_stdout, "\"", NULL); + + b_string_iterator it; + b_string_foreach(&it, data) + { + b_wchar c = it.char_value; + + if (c >= 0x10000) { + c -= 0x10000; + long hi = 0xD800 | ((c >> 10) & 0x3FF); + long lo = 0xDC00 | (c & 0x3FF); + b_stream_write_fmt(b_stdout, NULL, "\\u%04x\\u%04x", hi, lo); + } else if (c <= 0x1F || c >= 0x7F) { + b_stream_write_fmt(b_stdout, NULL, "\\u%04x", c); + } else if (c == '\\' || c == '"') { + b_stream_write_fmt(b_stdout, NULL, "\\%c", c); + } else { + b_stream_write_char(b_stdout, c); + } + } + + b_stream_write_string(b_stdout, "\"", NULL); +} + +void write_tagged_string(b_string *data) +{ + b_stream_write_string(b_stdout, "{ \"type\": \"string\", \"value\": ", NULL); + + write_raw_string(data); + + b_stream_write_string(b_stdout, " }", NULL); +} + +void write_tagged_integer(b_number *data) +{ + b_stream_write_string( + b_stdout, "{ \"type\": \"integer\", \"value\": \"", NULL); + + if (b_number_is_inf_positive(data)) { + b_stream_write_string(b_stdout, "inf", NULL); + } else if (b_number_is_inf_negative(data)) { + b_stream_write_string(b_stdout, "-inf", NULL); + } else if (b_number_is_nan_positive(data)) { + b_stream_write_string(b_stdout, "nan", NULL); + } else if (b_number_is_nan_negative(data)) { + b_stream_write_string(b_stdout, "-nan", NULL); + } else { + b_stream_write_fmt( + b_stdout, NULL, "%lld", b_number_get_longlong(data), NULL); + } + + b_stream_write_string(b_stdout, "\" }", NULL); +} + +void write_tagged_float(b_number *data) +{ + b_stream_write_string( + b_stdout, "{ \"type\": \"float\", \"value\": \"", NULL); + + if (b_number_is_inf_positive(data)) { + b_stream_write_string(b_stdout, "inf", NULL); + } else if (b_number_is_inf_negative(data)) { + b_stream_write_string(b_stdout, "-inf", NULL); + } else if (b_number_is_nan_positive(data)) { + b_stream_write_string(b_stdout, "nan", NULL); + } else if (b_number_is_nan_negative(data)) { + b_stream_write_string(b_stdout, "-nan", NULL); + } else { + double v = b_number_get_double(data); + if ((v <= 0.00000001 && v > 0) || (v >= -0.00000001 && v < 0) + || (v >= 1000000000) || (v <= -1000000000)) { + b_stream_write_fmt(b_stdout, NULL, "%.15e", v, NULL); + } else { + b_stream_write_fmt(b_stdout, NULL, "%.15f", v, NULL); + } + } + + b_stream_write_string(b_stdout, "\" }", NULL); +} + +void write_tagged_bool(b_number *data) +{ + int v = b_number_get_int8(data); + b_stream_write_fmt( + b_stdout, NULL, "{ \"type\": \"bool\", \"value\": \"%s\" }", + (v > 0) ? "true" : "false", NULL); +} + +void write_tagged_datetime(b_datetime *data) +{ + bool has_date = b_datetime_has_date(data); + bool has_time = b_datetime_has_time(data); + bool localtime = b_datetime_is_localtime(data); + + b_stream_write_string(b_stdout, "{ \"type\": \"", NULL); + + if (has_date && has_time) { + b_stream_write_string( + b_stdout, localtime ? "datetime-local" : "datetime", NULL); + } else if (has_date) { + b_stream_write_string( + b_stdout, localtime ? "date-local" : "date", NULL); + } else if (has_time) { + b_stream_write_string( + b_stdout, localtime ? "time-local" : "time", NULL); + } + + b_stream_write_string(b_stdout, "\", \"value\": \"", NULL); + + b_string *new_data = b_string_create(); + b_datetime_to_string(data, B_DATETIME_FORMAT_RFC3339, new_data); + b_stream_write_string(b_stdout, b_string_ptr(new_data), NULL); + + b_stream_write_string(b_stdout, "\" }", NULL); + + b_string_release(new_data); +} + +void write_tagged_dict(b_dict *data) +{ + b_stream_write_string(b_stdout, "{ ", NULL); + + int i = 0; + + b_dict_iterator it; + b_dict_foreach(&it, data) + { + if (i++ > 0) { + b_stream_write_string(b_stdout, ", ", NULL); + } + + write_raw_string(it.key); + b_stream_write_string(b_stdout, ": ", NULL); + write_tagged_value(it.value); + } + + b_stream_write_string(b_stdout, " }", NULL); +} + +void write_tagged_array(b_array *data) +{ + b_stream_write_string(b_stdout, "[ ", NULL); + + int i = 0; + b_array_iterator it; + b_array_foreach(&it, data) + { + if (i++ > 0) { + b_stream_write_string(b_stdout, ", ", NULL); + } + + write_tagged_value(it.value); + } + + b_stream_write_string(b_stdout, " ]", NULL); +} + +void write_tagged_value(b_object *data) +{ + b_object_type_id typeid = B_TYPEID(data); + + switch (typeid) { + case B_OBJECT_TYPE_DICT: + write_tagged_dict(B_DICT(data)); + break; + case B_OBJECT_TYPE_ARRAY: + write_tagged_array(B_ARRAY(data)); + break; + case B_OBJECT_TYPE_STRING: + write_tagged_string(B_STRING(data)); + break; + case B_OBJECT_TYPE_DATETIME: + write_tagged_datetime(B_DATETIME(data)); + break; + case B_OBJECT_TYPE_NUMBER: + switch (b_number_get_type(B_NUMBER(data))) { + case B_NUMBER_LONGLONG: + write_tagged_integer(B_NUMBER(data)); + break; + case B_NUMBER_INT8: + write_tagged_bool(B_NUMBER(data)); + break; + case B_NUMBER_DOUBLE: + write_tagged_float(B_NUMBER(data)); + break; + default: + break; + } + break; + default: + break; + } +} + +int main(void) +{ + b_stream *src = b_stdin; + b_stream *dest = b_stdout; + + b_serial_ctx *ctx; + b_serial_ctx_create(&ctx); + + b_object *data; + b_status status = b_serial_ctx_deserialise( + ctx, B_SERIAL_FORMAT_TOML, src, &data, 0); + if (!B_OK(status)) { + return 1; + } + + write_tagged_value(data); + + b_stream_write_char(b_stdout, '\n'); + + b_serial_ctx_destroy(ctx); + b_release(data); + + return 0; +} diff --git a/serial-test/toml-encode.c b/serial-test/toml-encode.c new file mode 100644 index 0000000..27e3faf --- /dev/null +++ b/serial-test/toml-encode.c @@ -0,0 +1,28 @@ +#include +#include + +int main(void) +{ + b_stream *src = b_stdin; + b_stream *dest = b_stdout; + +#if 0 + b_serial_ctx *ctx; + b_serial_ctx_create(&ctx); + + b_object *data; + b_status status = b_serial_ctx_deserialise( + ctx, B_SERIAL_FORMAT_JSON, src, &data, 0); + if (!B_OK(status)) { + return -1; + } + + b_to_string(B_OBJECT(data), dest); + b_stream_write_char(b_stdout, '\n'); + + b_release(data); + b_serial_ctx_destroy(ctx); +#endif + + return 0; +} diff --git a/test/test.toml b/test/test.toml new file mode 100644 index 0000000..cf80572 --- /dev/null +++ b/test/test.toml @@ -0,0 +1 @@ +tab = 32 diff --git a/test/toml-read.c b/test/toml-read.c new file mode 100644 index 0000000..3bf6463 --- /dev/null +++ b/test/toml-read.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +int main(int argc, const char **argv) +{ + if (argc < 2) { + return -1; + } + + const char *path_cstr = argv[1]; + b_path *path = b_path_create_from_cstr(path_cstr); + + b_file *file = NULL; + b_result result = b_file_open(NULL, path, B_FILE_READ_ONLY, &file); + if (b_result_is_error(result)) { + b_throw(result); + return -1; + } + + b_stream *src_stream; + + b_file_open_stream(file, &src_stream); + + b_serial_ctx *ctx; + b_serial_ctx_create(&ctx); + + b_object *data = NULL; + b_status status = b_serial_ctx_deserialise( + ctx, B_SERIAL_FORMAT_TOML, src_stream, &data, 0); + if (!B_OK(status)) { + fprintf(stderr, "cannot read data\n"); + return -1; + } + + if (!data) { + return 0; + } + + b_to_string(B_OBJECT(data), b_stdout); + b_stream_write_char(b_stdout, '\n'); + + b_release(data); + b_serial_ctx_destroy(ctx); + + return 0; +} From 31cdc7064d871871b38ae45e46949bec2b1e02eb Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sun, 12 Oct 2025 17:29:03 +0100 Subject: [PATCH 21/23] cmd: fix coloured arg usage string output --- cmd/arg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/arg.c b/cmd/arg.c index f7e4963..9e625ac 100644 --- a/cmd/arg.c +++ b/cmd/arg.c @@ -127,7 +127,7 @@ void z__b_get_arg_usage_string( if (optional) { b_string_append_cstrf( - out, colour ? F_GREEN "[%s]" : "[%s]", arg->arg_name); + out, colour ? F_GREEN "[[%s]" : "[[%s]", arg->arg_name); } else { b_string_append_cstrf( out, colour ? F_GREEN "<%s>" : "<%s>", arg->arg_name); From 57f21020fc86c43ee038a5076583cc10876f5594 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sun, 12 Oct 2025 17:29:22 +0100 Subject: [PATCH 22/23] cmd: add asserts to b_arglist_report_missing_args --- cmd/report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/report.c b/cmd/report.c index aebb08a..f14b9bb 100644 --- a/cmd/report.c +++ b/cmd/report.c @@ -1,5 +1,6 @@ #include "command.h" +#include #include #include #include @@ -132,11 +133,13 @@ enum b_status b_arglist_report_missing_args( if (opt_id != B_COMMAND_INVALID_ID) { opt = b_command_get_option_with_id(args->list_command, opt_id); + assert(opt); } if (arg_id != B_COMMAND_INVALID_ID) { arg = opt ? b_command_option_get_arg_with_id(opt, arg_id) : b_command_get_arg_with_id(args->list_command, arg_id); + assert(arg); } struct b_string *usage = z__b_command_default_usage_string( From 407a0972c1fef6d947f3b2e34b1d903b4f38785f Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sun, 12 Oct 2025 17:29:49 +0100 Subject: [PATCH 23/23] io: file: implement tell() callback for file streams --- io/sys/darwin/file.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/io/sys/darwin/file.c b/io/sys/darwin/file.c index ddfe6d7..d0b7e39 100644 --- a/io/sys/darwin/file.c +++ b/io/sys/darwin/file.c @@ -274,6 +274,18 @@ static enum b_status stream_seek( return b_file_cursor(file, &stream->s_cursor); } +static enum b_status stream_tell(const struct b_stream *stream, size_t *pos) +{ + const struct b_file *file = stream->s_ptr; + off_t v = lseek(file->fd, 0, SEEK_CUR); + if (v == (off_t)-1) { + return b_status_from_errno(errno, B_ERR_IO_FAILURE); + } + + *pos = v; + return B_SUCCESS; +} + const struct b_path *b_file_path(const struct b_file *file) { return file->path; @@ -306,6 +318,7 @@ enum b_status b_file_open_stream(struct b_file *file, struct b_stream **out) stream->s_read = stream_read; stream->s_write = stream_write; stream->s_seek = stream_seek; + stream->s_tell = stream_tell; *out = stream; @@ -387,11 +400,11 @@ enum b_status b_file_seek( } int err = lseek(file->fd, offset, whence); - if (err == 0) { - return B_SUCCESS; + if (err == (off_t)-1) { + return b_status_from_errno(errno, B_ERR_IO_FAILURE); } - return b_status_from_errno(errno, B_ERR_IO_FAILURE); + return B_SUCCESS; } enum b_status b_file_swap_shadow(struct b_file *main_file, struct b_file *shadow_file)