diff --git a/ds/include/blue/ds/string.h b/ds/include/blue/ds/string.h index b030e87..1d27b6a 100644 --- a/ds/include/blue/ds/string.h +++ b/ds/include/blue/ds/string.h @@ -1,16 +1,23 @@ -#ifndef BLUELIB_STRING_H_ -#define BLUELIB_STRING_H_ +#ifndef BLUE_DS_STRING_H_ +#define BLUE_DS_STRING_H_ #include #include +#include #include -#include -#include #include -struct b_stream; +B_DECLS_BEGIN; -#define B_STRING(p) ((b_string *)(p)) +struct b_stream; +struct b_string_p; + +#define B_TYPE_STRING (b_string_get_type()) + +B_DECLARE_TYPE(b_string); + +B_TYPE_CLASS_DECLARATION_BEGIN(b_string) +B_TYPE_CLASS_DECLARATION_END(b_string) #define B_CSTR(s) (b_string_create_from_cstr(s)) #define B_RV_CSTR(s) (B_RV(b_string_create_from_cstr(s))) @@ -19,12 +26,12 @@ struct b_stream; for (int z__b_unique_name() = b_string_iterator_begin(str, it); \ b_string_iterator_is_valid(it); b_string_iterator_next(it)) -typedef struct b_string b_string; - typedef struct b_string_iterator { b_iterator _base; int _m, _f; - b_string *_s, *_tmp; + b_string *_tmp; + struct b_string_p *_s_p, *_tmp_p; + const char **_d; size_t _nd, _ds; @@ -50,20 +57,14 @@ typedef enum b_string_tokenise_flags { B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS = 0x01u, } b_string_tokenise_flags; -BLUE_API b_string *b_string_create(void); +BLUE_API b_type b_string_get_type(void); + +B_TYPE_DEFAULT_CONSTRUCTOR(b_string, B_TYPE_STRING); BLUE_API b_string *b_string_create_from_cstr(const char *s); BLUE_API b_string *b_string_create_from_wstr(const b_wchar *s); BLUE_API b_string *b_string_create_from_c(char c, size_t count); BLUE_API b_string *b_string_duplicate(const b_string *str); -static inline b_string *b_string_retain(b_string *str) -{ - return B_STRING(b_retain(B_DSREF(str))); -} -static inline void b_string_release(b_string *str) -{ - b_release(B_DSREF(str)); -} BLUE_API char *b_string_steal(b_string *str); BLUE_API b_status b_string_reserve(b_string *str, size_t capacity); BLUE_API b_status b_string_replace( @@ -138,4 +139,6 @@ BLUE_API size_t b_wstrlen(const b_wchar *s); BLUE_API uint64_t b_string_hash(const b_string *s); +B_DECLS_END; + #endif diff --git a/ds/string.c b/ds/string.c index a18914f..7a42fae 100644 --- a/ds/string.c +++ b/ds/string.c @@ -1,9 +1,6 @@ -#include "string.h" - #include #include #include -#include #include #include #include @@ -11,29 +8,57 @@ #include #include +/* maximum length of string that can be stored inline, not including null-terminator */ +#define STRING_INLINE_CAPACITY 15 + #define IS_VALID_UTF8_SCALAR(x) \ (((x) >= 0x0000 && (x) <= 0xD7FF) || ((x) >= 0xE000 && (x) <= 0x10FFFF)) #define STRING_TOK_F_FOUND_DELIM 0x80 +/*** PRIVATE DATA *************************************************************/ + +static struct b_iterator_ops it_ops; + enum iterator_mode { ITERATOR_MODE_NONE = 0, ITERATOR_MODE_CHARS, ITERATOR_MODE_TOKENS, }; -static void string_release(struct b_dsref *obj); -static void string_to_string(const struct b_dsref *obj, struct b_stream *out); - -static struct b_dsref_type string_type = { - .t_name = "corelib::string", - .t_flags = B_DSREF_FUNDAMENTAL, - .t_id = B_DSREF_TYPE_STRING, - .t_instance_size = sizeof(struct b_string), - .t_release = string_release, - .t_to_string = string_to_string, +struct b_string_p { + /* length of string in bytes, not including null-terminator. + * a multi-byte utf-8 codepoint will be counted as multiple bytes here */ + unsigned int s_len; + /* length of string in codepoints, not including null-terminator. + * a multi-byte utf-8 codepoint will be counted as one codepoint here */ + unsigned int s_codepoints; + /* maximum length of string storable in the currently-allocated buffer + * in bytes, not including null terminator */ + unsigned int s_max; + union { + char d_inline[STRING_INLINE_CAPACITY + 1]; + char *d_external; + } s_data; }; +/*** PRIVATE FUNCTIONS ********************************************************/ + +static bool string_is_inline(const struct b_string_p *str) +{ + /* strings cannot go below STRING_INLINE_CAPACITY capacity */ + return str->s_max == STRING_INLINE_CAPACITY; +} + +static char *string_ptr(const struct b_string_p *str) +{ + if (string_is_inline(str)) { + return (char *)str->s_data.d_inline; + } + + return str->s_data.d_external; +} + static size_t utf8_codepoint_size(b_wchar c) { if (!IS_VALID_UTF8_SCALAR(c)) { @@ -59,7 +84,7 @@ static size_t utf8_codepoint_size(b_wchar c) return 0; } -int32_t decode_utf8_trailer_byte(char c) +static int32_t decode_utf8_trailer_byte(char c) { if (!(c & 0x80) || (c & 0x40)) { return -1; @@ -203,10 +228,10 @@ static size_t get_utf8_encoded_size(const b_wchar *s, size_t nr_codepoints) } static enum b_status convert_codepoint_range_to_byte_range( - const struct b_string *str, size_t cp_start, size_t cp_length, + const struct b_string_p *str, size_t cp_start, size_t cp_length, size_t *out_byte_start, size_t *out_byte_length) { - const char *s = b_string_ptr(str); + const char *s = string_ptr(str); size_t byte_offset = 0, byte_length = 0; for (size_t i = 0; i < cp_start; i++) { @@ -253,37 +278,7 @@ static enum b_status convert_codepoint_range_to_byte_range( return B_SUCCESS; } -struct b_string *b_string_create(void) -{ - struct b_string *str - = (struct b_string *)b_dsref_type_instantiate(&string_type); - if (!str) { - return NULL; - } - - str->s_len = 0; - str->s_codepoints = 0; - str->s_max = STRING_INLINE_CAPACITY; - - return str; -} - -static bool string_is_inline(const struct b_string *str) -{ - /* strings cannot go below STRING_INLINE_CAPACITY capacity */ - return str->s_max == STRING_INLINE_CAPACITY; -} - -static char *string_ptr(struct b_string *str) -{ - if (string_is_inline(str)) { - return str->s_data.d_inline; - } - - return str->s_data.d_external; -} - -static char *get_next_codepoint(struct b_string *str, char *this_codepoint) +static char *get_next_codepoint(struct b_string_p *str, char *this_codepoint) { char c = *this_codepoint; char *end = this_codepoint - 1; @@ -303,7 +298,7 @@ static char *get_next_codepoint(struct b_string *str, char *this_codepoint) return this_codepoint + len; } -static char *get_previous_codepoint(struct b_string *str, char *this_codepoint) +static char *get_previous_codepoint(struct b_string_p *str, char *this_codepoint) { char *start = string_ptr(str); char *end = this_codepoint - 1; @@ -335,7 +330,7 @@ static char *get_previous_codepoint(struct b_string *str, char *this_codepoint) return NULL; } -static char *get_last_codepoint(struct b_string *str) +static char *get_last_codepoint(struct b_string_p *str) { if (str->s_len == 0) { return NULL; @@ -344,7 +339,7 @@ static char *get_last_codepoint(struct b_string *str) return get_previous_codepoint(str, string_ptr(str) + str->s_len); } -static int string_make_inline(struct b_string *str) +static int string_make_inline(struct b_string_p *str) { char *buffer = string_ptr(str); memcpy(str->s_data.d_inline, buffer, sizeof str->s_data.d_inline); @@ -360,7 +355,7 @@ static int string_make_inline(struct b_string *str) return 0; } -static int string_resize_large(struct b_string *str, size_t capacity) +static int string_resize_large(struct b_string_p *str, size_t capacity) { char *buffer = string_ptr(str); char *new_buffer = realloc(buffer, capacity + 1); @@ -373,7 +368,7 @@ static int string_resize_large(struct b_string *str, size_t capacity) return 0; } -static int string_make_large(struct b_string *str, size_t capacity) +static int string_make_large(struct b_string_p *str, size_t capacity) { const char *old_buffer = string_ptr(str); char *buffer = malloc(capacity + 1); @@ -389,7 +384,7 @@ static int string_make_large(struct b_string *str, size_t capacity) return 0; } -static int string_change_capacity(struct b_string *str, size_t capacity) +static int string_change_capacity(struct b_string_p *str, size_t capacity) { size_t old_capacity = str->s_max; @@ -424,68 +419,28 @@ static int string_change_capacity(struct b_string *str, size_t capacity) return 0; } -struct b_string *b_string_create_from_cstr(const char *s) +static b_string *string_duplicate(const struct b_string_p *str) { - struct b_string *str = b_string_create(); + b_string *new_str = b_string_create(); if (!str) { return NULL; } - if (!s) { - return str; - } + struct b_string_p *new_str_p + = b_object_get_private(new_str, B_TYPE_STRING); - size_t s_len = strlen(s); - size_t s_codepoints = get_number_of_codepoints(s, s_len); - b_string_reserve(str, s_len); - - char *dest = string_ptr(str); - memcpy(dest, s, s_len); - dest[s_len] = 0; - - str->s_len = s_len; - str->s_codepoints = s_codepoints; - - return str; -} - -struct b_string *b_string_create_from_c(char c, size_t count) -{ - struct b_string *str = b_string_create(); - if (!str) { - return NULL; - } - - string_change_capacity(str, count); - char *s = string_ptr(str); - for (size_t i = 0; i < count; i++) { - s[i] = c; - } - - str->s_len = count; - str->s_codepoints = count; - return str; -} - -struct b_string *b_string_duplicate(const struct b_string *str) -{ - struct b_string *new_str = b_string_create(); - if (!str) { - return NULL; - } - - string_change_capacity(new_str, str->s_len); - const char *src = b_string_ptr(str); - char *dst = string_ptr(new_str); + string_change_capacity(new_str_p, str->s_len); + const char *src = string_ptr(str); + char *dst = string_ptr(new_str_p); memcpy(dst, src, str->s_len); - new_str->s_len = str->s_len; - new_str->s_codepoints = str->s_codepoints; + new_str_p->s_len = str->s_len; + new_str_p->s_codepoints = str->s_codepoints; return new_str; } -char *b_string_steal(struct b_string *str) +static char *string_steal(struct b_string_p *str) { char *dest = NULL; char *src = string_ptr(str); @@ -506,7 +461,7 @@ char *b_string_steal(struct b_string *str) return dest; } -b_status b_string_reserve(struct b_string *str, size_t capacity) +static b_status string_reserve(struct b_string_p *str, size_t capacity) { if (str->s_max >= capacity) { return B_SUCCESS; @@ -518,7 +473,7 @@ b_status b_string_reserve(struct b_string *str, size_t capacity) } static enum b_status replace_ansi( - struct b_string *str, size_t start, size_t length, const char *new_data) + struct b_string_p *str, size_t start, size_t length, const char *new_data) { b_status status = B_SUCCESS; size_t new_data_len = strlen(new_data); @@ -533,7 +488,7 @@ static enum b_status replace_ansi( size_t new_str_len = str->s_len - length + new_data_len; if (new_str_len > str->s_max) { - status = b_string_reserve(str, new_str_len); + status = string_reserve(str, new_str_len); } if (!B_OK(status)) { @@ -557,7 +512,7 @@ static enum b_status replace_ansi( } static enum b_status replace_utf8( - struct b_string *str, size_t start, size_t length, const char *new_data) + struct b_string_p *str, size_t start, size_t length, const char *new_data) { if (start >= str->s_codepoints) { return B_ERR_INVALID_ARGUMENT; @@ -585,7 +540,7 @@ static enum b_status replace_utf8( size_t new_total_bytes = str->s_len - old_data_nr_bytes + new_data_nr_bytes; if (new_total_bytes > str->s_max) { - status = b_string_reserve(str, new_total_bytes); + status = string_reserve(str, new_total_bytes); } if (!B_OK(status)) { @@ -610,8 +565,8 @@ static enum b_status replace_utf8( return B_SUCCESS; } -b_status b_string_replace( - struct b_string *str, size_t start, size_t length, const char *new_data) +static b_status string_replace( + struct b_string_p *str, size_t start, size_t length, const char *new_data) { if (str->s_len == str->s_codepoints) { return replace_ansi(str, start, length, new_data); @@ -620,11 +575,11 @@ b_status b_string_replace( return replace_utf8(str, start, length, new_data); } -b_status b_string_replace_all(b_string *str, const char *new_data) +static b_status string_replace_all(struct b_string_p *str, const char *new_data) { size_t new_len = strlen(new_data); - b_string_reserve(str, new_len); - char *dest = (char *)b_string_ptr(str); + string_reserve(str, new_len); + char *dest = string_ptr(str); memcpy(dest, new_data, new_len); dest[new_len] = '\0'; str->s_len = new_len; @@ -632,7 +587,7 @@ b_status b_string_replace_all(b_string *str, const char *new_data) return B_SUCCESS; } -static enum b_status remove_ansi(struct b_string *str, size_t start, size_t length) +static enum b_status remove_ansi(struct b_string_p *str, size_t start, size_t length) { b_status status = B_SUCCESS; @@ -660,7 +615,7 @@ static enum b_status remove_ansi(struct b_string *str, size_t start, size_t leng return B_SUCCESS; } -static enum b_status remove_utf8(struct b_string *str, size_t start, size_t length) +static enum b_status remove_utf8(struct b_string_p *str, size_t start, size_t length) { size_t remove_offset = 0, remove_nr_bytes = 0; enum b_status status = convert_codepoint_range_to_byte_range( @@ -686,7 +641,8 @@ static enum b_status remove_utf8(struct b_string *str, size_t start, size_t leng return B_SUCCESS; } -enum b_status b_string_remove(struct b_string *str, size_t start, size_t length) +static enum b_status string_remove( + struct b_string_p *str, size_t start, size_t length) { if (str->s_len == str->s_codepoints) { return remove_ansi(str, start, length); @@ -695,7 +651,7 @@ enum b_status b_string_remove(struct b_string *str, size_t start, size_t length) return remove_utf8(str, start, length); } -b_status b_string_transform(struct b_string *str, int (*transformer)(int)) +static b_status string_transform(struct b_string_p *str, int (*transformer)(int)) { char *s = string_ptr(str); for (size_t i = 0; i < str->s_len; i++) { @@ -709,7 +665,7 @@ b_status b_string_transform(struct b_string *str, int (*transformer)(int)) return B_SUCCESS; } -static enum b_status trim_ansi(struct b_string *str) +static enum b_status trim_ansi(struct b_string_p *str) { char *s = string_ptr(str); size_t whitespace_end = 0; @@ -735,7 +691,7 @@ static enum b_status trim_ansi(struct b_string *str) return B_SUCCESS; } -static enum b_status trim_utf8(struct b_string *str) +static enum b_status trim_utf8(struct b_string_p *str) { char *s = string_ptr(str); size_t whitespace_end = 0; @@ -778,7 +734,7 @@ static enum b_status trim_utf8(struct b_string *str) return B_SUCCESS; } -b_status b_string_trim(struct b_string *str) +static b_status string_trim(struct b_string_p *str) { if (str->s_len == 0) { return B_SUCCESS; @@ -792,7 +748,7 @@ b_status b_string_trim(struct b_string *str) } static enum b_status string_insert_cstr_ansi( - struct b_string *dest, const char *src, size_t nr_bytes, size_t at) + struct b_string_p *dest, const char *src, size_t nr_bytes, size_t at) { if (at >= dest->s_len) { at = dest->s_len; @@ -817,7 +773,7 @@ static enum b_status string_insert_cstr_ansi( } static enum b_status string_insert_cstr_utf8( - struct b_string *dest, const char *src, size_t nr_bytes, + struct b_string_p *dest, const char *src, size_t nr_bytes, size_t codepoint_offset) { if (codepoint_offset >= dest->s_codepoints) { @@ -858,7 +814,7 @@ static enum b_status string_insert_cstr_utf8( } static enum b_status string_insert_wstr_ansi( - struct b_string *dest, const b_wchar *src, size_t nr_codepoints, size_t at) + struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints, size_t at) { if (at >= dest->s_len) { at = dest->s_len; @@ -902,7 +858,7 @@ static enum b_status string_insert_wstr_ansi( } static enum b_status string_insert_wstr_utf8( - struct b_string *dest, const b_wchar *src, size_t nr_codepoints, + struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints, size_t codepoint_offset) { if (codepoint_offset >= dest->s_codepoints) { @@ -961,7 +917,7 @@ static enum b_status string_insert_wstr_utf8( } static enum b_status string_insert_cstr( - struct b_string *dest, const char *src, size_t nr_bytes, size_t at) + struct b_string_p *dest, const char *src, size_t nr_bytes, size_t at) { if (dest->s_len == dest->s_codepoints) { return string_insert_cstr_ansi(dest, src, nr_bytes, at); @@ -971,7 +927,7 @@ static enum b_status string_insert_cstr( } static enum b_status string_insert_wstr( - struct b_string *dest, const b_wchar *src, size_t nr_codepoints, size_t at) + struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints, size_t at) { if (dest->s_len == dest->s_codepoints) { return string_insert_wstr_ansi(dest, src, nr_codepoints, at); @@ -981,128 +937,30 @@ static enum b_status string_insert_wstr( } static enum b_status string_insertf( - struct b_string *dest, size_t at, const char *format, va_list arg) + struct b_string_p *dest, size_t at, const char *format, va_list arg) { char buf[1024]; size_t len = vsnprintf(buf, sizeof buf, format, arg); return string_insert_cstr(dest, buf, len, at); } -enum b_status b_string_insert_c(struct b_string *dest, char c, size_t at) +static enum b_status string_insert_c(struct b_string_p *dest, char c, size_t at) { return string_insert_cstr(dest, &c, 1, at); } -enum b_status b_string_insert_wc(struct b_string *dest, b_wchar c, size_t at) +static enum b_status string_insert_wc(struct b_string_p *dest, b_wchar c, size_t at) { return string_insert_wstr(dest, &c, 1, at); } -enum b_status b_string_insert_s( - struct b_string *dest, const struct b_string *src, size_t at) +static enum b_status string_insert_s( + struct b_string_p *dest, const struct b_string_p *src, size_t at) { - return string_insert_cstr(dest, b_string_ptr(src), src->s_len, at); + return string_insert_cstr(dest, string_ptr(src), src->s_len, at); } -enum b_status b_string_insert_cstr(struct b_string *dest, const char *src, size_t at) -{ - return string_insert_cstr(dest, src, strlen(src), at); -} - -enum b_status b_string_insert_wstr( - struct b_string *dest, const b_wchar *src, size_t at) -{ - return string_insert_wstr(dest, src, b_wstrlen(src), at); -} - -enum b_status b_string_insert_cstrf( - struct b_string *dest, size_t at, const char *format, ...) -{ - va_list arg; - va_start(arg, format); - enum b_status status = string_insertf(dest, at, format, arg); - va_end(arg); - - return status; -} - -enum b_status b_string_insert_cstrn( - b_string *dest, const char *src, size_t len, size_t at) -{ - return string_insert_cstr(dest, src, len, at); -} - -enum b_status b_string_append_c(struct b_string *dest, char c) -{ - return b_string_insert_c(dest, c, SIZE_MAX); -} - -enum b_status b_string_append_wc(struct b_string *dest, b_wchar c) -{ - return b_string_insert_wc(dest, c, SIZE_MAX); -} - -enum b_status b_string_append_s(struct b_string *dest, const struct b_string *src) -{ - return b_string_insert_s(dest, src, SIZE_MAX); -} - -enum b_status b_string_append_cstr(struct b_string *dest, const char *src) -{ - return b_string_insert_cstr(dest, src, SIZE_MAX); -} - -enum b_status b_string_append_wstr(struct b_string *dest, const b_wchar *src) -{ - return b_string_insert_wstr(dest, src, SIZE_MAX); -} - -enum b_status b_string_append_cstrf(struct b_string *dest, const char *format, ...) -{ - va_list arg; - va_start(arg, format); - enum b_status status = string_insertf(dest, SIZE_MAX, format, arg); - va_end(arg); - - return status; -} - -enum b_status b_string_prepend_c(struct b_string *dest, char c) -{ - return b_string_insert_c(dest, c, 0); -} - -enum b_status b_string_prepend_wc(struct b_string *dest, b_wchar c) -{ - return b_string_insert_wc(dest, c, 0); -} - -enum b_status b_string_prepend_s(struct b_string *dest, const struct b_string *src) -{ - return b_string_insert_s(dest, src, 0); -} - -enum b_status b_string_prepend_cstr(struct b_string *dest, const char *src) -{ - return b_string_insert_cstr(dest, src, 0); -} - -enum b_status b_string_prepend_wstr(struct b_string *dest, const b_wchar *src) -{ - return b_string_insert_wstr(dest, src, 0); -} - -enum b_status b_string_prepend_cstrf(struct b_string *dest, const char *format, ...) -{ - va_list arg; - va_start(arg, format); - enum b_status status = string_insertf(dest, 0, format, arg); - va_end(arg); - - return status; -} - -void b_string_clear(struct b_string *str) +static void string_clear(struct b_string_p *str) { if (str->s_len == 0) { return; @@ -1114,8 +972,6 @@ void b_string_clear(struct b_string *str) str->s_codepoints = 0; } -static struct b_iterator_ops it_ops; - static bool has_prefix(const char *s, const char *prefix, size_t *prefix_len) { size_t len = 0; @@ -1153,7 +1009,7 @@ static enum b_status find_next_token(struct b_string_iterator *it) { size_t offset = it->_ds; size_t prefix_len = 0; - char *start = string_ptr(it->_s); + char *start = string_ptr(it->_s_p); bool found_delim_last_time = (it->_f & STRING_TOK_F_FOUND_DELIM) != 0; bool found_delim = false; bool include_empty = (it->_f & B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS); @@ -1169,7 +1025,7 @@ static enum b_status find_next_token(struct b_string_iterator *it) found_delim = has_prefixes(s, it->_d, it->_nd, &prefix_len); if (found_delim) { - if (it->_tmp->s_len == 0 && !include_empty) { + if (it->_tmp_p->s_len == 0 && !include_empty) { /* this token is empty, skip it */ offset += prefix_len; found_delim = false; @@ -1188,12 +1044,12 @@ static enum b_status find_next_token(struct b_string_iterator *it) b_string_append_wc(it->_tmp, c); offset += utf8_codepoint_size(c); - if (offset > it->_s->s_len) { + if (offset > it->_s_p->s_len) { break; } } - bool end = !found_delim && it->_tmp->s_len == 0; + bool end = !found_delim && it->_tmp_p->s_len == 0; if (include_empty && found_delim_last_time) { end = false; @@ -1208,13 +1064,13 @@ static enum b_status find_next_token(struct b_string_iterator *it) it->_ds = offset + prefix_len; it->string_value = b_string_ptr(it->_tmp); - it->string_length = it->_tmp->s_len; - it->string_codepoints = it->_tmp->s_codepoints; + it->string_length = it->_tmp_p->s_len; + it->string_codepoints = it->_tmp_p->s_codepoints; return B_SUCCESS; } -enum b_status b_string_tokenise( - struct b_string *str, const char *delims[], size_t nr_delims, +static enum b_status string_tokenise( + struct b_string_p *str, const char *delims[], size_t nr_delims, b_string_tokenise_flags flags, struct b_string_iterator *it) { memset(it, 0x0, sizeof *it); @@ -1223,7 +1079,7 @@ enum b_status b_string_tokenise( return B_ERR_INVALID_ARGUMENT; } - struct b_string *tmp = b_string_create(); + b_string *tmp = b_string_create(); if (!tmp) { return B_ERR_NO_MEMORY; } @@ -1232,20 +1088,22 @@ enum b_status b_string_tokenise( it->_m = ITERATOR_MODE_TOKENS; it->_d = delims; it->_nd = nr_delims; - it->_s = str; + it->_s_p = str; it->_f = flags; it->_tmp = tmp; + it->_tmp_p = b_object_get_private(tmp, B_TYPE_STRING); enum b_status status = find_next_token(it); if (!B_OK(status)) { - b_string_release(tmp); + b_string_unref(tmp); it->_tmp = NULL; + it->_tmp_p = NULL; } return status; } -size_t b_string_get_size(const struct b_string *str, b_strlen_flags flags) +static size_t string_get_size(const struct b_string_p *str, b_strlen_flags flags) { switch (flags) { case B_STRLEN_NORMAL: @@ -1253,16 +1111,16 @@ size_t b_string_get_size(const struct b_string *str, b_strlen_flags flags) case B_STRLEN_CODEPOINTS: return str->s_codepoints; default: - return b_strlen(b_string_ptr(str), flags); + return b_strlen(string_ptr(str), flags); } } -size_t b_string_get_capacity(const struct b_string *str) +static size_t string_get_capacity(const struct b_string_p *str) { return str->s_max; } -bool b_string_compare(const struct b_string *a, const struct b_string *b) +static bool string_compare(const struct b_string_p *a, const struct b_string_p *b) { if (a->s_len != b->s_len) { return false; @@ -1272,8 +1130,8 @@ bool b_string_compare(const struct b_string *a, const struct b_string *b) return true; } - const char *ap = b_string_ptr(a); - const char *bp = b_string_ptr(b); + const char *ap = string_ptr(a); + const char *bp = string_ptr(b); for (size_t i = 0; i < a->s_len; i++) { if (ap[i] != bp[i]) { @@ -1284,27 +1142,27 @@ bool b_string_compare(const struct b_string *a, const struct b_string *b) return true; } -char b_string_front(const struct b_string *str) +static char string_front(const struct b_string_p *str) { if (str->s_len == 0) { return 0; } - const char *s = b_string_ptr(str); + const char *s = string_ptr(str); return s[0]; } -char b_string_back(const struct b_string *str) +static char string_back(const struct b_string_p *str) { if (str->s_len == 0) { return 0; } - const char *s = b_string_ptr(str); + const char *s = string_ptr(str); return s[str->s_len - 1]; } -void b_string_pop_back(struct b_string *str) +static void string_pop_back(struct b_string_p *str) { if (str->s_len == 0) { return; @@ -1316,48 +1174,59 @@ void b_string_pop_back(struct b_string *str) str->s_len--; } -const char *b_string_ptr(const struct b_string *str) +static b_string *string_substr(const struct b_string_p *str, size_t start, size_t len) { - if (string_is_inline(str)) { - return str->s_data.d_inline; - } - - return str->s_data.d_external; -} - -struct b_string *b_string_substr(const struct b_string *str, size_t start, size_t len) -{ - if (start > b_string_get_size(str, B_STRLEN_NORMAL)) { + if (start > string_get_size(str, B_STRLEN_NORMAL)) { return NULL; } - if (start + len > b_string_get_size(str, B_STRLEN_NORMAL)) { - len = b_string_get_size(str, B_STRLEN_NORMAL) - start; + if (start + len > string_get_size(str, B_STRLEN_NORMAL)) { + len = string_get_size(str, B_STRLEN_NORMAL) - start; } - struct b_string *newstr = b_string_create(); - b_string_reserve(newstr, len); + b_string *newstr = b_string_create(); + struct b_string_p *newstr_p = b_object_get_private(newstr, B_TYPE_STRING); + string_reserve(newstr_p, len); - const char *src = b_string_ptr(str) + start; - char *dest = string_ptr(newstr); + const char *src = string_ptr(str) + start; + char *dest = string_ptr(newstr_p); memcpy(dest, src, len); - newstr->s_len = len; + newstr_p->s_len = len; return newstr; } +static uint64_t string_hash(const struct b_string_p *str) +{ +#define FNV1_OFFSET_BASIS 0xcbf29ce484222325 +#define FNV1_PRIME 0x100000001b3 + uint64_t hash = FNV1_OFFSET_BASIS; + size_t i = 0; + + const char *s = string_ptr(str); + + for (i = 0; i < str->s_len; i++) { + hash ^= s[i]; + hash *= FNV1_PRIME; + } + + return hash; +} + +/*** STREAM FUNCTIONS *********************************************************/ + static enum b_status stream_close(struct b_stream *stream) { - struct b_string *str = stream->s_ptr; - b_string_release(str); + b_string *str = stream->s_ptr0; + b_string_unref(str); return B_SUCCESS; } static enum b_status stream_getc(struct b_stream *stream, int *out) { - struct b_string *str = stream->s_ptr; + struct b_string_p *str = stream->s_ptr1; if (stream->s_cursor >= str->s_len) { return B_ERR_NO_DATA; } @@ -1372,7 +1241,7 @@ static enum b_status stream_getc(struct b_stream *stream, int *out) static enum b_status stream_read( struct b_stream *stream, unsigned char *buf, size_t count, size_t *nr_read) { - struct b_string *str = stream->s_ptr; + struct b_string_p *str = stream->s_ptr1; if (stream->s_cursor >= str->s_len) { *nr_read = 0; return B_SUCCESS; @@ -1394,11 +1263,11 @@ static enum b_status stream_write( struct b_stream *stream, const unsigned char *buf, size_t count, size_t *nr_written) { - struct b_string *str = stream->s_ptr; + struct b_string_p *str = stream->s_ptr1; enum b_status status = B_SUCCESS; if (stream->s_cursor + count > str->s_max) { - status = b_string_reserve(str, stream->s_cursor + count); + status = string_reserve(str, stream->s_cursor + count); } if (!B_OK(status)) { @@ -1416,7 +1285,7 @@ static enum b_status stream_write( static enum b_status stream_seek( struct b_stream *stream, long long offset, b_stream_seek_origin origin) { - struct b_string *str = stream->s_ptr; + struct b_string_p *str = stream->s_ptr1; size_t abs_offset; switch (origin) { @@ -1440,13 +1309,237 @@ static enum b_status stream_seek( static enum b_status stream_reserve(struct b_stream *stream, size_t len) { - struct b_string *str = stream->s_ptr; + struct b_string_p *str = stream->s_ptr1; size_t new_capacity = str->s_len + len; - return b_string_reserve(str, new_capacity); + return string_reserve(str, new_capacity); } -enum b_status b_string_open_stream(struct b_string *str, struct b_stream **out) +/*** PUBLIC FUNCTIONS *********************************************************/ + +b_string *b_string_create_from_cstr(const char *s) +{ + b_string *str = b_string_create(); + if (!str) { + return NULL; + } + + if (!s) { + return str; + } + + struct b_string_p *p = b_object_get_private(str, B_TYPE_STRING); + + size_t s_len = strlen(s); + size_t s_codepoints = get_number_of_codepoints(s, s_len); + b_string_reserve(str, s_len); + + char *dest = string_ptr(p); + memcpy(dest, s, s_len); + dest[s_len] = 0; + + p->s_len = s_len; + p->s_codepoints = s_codepoints; + + return str; +} + +b_string *b_string_create_from_c(char c, size_t count) +{ + b_string *str = b_string_create(); + if (!str) { + return NULL; + } + + struct b_string_p *p = b_object_get_private(str, B_TYPE_STRING); + + string_change_capacity(p, count); + char *s = string_ptr(p); + for (size_t i = 0; i < count; i++) { + s[i] = c; + } + + p->s_len = count; + p->s_codepoints = count; + return str; +} + +b_string *b_string_duplicate(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_duplicate, str); +} + +char *b_string_steal(b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_steal, str); +} + +b_status b_string_reserve(b_string *str, size_t capacity) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_reserve, str, capacity); +} + +b_status b_string_replace( + b_string *str, size_t start, size_t length, const char *new_data) +{ + B_CLASS_DISPATCH_STATIC( + B_TYPE_STRING, string_replace, str, start, length, new_data); +} + +b_status b_string_replace_all(b_string *str, const char *new_data) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_replace_all, str, new_data); +} + +enum b_status b_string_remove(b_string *str, size_t start, size_t length) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_remove, str, start, length); +} + +b_status b_string_transform(b_string *str, int (*transformer)(int)) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_transform, str, transformer); +} + +b_status b_string_trim(b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_trim, str); +} + +enum b_status b_string_insert_c(b_string *dest, char c, size_t at) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_insert_c, dest, c, at); +} + +enum b_status b_string_insert_wc(b_string *dest, b_wchar c, size_t at) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_insert_wc, dest, c, at); +} + +enum b_status b_string_insert_s(b_string *dest, const b_string *src, size_t at) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + const struct b_string_p *src_p = b_object_get_private(src, B_TYPE_STRING); + return string_insert_s(dest_p, src_p, at); +} + +enum b_status b_string_insert_cstr(b_string *dest, const char *src, size_t at) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + return string_insert_cstr(dest_p, src, strlen(src), at); +} + +enum b_status b_string_insert_wstr(b_string *dest, const b_wchar *src, size_t at) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + return string_insert_wstr(dest_p, src, b_wstrlen(src), at); +} + +enum b_status b_string_insert_cstrf( + b_string *dest, size_t at, const char *format, ...) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + + va_list arg; + va_start(arg, format); + enum b_status status = string_insertf(dest_p, at, format, arg); + va_end(arg); + + return status; +} + +enum b_status b_string_insert_cstrn( + b_string *dest, const char *src, size_t len, size_t at) +{ + B_CLASS_DISPATCH_STATIC( + B_TYPE_STRING, string_insert_cstr, dest, src, len, at); +} + +enum b_status b_string_append_cstrf(b_string *dest, const char *format, ...) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + + va_list arg; + va_start(arg, format); + enum b_status status = string_insertf(dest_p, SIZE_MAX, format, arg); + va_end(arg); + + return status; +} + +enum b_status b_string_prepend_cstrf(b_string *dest, const char *format, ...) +{ + struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING); + + va_list arg; + va_start(arg, format); + enum b_status status = string_insertf(dest_p, 0, format, arg); + va_end(arg); + + return status; +} + +void b_string_clear(b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_clear, str); +} + +enum b_status b_string_tokenise( + b_string *str, const char *delims[], size_t nr_delims, + b_string_tokenise_flags flags, struct b_string_iterator *it) +{ + B_CLASS_DISPATCH_STATIC( + B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags, it); +} + +size_t b_string_get_size(const b_string *str, b_strlen_flags flags) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_get_size, str, flags); +} + +size_t b_string_get_capacity(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_get_capacity, str); +} + +bool b_string_compare(const b_string *a, const b_string *b) +{ + struct b_string_p *ap = b_object_get_private(a, B_TYPE_STRING); + struct b_string_p *bp = b_object_get_private(a, B_TYPE_STRING); + return string_compare(ap, bp); +} + +char b_string_front(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_front, str); +} + +char b_string_back(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_back, str); +} + +void b_string_pop_back(b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_pop_back, str); +} + +const char *b_string_ptr(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_ptr, str); +} + +b_string *b_string_substr(const b_string *str, size_t start, size_t len) +{ + B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_substr, str, start, len); +} + +uint64_t b_string_hash(const b_string *str) +{ + B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_hash, str); +} + +enum b_status b_string_open_stream(b_string *str, struct b_stream **out) { struct b_stream *stream = malloc(sizeof *stream); if (!stream) { @@ -1457,7 +1550,8 @@ enum b_status b_string_open_stream(struct b_string *str, struct b_stream **out) stream->s_mode |= B_STREAM_READ | B_STREAM_WRITE; - stream->s_ptr = b_string_retain(str); + stream->s_ptr0 = b_string_ref(str); + stream->s_ptr1 = b_object_get_private(str, B_TYPE_STRING); stream->s_close = stream_close; stream->s_getc = stream_getc; stream->s_read = stream_read; @@ -1470,6 +1564,103 @@ enum b_status b_string_open_stream(struct b_string *str, struct b_stream **out) return B_SUCCESS; } +/*** PUBLIC ALIAS FUNCTIONS ***************************************************/ + +enum b_status b_string_append_c(b_string *dest, char c) +{ + return b_string_insert_c(dest, c, SIZE_MAX); +} + +enum b_status b_string_append_wc(b_string *dest, b_wchar c) +{ + return b_string_insert_wc(dest, c, SIZE_MAX); +} + +enum b_status b_string_append_s(b_string *dest, const b_string *src) +{ + return b_string_insert_s(dest, src, SIZE_MAX); +} + +enum b_status b_string_append_cstr(b_string *dest, const char *src) +{ + return b_string_insert_cstr(dest, src, SIZE_MAX); +} + +enum b_status b_string_append_wstr(b_string *dest, const b_wchar *src) +{ + return b_string_insert_wstr(dest, src, SIZE_MAX); +} + +enum b_status b_string_prepend_c(b_string *dest, char c) +{ + return b_string_insert_c(dest, c, 0); +} + +enum b_status b_string_prepend_wc(b_string *dest, b_wchar c) +{ + return b_string_insert_wc(dest, c, 0); +} + +enum b_status b_string_prepend_s(b_string *dest, const b_string *src) +{ + return b_string_insert_s(dest, src, 0); +} + +enum b_status b_string_prepend_cstr(b_string *dest, const char *src) +{ + return b_string_insert_cstr(dest, src, 0); +} + +enum b_status b_string_prepend_wstr(b_string *dest, const b_wchar *src) +{ + return b_string_insert_wstr(dest, src, 0); +} + +/*** VIRTUAL FUNCTIONS ********************************************************/ + +static void string_init(b_object *obj, void *priv) +{ + struct b_string_p *str = priv; + + str->s_len = 0; + str->s_codepoints = 0; + str->s_max = STRING_INLINE_CAPACITY; +} + +static void string_fini(b_object *obj, void *priv) +{ + struct b_string_p *str = priv; + if (!string_is_inline(str)) { + free(string_ptr(str)); + } +} + +static void string_to_string(const b_object *obj, struct b_stream *out) +{ + struct b_string_p *str = b_object_get_private(obj, B_TYPE_STRING); + const char *s = string_ptr(str); + for (size_t i = 0; i < str->s_len; i++) { + b_stream_write_char(out, s[i]); + } +} + +/*** CLASS DEFINITION *********************************************************/ + +B_TYPE_CLASS_DEFINITION_BEGIN(b_string) + B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT) + B_INTERFACE_ENTRY(to_string) = string_to_string; + B_TYPE_CLASS_INTERFACE_END(b_object, B_TYPE_OBJECT) +B_TYPE_CLASS_DEFINITION_END(b_string) + +B_TYPE_DEFINITION_BEGIN(b_string) + B_TYPE_ID(0x200194f6, 0x0327, 0x4a82, 0xb9c9, 0xb62ddd038c33); + B_TYPE_CLASS(b_string_class); + B_TYPE_INSTANCE_INIT(string_init); + B_TYPE_INSTANCE_INIT(string_fini); +B_TYPE_DEFINITION_END(b_string) + +/*** ITERATOR FUNCTIONS *******************************************************/ + static bool string_iterator_next(struct b_iterator *it) { return b_string_iterator_next((struct b_string_iterator *)it); @@ -1489,27 +1680,29 @@ static struct b_iterator_ops it_ops = { static void iterator_cleanup(b_string_iterator *it) { if (it->_tmp) { - b_string_release(it->_tmp); + b_string_unref(it->_tmp); } memset(it, 0x0, sizeof *it); } -int b_string_iterator_begin(const struct b_string *string, b_string_iterator *it) +int b_string_iterator_begin(const b_string *string, b_string_iterator *it) { memset(it, 0x0, sizeof *it); + struct b_string_p *p = b_object_get_private(string, B_TYPE_STRING); + it->_base.it_ops = &it_ops; - if (!string->s_len) { + if (!p->s_len) { it->status = B_ERR_NO_DATA; return -1; } - const char *p = b_string_ptr(string); + const char *s = string_ptr(it->_s_p); it->_m = ITERATOR_MODE_CHARS; - it->_s = B_STRING(string); - it->char_value = utf8_codepoint_decode(p); + it->_s_p = p; + it->char_value = utf8_codepoint_decode(s); if (it->char_value == B_WCHAR_INVALID) { it->status = B_ERR_BAD_FORMAT; @@ -1534,9 +1727,9 @@ static bool chars_iterator_next(b_string_iterator *it) it->byte_index += stride; it->codepoint_index += 1; - if (it->byte_index >= it->_s->s_len) { + if (it->byte_index >= it->_s_p->s_len) { iterator_cleanup(it); - it->_s = NULL; + it->_s_p = NULL; it->byte_index = 0; it->codepoint_index = 0; it->char_value = B_WCHAR_INVALID; @@ -1544,11 +1737,11 @@ static bool chars_iterator_next(b_string_iterator *it) return false; } - char *p = string_ptr(it->_s) + it->byte_index; + char *p = string_ptr(it->_s_p) + it->byte_index; it->char_value = utf8_codepoint_decode(p); if (it->char_value == B_WCHAR_INVALID) { iterator_cleanup(it); - it->_s = NULL; + it->_s_p = NULL; it->byte_index = 0; it->codepoint_index = 0; it->char_value = B_WCHAR_INVALID; @@ -1572,7 +1765,7 @@ static bool tokens_iterator_next(b_string_iterator *it) return false; } - it->string_value = string_ptr(it->_tmp); + it->string_value = string_ptr(it->_tmp_p); it->iteration_index++; return true; @@ -1592,11 +1785,11 @@ bool b_string_iterator_next(b_string_iterator *it) static bool chars_iterator_is_valid(const struct b_string_iterator *it) { - if (!it->_s) { + if (!it->_s_p) { return false; } - if (it->byte_index >= it->_s->s_len) { + if (it->byte_index >= it->_s_p->s_len) { return false; } @@ -1609,11 +1802,11 @@ static bool chars_iterator_is_valid(const struct b_string_iterator *it) static bool tokens_iterator_is_valid(const struct b_string_iterator *it) { - if (!it->_s) { + if (!it->_s_p) { return false; } - if (it->byte_index >= it->_s->s_len) { + if (it->byte_index >= it->_s_p->s_len) { return false; } @@ -1636,22 +1829,7 @@ bool b_string_iterator_is_valid(const struct b_string_iterator *it) } } -static void string_release(struct b_dsref *obj) -{ - struct b_string *str = B_STRING(obj); - if (!string_is_inline(str)) { - free(string_ptr(str)); - } -} - -static void string_to_string(const struct b_dsref *obj, struct b_stream *out) -{ - b_string *str = B_STRING(obj); - const char *s = b_string_ptr(str); - for (size_t i = 0; i < str->s_len; i++) { - b_stream_write_char(out, s[i]); - } -} +/*** MISC FUNCTIONS ***********************************************************/ char *b_strdup(const char *s) { @@ -1723,25 +1901,3 @@ size_t b_wstrlen(const b_wchar *s) ; return len; } - -uint64_t b_string_hash(const struct b_string *str) -{ -#define FNV1_OFFSET_BASIS 0xcbf29ce484222325 -#define FNV1_PRIME 0x100000001b3 - uint64_t hash = FNV1_OFFSET_BASIS; - size_t i = 0; - - const char *s = b_string_ptr(str); - - for (i = 0; i < str->s_len; i++) { - hash ^= s[i]; - hash *= FNV1_PRIME; - } - - return hash; -} - -b_dsref_type_id b_string_type_id(void) -{ - return (b_dsref_type_id)&string_type; -} diff --git a/ds/string.h b/ds/string.h deleted file mode 100644 index 163aaa4..0000000 --- a/ds/string.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _BLUELIB_STRING_H_ -#define _BLUELIB_STRING_H_ - -#include "object.h" - -/* maximum length of string that can be stored inline, not including null-terminator */ -#define STRING_INLINE_CAPACITY 15 - -struct b_string { - struct b_dsref s_base; - /* length of string in bytes, not including null-terminator. - * a multi-byte utf-8 codepoint will be counted as multiple bytes here */ - unsigned int s_len; - /* length of string in codepoints, not including null-terminator. - * a multi-byte utf-8 codepoint will be counted as one codepoint here */ - unsigned int s_codepoints; - /* maximum length of string storable in the currently-allocated buffer - * in bytes, not including null terminator */ - unsigned int s_max; - union { - char d_inline[STRING_INLINE_CAPACITY + 1]; - char *d_external; - } s_data; -}; - -#endif