From b292168c70f1d3d6ed3735166a2a2978b24bdaee Mon Sep 17 00:00:00 2001 From: Max Wash Date: Wed, 29 Oct 2025 14:35:36 +0000 Subject: [PATCH] ds: string: update iterator interface --- ds/include/blue/ds/string.h | 35 +--- ds/string.c | 352 ++++++++++++++++++------------------ 2 files changed, 180 insertions(+), 207 deletions(-) diff --git a/ds/include/blue/ds/string.h b/ds/include/blue/ds/string.h index 2cd2c4c..f00e7f0 100644 --- a/ds/include/blue/ds/string.h +++ b/ds/include/blue/ds/string.h @@ -13,39 +13,21 @@ B_DECLS_BEGIN; struct b_stream; struct b_string_p; -#define B_TYPE_STRING (b_string_get_type()) +#define B_TYPE_STRING (b_string_get_type()) +#define B_TYPE_STRING_ITERATOR (b_string_iterator_get_type()) B_DECLARE_TYPE(b_string); +B_DECLARE_TYPE(b_string_iterator); B_TYPE_CLASS_DECLARATION_BEGIN(b_string) B_TYPE_CLASS_DECLARATION_END(b_string) +B_TYPE_CLASS_DECLARATION_BEGIN(b_string_iterator) +B_TYPE_CLASS_DECLARATION_END(b_string_iterator) + #define B_CSTR(s) (b_string_create_from_cstr(s)) #define B_RV_CSTR(s) (B_RV(b_string_create_from_cstr(s))) -#define b_string_foreach(it, str) \ - for (int z__b_unique_name() = b_string_iterator_begin(str, it); \ - b_string_iterator_is_valid(it); b_string_iterator_next(it)) - -typedef struct b_string_iterator { - b_iterator _base; - int _m, _f; - b_string *_tmp; - struct b_string_p *_s_p, *_tmp_p; - - const char **_d; - size_t _nd, _ds; - - b_status status; - size_t iteration_index; - size_t byte_index; - size_t codepoint_index; - b_wchar char_value; - const char *string_value; - size_t string_length; - size_t string_codepoints; -} b_string_iterator; - typedef enum b_strlen_flags { B_STRLEN_NORMAL = 0, B_STRLEN_IGNORE_ESC = 0x01u, @@ -59,6 +41,7 @@ typedef enum b_string_tokenise_flags { } b_string_tokenise_flags; BLUE_API b_type b_string_get_type(void); +BLUE_API b_type b_string_iterator_get_type(void); B_TYPE_DEFAULT_CONSTRUCTOR(b_string, B_TYPE_STRING); BLUE_API b_string *b_string_create_from_cstr(const char *s); @@ -112,9 +95,9 @@ BLUE_API b_status b_string_insert_cstrf( b_string *dest, size_t at, const char *format, ...); BLUE_API void b_string_clear(b_string *str); -BLUE_API b_status b_string_tokenise( +BLUE_API b_iterator *b_string_tokenise( b_string *str, const char *delims[], size_t nr_delims, - b_string_tokenise_flags flags, b_string_iterator *it); + b_string_tokenise_flags flags); BLUE_API size_t b_string_get_size(const b_string *str, b_strlen_flags flags); BLUE_API size_t b_string_get_capacity(const b_string *str); diff --git a/ds/string.c b/ds/string.c index 955d318..842a561 100644 --- a/ds/string.c +++ b/ds/string.c @@ -18,8 +18,6 @@ /*** PRIVATE DATA *************************************************************/ -static struct b_iterator_ops it_ops; - enum iterator_mode { ITERATOR_MODE_NONE = 0, ITERATOR_MODE_CHARS, @@ -42,6 +40,24 @@ struct b_string_p { } s_data; }; +struct b_string_iterator_p { + int _m, _f; + b_string *_tmp; + struct b_string_p *_s_p, *_tmp_p; + + const char **_d; + size_t _nd, _ds; + + b_status status; + size_t iteration_index; + size_t byte_index; + size_t codepoint_index; + b_wchar char_value; + const char *string_value; + size_t string_length; + size_t string_codepoints; +}; + /*** PRIVATE FUNCTIONS ********************************************************/ static bool string_is_inline(const struct b_string_p *str) @@ -854,7 +870,7 @@ static bool has_prefixes( return false; } -static enum b_status find_next_token(struct b_string_iterator *it) +static enum b_status find_next_token(struct b_string_iterator_p *it) { size_t offset = it->_ds; size_t prefix_len = 0; @@ -918,22 +934,23 @@ static enum b_status find_next_token(struct b_string_iterator *it) return B_SUCCESS; } -static enum b_status string_tokenise( +static b_iterator *string_tokenise( struct b_string_p *str, const char *delims[], size_t nr_delims, - b_string_tokenise_flags flags, struct b_string_iterator *it) + b_string_tokenise_flags flags) { - memset(it, 0x0, sizeof *it); - if (!nr_delims) { - return B_ERR_INVALID_ARGUMENT; + return NULL; } b_string *tmp = b_string_create(); if (!tmp) { - return B_ERR_NO_MEMORY; + return NULL; } - it->_base.it_ops = &it_ops; + b_string_iterator *it_obj = b_object_create(B_TYPE_STRING_ITERATOR); + struct b_string_iterator_p *it + = b_object_get_private(it_obj, B_TYPE_STRING_ITERATOR); + it->_m = ITERATOR_MODE_TOKENS; it->_d = delims; it->_nd = nr_delims; @@ -949,7 +966,7 @@ static enum b_status string_tokenise( it->_tmp_p = NULL; } - return status; + return it_obj; } static size_t string_get_size(const struct b_string_p *str, b_strlen_flags flags) @@ -1239,12 +1256,12 @@ void b_string_clear(b_string *str) B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_clear, str); } -enum b_status b_string_tokenise( +b_iterator *b_string_tokenise( b_string *str, const char *delims[], size_t nr_delims, - b_string_tokenise_flags flags, struct b_string_iterator *it) + b_string_tokenise_flags flags) { B_CLASS_DISPATCH_STATIC( - B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags, it); + B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags); } size_t b_string_get_size(const b_string *str, b_strlen_flags flags) @@ -1374,8 +1391,131 @@ static void string_to_string(const b_object *obj, b_stream *out) } } +/*** ITERATOR FUNCTIONS *******************************************************/ + +static void iterator_fini(b_iterator *obj) +{ + struct b_string_iterator_p *it + = b_object_get_private(obj, B_TYPE_STRING_ITERATOR); + if (it->_tmp) { + b_string_unref(it->_tmp); + } + + memset(it, 0x0, sizeof *it); +} + +static b_iterator *iterator_begin(b_object *obj) +{ + b_string_iterator *it_obj = b_object_create(B_TYPE_STRING_ITERATOR); + struct b_string_iterator_p *it + = b_object_get_private(it_obj, B_TYPE_STRING_ITERATOR); + struct b_string_p *p = b_object_get_private(obj, B_TYPE_STRING); + + if (!p->s_len) { + it->status = B_ERR_NO_DATA; + return it_obj; + } + + const char *s = string_ptr(p); + it->_m = ITERATOR_MODE_CHARS; + it->_s_p = p; + it->char_value = b_wchar_utf8_codepoint_decode(s); + + if (it->char_value == B_WCHAR_INVALID) { + it->status = B_ERR_BAD_FORMAT; + } + + return it_obj; +} + +static enum b_status chars_iterator_move_next(struct b_string_iterator_p *it) +{ + if (!it->_s_p) { + return B_ERR_NO_DATA; + } + + size_t stride = b_wchar_utf8_codepoint_size(it->char_value); + if (stride == 0) { + return B_ERR_NO_DATA; + } + + it->byte_index += stride; + it->codepoint_index += 1; + + if (it->byte_index >= it->_s_p->s_len) { + return B_ERR_NO_DATA; + } + + char *p = string_ptr(it->_s_p) + it->byte_index; + it->char_value = b_wchar_utf8_codepoint_decode(p); + if (it->char_value == B_WCHAR_INVALID) { + return B_ERR_BAD_FORMAT; + } + + it->iteration_index++; + return B_SUCCESS; +} + +static enum b_status tokens_iterator_move_next(struct b_string_iterator_p *it) +{ + if (!it->_s_p) { + return B_ERR_NO_DATA; + } + + enum b_status status = find_next_token(it); + if (!B_OK(status)) { + return status; + } + + it->string_value = string_ptr(it->_tmp_p); + it->iteration_index++; + + return B_SUCCESS; +} + +static enum b_status iterator_move_next(const b_iterator *obj) +{ + struct b_string_iterator_p *it + = b_object_get_private(obj, B_TYPE_STRING_ITERATOR); + + switch (it->_m) { + case ITERATOR_MODE_CHARS: + return chars_iterator_move_next(it); + case ITERATOR_MODE_TOKENS: + return tokens_iterator_move_next(it); + default: + return B_ERR_BAD_STATE; + } +} + +static b_iterator_value chars_iterator_get_value(struct b_string_iterator_p *it) +{ + return B_ITERATOR_VALUE_INT(it->char_value); +} + +static b_iterator_value tokens_iterator_get_value(struct b_string_iterator_p *it) +{ + return B_ITERATOR_VALUE_CPTR(it->string_value); +} + +static b_iterator_value iterator_get_value(b_iterator *obj) +{ + struct b_string_iterator_p *it + = b_object_get_private(obj, B_TYPE_STRING_ITERATOR); + + switch (it->_m) { + case ITERATOR_MODE_CHARS: + return chars_iterator_get_value(it); + case ITERATOR_MODE_TOKENS: + return tokens_iterator_get_value(it); + default: + return B_ITERATOR_VALUE_NULL; + } +} + /*** CLASS DEFINITION *********************************************************/ +// ---- b_string DEFINITION B_TYPE_CLASS_DEFINITION_BEGIN(b_string) B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT) B_INTERFACE_ENTRY(to_string) = string_to_string; @@ -1390,175 +1530,25 @@ B_TYPE_DEFINITION_BEGIN(b_string) B_TYPE_INSTANCE_FINI(string_fini); B_TYPE_DEFINITION_END(b_string) -/*** ITERATOR FUNCTIONS *******************************************************/ +// ---- b_string_iterator DEFINITION +B_TYPE_CLASS_DEFINITION_BEGIN(b_string_iterator) + B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT) + B_INTERFACE_ENTRY(to_string) = NULL; + B_TYPE_CLASS_INTERFACE_END(b_object, B_TYPE_OBJECT) -static bool string_iterator_next(struct b_iterator *it) -{ - return b_string_iterator_next((struct b_string_iterator *)it); -} + B_TYPE_CLASS_INTERFACE_BEGIN(b_iterator, B_TYPE_ITERATOR) + B_INTERFACE_ENTRY(it_move_next) = iterator_move_next; + B_INTERFACE_ENTRY(it_erase) = NULL; + B_INTERFACE_ENTRY(it_get_value) = iterator_get_value; + B_TYPE_CLASS_INTERFACE_END(b_iterator, B_TYPE_ITERATOR) +B_TYPE_CLASS_DEFINITION_END(b_string_iterator) -static bool string_iterator_is_valid(const struct b_iterator *it) -{ - return b_string_iterator_is_valid((struct b_string_iterator *)it); -} - -static struct b_iterator_ops it_ops = { - .it_next = string_iterator_next, - .it_close = NULL, - .it_is_valid = string_iterator_is_valid, -}; - -static void iterator_cleanup(b_string_iterator *it) -{ - if (it->_tmp) { - b_string_unref(it->_tmp); - } - - memset(it, 0x0, sizeof *it); -} - -int b_string_iterator_begin(const b_string *string, b_string_iterator *it) -{ - memset(it, 0x0, sizeof *it); - - struct b_string_p *p = b_object_get_private(string, B_TYPE_STRING); - - it->_base.it_ops = &it_ops; - - if (!p->s_len) { - it->status = B_ERR_NO_DATA; - return -1; - } - - const char *s = string_ptr(p); - it->_m = ITERATOR_MODE_CHARS; - it->_s_p = p; - it->char_value = b_wchar_utf8_codepoint_decode(s); - - if (it->char_value == B_WCHAR_INVALID) { - it->status = B_ERR_BAD_FORMAT; - return -1; - } - - return 0; -} - -static bool chars_iterator_next(b_string_iterator *it) -{ - if (!b_string_iterator_is_valid(it)) { - return false; - } - - size_t stride = b_wchar_utf8_codepoint_size(it->char_value); - if (stride == 0) { - iterator_cleanup(it); - return false; - } - - it->byte_index += stride; - it->codepoint_index += 1; - - if (it->byte_index >= it->_s_p->s_len) { - iterator_cleanup(it); - it->_s_p = NULL; - it->byte_index = 0; - it->codepoint_index = 0; - it->char_value = B_WCHAR_INVALID; - it->status = B_ERR_NO_DATA; - return false; - } - - char *p = string_ptr(it->_s_p) + it->byte_index; - it->char_value = b_wchar_utf8_codepoint_decode(p); - if (it->char_value == B_WCHAR_INVALID) { - iterator_cleanup(it); - it->_s_p = NULL; - it->byte_index = 0; - it->codepoint_index = 0; - it->char_value = B_WCHAR_INVALID; - it->status = B_ERR_BAD_FORMAT; - return false; - } - - it->iteration_index++; - return true; -} - -static bool tokens_iterator_next(b_string_iterator *it) -{ - if (!b_string_iterator_is_valid(it)) { - return false; - } - - enum b_status status = find_next_token(it); - if (!B_OK(status)) { - iterator_cleanup(it); - return false; - } - - it->string_value = string_ptr(it->_tmp_p); - it->iteration_index++; - - return true; -} - -bool b_string_iterator_next(b_string_iterator *it) -{ - switch (it->_m) { - case ITERATOR_MODE_CHARS: - return chars_iterator_next(it); - case ITERATOR_MODE_TOKENS: - return tokens_iterator_next(it); - default: - return false; - } -} - -static bool chars_iterator_is_valid(const struct b_string_iterator *it) -{ - if (!it->_s_p) { - return false; - } - - if (it->byte_index >= it->_s_p->s_len) { - return false; - } - - if (it->char_value == B_WCHAR_INVALID) { - return false; - } - - return true; -} - -static bool tokens_iterator_is_valid(const struct b_string_iterator *it) -{ - if (!it->_s_p) { - return false; - } - - if (it->byte_index >= it->_s_p->s_len) { - return false; - } - - if (!it->string_value) { - return false; - } - - return true; -} - -bool b_string_iterator_is_valid(const struct b_string_iterator *it) -{ - switch (it->_m) { - case ITERATOR_MODE_CHARS: - return chars_iterator_is_valid(it); - case ITERATOR_MODE_TOKENS: - return tokens_iterator_is_valid(it); - default: - return false; - } -} +B_TYPE_DEFINITION_BEGIN(b_string_iterator) + B_TYPE_ID(0xfc06cee1, 0xb63a, 0x4718, 0x9b8e, 0x3bd2eb7a8608); + B_TYPE_EXTENDS(B_TYPE_ITERATOR); + B_TYPE_CLASS(b_string_iterator_class); + B_TYPE_INSTANCE_PRIVATE(struct b_string_iterator_p); +B_TYPE_DEFINITION_END(b_string_iterator) /*** MISC FUNCTIONS ***********************************************************/