ds: string: update iterator interface

This commit is contained in:
2025-10-29 14:35:36 +00:00
parent 0f89901239
commit b292168c70
2 changed files with 180 additions and 207 deletions

View File

@@ -18,8 +18,6 @@
/*** PRIVATE DATA *************************************************************/
static struct b_iterator_ops it_ops;
enum iterator_mode {
ITERATOR_MODE_NONE = 0,
ITERATOR_MODE_CHARS,
@@ -42,6 +40,24 @@ struct b_string_p {
} s_data;
};
struct b_string_iterator_p {
int _m, _f;
b_string *_tmp;
struct b_string_p *_s_p, *_tmp_p;
const char **_d;
size_t _nd, _ds;
b_status status;
size_t iteration_index;
size_t byte_index;
size_t codepoint_index;
b_wchar char_value;
const char *string_value;
size_t string_length;
size_t string_codepoints;
};
/*** PRIVATE FUNCTIONS ********************************************************/
static bool string_is_inline(const struct b_string_p *str)
@@ -854,7 +870,7 @@ static bool has_prefixes(
return false;
}
static enum b_status find_next_token(struct b_string_iterator *it)
static enum b_status find_next_token(struct b_string_iterator_p *it)
{
size_t offset = it->_ds;
size_t prefix_len = 0;
@@ -918,22 +934,23 @@ static enum b_status find_next_token(struct b_string_iterator *it)
return B_SUCCESS;
}
static enum b_status string_tokenise(
static b_iterator *string_tokenise(
struct b_string_p *str, const char *delims[], size_t nr_delims,
b_string_tokenise_flags flags, struct b_string_iterator *it)
b_string_tokenise_flags flags)
{
memset(it, 0x0, sizeof *it);
if (!nr_delims) {
return B_ERR_INVALID_ARGUMENT;
return NULL;
}
b_string *tmp = b_string_create();
if (!tmp) {
return B_ERR_NO_MEMORY;
return NULL;
}
it->_base.it_ops = &it_ops;
b_string_iterator *it_obj = b_object_create(B_TYPE_STRING_ITERATOR);
struct b_string_iterator_p *it
= b_object_get_private(it_obj, B_TYPE_STRING_ITERATOR);
it->_m = ITERATOR_MODE_TOKENS;
it->_d = delims;
it->_nd = nr_delims;
@@ -949,7 +966,7 @@ static enum b_status string_tokenise(
it->_tmp_p = NULL;
}
return status;
return it_obj;
}
static size_t string_get_size(const struct b_string_p *str, b_strlen_flags flags)
@@ -1239,12 +1256,12 @@ void b_string_clear(b_string *str)
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_clear, str);
}
enum b_status b_string_tokenise(
b_iterator *b_string_tokenise(
b_string *str, const char *delims[], size_t nr_delims,
b_string_tokenise_flags flags, struct b_string_iterator *it)
b_string_tokenise_flags flags)
{
B_CLASS_DISPATCH_STATIC(
B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags, it);
B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags);
}
size_t b_string_get_size(const b_string *str, b_strlen_flags flags)
@@ -1374,8 +1391,131 @@ static void string_to_string(const b_object *obj, b_stream *out)
}
}
/*** ITERATOR FUNCTIONS *******************************************************/
static void iterator_fini(b_iterator *obj)
{
struct b_string_iterator_p *it
= b_object_get_private(obj, B_TYPE_STRING_ITERATOR);
if (it->_tmp) {
b_string_unref(it->_tmp);
}
memset(it, 0x0, sizeof *it);
}
static b_iterator *iterator_begin(b_object *obj)
{
b_string_iterator *it_obj = b_object_create(B_TYPE_STRING_ITERATOR);
struct b_string_iterator_p *it
= b_object_get_private(it_obj, B_TYPE_STRING_ITERATOR);
struct b_string_p *p = b_object_get_private(obj, B_TYPE_STRING);
if (!p->s_len) {
it->status = B_ERR_NO_DATA;
return it_obj;
}
const char *s = string_ptr(p);
it->_m = ITERATOR_MODE_CHARS;
it->_s_p = p;
it->char_value = b_wchar_utf8_codepoint_decode(s);
if (it->char_value == B_WCHAR_INVALID) {
it->status = B_ERR_BAD_FORMAT;
}
return it_obj;
}
static enum b_status chars_iterator_move_next(struct b_string_iterator_p *it)
{
if (!it->_s_p) {
return B_ERR_NO_DATA;
}
size_t stride = b_wchar_utf8_codepoint_size(it->char_value);
if (stride == 0) {
return B_ERR_NO_DATA;
}
it->byte_index += stride;
it->codepoint_index += 1;
if (it->byte_index >= it->_s_p->s_len) {
return B_ERR_NO_DATA;
}
char *p = string_ptr(it->_s_p) + it->byte_index;
it->char_value = b_wchar_utf8_codepoint_decode(p);
if (it->char_value == B_WCHAR_INVALID) {
return B_ERR_BAD_FORMAT;
}
it->iteration_index++;
return B_SUCCESS;
}
static enum b_status tokens_iterator_move_next(struct b_string_iterator_p *it)
{
if (!it->_s_p) {
return B_ERR_NO_DATA;
}
enum b_status status = find_next_token(it);
if (!B_OK(status)) {
return status;
}
it->string_value = string_ptr(it->_tmp_p);
it->iteration_index++;
return B_SUCCESS;
}
static enum b_status iterator_move_next(const b_iterator *obj)
{
struct b_string_iterator_p *it
= b_object_get_private(obj, B_TYPE_STRING_ITERATOR);
switch (it->_m) {
case ITERATOR_MODE_CHARS:
return chars_iterator_move_next(it);
case ITERATOR_MODE_TOKENS:
return tokens_iterator_move_next(it);
default:
return B_ERR_BAD_STATE;
}
}
static b_iterator_value chars_iterator_get_value(struct b_string_iterator_p *it)
{
return B_ITERATOR_VALUE_INT(it->char_value);
}
static b_iterator_value tokens_iterator_get_value(struct b_string_iterator_p *it)
{
return B_ITERATOR_VALUE_CPTR(it->string_value);
}
static b_iterator_value iterator_get_value(b_iterator *obj)
{
struct b_string_iterator_p *it
= b_object_get_private(obj, B_TYPE_STRING_ITERATOR);
switch (it->_m) {
case ITERATOR_MODE_CHARS:
return chars_iterator_get_value(it);
case ITERATOR_MODE_TOKENS:
return tokens_iterator_get_value(it);
default:
return B_ITERATOR_VALUE_NULL;
}
}
/*** CLASS DEFINITION *********************************************************/
// ---- b_string DEFINITION
B_TYPE_CLASS_DEFINITION_BEGIN(b_string)
B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT)
B_INTERFACE_ENTRY(to_string) = string_to_string;
@@ -1390,175 +1530,25 @@ B_TYPE_DEFINITION_BEGIN(b_string)
B_TYPE_INSTANCE_FINI(string_fini);
B_TYPE_DEFINITION_END(b_string)
/*** ITERATOR FUNCTIONS *******************************************************/
// ---- b_string_iterator DEFINITION
B_TYPE_CLASS_DEFINITION_BEGIN(b_string_iterator)
B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT)
B_INTERFACE_ENTRY(to_string) = NULL;
B_TYPE_CLASS_INTERFACE_END(b_object, B_TYPE_OBJECT)
static bool string_iterator_next(struct b_iterator *it)
{
return b_string_iterator_next((struct b_string_iterator *)it);
}
B_TYPE_CLASS_INTERFACE_BEGIN(b_iterator, B_TYPE_ITERATOR)
B_INTERFACE_ENTRY(it_move_next) = iterator_move_next;
B_INTERFACE_ENTRY(it_erase) = NULL;
B_INTERFACE_ENTRY(it_get_value) = iterator_get_value;
B_TYPE_CLASS_INTERFACE_END(b_iterator, B_TYPE_ITERATOR)
B_TYPE_CLASS_DEFINITION_END(b_string_iterator)
static bool string_iterator_is_valid(const struct b_iterator *it)
{
return b_string_iterator_is_valid((struct b_string_iterator *)it);
}
static struct b_iterator_ops it_ops = {
.it_next = string_iterator_next,
.it_close = NULL,
.it_is_valid = string_iterator_is_valid,
};
static void iterator_cleanup(b_string_iterator *it)
{
if (it->_tmp) {
b_string_unref(it->_tmp);
}
memset(it, 0x0, sizeof *it);
}
int b_string_iterator_begin(const b_string *string, b_string_iterator *it)
{
memset(it, 0x0, sizeof *it);
struct b_string_p *p = b_object_get_private(string, B_TYPE_STRING);
it->_base.it_ops = &it_ops;
if (!p->s_len) {
it->status = B_ERR_NO_DATA;
return -1;
}
const char *s = string_ptr(p);
it->_m = ITERATOR_MODE_CHARS;
it->_s_p = p;
it->char_value = b_wchar_utf8_codepoint_decode(s);
if (it->char_value == B_WCHAR_INVALID) {
it->status = B_ERR_BAD_FORMAT;
return -1;
}
return 0;
}
static bool chars_iterator_next(b_string_iterator *it)
{
if (!b_string_iterator_is_valid(it)) {
return false;
}
size_t stride = b_wchar_utf8_codepoint_size(it->char_value);
if (stride == 0) {
iterator_cleanup(it);
return false;
}
it->byte_index += stride;
it->codepoint_index += 1;
if (it->byte_index >= it->_s_p->s_len) {
iterator_cleanup(it);
it->_s_p = NULL;
it->byte_index = 0;
it->codepoint_index = 0;
it->char_value = B_WCHAR_INVALID;
it->status = B_ERR_NO_DATA;
return false;
}
char *p = string_ptr(it->_s_p) + it->byte_index;
it->char_value = b_wchar_utf8_codepoint_decode(p);
if (it->char_value == B_WCHAR_INVALID) {
iterator_cleanup(it);
it->_s_p = NULL;
it->byte_index = 0;
it->codepoint_index = 0;
it->char_value = B_WCHAR_INVALID;
it->status = B_ERR_BAD_FORMAT;
return false;
}
it->iteration_index++;
return true;
}
static bool tokens_iterator_next(b_string_iterator *it)
{
if (!b_string_iterator_is_valid(it)) {
return false;
}
enum b_status status = find_next_token(it);
if (!B_OK(status)) {
iterator_cleanup(it);
return false;
}
it->string_value = string_ptr(it->_tmp_p);
it->iteration_index++;
return true;
}
bool b_string_iterator_next(b_string_iterator *it)
{
switch (it->_m) {
case ITERATOR_MODE_CHARS:
return chars_iterator_next(it);
case ITERATOR_MODE_TOKENS:
return tokens_iterator_next(it);
default:
return false;
}
}
static bool chars_iterator_is_valid(const struct b_string_iterator *it)
{
if (!it->_s_p) {
return false;
}
if (it->byte_index >= it->_s_p->s_len) {
return false;
}
if (it->char_value == B_WCHAR_INVALID) {
return false;
}
return true;
}
static bool tokens_iterator_is_valid(const struct b_string_iterator *it)
{
if (!it->_s_p) {
return false;
}
if (it->byte_index >= it->_s_p->s_len) {
return false;
}
if (!it->string_value) {
return false;
}
return true;
}
bool b_string_iterator_is_valid(const struct b_string_iterator *it)
{
switch (it->_m) {
case ITERATOR_MODE_CHARS:
return chars_iterator_is_valid(it);
case ITERATOR_MODE_TOKENS:
return tokens_iterator_is_valid(it);
default:
return false;
}
}
B_TYPE_DEFINITION_BEGIN(b_string_iterator)
B_TYPE_ID(0xfc06cee1, 0xb63a, 0x4718, 0x9b8e, 0x3bd2eb7a8608);
B_TYPE_EXTENDS(B_TYPE_ITERATOR);
B_TYPE_CLASS(b_string_iterator_class);
B_TYPE_INSTANCE_PRIVATE(struct b_string_iterator_p);
B_TYPE_DEFINITION_END(b_string_iterator)
/*** MISC FUNCTIONS ***********************************************************/