1904 lines
40 KiB
C
1904 lines
40 KiB
C
#include <blue/core/stream.h>
|
|
#include <blue/core/stringstream.h>
|
|
#include <blue/ds/string.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* maximum length of string that can be stored inline, not including null-terminator */
|
|
#define STRING_INLINE_CAPACITY 15
|
|
|
|
#define IS_VALID_UTF8_SCALAR(x) \
|
|
(((x) >= 0x0000 && (x) <= 0xD7FF) || ((x) >= 0xE000 && (x) <= 0x10FFFF))
|
|
|
|
#define STRING_TOK_F_FOUND_DELIM 0x80
|
|
|
|
/*** PRIVATE DATA *************************************************************/
|
|
|
|
static struct b_iterator_ops it_ops;
|
|
|
|
enum iterator_mode {
|
|
ITERATOR_MODE_NONE = 0,
|
|
ITERATOR_MODE_CHARS,
|
|
ITERATOR_MODE_TOKENS,
|
|
};
|
|
|
|
struct b_string_p {
|
|
/* length of string in bytes, not including null-terminator.
|
|
* a multi-byte utf-8 codepoint will be counted as multiple bytes here */
|
|
unsigned int s_len;
|
|
/* length of string in codepoints, not including null-terminator.
|
|
* a multi-byte utf-8 codepoint will be counted as one codepoint here */
|
|
unsigned int s_codepoints;
|
|
/* maximum length of string storable in the currently-allocated buffer
|
|
* in bytes, not including null terminator */
|
|
unsigned int s_max;
|
|
union {
|
|
char d_inline[STRING_INLINE_CAPACITY + 1];
|
|
char *d_external;
|
|
} s_data;
|
|
};
|
|
|
|
/*** PRIVATE FUNCTIONS ********************************************************/
|
|
|
|
static bool string_is_inline(const struct b_string_p *str)
|
|
{
|
|
/* strings cannot go below STRING_INLINE_CAPACITY capacity */
|
|
return str->s_max == STRING_INLINE_CAPACITY;
|
|
}
|
|
|
|
static char *string_ptr(const struct b_string_p *str)
|
|
{
|
|
if (string_is_inline(str)) {
|
|
return (char *)str->s_data.d_inline;
|
|
}
|
|
|
|
return str->s_data.d_external;
|
|
}
|
|
|
|
static size_t utf8_codepoint_size(b_wchar c)
|
|
{
|
|
if (!IS_VALID_UTF8_SCALAR(c)) {
|
|
return 0;
|
|
}
|
|
|
|
if (c <= 0x7F) {
|
|
return 1;
|
|
}
|
|
|
|
if (c <= 0x7FF) {
|
|
return 2;
|
|
}
|
|
|
|
if (c <= 0xFFFF) {
|
|
return 3;
|
|
}
|
|
|
|
if (c <= 0x10FFFF) {
|
|
return 4;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int32_t decode_utf8_trailer_byte(char c)
|
|
{
|
|
if (!(c & 0x80) || (c & 0x40)) {
|
|
return -1;
|
|
}
|
|
|
|
return c & 0x3F;
|
|
}
|
|
|
|
static b_wchar utf8_codepoint_decode(const char *s)
|
|
{
|
|
b_wchar result = 0;
|
|
int len = 0;
|
|
|
|
if (!(s[0] & 0x80)) {
|
|
len = 1;
|
|
result = s[0] & 0x7F;
|
|
} else if (s[0] & 0xC0 && !(s[0] & 0x20)) {
|
|
len = 2;
|
|
result = s[0] & 0x1F;
|
|
result <<= 6;
|
|
} else if (s[0] & 0xE0 && !(s[0] & 0x10)) {
|
|
len = 3;
|
|
result = s[0] & 0x0F;
|
|
result <<= 12;
|
|
} else if (s[0] & 0xF0 && !(s[0] & 0x08)) {
|
|
len = 4;
|
|
result = s[0] & 0x07;
|
|
result <<= 18;
|
|
} else {
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
for (int i = 1; i < len; i++) {
|
|
int32_t c = decode_utf8_trailer_byte(s[i]);
|
|
if (c == -1) {
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
c <<= 6 * (len - i - 1);
|
|
result |= c;
|
|
}
|
|
|
|
if (!IS_VALID_UTF8_SCALAR(result)) {
|
|
return B_WCHAR_INVALID;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static size_t utf8_codepoint_encode(b_wchar c, char s[4])
|
|
{
|
|
size_t len = utf8_codepoint_size(c);
|
|
|
|
switch (len) {
|
|
case 1:
|
|
s[0] = c & 0x7F;
|
|
break;
|
|
case 2:
|
|
s[0] = ((c >> 6) & 0x1F) | 0xC0;
|
|
s[1] = (c & 0x3F) | 0x80;
|
|
break;
|
|
case 3:
|
|
s[0] = ((c >> 12) & 0x0F) | 0xE0;
|
|
s[1] = ((c >> 6) & 0x3F) | 0x80;
|
|
s[2] = (c & 0x3F) | 0x80;
|
|
break;
|
|
case 4:
|
|
s[0] = ((c >> 18) & 0x07) | 0xF0;
|
|
s[1] = ((c >> 12) & 0x3F) | 0x80;
|
|
s[2] = ((c >> 6) & 0x3F) | 0x80;
|
|
s[3] = (c & 0x3F) | 0x80;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static size_t codepoint_stride(const char *s)
|
|
{
|
|
char c = *s;
|
|
|
|
if (!(c & 0x80)) {
|
|
return 1;
|
|
}
|
|
|
|
if ((c & 0xC0) && !(c & 0x20)) {
|
|
return 2;
|
|
}
|
|
|
|
if ((c & 0xE0) && !(c & 0x10)) {
|
|
return 3;
|
|
}
|
|
|
|
if ((c & 0xF0) && !(c & 0x08)) {
|
|
return 4;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static size_t get_number_of_codepoints(const char *s, size_t len)
|
|
{
|
|
size_t nr_codepoints = 0;
|
|
const char *end = s + len;
|
|
|
|
while (*s && s < end) {
|
|
size_t stride = codepoint_stride(s);
|
|
if (stride == 0) {
|
|
/* invalid codepoint */
|
|
return 0;
|
|
}
|
|
|
|
nr_codepoints++;
|
|
s += stride;
|
|
}
|
|
|
|
if (*s != 0) {
|
|
/* string is not null-terminated */
|
|
return 0;
|
|
}
|
|
|
|
return nr_codepoints;
|
|
}
|
|
|
|
static size_t get_utf8_encoded_size(const b_wchar *s, size_t nr_codepoints)
|
|
{
|
|
size_t len = 0;
|
|
for (size_t i = 0; i < nr_codepoints; i++) {
|
|
size_t l = utf8_codepoint_size(s[i]);
|
|
if (l == 0) {
|
|
/* invalid codepoint */
|
|
return 0;
|
|
}
|
|
|
|
len += l;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static enum b_status convert_codepoint_range_to_byte_range(
|
|
const struct b_string_p *str, size_t cp_start, size_t cp_length,
|
|
size_t *out_byte_start, size_t *out_byte_length)
|
|
{
|
|
const char *s = string_ptr(str);
|
|
size_t byte_offset = 0, byte_length = 0;
|
|
|
|
for (size_t i = 0; i < cp_start; i++) {
|
|
const char *cp = &s[byte_offset];
|
|
if (!cp || byte_offset >= str->s_len) {
|
|
/* out of range */
|
|
return B_ERR_OUT_OF_BOUNDS;
|
|
}
|
|
|
|
size_t stride = codepoint_stride(cp);
|
|
if (!stride) {
|
|
/* invalid codepoint */
|
|
return B_ERR_BAD_STATE;
|
|
}
|
|
|
|
byte_offset += stride;
|
|
}
|
|
|
|
for (size_t i = 0; i < cp_length; i++) {
|
|
size_t cp_offset = byte_offset + byte_length;
|
|
const char *cp = &s[cp_offset];
|
|
if (!cp || (cp_offset >= str->s_len)) {
|
|
/* out of range */
|
|
return B_ERR_OUT_OF_BOUNDS;
|
|
}
|
|
|
|
size_t stride = codepoint_stride(cp);
|
|
if (!stride) {
|
|
/* invalid codepoint */
|
|
return B_ERR_BAD_STATE;
|
|
}
|
|
|
|
byte_length += stride;
|
|
}
|
|
|
|
if (out_byte_start) {
|
|
*out_byte_start = byte_offset;
|
|
}
|
|
|
|
if (out_byte_length) {
|
|
*out_byte_length = byte_length;
|
|
}
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static char *get_next_codepoint(struct b_string_p *str, char *this_codepoint)
|
|
{
|
|
char c = *this_codepoint;
|
|
char *end = this_codepoint - 1;
|
|
size_t len = 0;
|
|
if (!(c & 0x80)) {
|
|
len = 1;
|
|
} else if ((c & 0xC0) && !(c & 0x20)) {
|
|
len = 2;
|
|
} else if ((c & 0xE0) && !(c & 0x10)) {
|
|
len = 3;
|
|
} else if ((c & 0xF0) && !(c & 0x08)) {
|
|
len = 4;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
|
|
return this_codepoint + len;
|
|
}
|
|
|
|
static char *get_previous_codepoint(struct b_string_p *str, char *this_codepoint)
|
|
{
|
|
char *start = string_ptr(str);
|
|
char *end = this_codepoint - 1;
|
|
|
|
while (end >= start) {
|
|
char c = *end;
|
|
if ((c & 0x80) && !(c & 0x40)) {
|
|
end--;
|
|
continue;
|
|
}
|
|
|
|
if ((c & 0xF0) && !(c & 0x08)) {
|
|
return end;
|
|
}
|
|
|
|
if ((c & 0xE0) && !(c & 0x10)) {
|
|
return end;
|
|
}
|
|
|
|
if ((c & 0xC0) && !(c & 0x20)) {
|
|
return end;
|
|
}
|
|
|
|
if (!(c & 0x80)) {
|
|
return end;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static char *get_last_codepoint(struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return NULL;
|
|
}
|
|
|
|
return get_previous_codepoint(str, string_ptr(str) + str->s_len);
|
|
}
|
|
|
|
static int string_make_inline(struct b_string_p *str)
|
|
{
|
|
char *buffer = string_ptr(str);
|
|
memcpy(str->s_data.d_inline, buffer, sizeof str->s_data.d_inline);
|
|
str->s_data.d_inline[sizeof str->s_data.d_inline - 1] = '\0';
|
|
|
|
str->s_max = STRING_INLINE_CAPACITY;
|
|
|
|
if (str->s_len >= str->s_max) {
|
|
str->s_len = str->s_max;
|
|
}
|
|
|
|
free(buffer);
|
|
return 0;
|
|
}
|
|
|
|
static int string_resize_large(struct b_string_p *str, size_t capacity)
|
|
{
|
|
char *buffer = string_ptr(str);
|
|
char *new_buffer = realloc(buffer, capacity + 1);
|
|
if (!new_buffer) {
|
|
return -1;
|
|
}
|
|
|
|
str->s_max = capacity;
|
|
str->s_data.d_external = new_buffer;
|
|
return 0;
|
|
}
|
|
|
|
static int string_make_large(struct b_string_p *str, size_t capacity)
|
|
{
|
|
const char *old_buffer = string_ptr(str);
|
|
char *buffer = malloc(capacity + 1);
|
|
if (!buffer) {
|
|
return -1;
|
|
}
|
|
|
|
memcpy(buffer, old_buffer, sizeof str->s_data.d_inline);
|
|
buffer[str->s_len] = '\0';
|
|
|
|
str->s_max = capacity;
|
|
str->s_data.d_external = buffer;
|
|
return 0;
|
|
}
|
|
|
|
static int string_change_capacity(struct b_string_p *str, size_t capacity)
|
|
{
|
|
size_t old_capacity = str->s_max;
|
|
|
|
if (capacity < STRING_INLINE_CAPACITY) {
|
|
capacity = STRING_INLINE_CAPACITY;
|
|
}
|
|
|
|
bool was_inline = string_is_inline(str);
|
|
bool is_now_inline = capacity == STRING_INLINE_CAPACITY;
|
|
|
|
if (capacity == old_capacity) {
|
|
/* this also handles the case where the old and new capacity both fit into the inline buffer. */
|
|
return 0;
|
|
}
|
|
|
|
if (!was_inline && is_now_inline) {
|
|
/* string was large, is now small enough to fit inline. */
|
|
return string_make_inline(str);
|
|
}
|
|
|
|
if (!was_inline) {
|
|
/* string was large, and is still large. */
|
|
return string_resize_large(str, capacity);
|
|
}
|
|
|
|
if (!is_now_inline) {
|
|
/* string was inline, and now large enough to require a buffer. */
|
|
return string_make_large(str, capacity);
|
|
}
|
|
|
|
/* nothing to do */
|
|
return 0;
|
|
}
|
|
|
|
static b_string *string_duplicate(const struct b_string_p *str)
|
|
{
|
|
b_string *new_str = b_string_create();
|
|
if (!str) {
|
|
return NULL;
|
|
}
|
|
|
|
struct b_string_p *new_str_p
|
|
= b_object_get_private(new_str, B_TYPE_STRING);
|
|
|
|
string_change_capacity(new_str_p, str->s_len);
|
|
const char *src = string_ptr(str);
|
|
char *dst = string_ptr(new_str_p);
|
|
|
|
memcpy(dst, src, str->s_len);
|
|
new_str_p->s_len = str->s_len;
|
|
new_str_p->s_codepoints = str->s_codepoints;
|
|
|
|
return new_str;
|
|
}
|
|
|
|
static char *string_steal(struct b_string_p *str)
|
|
{
|
|
char *dest = NULL;
|
|
char *src = string_ptr(str);
|
|
|
|
if (string_is_inline(str)) {
|
|
dest = malloc(str->s_len + 1);
|
|
memcpy(dest, src, str->s_len);
|
|
dest[str->s_len] = 0;
|
|
src[0] = 0;
|
|
} else {
|
|
dest = src;
|
|
str->s_data.d_external = NULL;
|
|
str->s_max = STRING_INLINE_CAPACITY;
|
|
}
|
|
|
|
str->s_len = 0;
|
|
str->s_codepoints = 0;
|
|
return dest;
|
|
}
|
|
|
|
static b_status string_reserve(struct b_string_p *str, size_t capacity)
|
|
{
|
|
if (str->s_max >= capacity) {
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
int err = string_change_capacity(str, capacity);
|
|
|
|
return err == 0 ? B_SUCCESS : B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
static enum b_status replace_ansi(
|
|
struct b_string_p *str, size_t start, size_t length, const char *new_data)
|
|
{
|
|
b_status status = B_SUCCESS;
|
|
size_t new_data_len = strlen(new_data);
|
|
|
|
if (start >= str->s_len) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
if (start + length >= str->s_len) {
|
|
length = str->s_len - start;
|
|
}
|
|
|
|
size_t new_str_len = str->s_len - length + new_data_len;
|
|
if (new_str_len > str->s_max) {
|
|
status = string_reserve(str, new_str_len);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
char *s = string_ptr(str);
|
|
|
|
char *substitution_start = s + start;
|
|
char *excess_src = s + start + length;
|
|
size_t excess_length = str->s_len - start - length;
|
|
char *excess_dest = substitution_start + new_data_len;
|
|
|
|
memmove(excess_dest, excess_src, excess_length);
|
|
memmove(substitution_start, new_data, new_data_len);
|
|
s[new_str_len] = '\0';
|
|
|
|
str->s_len = new_str_len;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status replace_utf8(
|
|
struct b_string_p *str, size_t start, size_t length, const char *new_data)
|
|
{
|
|
if (start >= str->s_codepoints) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
if (start + length >= str->s_codepoints) {
|
|
length = str->s_codepoints - start;
|
|
}
|
|
|
|
size_t new_data_nr_bytes = strlen(new_data);
|
|
size_t new_data_nr_codepoints
|
|
= get_number_of_codepoints(new_data, new_data_nr_bytes);
|
|
if (new_data_nr_codepoints == 0) {
|
|
/* new_data is not a valid utf-8 string */
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
size_t old_data_offset = 0, old_data_nr_bytes = 0;
|
|
size_t old_data_nr_codepoints = length;
|
|
enum b_status status = convert_codepoint_range_to_byte_range(
|
|
str, start, length, &old_data_offset, &old_data_nr_bytes);
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
size_t new_total_bytes = str->s_len - old_data_nr_bytes + new_data_nr_bytes;
|
|
if (new_total_bytes > str->s_max) {
|
|
status = string_reserve(str, new_total_bytes);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
char *s = string_ptr(str);
|
|
|
|
char *substitution_start = s + old_data_offset;
|
|
char *excess_src = s + old_data_offset + old_data_nr_bytes;
|
|
size_t excess_length = str->s_len - old_data_offset - old_data_nr_bytes;
|
|
char *excess_dest = substitution_start + new_data_nr_bytes;
|
|
|
|
memmove(excess_dest, excess_src, excess_length);
|
|
memmove(substitution_start, new_data, new_data_nr_bytes);
|
|
s[new_total_bytes] = '\0';
|
|
|
|
str->s_len = new_total_bytes;
|
|
str->s_codepoints -= old_data_nr_codepoints;
|
|
str->s_codepoints += new_data_nr_codepoints;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static b_status string_replace(
|
|
struct b_string_p *str, size_t start, size_t length, const char *new_data)
|
|
{
|
|
if (str->s_len == str->s_codepoints) {
|
|
return replace_ansi(str, start, length, new_data);
|
|
}
|
|
|
|
return replace_utf8(str, start, length, new_data);
|
|
}
|
|
|
|
static b_status string_replace_all(struct b_string_p *str, const char *new_data)
|
|
{
|
|
size_t new_len = strlen(new_data);
|
|
string_reserve(str, new_len);
|
|
char *dest = string_ptr(str);
|
|
memcpy(dest, new_data, new_len);
|
|
dest[new_len] = '\0';
|
|
str->s_len = new_len;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status remove_ansi(struct b_string_p *str, size_t start, size_t length)
|
|
{
|
|
b_status status = B_SUCCESS;
|
|
|
|
if (start >= str->s_len) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
if (start + length >= str->s_len) {
|
|
length = str->s_len - start;
|
|
}
|
|
|
|
size_t new_str_len = str->s_len - length;
|
|
|
|
char *s = string_ptr(str);
|
|
|
|
char *removal_start = s + start;
|
|
char *excess_src = s + start + length;
|
|
size_t excess_length = str->s_len - start - length;
|
|
|
|
memmove(removal_start, excess_src, excess_length);
|
|
s[new_str_len] = '\0';
|
|
|
|
str->s_len = new_str_len;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status remove_utf8(struct b_string_p *str, size_t start, size_t length)
|
|
{
|
|
size_t remove_offset = 0, remove_nr_bytes = 0;
|
|
enum b_status status = convert_codepoint_range_to_byte_range(
|
|
str, start, length, &remove_offset, &remove_nr_bytes);
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
size_t new_total_bytes = str->s_len - remove_nr_bytes;
|
|
|
|
char *s = string_ptr(str);
|
|
|
|
char *removal_start = s + remove_offset;
|
|
char *excess_src = s + remove_offset + remove_nr_bytes;
|
|
size_t excess_length = str->s_len - remove_offset - remove_nr_bytes;
|
|
|
|
memmove(removal_start, excess_src, excess_length);
|
|
s[new_total_bytes] = '\0';
|
|
|
|
str->s_len = new_total_bytes;
|
|
str->s_codepoints -= length;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_remove(
|
|
struct b_string_p *str, size_t start, size_t length)
|
|
{
|
|
if (str->s_len == str->s_codepoints) {
|
|
return remove_ansi(str, start, length);
|
|
}
|
|
|
|
return remove_utf8(str, start, length);
|
|
}
|
|
|
|
static b_status string_transform(struct b_string_p *str, int (*transformer)(int))
|
|
{
|
|
char *s = string_ptr(str);
|
|
for (size_t i = 0; i < str->s_len; i++) {
|
|
int c = transformer(s[i]);
|
|
|
|
if (c != 0) {
|
|
s[i] = c;
|
|
}
|
|
}
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status trim_ansi(struct b_string_p *str)
|
|
{
|
|
char *s = string_ptr(str);
|
|
size_t whitespace_end = 0;
|
|
for (size_t i = 0; i < str->s_len; i++) {
|
|
if (!isspace(s[i])) {
|
|
whitespace_end = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
memmove(s, s + whitespace_end, str->s_len - whitespace_end);
|
|
str->s_len -= whitespace_end;
|
|
|
|
for (long i = str->s_len - 1; i >= 0; i--) {
|
|
if (isspace(s[i])) {
|
|
s[i] = 0;
|
|
str->s_len--;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status trim_utf8(struct b_string_p *str)
|
|
{
|
|
char *s = string_ptr(str);
|
|
size_t whitespace_end = 0;
|
|
size_t nr_whitespace_codepoints = 0;
|
|
for (size_t i = 0; i < str->s_len;) {
|
|
b_wchar c = utf8_codepoint_decode(&s[i]);
|
|
|
|
if (!b_wchar_is_space(s[i])) {
|
|
whitespace_end = i;
|
|
break;
|
|
}
|
|
|
|
nr_whitespace_codepoints++;
|
|
}
|
|
|
|
memmove(s, s + whitespace_end, str->s_len - whitespace_end);
|
|
str->s_len -= whitespace_end;
|
|
str->s_codepoints -= nr_whitespace_codepoints;
|
|
|
|
char *p = get_last_codepoint(str);
|
|
if (!p) {
|
|
return B_ERR_BAD_STATE;
|
|
}
|
|
|
|
for (long i = str->s_len - 1; i >= 0;) {
|
|
b_wchar c = utf8_codepoint_decode(p);
|
|
size_t c_size = utf8_codepoint_size(c);
|
|
|
|
if (b_wchar_is_space(c)) {
|
|
memset(p, 0, c_size);
|
|
str->s_len -= c_size;
|
|
str->s_codepoints--;
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
p = get_previous_codepoint(str, p);
|
|
}
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static b_status string_trim(struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
if (str->s_len == str->s_codepoints) {
|
|
return trim_ansi(str);
|
|
}
|
|
|
|
return trim_utf8(str);
|
|
}
|
|
|
|
static enum b_status string_insert_cstr_ansi(
|
|
struct b_string_p *dest, const char *src, size_t nr_bytes, size_t at)
|
|
{
|
|
if (at >= dest->s_len) {
|
|
at = dest->s_len;
|
|
}
|
|
|
|
size_t new_size = dest->s_len + nr_bytes;
|
|
if (dest->s_max < new_size) {
|
|
string_change_capacity(dest, new_size);
|
|
}
|
|
|
|
char *dest_buf = string_ptr(dest);
|
|
char *from = dest_buf + at;
|
|
char *to = dest_buf + at + nr_bytes;
|
|
|
|
memmove(to, from, dest->s_len - at);
|
|
memcpy(from, src, nr_bytes);
|
|
dest_buf[new_size] = '\0';
|
|
|
|
dest->s_len = new_size;
|
|
dest->s_codepoints += nr_bytes;
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_insert_cstr_utf8(
|
|
struct b_string_p *dest, const char *src, size_t nr_bytes,
|
|
size_t codepoint_offset)
|
|
{
|
|
if (codepoint_offset >= dest->s_codepoints) {
|
|
codepoint_offset = dest->s_codepoints;
|
|
}
|
|
|
|
size_t byte_offset = 0;
|
|
enum b_status status = B_SUCCESS;
|
|
|
|
if (codepoint_offset == dest->s_codepoints) {
|
|
byte_offset = dest->s_len;
|
|
} else {
|
|
status = convert_codepoint_range_to_byte_range(
|
|
dest, 0, codepoint_offset, NULL, &byte_offset);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
size_t new_total_bytes = dest->s_len + nr_bytes;
|
|
if (dest->s_max < new_total_bytes) {
|
|
string_change_capacity(dest, new_total_bytes);
|
|
}
|
|
|
|
char *dest_buf = string_ptr(dest);
|
|
char *from = dest_buf + byte_offset;
|
|
char *to = dest_buf + byte_offset + nr_bytes;
|
|
|
|
memmove(to, from, dest->s_len - byte_offset);
|
|
memcpy(from, src, nr_bytes);
|
|
dest_buf[new_total_bytes] = '\0';
|
|
|
|
dest->s_len += nr_bytes;
|
|
dest->s_codepoints += get_number_of_codepoints(src, nr_bytes);
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_insert_wstr_ansi(
|
|
struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints, size_t at)
|
|
{
|
|
if (at >= dest->s_len) {
|
|
at = dest->s_len;
|
|
}
|
|
|
|
size_t utf8_encoded_size = get_utf8_encoded_size(src, nr_codepoints);
|
|
if (utf8_encoded_size == 0) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
size_t new_total_bytes = dest->s_len + utf8_encoded_size;
|
|
if (dest->s_max < new_total_bytes) {
|
|
string_change_capacity(dest, new_total_bytes);
|
|
}
|
|
|
|
char *dest_buf = string_ptr(dest);
|
|
char *from = dest_buf + at;
|
|
char *to = dest_buf + at + utf8_encoded_size;
|
|
memmove(to, from, dest->s_len - at);
|
|
|
|
char *ptr = dest_buf + at;
|
|
for (size_t i = 0; i < nr_codepoints; i++) {
|
|
char c[4];
|
|
size_t c_len = utf8_codepoint_encode(src[i], c);
|
|
if (c_len == 0) {
|
|
/* the input string was already checked by
|
|
* get_utf8_encoded_size, so this should never happen */
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
memcpy(ptr, c, c_len);
|
|
ptr += c_len;
|
|
}
|
|
|
|
dest_buf[new_total_bytes] = '\0';
|
|
|
|
dest->s_len += utf8_encoded_size;
|
|
dest->s_codepoints += nr_codepoints;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_insert_wstr_utf8(
|
|
struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints,
|
|
size_t codepoint_offset)
|
|
{
|
|
if (codepoint_offset >= dest->s_codepoints) {
|
|
codepoint_offset = dest->s_codepoints;
|
|
}
|
|
|
|
size_t utf8_encoded_size = get_utf8_encoded_size(src, nr_codepoints);
|
|
if (utf8_encoded_size == 0) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
size_t new_total_bytes = dest->s_len + utf8_encoded_size;
|
|
if (dest->s_max < new_total_bytes) {
|
|
string_change_capacity(dest, new_total_bytes);
|
|
}
|
|
|
|
size_t move_offset = 0;
|
|
enum b_status status = B_SUCCESS;
|
|
|
|
if (codepoint_offset == dest->s_codepoints) {
|
|
move_offset = dest->s_len;
|
|
} else {
|
|
status = convert_codepoint_range_to_byte_range(
|
|
dest, 0, codepoint_offset, NULL, &move_offset);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
char *dest_buf = string_ptr(dest);
|
|
char *from = dest_buf + move_offset;
|
|
char *to = dest_buf + move_offset + utf8_encoded_size;
|
|
memmove(to, from, dest->s_len - move_offset);
|
|
|
|
char *ptr = dest_buf + move_offset;
|
|
for (size_t i = 0; i < nr_codepoints; i++) {
|
|
char c[4];
|
|
size_t c_len = utf8_codepoint_encode(src[i], c);
|
|
if (c_len == 0) {
|
|
/* the input string was already checked by
|
|
* get_utf8_encoded_size, so this should never happen */
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
memcpy(ptr, c, c_len);
|
|
ptr += c_len;
|
|
}
|
|
|
|
dest_buf[new_total_bytes] = '\0';
|
|
|
|
dest->s_len += utf8_encoded_size;
|
|
dest->s_codepoints += nr_codepoints;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_insert_cstr(
|
|
struct b_string_p *dest, const char *src, size_t nr_bytes, size_t at)
|
|
{
|
|
if (dest->s_len == dest->s_codepoints) {
|
|
return string_insert_cstr_ansi(dest, src, nr_bytes, at);
|
|
}
|
|
|
|
return string_insert_cstr_utf8(dest, src, nr_bytes, at);
|
|
}
|
|
|
|
static enum b_status string_insert_wstr(
|
|
struct b_string_p *dest, const b_wchar *src, size_t nr_codepoints, size_t at)
|
|
{
|
|
if (dest->s_len == dest->s_codepoints) {
|
|
return string_insert_wstr_ansi(dest, src, nr_codepoints, at);
|
|
}
|
|
|
|
return string_insert_wstr_utf8(dest, src, nr_codepoints, at);
|
|
}
|
|
|
|
static enum b_status string_insertf(
|
|
struct b_string_p *dest, size_t at, const char *format, va_list arg)
|
|
{
|
|
char buf[1024];
|
|
size_t len = vsnprintf(buf, sizeof buf, format, arg);
|
|
return string_insert_cstr(dest, buf, len, at);
|
|
}
|
|
|
|
static enum b_status string_insert_c(struct b_string_p *dest, char c, size_t at)
|
|
{
|
|
return string_insert_cstr(dest, &c, 1, at);
|
|
}
|
|
|
|
static enum b_status string_insert_wc(struct b_string_p *dest, b_wchar c, size_t at)
|
|
{
|
|
return string_insert_wstr(dest, &c, 1, at);
|
|
}
|
|
|
|
static enum b_status string_insert_s(
|
|
struct b_string_p *dest, const struct b_string_p *src, size_t at)
|
|
{
|
|
return string_insert_cstr(dest, string_ptr(src), src->s_len, at);
|
|
}
|
|
|
|
static void string_clear(struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return;
|
|
}
|
|
|
|
char *s = string_ptr(str);
|
|
*s = '\0';
|
|
str->s_len = 0;
|
|
str->s_codepoints = 0;
|
|
}
|
|
|
|
static bool has_prefix(const char *s, const char *prefix, size_t *prefix_len)
|
|
{
|
|
size_t len = 0;
|
|
for (size_t i = 0;; i++) {
|
|
if (s[i] == 0 || prefix[i] == 0) {
|
|
break;
|
|
}
|
|
|
|
if (s[i] != prefix[i]) {
|
|
return false;
|
|
}
|
|
|
|
len++;
|
|
}
|
|
|
|
*prefix_len = len;
|
|
return true;
|
|
}
|
|
|
|
static bool has_prefixes(
|
|
const char *s, const char **prefixes, size_t nr_prefixes,
|
|
size_t *selected_prefix_len)
|
|
{
|
|
for (size_t i = 0; i < nr_prefixes; i++) {
|
|
const char *delim = prefixes[i];
|
|
if (has_prefix(s, delim, selected_prefix_len)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static enum b_status find_next_token(struct b_string_iterator *it)
|
|
{
|
|
size_t offset = it->_ds;
|
|
size_t prefix_len = 0;
|
|
char *start = string_ptr(it->_s_p);
|
|
bool found_delim_last_time = (it->_f & STRING_TOK_F_FOUND_DELIM) != 0;
|
|
bool found_delim = false;
|
|
bool include_empty = (it->_f & B_STRING_TOK_F_INCLUDE_EMPTY_TOKENS);
|
|
bool found_null = false;
|
|
b_string_clear(it->_tmp);
|
|
|
|
while (1) {
|
|
char *s = start + offset;
|
|
if (*s == 0) {
|
|
it->_f &= ~STRING_TOK_F_FOUND_DELIM;
|
|
break;
|
|
}
|
|
|
|
found_delim = has_prefixes(s, it->_d, it->_nd, &prefix_len);
|
|
if (found_delim) {
|
|
if (it->_tmp_p->s_len == 0 && !include_empty) {
|
|
/* this token is empty, skip it */
|
|
offset += prefix_len;
|
|
found_delim = false;
|
|
continue;
|
|
}
|
|
|
|
it->_f |= STRING_TOK_F_FOUND_DELIM;
|
|
break;
|
|
}
|
|
|
|
b_wchar c = utf8_codepoint_decode(s);
|
|
if (c == B_WCHAR_INVALID) {
|
|
return B_ERR_BAD_STATE;
|
|
}
|
|
|
|
b_string_append_wc(it->_tmp, c);
|
|
offset += utf8_codepoint_size(c);
|
|
|
|
if (offset > it->_s_p->s_len) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool end = !found_delim && it->_tmp_p->s_len == 0;
|
|
|
|
if (include_empty && found_delim_last_time) {
|
|
end = false;
|
|
}
|
|
|
|
if (end) {
|
|
it->string_value = NULL;
|
|
it->string_length = 0;
|
|
it->string_codepoints = 0;
|
|
return B_ERR_NO_DATA;
|
|
}
|
|
|
|
it->_ds = offset + prefix_len;
|
|
it->string_value = b_string_ptr(it->_tmp);
|
|
it->string_length = it->_tmp_p->s_len;
|
|
it->string_codepoints = it->_tmp_p->s_codepoints;
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status string_tokenise(
|
|
struct b_string_p *str, const char *delims[], size_t nr_delims,
|
|
b_string_tokenise_flags flags, struct b_string_iterator *it)
|
|
{
|
|
memset(it, 0x0, sizeof *it);
|
|
|
|
if (!nr_delims) {
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
b_string *tmp = b_string_create();
|
|
if (!tmp) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
it->_base.it_ops = &it_ops;
|
|
it->_m = ITERATOR_MODE_TOKENS;
|
|
it->_d = delims;
|
|
it->_nd = nr_delims;
|
|
it->_s_p = str;
|
|
it->_f = flags;
|
|
it->_tmp = tmp;
|
|
it->_tmp_p = b_object_get_private(tmp, B_TYPE_STRING);
|
|
|
|
enum b_status status = find_next_token(it);
|
|
if (!B_OK(status)) {
|
|
b_string_unref(tmp);
|
|
it->_tmp = NULL;
|
|
it->_tmp_p = NULL;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static size_t string_get_size(const struct b_string_p *str, b_strlen_flags flags)
|
|
{
|
|
switch (flags) {
|
|
case B_STRLEN_NORMAL:
|
|
return str->s_len;
|
|
case B_STRLEN_CODEPOINTS:
|
|
return str->s_codepoints;
|
|
default:
|
|
return b_strlen(string_ptr(str), flags);
|
|
}
|
|
}
|
|
|
|
static size_t string_get_capacity(const struct b_string_p *str)
|
|
{
|
|
return str->s_max;
|
|
}
|
|
|
|
static bool string_compare(const struct b_string_p *a, const struct b_string_p *b)
|
|
{
|
|
if (a->s_len != b->s_len) {
|
|
return false;
|
|
}
|
|
|
|
if (a == b) {
|
|
return true;
|
|
}
|
|
|
|
const char *ap = string_ptr(a);
|
|
const char *bp = string_ptr(b);
|
|
|
|
for (size_t i = 0; i < a->s_len; i++) {
|
|
if (ap[i] != bp[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static char string_front(const struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return 0;
|
|
}
|
|
|
|
const char *s = string_ptr(str);
|
|
return s[0];
|
|
}
|
|
|
|
static char string_back(const struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return 0;
|
|
}
|
|
|
|
const char *s = string_ptr(str);
|
|
return s[str->s_len - 1];
|
|
}
|
|
|
|
static void string_pop_back(struct b_string_p *str)
|
|
{
|
|
if (str->s_len == 0) {
|
|
return;
|
|
}
|
|
|
|
char *s = string_ptr(str);
|
|
|
|
s[str->s_len - 1] = '\0';
|
|
str->s_len--;
|
|
}
|
|
|
|
static b_string *string_substr(const struct b_string_p *str, size_t start, size_t len)
|
|
{
|
|
if (start > string_get_size(str, B_STRLEN_NORMAL)) {
|
|
return NULL;
|
|
}
|
|
|
|
if (start + len > string_get_size(str, B_STRLEN_NORMAL)) {
|
|
len = string_get_size(str, B_STRLEN_NORMAL) - start;
|
|
}
|
|
|
|
b_string *newstr = b_string_create();
|
|
struct b_string_p *newstr_p = b_object_get_private(newstr, B_TYPE_STRING);
|
|
string_reserve(newstr_p, len);
|
|
|
|
const char *src = string_ptr(str) + start;
|
|
char *dest = string_ptr(newstr_p);
|
|
|
|
memcpy(dest, src, len);
|
|
newstr_p->s_len = len;
|
|
|
|
return newstr;
|
|
}
|
|
|
|
static uint64_t string_hash(const struct b_string_p *str)
|
|
{
|
|
#define FNV1_OFFSET_BASIS 0xcbf29ce484222325
|
|
#define FNV1_PRIME 0x100000001b3
|
|
uint64_t hash = FNV1_OFFSET_BASIS;
|
|
size_t i = 0;
|
|
|
|
const char *s = string_ptr(str);
|
|
|
|
for (i = 0; i < str->s_len; i++) {
|
|
hash ^= s[i];
|
|
hash *= FNV1_PRIME;
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
/*** STREAM FUNCTIONS *********************************************************/
|
|
|
|
static enum b_status stream_close(struct b_stream *stream)
|
|
{
|
|
b_string *str = stream->s_ptr0;
|
|
b_string_unref(str);
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status stream_getc(struct b_stream *stream, int *out)
|
|
{
|
|
struct b_string_p *str = stream->s_ptr1;
|
|
if (stream->s_cursor >= str->s_len) {
|
|
return B_ERR_NO_DATA;
|
|
}
|
|
|
|
char *s = string_ptr(str);
|
|
*out = s[stream->s_cursor];
|
|
stream->s_cursor++;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status stream_read(
|
|
struct b_stream *stream, unsigned char *buf, size_t count, size_t *nr_read)
|
|
{
|
|
struct b_string_p *str = stream->s_ptr1;
|
|
if (stream->s_cursor >= str->s_len) {
|
|
*nr_read = 0;
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
size_t available = str->s_len - stream->s_cursor;
|
|
size_t to_read = b_min(size_t, count, available);
|
|
|
|
char *s = string_ptr(str) + stream->s_cursor;
|
|
|
|
memcpy(buf, s, to_read);
|
|
|
|
*nr_read = to_read;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status stream_write(
|
|
struct b_stream *stream, const unsigned char *buf, size_t count,
|
|
size_t *nr_written)
|
|
{
|
|
struct b_string_p *str = stream->s_ptr1;
|
|
enum b_status status = B_SUCCESS;
|
|
|
|
if (stream->s_cursor + count > str->s_max) {
|
|
status = string_reserve(str, stream->s_cursor + count);
|
|
}
|
|
|
|
if (!B_OK(status)) {
|
|
return status;
|
|
}
|
|
|
|
string_insert_cstr(str, (const char *)buf, count, stream->s_cursor);
|
|
stream->s_cursor += count;
|
|
|
|
*nr_written = count;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status stream_seek(
|
|
struct b_stream *stream, long long offset, b_stream_seek_origin origin)
|
|
{
|
|
struct b_string_p *str = stream->s_ptr1;
|
|
|
|
size_t abs_offset;
|
|
switch (origin) {
|
|
case B_STREAM_SEEK_START:
|
|
abs_offset = offset;
|
|
break;
|
|
case B_STREAM_SEEK_CURRENT:
|
|
abs_offset = stream->s_cursor + offset;
|
|
break;
|
|
case B_STREAM_SEEK_END:
|
|
abs_offset = str->s_len + offset;
|
|
break;
|
|
default:
|
|
return B_ERR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
stream->s_cursor = abs_offset;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
static enum b_status stream_reserve(struct b_stream *stream, size_t len)
|
|
{
|
|
struct b_string_p *str = stream->s_ptr1;
|
|
|
|
size_t new_capacity = str->s_len + len;
|
|
return string_reserve(str, new_capacity);
|
|
}
|
|
|
|
/*** PUBLIC FUNCTIONS *********************************************************/
|
|
|
|
b_string *b_string_create_from_cstr(const char *s)
|
|
{
|
|
b_string *str = b_string_create();
|
|
if (!str) {
|
|
return NULL;
|
|
}
|
|
|
|
if (!s) {
|
|
return str;
|
|
}
|
|
|
|
struct b_string_p *p = b_object_get_private(str, B_TYPE_STRING);
|
|
|
|
size_t s_len = strlen(s);
|
|
size_t s_codepoints = get_number_of_codepoints(s, s_len);
|
|
b_string_reserve(str, s_len);
|
|
|
|
char *dest = string_ptr(p);
|
|
memcpy(dest, s, s_len);
|
|
dest[s_len] = 0;
|
|
|
|
p->s_len = s_len;
|
|
p->s_codepoints = s_codepoints;
|
|
|
|
return str;
|
|
}
|
|
|
|
b_string *b_string_create_from_c(char c, size_t count)
|
|
{
|
|
b_string *str = b_string_create();
|
|
if (!str) {
|
|
return NULL;
|
|
}
|
|
|
|
struct b_string_p *p = b_object_get_private(str, B_TYPE_STRING);
|
|
|
|
string_change_capacity(p, count);
|
|
char *s = string_ptr(p);
|
|
for (size_t i = 0; i < count; i++) {
|
|
s[i] = c;
|
|
}
|
|
|
|
p->s_len = count;
|
|
p->s_codepoints = count;
|
|
return str;
|
|
}
|
|
|
|
b_string *b_string_duplicate(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_duplicate, str);
|
|
}
|
|
|
|
char *b_string_steal(b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_steal, str);
|
|
}
|
|
|
|
b_status b_string_reserve(b_string *str, size_t capacity)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_reserve, str, capacity);
|
|
}
|
|
|
|
b_status b_string_replace(
|
|
b_string *str, size_t start, size_t length, const char *new_data)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(
|
|
B_TYPE_STRING, string_replace, str, start, length, new_data);
|
|
}
|
|
|
|
b_status b_string_replace_all(b_string *str, const char *new_data)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_replace_all, str, new_data);
|
|
}
|
|
|
|
enum b_status b_string_remove(b_string *str, size_t start, size_t length)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_remove, str, start, length);
|
|
}
|
|
|
|
b_status b_string_transform(b_string *str, int (*transformer)(int))
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_transform, str, transformer);
|
|
}
|
|
|
|
b_status b_string_trim(b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_trim, str);
|
|
}
|
|
|
|
enum b_status b_string_insert_c(b_string *dest, char c, size_t at)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_insert_c, dest, c, at);
|
|
}
|
|
|
|
enum b_status b_string_insert_wc(b_string *dest, b_wchar c, size_t at)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_insert_wc, dest, c, at);
|
|
}
|
|
|
|
enum b_status b_string_insert_s(b_string *dest, const b_string *src, size_t at)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
const struct b_string_p *src_p = b_object_get_private(src, B_TYPE_STRING);
|
|
return string_insert_s(dest_p, src_p, at);
|
|
}
|
|
|
|
enum b_status b_string_insert_cstr(b_string *dest, const char *src, size_t at)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
return string_insert_cstr(dest_p, src, strlen(src), at);
|
|
}
|
|
|
|
enum b_status b_string_insert_wstr(b_string *dest, const b_wchar *src, size_t at)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
return string_insert_wstr(dest_p, src, b_wstrlen(src), at);
|
|
}
|
|
|
|
enum b_status b_string_insert_cstrf(
|
|
b_string *dest, size_t at, const char *format, ...)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
|
|
va_list arg;
|
|
va_start(arg, format);
|
|
enum b_status status = string_insertf(dest_p, at, format, arg);
|
|
va_end(arg);
|
|
|
|
return status;
|
|
}
|
|
|
|
enum b_status b_string_insert_cstrn(
|
|
b_string *dest, const char *src, size_t len, size_t at)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(
|
|
B_TYPE_STRING, string_insert_cstr, dest, src, len, at);
|
|
}
|
|
|
|
enum b_status b_string_append_cstrf(b_string *dest, const char *format, ...)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
|
|
va_list arg;
|
|
va_start(arg, format);
|
|
enum b_status status = string_insertf(dest_p, SIZE_MAX, format, arg);
|
|
va_end(arg);
|
|
|
|
return status;
|
|
}
|
|
|
|
enum b_status b_string_prepend_cstrf(b_string *dest, const char *format, ...)
|
|
{
|
|
struct b_string_p *dest_p = b_object_get_private(dest, B_TYPE_STRING);
|
|
|
|
va_list arg;
|
|
va_start(arg, format);
|
|
enum b_status status = string_insertf(dest_p, 0, format, arg);
|
|
va_end(arg);
|
|
|
|
return status;
|
|
}
|
|
|
|
void b_string_clear(b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_clear, str);
|
|
}
|
|
|
|
enum b_status b_string_tokenise(
|
|
b_string *str, const char *delims[], size_t nr_delims,
|
|
b_string_tokenise_flags flags, struct b_string_iterator *it)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(
|
|
B_TYPE_STRING, string_tokenise, str, delims, nr_delims, flags, it);
|
|
}
|
|
|
|
size_t b_string_get_size(const b_string *str, b_strlen_flags flags)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_get_size, str, flags);
|
|
}
|
|
|
|
size_t b_string_get_capacity(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_get_capacity, str);
|
|
}
|
|
|
|
bool b_string_compare(const b_string *a, const b_string *b)
|
|
{
|
|
struct b_string_p *ap = b_object_get_private(a, B_TYPE_STRING);
|
|
struct b_string_p *bp = b_object_get_private(a, B_TYPE_STRING);
|
|
return string_compare(ap, bp);
|
|
}
|
|
|
|
char b_string_front(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_front, str);
|
|
}
|
|
|
|
char b_string_back(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_back, str);
|
|
}
|
|
|
|
void b_string_pop_back(b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_pop_back, str);
|
|
}
|
|
|
|
const char *b_string_ptr(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_ptr, str);
|
|
}
|
|
|
|
b_string *b_string_substr(const b_string *str, size_t start, size_t len)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC(B_TYPE_STRING, string_substr, str, start, len);
|
|
}
|
|
|
|
uint64_t b_string_hash(const b_string *str)
|
|
{
|
|
B_CLASS_DISPATCH_STATIC_0(B_TYPE_STRING, string_hash, str);
|
|
}
|
|
|
|
enum b_status b_string_open_stream(b_string *str, struct b_stream **out)
|
|
{
|
|
struct b_stream *stream = malloc(sizeof *stream);
|
|
if (!stream) {
|
|
return B_ERR_NO_MEMORY;
|
|
}
|
|
|
|
memset(stream, 0x0, sizeof *stream);
|
|
|
|
stream->s_mode |= B_STREAM_READ | B_STREAM_WRITE;
|
|
|
|
stream->s_ptr0 = b_string_ref(str);
|
|
stream->s_ptr1 = b_object_get_private(str, B_TYPE_STRING);
|
|
stream->s_close = stream_close;
|
|
stream->s_getc = stream_getc;
|
|
stream->s_read = stream_read;
|
|
stream->s_write = stream_write;
|
|
stream->s_seek = stream_seek;
|
|
stream->s_reserve = stream_reserve;
|
|
|
|
*out = stream;
|
|
|
|
return B_SUCCESS;
|
|
}
|
|
|
|
/*** PUBLIC ALIAS FUNCTIONS ***************************************************/
|
|
|
|
enum b_status b_string_append_c(b_string *dest, char c)
|
|
{
|
|
return b_string_insert_c(dest, c, SIZE_MAX);
|
|
}
|
|
|
|
enum b_status b_string_append_wc(b_string *dest, b_wchar c)
|
|
{
|
|
return b_string_insert_wc(dest, c, SIZE_MAX);
|
|
}
|
|
|
|
enum b_status b_string_append_s(b_string *dest, const b_string *src)
|
|
{
|
|
return b_string_insert_s(dest, src, SIZE_MAX);
|
|
}
|
|
|
|
enum b_status b_string_append_cstr(b_string *dest, const char *src)
|
|
{
|
|
return b_string_insert_cstr(dest, src, SIZE_MAX);
|
|
}
|
|
|
|
enum b_status b_string_append_wstr(b_string *dest, const b_wchar *src)
|
|
{
|
|
return b_string_insert_wstr(dest, src, SIZE_MAX);
|
|
}
|
|
|
|
enum b_status b_string_prepend_c(b_string *dest, char c)
|
|
{
|
|
return b_string_insert_c(dest, c, 0);
|
|
}
|
|
|
|
enum b_status b_string_prepend_wc(b_string *dest, b_wchar c)
|
|
{
|
|
return b_string_insert_wc(dest, c, 0);
|
|
}
|
|
|
|
enum b_status b_string_prepend_s(b_string *dest, const b_string *src)
|
|
{
|
|
return b_string_insert_s(dest, src, 0);
|
|
}
|
|
|
|
enum b_status b_string_prepend_cstr(b_string *dest, const char *src)
|
|
{
|
|
return b_string_insert_cstr(dest, src, 0);
|
|
}
|
|
|
|
enum b_status b_string_prepend_wstr(b_string *dest, const b_wchar *src)
|
|
{
|
|
return b_string_insert_wstr(dest, src, 0);
|
|
}
|
|
|
|
/*** VIRTUAL FUNCTIONS ********************************************************/
|
|
|
|
static void string_init(b_object *obj, void *priv)
|
|
{
|
|
struct b_string_p *str = priv;
|
|
|
|
str->s_len = 0;
|
|
str->s_codepoints = 0;
|
|
str->s_max = STRING_INLINE_CAPACITY;
|
|
}
|
|
|
|
static void string_fini(b_object *obj, void *priv)
|
|
{
|
|
struct b_string_p *str = priv;
|
|
if (!string_is_inline(str)) {
|
|
free(string_ptr(str));
|
|
}
|
|
}
|
|
|
|
static void string_to_string(const b_object *obj, struct b_stream *out)
|
|
{
|
|
struct b_string_p *str = b_object_get_private(obj, B_TYPE_STRING);
|
|
const char *s = string_ptr(str);
|
|
for (size_t i = 0; i < str->s_len; i++) {
|
|
b_stream_write_char(out, s[i]);
|
|
}
|
|
}
|
|
|
|
/*** CLASS DEFINITION *********************************************************/
|
|
|
|
B_TYPE_CLASS_DEFINITION_BEGIN(b_string)
|
|
B_TYPE_CLASS_INTERFACE_BEGIN(b_object, B_TYPE_OBJECT)
|
|
B_INTERFACE_ENTRY(to_string) = string_to_string;
|
|
B_TYPE_CLASS_INTERFACE_END(b_object, B_TYPE_OBJECT)
|
|
B_TYPE_CLASS_DEFINITION_END(b_string)
|
|
|
|
B_TYPE_DEFINITION_BEGIN(b_string)
|
|
B_TYPE_ID(0x200194f6, 0x0327, 0x4a82, 0xb9c9, 0xb62ddd038c33);
|
|
B_TYPE_CLASS(b_string_class);
|
|
B_TYPE_INSTANCE_INIT(string_init);
|
|
B_TYPE_INSTANCE_INIT(string_fini);
|
|
B_TYPE_DEFINITION_END(b_string)
|
|
|
|
/*** ITERATOR FUNCTIONS *******************************************************/
|
|
|
|
static bool string_iterator_next(struct b_iterator *it)
|
|
{
|
|
return b_string_iterator_next((struct b_string_iterator *)it);
|
|
}
|
|
|
|
static bool string_iterator_is_valid(const struct b_iterator *it)
|
|
{
|
|
return b_string_iterator_is_valid((struct b_string_iterator *)it);
|
|
}
|
|
|
|
static struct b_iterator_ops it_ops = {
|
|
.it_next = string_iterator_next,
|
|
.it_close = NULL,
|
|
.it_is_valid = string_iterator_is_valid,
|
|
};
|
|
|
|
static void iterator_cleanup(b_string_iterator *it)
|
|
{
|
|
if (it->_tmp) {
|
|
b_string_unref(it->_tmp);
|
|
}
|
|
|
|
memset(it, 0x0, sizeof *it);
|
|
}
|
|
|
|
int b_string_iterator_begin(const b_string *string, b_string_iterator *it)
|
|
{
|
|
memset(it, 0x0, sizeof *it);
|
|
|
|
struct b_string_p *p = b_object_get_private(string, B_TYPE_STRING);
|
|
|
|
it->_base.it_ops = &it_ops;
|
|
|
|
if (!p->s_len) {
|
|
it->status = B_ERR_NO_DATA;
|
|
return -1;
|
|
}
|
|
|
|
const char *s = string_ptr(it->_s_p);
|
|
it->_m = ITERATOR_MODE_CHARS;
|
|
it->_s_p = p;
|
|
it->char_value = utf8_codepoint_decode(s);
|
|
|
|
if (it->char_value == B_WCHAR_INVALID) {
|
|
it->status = B_ERR_BAD_FORMAT;
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool chars_iterator_next(b_string_iterator *it)
|
|
{
|
|
if (!b_string_iterator_is_valid(it)) {
|
|
return false;
|
|
}
|
|
|
|
size_t stride = utf8_codepoint_size(it->char_value);
|
|
if (stride == 0) {
|
|
iterator_cleanup(it);
|
|
return false;
|
|
}
|
|
|
|
it->byte_index += stride;
|
|
it->codepoint_index += 1;
|
|
|
|
if (it->byte_index >= it->_s_p->s_len) {
|
|
iterator_cleanup(it);
|
|
it->_s_p = NULL;
|
|
it->byte_index = 0;
|
|
it->codepoint_index = 0;
|
|
it->char_value = B_WCHAR_INVALID;
|
|
it->status = B_ERR_NO_DATA;
|
|
return false;
|
|
}
|
|
|
|
char *p = string_ptr(it->_s_p) + it->byte_index;
|
|
it->char_value = utf8_codepoint_decode(p);
|
|
if (it->char_value == B_WCHAR_INVALID) {
|
|
iterator_cleanup(it);
|
|
it->_s_p = NULL;
|
|
it->byte_index = 0;
|
|
it->codepoint_index = 0;
|
|
it->char_value = B_WCHAR_INVALID;
|
|
it->status = B_ERR_BAD_FORMAT;
|
|
return false;
|
|
}
|
|
|
|
it->iteration_index++;
|
|
return true;
|
|
}
|
|
|
|
static bool tokens_iterator_next(b_string_iterator *it)
|
|
{
|
|
if (!b_string_iterator_is_valid(it)) {
|
|
return false;
|
|
}
|
|
|
|
enum b_status status = find_next_token(it);
|
|
if (!B_OK(status)) {
|
|
iterator_cleanup(it);
|
|
return false;
|
|
}
|
|
|
|
it->string_value = string_ptr(it->_tmp_p);
|
|
it->iteration_index++;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool b_string_iterator_next(b_string_iterator *it)
|
|
{
|
|
switch (it->_m) {
|
|
case ITERATOR_MODE_CHARS:
|
|
return chars_iterator_next(it);
|
|
case ITERATOR_MODE_TOKENS:
|
|
return tokens_iterator_next(it);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool chars_iterator_is_valid(const struct b_string_iterator *it)
|
|
{
|
|
if (!it->_s_p) {
|
|
return false;
|
|
}
|
|
|
|
if (it->byte_index >= it->_s_p->s_len) {
|
|
return false;
|
|
}
|
|
|
|
if (it->char_value == B_WCHAR_INVALID) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool tokens_iterator_is_valid(const struct b_string_iterator *it)
|
|
{
|
|
if (!it->_s_p) {
|
|
return false;
|
|
}
|
|
|
|
if (it->byte_index >= it->_s_p->s_len) {
|
|
return false;
|
|
}
|
|
|
|
if (!it->string_value) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool b_string_iterator_is_valid(const struct b_string_iterator *it)
|
|
{
|
|
switch (it->_m) {
|
|
case ITERATOR_MODE_CHARS:
|
|
return chars_iterator_is_valid(it);
|
|
case ITERATOR_MODE_TOKENS:
|
|
return tokens_iterator_is_valid(it);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/*** MISC FUNCTIONS ***********************************************************/
|
|
|
|
char *b_strdup(const char *s)
|
|
{
|
|
size_t len = strlen(s);
|
|
char *p = malloc(len + 1);
|
|
if (!p) {
|
|
return NULL;
|
|
}
|
|
|
|
memcpy(p, s, len);
|
|
p[len] = '\0';
|
|
|
|
return p;
|
|
}
|
|
|
|
size_t b_strlen(const char *s, b_strlen_flags flags)
|
|
{
|
|
if (!(flags & (B_STRLEN_IGNORE_ESC | B_STRLEN_IGNORE_MOD))) {
|
|
return strlen(s);
|
|
}
|
|
|
|
size_t out = 0;
|
|
for (size_t i = 0; s[i]; i++) {
|
|
if (s[i] == '\033' && (flags & B_STRLEN_IGNORE_ESC)) {
|
|
while (!isalpha(s[i]) && s[i]) {
|
|
i++;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if (s[i] == '[' && (flags & B_STRLEN_IGNORE_MOD)) {
|
|
i++;
|
|
if (s[i] == '[') {
|
|
out++;
|
|
continue;
|
|
}
|
|
|
|
while (s[i] != ']' && s[i]) {
|
|
i++;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
out++;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
b_wchar *b_wstrdup(const b_wchar *s)
|
|
{
|
|
size_t len = b_wstrlen(s);
|
|
b_wchar *buf = calloc(len + 1, sizeof(b_wchar));
|
|
if (!buf) {
|
|
return NULL;
|
|
}
|
|
|
|
memcpy(buf, s, len * sizeof(b_wchar));
|
|
|
|
return buf;
|
|
}
|
|
|
|
size_t b_wstrlen(const b_wchar *s)
|
|
{
|
|
size_t len;
|
|
for (len = 0; s[len] != 0; len++)
|
|
;
|
|
return len;
|
|
}
|