b_string now uses UTF-8 internally, and can correctly manipulate strings
that contain non-ASCII and multi-byte codepoints.
b_string now tracks the length of a string in both bytes and unicode codepoints.
string insertion functions have been updated to correctly handle strings with
multi-byte codepoints, so the index parameter of each function now refers to codepoints
rather than bytes. inserting single-byte chars into a string with no multi-byte codepoints
is still optimised to used array indexing and memmove.
a b_string_iterator has been added to simplify iterating through a UTF-8 string, without
having to use a charAt()-style interface that would incur performance penalties.
strings can now also contain null bytes.
new functions include:
- b_string_tokenise: a b_iterator interface for iterating through tokens
in a string. similar to strtok except that:
* it is re-entrant, and uses no global state.
* it supports delimiters that are longer than one character and/or contain
multi-byte UTF-8 codepoints.
* it doesn't modify the string that is being iterated over.
* it correctly handles strings with multi-byte UTF-8 codepoints and null chars.
- b_string_compare: for comparing strings. necessary to use this rather than strcpy
as b_strings can now contain null chars.
27 lines
805 B
C
27 lines
805 B
C
#ifndef _BLUELIB_STRING_H_
|
|
#define _BLUELIB_STRING_H_
|
|
|
|
#include "object.h"
|
|
|
|
/* maximum length of string that can be stored inline, not including null-terminator */
|
|
#define STRING_INLINE_CAPACITY 15
|
|
|
|
struct b_string {
|
|
struct b_object s_base;
|
|
/* length of string in bytes, not including null-terminator.
|
|
* a multi-byte utf-8 codepoint will be counted as multiple bytes here */
|
|
unsigned int s_len;
|
|
/* length of string in codepoints, not including null-terminator.
|
|
* a multi-byte utf-8 codepoint will be counted as one codepoint here */
|
|
unsigned int s_codepoints;
|
|
/* maximum length of string storable in the currently-allocated buffer
|
|
* in bytes, not including null terminator */
|
|
unsigned int s_max;
|
|
union {
|
|
char d_inline[STRING_INLINE_CAPACITY + 1];
|
|
char *d_external;
|
|
} s_data;
|
|
};
|
|
|
|
#endif
|