From a429e76721884b90b7967758bd2e0b0ea934d9e8 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sat, 25 Oct 2025 00:02:33 +0100 Subject: [PATCH] core: stream: add utf-8 awareness to b_stream interface b_stream can now read/write utf-8 encoded data, and will do so unless the stream is in binary mode. b_stream will also report an error it encounters invalid utf-8 data (unless binary mode is enabled). --- core/include/blue/core/stream.h | 7 +- core/stream.c | 118 ++++++++++++++++++++++++++------ 2 files changed, 101 insertions(+), 24 deletions(-) diff --git a/core/include/blue/core/stream.h b/core/include/blue/core/stream.h index ff69f6c..634f13d 100644 --- a/core/include/blue/core/stream.h +++ b/core/include/blue/core/stream.h @@ -1,6 +1,7 @@ #ifndef BLUE_CORE_STREAM_H_ #define BLUE_CORE_STREAM_H_ +#include #include #include #include @@ -40,7 +41,7 @@ B_TYPE_CLASS_DECLARATION_BEGIN(b_stream) b_status (*s_close)(b_stream *); b_status (*s_seek)(b_stream *, long long, b_stream_seek_origin); b_status (*s_tell)(const b_stream *, size_t *); - b_status (*s_getc)(b_stream *, int *); + b_status (*s_getc)(b_stream *, b_wchar *); b_status (*s_read)(b_stream *, unsigned char *, size_t, size_t *); b_status (*s_write)(b_stream *, const unsigned char *, size_t, size_t *); b_status (*s_reserve)(b_stream *, size_t); @@ -69,7 +70,7 @@ BLUE_API size_t b_stream_cursor(const b_stream *stream); BLUE_API b_status b_stream_push_indent(b_stream *stream, int indent); BLUE_API b_status b_stream_pop_indent(b_stream *stream); -BLUE_API b_status b_stream_read_char(b_stream *stream, int *c); +BLUE_API b_status b_stream_read_char(b_stream *stream, b_wchar *c); BLUE_API b_status b_stream_read_bytes( b_stream *stream, void *buf, size_t count, size_t *nr_read); @@ -82,7 +83,7 @@ BLUE_API b_status b_stream_read_all_bytes( BLUE_API b_status b_stream_read_all_bytes_s( b_stream *src, b_stream *dest, b_stream_buffer *buffer, size_t *nr_read); -BLUE_API b_status b_stream_write_char(b_stream *stream, char c); +BLUE_API b_status b_stream_write_char(b_stream *stream, b_wchar c); BLUE_API b_status b_stream_write_string( b_stream *stream, const char *s, size_t *nr_written); diff --git a/core/stream.c b/core/stream.c index 6f99528..44d77d0 100644 --- a/core/stream.c +++ b/core/stream.c @@ -155,7 +155,67 @@ static enum b_status stream_reserve(struct stream_data *stream, size_t len) return stream->s_ops->s_reserve(stream->s_obj, len); } -static enum b_status stream_read_char(struct stream_data *stream, int *c) +static enum b_status read_char_binary(struct stream_data *stream, b_wchar *c) +{ + size_t r; + unsigned char v = 0; + enum b_status status = stream->s_ops->s_read(stream->s_obj, &v, 1, &r); + + *c = v; + + if (status == B_SUCCESS && r < 1) { + status = B_ERR_NO_DATA; + } + + return status; +} + +static enum b_status read_char_utf8(struct stream_data *stream, b_wchar *c) +{ + size_t r; + unsigned char s[4]; + unsigned int len = 0; + enum b_status status = stream->s_ops->s_read(stream->s_obj, s, 1, &r); + + if (!B_OK(status)) { + return status; + } + + if (r < 1) { + return B_ERR_NO_DATA; + } + + len = b_wchar_utf8_header_decode(s[0]); + if (len <= 0 || len > 4) { + return B_ERR_BAD_FORMAT; + } + + if (len == 1) { + *c = s[0]; + return B_SUCCESS; + } + + status = stream->s_ops->s_read(stream->s_obj, s + 1, len - 1, &r); + + if (!B_OK(status)) { + return status; + } + + if (r != len - 1) { + return B_ERR_BAD_FORMAT; + } + + b_wchar result = b_wchar_utf8_codepoint_decode((char *)s); + + if (result == B_WCHAR_INVALID) { + return B_ERR_BAD_FORMAT; + } + + *c = result; + return B_SUCCESS; +} + +static enum b_status stream_read_char(struct stream_data *stream, b_wchar *c) { if (!(stream->s_cfg->s_mode & B_STREAM_READ)) { return B_ERR_NOT_SUPPORTED; @@ -163,37 +223,53 @@ static enum b_status stream_read_char(struct stream_data *stream, int *c) enum b_status status = B_ERR_NOT_SUPPORTED; +#if 0 if (stream->s_ops->s_getc) { - status = stream->s_ops->s_getc(stream->s_obj, c); - } else if (stream->s_ops->s_read) { - size_t r; - unsigned char v = 0; - status = stream->s_ops->s_read(stream->s_obj, &v, 1, &r); + return stream->s_ops->s_getc(stream->s_obj, c); + } +#endif - *c = v; + if (!stream->s_ops->s_read) { + return B_ERR_NOT_SUPPORTED; + } - if (status == B_SUCCESS && r < 1) { - status = B_ERR_NO_DATA; + if (stream->s_cfg->s_mode & B_STREAM_BINARY) { + return read_char_binary(stream, c); + } + + return read_char_utf8(stream, c); +} + +static enum b_status __write_char(struct stream_data *stream, b_wchar c) +{ + size_t w, len; + enum b_status status; + char s[4]; + + if (stream->s_cfg->s_mode & B_STREAM_BINARY) { + s[0] = c & 0xFF; + len = 1; + } else { + len = b_wchar_utf8_codepoint_encode(c, s); + if (len == 0 || len > 4) { + return B_ERR_INVALID_ARGUMENT; } } - return status; -} + status = stream->s_ops->s_write(stream->s_obj, (unsigned char *)s, len, &w); -static enum b_status __write_char(struct stream_data *stream, char c) -{ - size_t w; - enum b_status status = stream->s_ops->s_write( - stream->s_obj, (unsigned char *)&c, 1, &w); + if (!B_OK(status)) { + return status; + } - if (status == B_SUCCESS && w < 1) { + if (status == B_SUCCESS && w < len) { status = B_ERR_IO_FAILURE; } return status; } -static enum b_status stream_write_char(struct stream_data *stream, char c) +static enum b_status stream_write_char(struct stream_data *stream, b_wchar c) { if (!(stream->s_cfg->s_mode & B_STREAM_WRITE)) { return B_ERR_NOT_SUPPORTED; @@ -257,7 +333,7 @@ static enum b_status stream_read_line(struct stream_data *stream, char *s, size_ enum b_status status = B_SUCCESS; size_t i = 0; - int c = 0; + b_wchar c = 0; while (1) { if (i >= max) { @@ -294,7 +370,7 @@ static enum b_status stream_read_line_s( enum b_status status = B_SUCCESS; size_t i = 0; - int c = 0; + b_wchar c = 0; while (1) { status = stream_read_char(src, &c); @@ -698,7 +774,7 @@ enum b_status b_stream_read_all_bytes_s( return stream_read_all_bytes_s(&src_p, &dest_p, buffer_p, out_nr_read); } -enum b_status b_stream_write_char(b_stream *stream, char c) +enum b_status b_stream_write_char(b_stream *stream, b_wchar c) { STREAM_DISPATCH_VIRTUAL(stream_write_char, stream, c); }