core: stream: add utf-8 awareness to b_stream interface

b_stream can now read/write utf-8 encoded data, and will do so unless
the stream is in binary mode.

b_stream will also report an error it encounters invalid utf-8 data
(unless binary mode is enabled).
This commit is contained in:
2025-10-25 00:02:33 +01:00
parent f6f49faf97
commit a429e76721
2 changed files with 101 additions and 24 deletions

View File

@@ -1,6 +1,7 @@
#ifndef BLUE_CORE_STREAM_H_ #ifndef BLUE_CORE_STREAM_H_
#define BLUE_CORE_STREAM_H_ #define BLUE_CORE_STREAM_H_
#include <blue/core/encoding.h>
#include <blue/core/macros.h> #include <blue/core/macros.h>
#include <blue/core/misc.h> #include <blue/core/misc.h>
#include <blue/core/status.h> #include <blue/core/status.h>
@@ -40,7 +41,7 @@ B_TYPE_CLASS_DECLARATION_BEGIN(b_stream)
b_status (*s_close)(b_stream *); b_status (*s_close)(b_stream *);
b_status (*s_seek)(b_stream *, long long, b_stream_seek_origin); b_status (*s_seek)(b_stream *, long long, b_stream_seek_origin);
b_status (*s_tell)(const b_stream *, size_t *); b_status (*s_tell)(const b_stream *, size_t *);
b_status (*s_getc)(b_stream *, int *); b_status (*s_getc)(b_stream *, b_wchar *);
b_status (*s_read)(b_stream *, unsigned char *, size_t, size_t *); b_status (*s_read)(b_stream *, unsigned char *, size_t, size_t *);
b_status (*s_write)(b_stream *, const unsigned char *, size_t, size_t *); b_status (*s_write)(b_stream *, const unsigned char *, size_t, size_t *);
b_status (*s_reserve)(b_stream *, size_t); b_status (*s_reserve)(b_stream *, size_t);
@@ -69,7 +70,7 @@ BLUE_API size_t b_stream_cursor(const b_stream *stream);
BLUE_API b_status b_stream_push_indent(b_stream *stream, int indent); BLUE_API b_status b_stream_push_indent(b_stream *stream, int indent);
BLUE_API b_status b_stream_pop_indent(b_stream *stream); BLUE_API b_status b_stream_pop_indent(b_stream *stream);
BLUE_API b_status b_stream_read_char(b_stream *stream, int *c); BLUE_API b_status b_stream_read_char(b_stream *stream, b_wchar *c);
BLUE_API b_status b_stream_read_bytes( BLUE_API b_status b_stream_read_bytes(
b_stream *stream, void *buf, size_t count, size_t *nr_read); b_stream *stream, void *buf, size_t count, size_t *nr_read);
@@ -82,7 +83,7 @@ BLUE_API b_status b_stream_read_all_bytes(
BLUE_API b_status b_stream_read_all_bytes_s( BLUE_API b_status b_stream_read_all_bytes_s(
b_stream *src, b_stream *dest, b_stream_buffer *buffer, size_t *nr_read); b_stream *src, b_stream *dest, b_stream_buffer *buffer, size_t *nr_read);
BLUE_API b_status b_stream_write_char(b_stream *stream, char c); BLUE_API b_status b_stream_write_char(b_stream *stream, b_wchar c);
BLUE_API b_status b_stream_write_string( BLUE_API b_status b_stream_write_string(
b_stream *stream, const char *s, size_t *nr_written); b_stream *stream, const char *s, size_t *nr_written);

View File

@@ -155,7 +155,67 @@ static enum b_status stream_reserve(struct stream_data *stream, size_t len)
return stream->s_ops->s_reserve(stream->s_obj, len); return stream->s_ops->s_reserve(stream->s_obj, len);
} }
static enum b_status stream_read_char(struct stream_data *stream, int *c) static enum b_status read_char_binary(struct stream_data *stream, b_wchar *c)
{
size_t r;
unsigned char v = 0;
enum b_status status = stream->s_ops->s_read(stream->s_obj, &v, 1, &r);
*c = v;
if (status == B_SUCCESS && r < 1) {
status = B_ERR_NO_DATA;
}
return status;
}
static enum b_status read_char_utf8(struct stream_data *stream, b_wchar *c)
{
size_t r;
unsigned char s[4];
unsigned int len = 0;
enum b_status status = stream->s_ops->s_read(stream->s_obj, s, 1, &r);
if (!B_OK(status)) {
return status;
}
if (r < 1) {
return B_ERR_NO_DATA;
}
len = b_wchar_utf8_header_decode(s[0]);
if (len <= 0 || len > 4) {
return B_ERR_BAD_FORMAT;
}
if (len == 1) {
*c = s[0];
return B_SUCCESS;
}
status = stream->s_ops->s_read(stream->s_obj, s + 1, len - 1, &r);
if (!B_OK(status)) {
return status;
}
if (r != len - 1) {
return B_ERR_BAD_FORMAT;
}
b_wchar result = b_wchar_utf8_codepoint_decode((char *)s);
if (result == B_WCHAR_INVALID) {
return B_ERR_BAD_FORMAT;
}
*c = result;
return B_SUCCESS;
}
static enum b_status stream_read_char(struct stream_data *stream, b_wchar *c)
{ {
if (!(stream->s_cfg->s_mode & B_STREAM_READ)) { if (!(stream->s_cfg->s_mode & B_STREAM_READ)) {
return B_ERR_NOT_SUPPORTED; return B_ERR_NOT_SUPPORTED;
@@ -163,37 +223,53 @@ static enum b_status stream_read_char(struct stream_data *stream, int *c)
enum b_status status = B_ERR_NOT_SUPPORTED; enum b_status status = B_ERR_NOT_SUPPORTED;
#if 0
if (stream->s_ops->s_getc) { if (stream->s_ops->s_getc) {
status = stream->s_ops->s_getc(stream->s_obj, c); return stream->s_ops->s_getc(stream->s_obj, c);
} else if (stream->s_ops->s_read) {
size_t r;
unsigned char v = 0;
status = stream->s_ops->s_read(stream->s_obj, &v, 1, &r);
*c = v;
if (status == B_SUCCESS && r < 1) {
status = B_ERR_NO_DATA;
} }
#endif
if (!stream->s_ops->s_read) {
return B_ERR_NOT_SUPPORTED;
} }
return status; if (stream->s_cfg->s_mode & B_STREAM_BINARY) {
return read_char_binary(stream, c);
}
return read_char_utf8(stream, c);
} }
static enum b_status __write_char(struct stream_data *stream, char c) static enum b_status __write_char(struct stream_data *stream, b_wchar c)
{ {
size_t w; size_t w, len;
enum b_status status = stream->s_ops->s_write( enum b_status status;
stream->s_obj, (unsigned char *)&c, 1, &w); char s[4];
if (status == B_SUCCESS && w < 1) { if (stream->s_cfg->s_mode & B_STREAM_BINARY) {
s[0] = c & 0xFF;
len = 1;
} else {
len = b_wchar_utf8_codepoint_encode(c, s);
if (len == 0 || len > 4) {
return B_ERR_INVALID_ARGUMENT;
}
}
status = stream->s_ops->s_write(stream->s_obj, (unsigned char *)s, len, &w);
if (!B_OK(status)) {
return status;
}
if (status == B_SUCCESS && w < len) {
status = B_ERR_IO_FAILURE; status = B_ERR_IO_FAILURE;
} }
return status; return status;
} }
static enum b_status stream_write_char(struct stream_data *stream, char c) static enum b_status stream_write_char(struct stream_data *stream, b_wchar c)
{ {
if (!(stream->s_cfg->s_mode & B_STREAM_WRITE)) { if (!(stream->s_cfg->s_mode & B_STREAM_WRITE)) {
return B_ERR_NOT_SUPPORTED; return B_ERR_NOT_SUPPORTED;
@@ -257,7 +333,7 @@ static enum b_status stream_read_line(struct stream_data *stream, char *s, size_
enum b_status status = B_SUCCESS; enum b_status status = B_SUCCESS;
size_t i = 0; size_t i = 0;
int c = 0; b_wchar c = 0;
while (1) { while (1) {
if (i >= max) { if (i >= max) {
@@ -294,7 +370,7 @@ static enum b_status stream_read_line_s(
enum b_status status = B_SUCCESS; enum b_status status = B_SUCCESS;
size_t i = 0; size_t i = 0;
int c = 0; b_wchar c = 0;
while (1) { while (1) {
status = stream_read_char(src, &c); status = stream_read_char(src, &c);
@@ -698,7 +774,7 @@ enum b_status b_stream_read_all_bytes_s(
return stream_read_all_bytes_s(&src_p, &dest_p, buffer_p, out_nr_read); return stream_read_all_bytes_s(&src_p, &dest_p, buffer_p, out_nr_read);
} }
enum b_status b_stream_write_char(b_stream *stream, char c) enum b_status b_stream_write_char(b_stream *stream, b_wchar c)
{ {
STREAM_DISPATCH_VIRTUAL(stream_write_char, stream, c); STREAM_DISPATCH_VIRTUAL(stream_write_char, stream, c);
} }