From dad7c27bf6c1e1210076c85b2125f2661fe2d4c0 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Thu, 30 Jan 2025 18:10:38 +0000 Subject: [PATCH] add cluster i/o pipeline --- CMakeLists.txt | 4 +- doc/format.txt | 12 +- src/aes256.c | 27 ++++ src/b-tree.c | 0 src/b-tree.h | 29 ++++ src/bin.h | 269 ++++++++++++++++++++++++++++++++---- src/compress.h | 24 ++++ src/create.c | 44 ++++-- src/encrypt.h | 19 +++ src/file.c | 39 ++++++ src/pipeline.c | 156 +++++++++++++++++++++ src/pipeline.h | 74 ++++++++++ src/status.h | 3 + src/tag.h | 4 + src/wrap.c | 69 +++++++++- src/write.c | 368 +++++++++++++++++++++++++++++++++++++++++++++++++ src/write.h | 26 +++- src/zstd.c | 30 ++++ 18 files changed, 1147 insertions(+), 50 deletions(-) create mode 100644 src/aes256.c create mode 100644 src/b-tree.c create mode 100644 src/b-tree.h create mode 100644 src/compress.h create mode 100644 src/encrypt.h create mode 100644 src/file.c create mode 100644 src/pipeline.c create mode 100644 src/pipeline.h create mode 100644 src/tag.h create mode 100644 src/write.c create mode 100644 src/zstd.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 7295dc0..bd707ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ set(Bluelib_STATIC 1) find_package(Bluelib REQUIRED) find_package(ZSTD REQUIRED) -file(GLOB ec3_sources +file(GLOB_RECURSE ec3_sources src/*.c src/*.h) @@ -19,4 +19,4 @@ target_link_libraries(ec3 Bluelib::Cmd ${ZSTD_LIBRARY}) -target_include_directories(ec3 PRIVATE ${ZSTD_INCLUDE_DIR}) \ No newline at end of file +target_include_directories(ec3 PRIVATE ${ZSTD_INCLUDE_DIR}) diff --git a/doc/format.txt b/doc/format.txt index e5c3a03..9808022 100755 --- a/doc/format.txt +++ b/doc/format.txt @@ -299,13 +299,11 @@ version: 1.0 Header Value Cluster Size (bytes) Cluster Size (kilobytes) -------------------------------------------------------------------- - 0x00 16,384 16 - 0x01 32,768 32 - 0x02 65,536 64 - 0x03 131,072 128 - 0x04 262,144 256 - 0x05 524,288 512 - 0x06 1,048,576 1,024 + 0x00 4,096 4 + 0x01 8,192 8 + 0x02 16,384 16 + 0x03 32,768 32 + 0x04 65,536 64 5.1.4 Tag Table Offset This specifies the offset in bytes from the beginning of the image file diff --git a/src/aes256.c b/src/aes256.c new file mode 100644 index 0000000..9634d51 --- /dev/null +++ b/src/aes256.c @@ -0,0 +1,27 @@ +#include "pipeline.h" + +static enum ec3_status encrypt( + struct ec3_pipeline_stage *stage, + const void *src, + size_t len, + void *dest, + size_t *nr_written) +{ + return EC3_SUCCESS; +} + +static enum ec3_status decrypt( + struct ec3_pipeline_stage *stage, + const void *src, + void *dest, + size_t *nr_read) +{ + return EC3_SUCCESS; +} + +const struct ec3_pipeline_stage_type pipeline_aes256 = { + .t_id = EC3_PIPELINE_AES256, + .t_flags = EC3_PIPELINE_F_NONE, + .t_data_in = decrypt, + .t_data_out = encrypt, +}; diff --git a/src/b-tree.c b/src/b-tree.c new file mode 100644 index 0000000..e69de29 diff --git a/src/b-tree.h b/src/b-tree.h new file mode 100644 index 0000000..29b2665 --- /dev/null +++ b/src/b-tree.h @@ -0,0 +1,29 @@ +#ifndef B_TREE_H_ +#define B_TREE_H_ + +#include + +struct b_tree; +typedef void *b_tree_node; +typedef void *b_tree_node_entry; + +struct b_tree_ops { + size_t node_size; + size_t entry_size; + + int (*tree_get_node)(struct b_tree *, int, void *); + int (*tree_put_node)(struct b_tree *, int, void *); + int (*tree_alloc_node)(struct b_tree *); + + int (*node_get_nr_entries)(b_tree_node *); + b_tree_node_entry *(*node_get_entry)(b_tree_node *, int); + int (*node_get_child)(b_tree_node *, int); + + int (*entry_compare)(b_tree_node_entry *, b_tree_node_entry *); +}; + +struct b_tree { + const struct b_tree_ops *tree_ops; +}; + +#endif diff --git a/src/bin.h b/src/bin.h index ecacf47..0cc8fe9 100644 --- a/src/bin.h +++ b/src/bin.h @@ -3,39 +3,110 @@ #include -#define EC3_SIGNATURE 0x45433358 +#define EC3_SIGNATURE 0x45433358 -#define EC3_VERSION_1_0 0x0100 +#define EC3_VERSION_1_0 0x0100 -#define EC3_CLUSTER_16K 0x00u -#define EC3_CLUSTER_32K 0x01u -#define EC3_CLUSTER_64K 0x02u -#define EC3_CLUSTER_128K 0x03u -#define EC3_CLUSTER_256K 0x04u -#define EC3_CLUSTER_512K 0x05u -#define EC3_CLUSTER_1M 0x06u +#define EC3_CLUSTER_4K 0x00u +#define EC3_CLUSTER_8K 0x01u +#define EC3_CLUSTER_16K 0x02u +#define EC3_CLUSTER_32K 0x03u +#define EC3_CLUSTER_64K 0x04u -#define EC3_TAG_VOLU 0x564F4C55 -#define EC3_TAG_CTAB 0x43544142 -#define EC3_TAG_XATR 0x58415452 -#define EC3_TAG_STAB 0x53544142 -#define EC3_TAG_MFST 0x4D465354 -#define EC3_TAG_BLOB 0x424C4F42 -#define EC3_TAG_EXEC 0x45584543 -#define EC3_TAG_CERT 0x43455254 -#define EC3_TAG_CSIG 0x43534947 +#define EC3_COMPRESSION_NONE 0x00u +#define EC3_COMPRESSION_ZSTD 0x01u -#define EC3_TAG_SIGNED 0x00000001u -#define EC3_TAG_COMPRESSED 0x00000002u -#define EC3_TAG_ENCRYPTED 0x00000004u +#define EC3_ENCRYPTION_NONE 0x00u +#define EC3_ENCRYPTION_AES256 0x01u + +#define EC3_TAG_VOLU 0x564F4C55 +#define EC3_TAG_CTAB 0x43544142 +#define EC3_TAG_XATR 0x58415452 +#define EC3_TAG_STAB 0x53544142 +#define EC3_TAG_MFST 0x4D465354 +#define EC3_TAG_BLOB 0x424C4F42 +#define EC3_TAG_EXEC 0x45584543 +#define EC3_TAG_CERT 0x43455254 +#define EC3_TAG_CSIG 0x43534947 + +#define EC3_TAG_SIGNED 0x00000001u +#define EC3_TAG_COMPRESSED 0x00000002u +#define EC3_TAG_ENCRYPTED 0x00000004u + +/* 32K per cluster group */ +#define EC3_CLUSTERS_PER_GROUP 1635 +/* 1K per extent group */ +#define EC3_EXTENTS_PER_GROUP 36 + +/* each chunk group occupies one cluster, so the number of chunks per group + * depends on the cluster size */ +#define EC3_CHUNKS_PER_GROUP_4K 146 +#define EC3_CHUNKS_PER_GROUP_8K 292 +#define EC3_CHUNKS_PER_GROUP_16K 584 +#define EC3_CHUNKS_PER_GROUP_32K 1170 +#define EC3_CHUNKS_PER_GROUP_64K 2340 + +/* each vnode group occupies one cluster, so the number of vnodes per group + * depends on the cluster size */ +#define EC3_VNODES_PER_GROUP_4K 255 +#define EC3_VNODES_PER_GROUP_8K 511 +#define EC3_VNODES_PER_GROUP_16K 1023 +#define EC3_VNODES_PER_GROUP_32K 2047 +#define EC3_VNODES_PER_GROUP_64K 4095 + +/* each vnode/chunk link group occupies one cluster, so the number of links per + * group depends on the cluster size */ +#define EC3_VNCH_PER_GROUP_4K 146 +#define EC3_VNCH_PER_GROUP_8K 292 +#define EC3_VNCH_PER_GROUP_16K 584 +#define EC3_VNCH_PER_GROUP_32K 1170 +#define EC3_VNCH_PER_GROUP_64K 2340 + +/* Chunks are identified by a 128-bit (16-byte) hash */ +#define EC3_CHUNK_ID_SIZE 16 + +typedef uint8_t ec3_chunk_id[EC3_CHUNK_ID_SIZE]; struct ec3_header { - b_i32 h_sig; + b_i32 h_magic; b_i16 h_version; b_i16 h_cluster_size; b_i64 h_tag_table_offset; - b_i64 h_tag_count; + b_i64 h_extent_table_offset; + b_i64 h_cluster_table_offset; + b_i32 h_tag_count; + b_i32 h_extent_count; + b_i32 h_cluster_group_count; + b_i16 h_compression; + b_i16 h_encryption; b_i64 h_app_magic; + uint8_t h_reserved[8]; +}; + +struct ec3_cluster { + /* cluster identifier */ + b_i32 c_id; + /* lower 32-bits of the cluster bounds */ + b_i32 c_bounds0; + /* upper 32-bits of the cluster bounds */ + b_i32 c_bounds1; + /* CRC-16 of the on-disk cluster data */ + b_i16 c_checksum; + /* flags that apply to this cluster */ + b_i16 c_flags; +}; + +struct ec3_cluster_group { + /* the number of clusters that this group contains */ + b_i16 g_nr_clusters; + uint8_t g_reserved[2]; + /* array of clusters contained within the group. */ + struct ec3_cluster g_clusters[EC3_CLUSTERS_PER_GROUP]; + /* offsets to other cluster groups, relative to the start of the + * cluster group table. the cluster groups form a B-tree. */ + b_i32 g_child_offsets[EC3_CLUSTERS_PER_GROUP + 1]; + /* pad the group out to an even 32K */ + uint8_t g_padding[20]; }; struct ec3_tag_table_entry { @@ -44,9 +115,157 @@ struct ec3_tag_table_entry { b_i32 tag_checksum; b_i32 tag_reserved1; b_i64 tag_ident; - b_i64 tag_offset; - b_i64 tag_size; b_i64 tag_reserved2; }; +/* Extents serve two purposes: + * 1. They specify which clusters in an image are allocated to which tags. + * 2. They define the mapping between "logical" clusters and "physical" + * clusters. + * + * For example, logical cluster 0 of a certain tag (i.e. the cluster that + * contains the first 16K of the tag's data) might be mapped to physical + * cluster 17 (i.e. the cluster that has an id of 17). + */ +struct ec3_extent { + /* the id of the tag that this range of clusters belongs to */ + b_i64 ex_owner; + /* the id of the first logical cluster that this extent represents. */ + b_i32 ex_logical_cluster; + /* the id of the first physical cluster that this extent represents. */ + b_i32 ex_physical_cluster; + /* the number of clusters included in this extent. */ + b_i32 ex_count; +}; + +struct ec3_chunk { + ec3_chunk_id c_id; + b_i16 c_length; + b_i16 c_offset1; + b_i32 c_offset0; +}; + +struct ec3_chunk_group { + /* the number of chunks that this group contains */ + b_i16 g_nr_chunks; + uint8_t g_reserved[2]; + + union { + struct { + struct ec3_chunk g_chunks[EC3_CHUNKS_PER_GROUP_4K]; + b_i32 g_child_offsets[EC3_CHUNKS_PER_GROUP_4K + 1]; + } g_4k; + +#if 0 + struct { + struct ec3_chunk g_chunks[EC3_CHUNKS_PER_GROUP_8K]; + b_i32 g_child_offsets[EC3_CHUNKS_PER_GROUP_8K + 1]; + } g_8k; + + struct { + struct ec3_chunk g_chunks[EC3_CHUNKS_PER_GROUP_16K]; + b_i32 g_child_offsets[EC3_CHUNKS_PER_GROUP_16K + 1]; + } g_16k; + + struct { + struct ec3_chunk g_chunks[EC3_CHUNKS_PER_GROUP_32K]; + b_i32 g_child_offsets[EC3_CHUNKS_PER_GROUP_32K + 1]; + } g_32k; + + struct { + struct ec3_chunk g_chunks[EC3_CHUNKS_PER_GROUP_64K]; + b_i32 g_child_offsets[EC3_CHUNKS_PER_GROUP_64K + 1]; + } g_64k; +#endif + }; +}; + +struct ec3_directory_entry { + b_i32 d_name; + b_i32 d_vnode; +}; + +struct ec3_vnode { + b_i32 n_id; + b_i32 n_attrib; + /* length of file in chunks */ + b_i32 n_length; +}; + +struct ec3_vnode_group { + /* the number of vnodes that this group contains */ + b_i16 g_nr_vnodes; + uint8_t g_reserved[2]; + + union { + struct { + struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_4K]; + b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_4K + 1]; + } g_4k; + +#if 0 + struct { + struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_8K]; + b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_8K + 1]; + } g_8k; + + struct { + struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_16K]; + b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_16K + 1]; + } g_16k; + + struct { + struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_32K]; + b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_32K + 1]; + } g_32k; + + struct { + struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_64K]; + b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_64K + 1]; + } g_64k; +#endif + }; +}; + +struct ec3_vnode_chunk { + ec3_chunk_id c_chunk_id; + b_i32 c_vnode_id; + b_i32 c_vnode_index; +}; + +struct ec3_vnode_chunk_group { + /* the number of vnode/chunk links that this group contains */ + b_i16 g_nr_entries; + uint8_t g_reserved[2]; + + union { + struct { + struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_4K]; + b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_4K + 1]; + } g_4k; + +#if 0 + struct { + struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_8K]; + b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_8K + 1]; + } g_8k; + + struct { + struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_16K]; + b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_16K + 1]; + } g_16k; + + struct { + struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_32K]; + b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_32K + 1]; + } g_32k; + + struct { + struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_64K]; + b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_64K + 1]; + } g_64k; +#endif + }; +}; + #endif diff --git a/src/compress.h b/src/compress.h new file mode 100644 index 0000000..618e6b0 --- /dev/null +++ b/src/compress.h @@ -0,0 +1,24 @@ +#ifndef COMPRESS_H_ +#define COMPRESS_H_ + +#include + +struct ec3_compression_function { + enum ec3_status (*e_compress)( + const void *in, + size_t in_size, + void *out, + size_t out_max, + size_t *out_size); + + enum ec3_status (*e_decompress)( + const void *in, + size_t in_size, + void *out, + size_t out_size); +}; + +extern const struct ec3_compression_function *ec3_compression_function_get( + unsigned int type); + +#endif diff --git a/src/create.c b/src/create.c index 07db466..c4986f7 100644 --- a/src/create.c +++ b/src/create.c @@ -1,9 +1,10 @@ +#include "bin.h" #include "commands.h" #include -#include #include #include +#include enum { ARG_INPATH, @@ -18,7 +19,22 @@ static int create( { const char *in_path = NULL, *out_path = NULL; - b_arglist_get_string(opt, B_COMMAND_INVALID_ID, ARG_INPATH, 0, &in_path); + printf("cluster group = %zu\n", sizeof(struct ec3_cluster_group)); + printf("chunk group = %zu\n", sizeof(struct ec3_chunk_group)); + printf("vnode group = %zu\n", sizeof(struct ec3_vnode_group)); + printf("vnch group = %zu\n", sizeof(struct ec3_vnode_chunk_group)); + printf("vnode = %zu\n", sizeof(struct ec3_vnode)); + printf("chunk = %zu\n", sizeof(struct ec3_chunk)); + printf("link = %zu\n", sizeof(struct ec3_vnode_chunk)); + + return 0; + + b_arglist_get_string( + opt, + B_COMMAND_INVALID_ID, + ARG_INPATH, + 0, + &in_path); b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path); printf("in path: %s\n", in_path); @@ -61,17 +77,23 @@ static int create( total += r; bool last_chunk = r < in_bufsz; - ZSTD_EndDirective mode = last_chunk ? ZSTD_e_end : ZSTD_e_continue; + ZSTD_EndDirective mode + = last_chunk ? ZSTD_e_end : ZSTD_e_continue; - ZSTD_inBuffer input = { in_buf, r, 0 }; + ZSTD_inBuffer input = {in_buf, r, 0}; int finished; do { - ZSTD_outBuffer output = { out_buf, out_bufsz, 0 }; - size_t remaining = ZSTD_compressStream2(zstd, &output, &input, mode); - + ZSTD_outBuffer output = {out_buf, out_bufsz, 0}; + size_t remaining = ZSTD_compressStream2( + zstd, + &output, + &input, + mode); + fwrite(out_buf, 1, output.pos, out); - finished = last_chunk ? (remaining == 0) : (input.pos == input.size); + finished = last_chunk ? (remaining == 0) + : (input.pos == input.size); } while (!finished); if (last_chunk) { @@ -111,12 +133,14 @@ B_COMMAND(CMD_CREATE, CMD_ROOT) } } - B_COMMAND_ARG(ARG_INPATH) { + B_COMMAND_ARG(ARG_INPATH) + { B_ARG_NAME("input file"); B_ARG_NR_VALUES(1); } - B_COMMAND_USAGE() { + B_COMMAND_USAGE() + { B_COMMAND_USAGE_ARG(ARG_INPATH); B_COMMAND_USAGE_OPT(OPT_OUTPATH); } diff --git a/src/encrypt.h b/src/encrypt.h new file mode 100644 index 0000000..f7a74c8 --- /dev/null +++ b/src/encrypt.h @@ -0,0 +1,19 @@ +#ifndef ENCRYPT_H_ +#define ENCRYPT_H_ + +#include + +struct ec3_encryption_function { + const size_t e_key_size; + const size_t e_block_size; + enum ec3_status ( + *e_encrypt)(const void *in, void *out, const void *key); + + enum ec3_status ( + *e_decrypt)(const void *in, void *out, const void *key); +}; + +extern const struct ec3_encryption_function *ec3_encryption_function_get( + unsigned int type); + +#endif diff --git a/src/file.c b/src/file.c new file mode 100644 index 0000000..113259d --- /dev/null +++ b/src/file.c @@ -0,0 +1,39 @@ +#include "pipeline.h" + +#include + +static enum ec3_status write( + struct ec3_pipeline_stage *stage, + const void *src, + size_t len, + void *dest, + size_t *nr_written) +{ + FILE *fp = stage->s_arg; + size_t r = fwrite(src, 1, len, fp); + + *nr_written = r; + + if (r < len) { + return EC3_ERR_IO_FAILURE; + } + + return EC3_SUCCESS; +} + +static enum ec3_status read( + struct ec3_pipeline_stage *stage, + const void *src, + void *dest, + size_t *nr_read) +{ + FILE *fp = stage->s_arg; + return EC3_SUCCESS; +} + +const struct ec3_pipeline_stage_type pipeline_file = { + .t_id = EC3_PIPELINE_FILE, + .t_flags = EC3_PIPELINE_F_NONE, + .t_data_in = read, + .t_data_out = write, +}; diff --git a/src/pipeline.c b/src/pipeline.c new file mode 100644 index 0000000..59d8737 --- /dev/null +++ b/src/pipeline.c @@ -0,0 +1,156 @@ +#include "pipeline.h" + +#include +#include + +extern const struct ec3_pipeline_stage_type pipeline_zstd; +extern const struct ec3_pipeline_stage_type pipeline_aes256; +extern const struct ec3_pipeline_stage_type pipeline_file; + +static const struct ec3_pipeline_stage_type *stage_types[] = { + [EC3_PIPELINE_ZSTD] = &pipeline_zstd, + [EC3_PIPELINE_AES256] = &pipeline_aes256, + [EC3_PIPELINE_FILE] = &pipeline_file, +}; +static const size_t nr_stage_types = sizeof stage_types / sizeof stage_types[0]; + +static enum ec3_status create_pipeline_stage( + const struct ec3_pipeline_stage_type *type, + size_t cluster_size, + void *arg, + struct ec3_pipeline_stage **out) +{ + struct ec3_pipeline_stage *stage = malloc(sizeof *stage); + if (!stage) { + return EC3_ERR_NO_MEMORY; + } + + memset(stage, 0x0, sizeof *stage); + + stage->s_type = type; + stage->s_arg = arg; + + if (type->t_flags & EC3_PIPELINE_F_BUFFERED) { + stage->s_buf = malloc(cluster_size); + + if (!stage) { + free(stage); + return EC3_ERR_NO_MEMORY; + } + + memset(stage->s_buf, 0x0, cluster_size); + } + + *out = stage; + return EC3_SUCCESS; +} + +extern enum ec3_status ec3_pipeline_create( + struct ec3_pipeline_stage_args stages[], + size_t nr_stages, + size_t cluster_size, + struct ec3_pipeline **out) +{ + enum ec3_status status = EC3_SUCCESS; + struct ec3_pipeline *pipeline = malloc(sizeof *pipeline); + + if (!pipeline) { + return EC3_ERR_NO_MEMORY; + } + + memset(pipeline, 0x0, sizeof *pipeline); + + for (size_t i = 0; i < nr_stages; i++) { + struct ec3_pipeline_stage_args *args = &stages[i]; + + if (args->type == EC3_PIPELINE_NONE) { + continue; + } + + if (args->type < 0 || args->type >= nr_stage_types) { + return EC3_ERR_NOT_SUPPORTED; + } + + const struct ec3_pipeline_stage_type *type + = stage_types[stages[i].type]; + + if (!type) { + return EC3_ERR_NOT_SUPPORTED; + } + + struct ec3_pipeline_stage *stage = NULL; + status = create_pipeline_stage( + type, + cluster_size, + args->arg, + &stage); + if (status != EC3_SUCCESS) { + return status; + } + + b_queue_push_back(&pipeline->p_stages, &stage->s_entry); + } + + *out = pipeline; + return status; +} + +void ec3_pipeline_destroy(struct ec3_pipeline *p) +{ +} + +enum ec3_status ec3_pipeline_data_out( + struct ec3_pipeline *pipeline, + void *p, + size_t len, + size_t *nr_written) +{ + b_queue_entry *cur = b_queue_first(&pipeline->p_stages); + enum ec3_status status = EC3_SUCCESS; + void *src = p; + + size_t stage_in_size = len; + size_t stage_out_size = 0; + + while (cur) { + struct ec3_pipeline_stage *stage + = b_unbox(struct ec3_pipeline_stage, cur, s_entry); + + void *dest; + if (stage->s_type->t_flags & EC3_PIPELINE_F_BUFFERED) { + dest = stage->s_buf; + } else { + dest = src; + } + + status = stage->s_type->t_data_out( + stage, + src, + stage_in_size, + dest, + &stage_out_size); + + if (status != EC3_SUCCESS) { + return status; + } + + src = dest; + stage_in_size = stage_out_size; + cur = b_queue_next(cur); + } + + if (nr_written) { + *nr_written = stage_out_size; + } + + return EC3_SUCCESS; +} + +enum ec3_status ec3_pipeline_data_in( + struct ec3_pipeline *pipeline, + void *p, + size_t max, + size_t *nr_read) +{ + return EC3_ERR_NOT_SUPPORTED; +} diff --git a/src/pipeline.h b/src/pipeline.h new file mode 100644 index 0000000..4bff5d6 --- /dev/null +++ b/src/pipeline.h @@ -0,0 +1,74 @@ +#ifndef PIPELINE_H_ +#define PIPELINE_H_ + +#include "status.h" + +#include +#include + +enum ec3_pipeline_stage_type_id { + EC3_PIPELINE_NONE = 0, + EC3_PIPELINE_AES256, + EC3_PIPELINE_ZSTD, + EC3_PIPELINE_FILE, +}; + +enum ec3_pipeline_stage_type_flags { + EC3_PIPELINE_F_NONE = 0x00u, + EC3_PIPELINE_F_BUFFERED = 0x01u, +}; + +struct ec3_pipeline_stage; + +struct ec3_pipeline_stage_type { + enum ec3_pipeline_stage_type_id t_id; + enum ec3_pipeline_stage_type_flags t_flags; + + enum ec3_status (*t_data_in)( + struct ec3_pipeline_stage *, + const void *, + void *, + size_t *); + enum ec3_status (*t_data_out)( + struct ec3_pipeline_stage *, + const void *, + size_t, + void *, + size_t *); +}; + +struct ec3_pipeline_stage { + const struct ec3_pipeline_stage_type *s_type; + void *s_buf; + void *s_arg; + b_queue_entry s_entry; +}; + +struct ec3_pipeline_stage_args { + enum ec3_pipeline_stage_type_id type; + void *arg; +}; + +struct ec3_pipeline { + b_queue p_stages; +}; + +extern enum ec3_status ec3_pipeline_create( + struct ec3_pipeline_stage_args stages[], + size_t nr_stages, + size_t cluster_size, + struct ec3_pipeline **out); +extern void ec3_pipeline_destroy(struct ec3_pipeline *p); + +extern enum ec3_status ec3_pipeline_data_out( + struct ec3_pipeline *pipeline, + void *p, + size_t len, + size_t *nr_written); +extern enum ec3_status ec3_pipeline_data_in( + struct ec3_pipeline *pipeline, + void *p, + size_t max, + size_t *nr_read); + +#endif diff --git a/src/status.h b/src/status.h index 0f0a72c..1dcebf1 100644 --- a/src/status.h +++ b/src/status.h @@ -7,6 +7,9 @@ enum ec3_status { EC3_ERR_NO_ENTRY, EC3_ERR_NOT_SUPPORTED, EC3_ERR_BAD_STATE, + EC3_ERR_INVALID_VALUE, + EC3_ERR_NAME_EXISTS, + EC3_ERR_IO_FAILURE, }; #endif diff --git a/src/tag.h b/src/tag.h new file mode 100644 index 0000000..6fd64d5 --- /dev/null +++ b/src/tag.h @@ -0,0 +1,4 @@ +#ifndef TAG_H_ +#define TAG_H_ + +#endif diff --git a/src/wrap.c b/src/wrap.c index cda0809..141bd56 100644 --- a/src/wrap.c +++ b/src/wrap.c @@ -1,14 +1,16 @@ +#include "bin.h" #include "commands.h" +#include "write.h" #include -#include +#include +#include enum { OPT_OUTPATH, OPT_OUTPATH_PATH, ARG_FILE, - ARG_FILE_PATH, OPT_TAGGED_FILE, OPT_TAGGED_FILE_TAG, @@ -20,6 +22,65 @@ static int wrap( const b_arglist *opt, const b_array *args) { + const char *in_path = NULL, *out_path = NULL; + b_arglist_get_string(opt, B_COMMAND_INVALID_ID, ARG_FILE, 0, &in_path); + b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path); + + printf("in path: %s\n", in_path); + printf("out path: %s\n", out_path); + + FILE *inp = fopen(in_path, "rb"); + if (!inp) { + b_err("cannot open '%s'", in_path); + b_i("reason: %s", strerror(errno)); + return -1; + } + + FILE *outp = fopen(out_path, "wb"); + if (!outp) { + b_err("cannot open '%s'", out_path); + b_i("reason: %s", strerror(errno)); + return -1; + } + + struct ec3_writer *writer = NULL; + struct ec3_parameters param = { + .p_outp = outp, + .p_cluster_size = EC3_CLUSTER_16K, + .p_compression_func = EC3_COMPRESSION_ZSTD, + }; + enum ec3_status status = ec3_writer_create(¶m, &writer); + + if (status != EC3_SUCCESS) { + b_err("cannot initialise EC3 writer"); + return -1; + } + + struct ec3_tag_writer *tag = NULL; + status = ec3_writer_create_tag(writer, EC3_TAG_BLOB, 0, 0, &tag); + + if (status != EC3_SUCCESS) { + b_err("cannot initialise EC3 tag writer"); + return -1; + } + + char buf[4096]; + + while (1) { + size_t r = fread(buf, 1, sizeof buf, inp); + status = ec3_tag_writer_write(tag, buf, r); + + if (r < sizeof buf) { + break; + } + } + + ec3_tag_writer_finish(tag); + ec3_writer_finish(writer); + + fclose(inp); + fclose(outp); + return 0; } @@ -29,8 +90,8 @@ B_COMMAND(CMD_WRAP, CMD_ROOT) B_COMMAND_SHORT_NAME('W'); B_COMMAND_DESC( "wrap one or more files into an ec3 container. each file will " - "be " - "stored in a separate blob tag within the created container."); + "be stored in a separate blob tag within the created " + "container."); B_COMMAND_FLAGS(B_COMMAND_SHOW_HELP_BY_DEFAULT); B_COMMAND_FUNCTION(wrap); diff --git a/src/write.c b/src/write.c new file mode 100644 index 0000000..4112818 --- /dev/null +++ b/src/write.c @@ -0,0 +1,368 @@ +#include "write.h" + +#include "bin.h" +#include "pipeline.h" + +#include +#include +#include +#include + +struct ec3_writer { + b_queue w_tag_writers; + unsigned long w_next_tag_ident; + struct ec3_parameters w_param; + + uint64_t w_extent_tag; + size_t w_extent_logical_start; + size_t w_extent_physical_start; + size_t w_extent_nr_clusters; + + size_t w_data_offset; + + FILE *w_data; + FILE *w_cluster_table; + FILE *w_extent_table; + FILE *w_tag_table; + + struct ec3_pipeline *w_pipeline; +}; + +struct ec3_tag_writer { + struct ec3_writer *w_parent; + unsigned long w_type; + uint64_t w_ident; + unsigned long w_flags; + unsigned char *w_buf; + size_t w_ptr; + size_t w_nr_clusters; + b_queue_entry w_entry; +}; + +static const size_t cluster_sizes[] = { + [EC3_CLUSTER_4K] = 0x1000, + [EC3_CLUSTER_8K] = 0x2000, + [EC3_CLUSTER_16K] = 0x4000, + [EC3_CLUSTER_32K] = 0x8000, + [EC3_CLUSTER_64K] = 0x10000, +}; + +static enum ec3_pipeline_stage_type_id pipeline_stage_for_compression_func( + unsigned int func) +{ + switch (func) { + case EC3_COMPRESSION_ZSTD: + return EC3_PIPELINE_ZSTD; + default: + return EC3_PIPELINE_NONE; + } +} + +static enum ec3_pipeline_stage_type_id pipeline_stage_for_encryption_func( + unsigned int func) +{ + switch (func) { + case EC3_ENCRYPTION_AES256: + return EC3_PIPELINE_AES256; + default: + return EC3_PIPELINE_NONE; + } +} + +enum ec3_status ec3_writer_create( + const struct ec3_parameters *param, + struct ec3_writer **out) +{ + struct ec3_writer *writer = malloc(sizeof *writer); + if (!writer) { + return EC3_ERR_NO_MEMORY; + } + + memset(writer, 0x0, sizeof *writer); + memcpy(&writer->w_param, param, sizeof *param); + + size_t cluster_size = cluster_sizes[param->p_cluster_size]; + + writer->w_data = param->p_outp; + writer->w_cluster_table = tmpfile(); + writer->w_extent_table = tmpfile(); + writer->w_tag_table = tmpfile(); + + struct ec3_pipeline_stage_args stages[3] = {0}; + + if (param->p_compression_func != EC3_COMPRESSION_NONE) { + stages[0].type = pipeline_stage_for_compression_func( + param->p_compression_func); + } + + if (param->p_encryption_func != EC3_ENCRYPTION_NONE) { + stages[1].type = pipeline_stage_for_encryption_func( + param->p_encryption_func); + } + + stages[2].type = EC3_PIPELINE_FILE; + stages[2].arg = writer->w_data; + + struct ec3_pipeline *pipeline = NULL; + enum ec3_status status = ec3_pipeline_create( + stages, + sizeof stages / sizeof stages[0], + cluster_size, + &pipeline); + + if (status != EC3_SUCCESS) { + return status; + } + + writer->w_pipeline = pipeline; + + struct ec3_header header = {0}; + size_t written = fwrite(&header, sizeof header, 1, writer->w_data); + if (written != 1) { + return EC3_ERR_IO_FAILURE; + } + + *out = writer; + return EC3_SUCCESS; +} + +static enum ec3_status copy_file(FILE *src, FILE *dest) +{ + const size_t buf_len = 16384; + char *buf = malloc(buf_len); + + if (!buf) { + return EC3_ERR_IO_FAILURE; + } + + fseek(src, 0x0, SEEK_SET); + + enum ec3_status status = EC3_SUCCESS; + + while (1) { + size_t r = fread(buf, 1, buf_len, src); + if (ferror(src)) { + status = EC3_ERR_IO_FAILURE; + break; + } + + size_t w = fwrite(buf, 1, r, dest); + if (w != r) { + status = EC3_ERR_IO_FAILURE; + break; + } + + if (r < buf_len) { + break; + } + } + + free(buf); + return status; +} + +static enum ec3_status flush_extent_entry(struct ec3_writer *w) +{ + struct ec3_extent extent = {0}; + extent.ex_owner = b_i64_htob(w->w_extent_tag); + extent.ex_physical_cluster = b_i32_htob(w->w_extent_physical_start); + extent.ex_logical_cluster = b_i32_htob(w->w_extent_logical_start); + extent.ex_count = b_i32_htob(w->w_extent_nr_clusters); + + size_t written = fwrite(&extent, sizeof extent, 1, w->w_extent_table); + if (written != 1) { + return EC3_ERR_IO_FAILURE; + } + + return EC3_SUCCESS; +} + +void ec3_writer_finish(struct ec3_writer *w) +{ + enum ec3_status status = EC3_SUCCESS; + + if (w->w_extent_nr_clusters > 0) { + status = flush_extent_entry(w); + } + + if (status != EC3_SUCCESS) { + return; + } + + size_t cluster_table_offset = ftell(w->w_data); + status = copy_file(w->w_cluster_table, w->w_data); + + size_t extent_table_offset = ftell(w->w_data); + status = copy_file(w->w_extent_table, w->w_data); + + size_t tag_table_offset = ftell(w->w_data); + status = copy_file(w->w_tag_table, w->w_data); + + fseek(w->w_data, 0x0, SEEK_SET); + + struct ec3_header header = {0}; + header.h_magic = b_i32_htob(EC3_SIGNATURE); + header.h_version = b_i16_htob(EC3_VERSION_1_0); + header.h_cluster_size = b_i16_htob(w->w_param.p_cluster_size); + header.h_tag_table_offset = b_i64_htob(tag_table_offset); + header.h_extent_table_offset = b_i64_htob(extent_table_offset); + header.h_cluster_table_offset = b_i64_htob(cluster_table_offset); + + fwrite(&header, sizeof header, 1, w->w_data); +} + +void ec3_writer_write_image(struct ec3_writer *w, FILE *fp) +{ +} + +static bool is_tag_ident_free(struct ec3_writer *w, uint64_t ident) +{ + b_queue_iterator it = {0}; + b_queue_foreach(&it, &w->w_tag_writers) + { + struct ec3_tag_writer *tag + = b_unbox(struct ec3_tag_writer, it.entry, w_entry); + + if (tag->w_ident == ident) { + return false; + } + } + + return true; +} + +enum ec3_status ec3_writer_create_tag( + struct ec3_writer *w, + uint32_t tag_type, + uint64_t tag_ident, + unsigned int flags, + struct ec3_tag_writer **out_writer) +{ + struct ec3_tag_writer *tag = malloc(sizeof *tag); + if (!tag) { + return EC3_ERR_NO_MEMORY; + } + + memset(tag, 0x0, sizeof *tag); + + size_t cluster_size = cluster_sizes[w->w_param.p_cluster_size]; + + tag->w_parent = w; + tag->w_flags = flags; + tag->w_type = tag_type; + tag->w_ident = tag_ident; + tag->w_buf = malloc(cluster_size); + + if (!tag->w_buf) { + free(tag); + return EC3_ERR_NO_MEMORY; + } + + struct ec3_tag_table_entry entry = {0}; + entry.tag_type = b_i32_htob(tag_type); + entry.tag_ident = b_i64_htob(tag_ident); + entry.tag_flags = b_i32_htob(flags); + + size_t written = fwrite(&entry, sizeof entry, 1, w->w_tag_table); + if (written != 1) { + free(tag->w_buf); + free(tag); + return EC3_ERR_IO_FAILURE; + } + + *out_writer = tag; + return EC3_SUCCESS; +} + +static enum ec3_status flush_tag_buffer(struct ec3_tag_writer *w) +{ + struct ec3_writer *container = w->w_parent; + unsigned char *buf = w->w_buf; + enum ec3_status status = EC3_SUCCESS; + size_t nr_written = 0; + + status = ec3_pipeline_data_out( + container->w_pipeline, + buf, + w->w_ptr, + &nr_written); + + if (status != EC3_SUCCESS) { + return status; + } + + container->w_data_offset += nr_written; + w->w_ptr = 0; + w->w_nr_clusters++; + + if (container->w_extent_tag == w->w_ident) { + container->w_extent_nr_clusters++; + return EC3_SUCCESS; + } + + if (container->w_extent_nr_clusters) { + status = flush_extent_entry(container); + } + + if (status != EC3_SUCCESS) { + return status; + } + + container->w_extent_tag = w->w_ident; + container->w_extent_physical_start += container->w_extent_nr_clusters; + container->w_extent_logical_start = w->w_nr_clusters; + container->w_extent_nr_clusters = 0; + + return EC3_SUCCESS; +} + +enum ec3_status ec3_tag_writer_write( + struct ec3_tag_writer *w, + const void *p, + size_t len) +{ + size_t cluster_size + = cluster_sizes[w->w_parent->w_param.p_cluster_size]; + enum ec3_status status = EC3_SUCCESS; + void *buf = w->w_buf; + + while (len > 0) { + size_t remaining = cluster_size - w->w_ptr; + + size_t to_write = len; + if (len > remaining) { + to_write = remaining; + } + + memcpy(buf + w->w_ptr, p, to_write); + + remaining -= to_write; + w->w_ptr += to_write; + if (remaining == 0) { + status = flush_tag_buffer(w); + } + + if (status != EC3_SUCCESS) { + return status; + } + + len -= to_write; + } + + return status; +} + +enum ec3_status ec3_tag_writer_finish(struct ec3_tag_writer *w) +{ + enum ec3_status status = EC3_SUCCESS; + + if (w->w_ptr > 0) { + status = flush_tag_buffer(w); + } + + if (status != EC3_SUCCESS) { + return status; + } + + return status; +} diff --git a/src/write.h b/src/write.h index cda7dcb..817a48f 100644 --- a/src/write.h +++ b/src/write.h @@ -6,17 +6,39 @@ #include #include +struct ec3_parameters { + FILE *p_outp; + + unsigned int p_cluster_size; + + unsigned int p_compression_func; + unsigned int p_encryption_func; + uint64_t p_ident; + + const void *p_encryption_key; + size_t p_encryption_key_size; +}; + struct ec3_writer; struct ec3_tag_writer; -extern enum ec3_status ec3_writer_create(void); -extern void ec3_writer_destroy(struct ec3_writer *w); +extern enum ec3_status ec3_writer_create( + const struct ec3_parameters *param, + struct ec3_writer **out); +extern void ec3_writer_finish(struct ec3_writer *w); extern void ec3_writer_write_image(struct ec3_writer *w, FILE *fp); extern enum ec3_status ec3_writer_create_tag( struct ec3_writer *w, uint32_t tag_type, uint64_t tag_ident, + unsigned int flags, struct ec3_tag_writer **out_writer); +extern enum ec3_status ec3_tag_writer_write( + struct ec3_tag_writer *w, + const void *p, + size_t len); +extern enum ec3_status ec3_tag_writer_finish(struct ec3_tag_writer *w); + #endif diff --git a/src/zstd.c b/src/zstd.c new file mode 100644 index 0000000..3a5bbec --- /dev/null +++ b/src/zstd.c @@ -0,0 +1,30 @@ +#include "pipeline.h" + +#include + +static enum ec3_status compress( + struct ec3_pipeline_stage *stage, + const void *src, + size_t len, + void *dest, + size_t *nr_written) +{ + *nr_written = ZSTD_compress(dest, len, src, len, 10); + return EC3_SUCCESS; +} + +static enum ec3_status decompress( + struct ec3_pipeline_stage *stage, + const void *src, + void *dest, + size_t *nr_read) +{ + return EC3_SUCCESS; +} + +const struct ec3_pipeline_stage_type pipeline_zstd = { + .t_id = EC3_PIPELINE_ZSTD, + .t_flags = EC3_PIPELINE_F_BUFFERED, + .t_data_in = decompress, + .t_data_out = compress, +};