From 1d11e6dce7ba7ca304cb6d851104fa464d5e77ee Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sat, 1 Mar 2025 19:27:54 +0000 Subject: [PATCH] chunk: file contents is now stored in a single, variable-length chunk --- src/bin.h | 81 +++++++------------------------ src/capture.c | 29 ++++++++---- src/chunk-table.c | 118 +++++++++++++++++++++++++++++++++++++--------- src/chunk-table.h | 16 ++++++- 4 files changed, 147 insertions(+), 97 deletions(-) diff --git a/src/bin.h b/src/bin.h index b06ca65..e9f02c0 100644 --- a/src/bin.h +++ b/src/bin.h @@ -57,27 +57,19 @@ enum ec3_tag_flags { /* each chunk group occupies one cluster, so the number of chunks per group * depends on the cluster size */ -#define EC3_CHUNKS_PER_GROUP_4K 146 -#define EC3_CHUNKS_PER_GROUP_8K 292 -#define EC3_CHUNKS_PER_GROUP_16K 584 -#define EC3_CHUNKS_PER_GROUP_32K 1170 -#define EC3_CHUNKS_PER_GROUP_64K 2340 +#define EC3_CHUNKS_PER_GROUP_4K 145 +#define EC3_CHUNKS_PER_GROUP_8K 291 +#define EC3_CHUNKS_PER_GROUP_16K 583 +#define EC3_CHUNKS_PER_GROUP_32K 1169 +#define EC3_CHUNKS_PER_GROUP_64K 2339 /* each vnode group occupies one cluster, so the number of vnodes per group * depends on the cluster size */ -#define EC3_VNODES_PER_GROUP_4K 255 -#define EC3_VNODES_PER_GROUP_8K 511 -#define EC3_VNODES_PER_GROUP_16K 1023 -#define EC3_VNODES_PER_GROUP_32K 2047 -#define EC3_VNODES_PER_GROUP_64K 4095 - -/* each vnode/chunk link group occupies one cluster, so the number of links per - * group depends on the cluster size */ -#define EC3_VNCH_PER_GROUP_4K 146 -#define EC3_VNCH_PER_GROUP_8K 292 -#define EC3_VNCH_PER_GROUP_16K 584 -#define EC3_VNCH_PER_GROUP_32K 1170 -#define EC3_VNCH_PER_GROUP_64K 2340 +#define EC3_VNODES_PER_GROUP_4K 102 +#define EC3_VNODES_PER_GROUP_8K 204 +#define EC3_VNODES_PER_GROUP_16K 409 +#define EC3_VNODES_PER_GROUP_32K 819 +#define EC3_VNODES_PER_GROUP_64K 1638 /* Chunks are identified by a 128-bit (16-byte) hash */ #define EC3_CHUNK_ID_SIZE 16 @@ -156,9 +148,8 @@ struct ec3_extent { struct ec3_chunk { ec3_chunk_id c_id; - b_i16 c_length; - b_i16 c_offset1; - b_i32 c_offset0; + b_i32 c_first_cluster; + b_i32 c_nr_clusters; }; struct ec3_chunk_group { @@ -201,9 +192,11 @@ struct ec3_directory_entry { struct ec3_vnode { b_i32 n_id; - b_i32 n_attrib; - /* length of file in chunks */ - b_i32 n_length; + b_i32 n_atime, n_mtime; + b_i16 n_mode, n_reserved; + b_i16 n_uid, n_gid; + /* id of the chunk containing the vnode data */ + ec3_chunk_id n_data; }; struct ec3_vnode_group { @@ -216,7 +209,6 @@ struct ec3_vnode_group { struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_4K]; b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_4K + 1]; } g_4k; - struct { struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_8K]; b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_8K + 1]; @@ -239,43 +231,4 @@ struct ec3_vnode_group { }; }; -struct ec3_vnode_chunk { - ec3_chunk_id c_chunk_id; - b_i32 c_vnode_id; - b_i32 c_vnode_index; -}; - -struct ec3_vnode_chunk_group { - /* the number of vnode/chunk links that this group contains */ - b_i16 g_nr_entries; - uint8_t g_reserved[2]; - - union { - struct { - struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_4K]; - b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_4K + 1]; - } g_4k; - - struct { - struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_8K]; - b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_8K + 1]; - } g_8k; - - struct { - struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_16K]; - b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_16K + 1]; - } g_16k; - - struct { - struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_32K]; - b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_32K + 1]; - } g_32k; - - struct { - struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_64K]; - b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_64K + 1]; - } g_64k; - }; -}; - #endif diff --git a/src/capture.c b/src/capture.c index 5f2d5ed..b6c75f8 100644 --- a/src/capture.c +++ b/src/capture.c @@ -67,6 +67,9 @@ static enum ec3_status capture_file( } enum ec3_status s2 = EC3_SUCCESS; + chunk_table_begin_chunk(&ctx->ctx_chunks); + + size_t chunk_size = 0; while (1) { size_t nr_read = 0; @@ -84,22 +87,31 @@ static enum ec3_status capture_file( break; } - ec3_chunk_id chunk; - enum ec3_status s2 = chunk_table_put( - &ctx->ctx_chunks, - buf, - nr_read, - chunk); + enum ec3_status s2 + = chunk_table_put(&ctx->ctx_chunks, buf, nr_read); if (s2 != EC3_SUCCESS) { break; } + chunk_size += nr_read; + if (nr_read < buf_len) { break; } } + ec3_chunk_id id = {0}; + s2 = chunk_table_end_chunk(&ctx->ctx_chunks, id); + + if (s2 != EC3_SUCCESS) { + return s2; + } + + char id_str[128]; + ec3_chunk_id_to_string(id, id_str, sizeof id_str); + printf("wrote %zu byte chunk %s\n", chunk_size, id_str); + free(buf); b_file_release(src); return s2; @@ -175,10 +187,6 @@ static int capture( const b_arglist *opt, const b_array *args) { - printf("sizeof(struct ec3_vnode_group) = %zu\n", - sizeof(struct ec3_vnode_group)); - return 0; - const char *out_path = NULL; b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path); @@ -351,6 +359,7 @@ static int capture( ec3_tag_ioctx_close(stab); ec3_tag_ioctx_close(ctab); ec3_tag_ioctx_close(cdat); + ec3_image_ioctx_close(image); return 0; diff --git a/src/chunk-table.c b/src/chunk-table.c index fe58789..587a61b 100644 --- a/src/chunk-table.c +++ b/src/chunk-table.c @@ -444,21 +444,102 @@ enum ec3_status chunk_table_get( return EC3_ERR_NOT_SUPPORTED; } +enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab) +{ + b_hash_ctx_init(&tab->tab_hash, B_HASH_SHAKE128); + size_t nr_written = 0; + tab->tab_first_chunk_cdat_cluster = tab->tab_next_cdat_cluster; + return EC3_SUCCESS; +} + +static enum ec3_status flush_cluster_buf(struct chunk_table *tab) +{ + size_t nr_written = 0; + size_t cdat_cluster = tab->tab_next_cdat_cluster++; + + enum ec3_status status = ec3_tag_ioctx_write_cluster( + tab->tab_cdat, + cdat_cluster, + tab->tab_cluster_buf, + tab->tab_cluster_buf_pos, + &nr_written); + + if (status != EC3_SUCCESS) { + return status; + } + + tab->tab_cluster_buf_pos = 0; + return EC3_SUCCESS; +} + enum ec3_status chunk_table_put( struct chunk_table *tab, const void *data, - size_t len, - ec3_chunk_id out_id) + size_t len) { - b_hash_ctx hash; - b_hash_ctx_init(&hash, B_HASH_SHAKE128); - b_hash_ctx_update(&hash, data, len); + enum ec3_status status = EC3_SUCCESS; + + size_t cluster_size + = ec3_cluster_size_id_to_bytes(tab->tab_cluster_size); + size_t copied = 0; + + b_hash_ctx_update(&tab->tab_hash, data, len); + + while (len > 0) { + size_t remaining = cluster_size - tab->tab_cluster_buf_pos; + + size_t to_copy = len; + if (to_copy > remaining) { + to_copy = remaining; + } + + unsigned char *src = (unsigned char *)data + copied; + unsigned char *dest = (unsigned char *)tab->tab_cluster_buf + + tab->tab_cluster_buf_pos; + + memcpy(dest, src, to_copy); + + copied += to_copy; + tab->tab_cluster_buf_pos += to_copy; + + len -= to_copy; + remaining -= to_copy; + + if (remaining == 0) { + status = flush_cluster_buf(tab); + } + + if (status != EC3_SUCCESS) { + break; + } + } + + if (status != EC3_SUCCESS) { + return status; + } + + return EC3_SUCCESS; +} + +enum ec3_status chunk_table_end_chunk( + struct chunk_table *tab, + ec3_chunk_id out_chunk_id) +{ + enum ec3_status status = EC3_SUCCESS; + if (tab->tab_cluster_buf_pos > 0) { + status = flush_cluster_buf(tab); + } + + if (status != EC3_SUCCESS) { + return status; + } ec3_chunk_id id; - b_hash_ctx_finish(&hash, id, sizeof id); + b_hash_ctx_finish(&tab->tab_hash, id, sizeof id); struct cache_entry *entry = get_cache_entry(&tab->tab_cache, id); if (entry) { + memcpy(out_chunk_id, id, sizeof id); return EC3_SUCCESS; } @@ -467,21 +548,6 @@ enum ec3_status chunk_table_put( return EC3_ERR_NO_MEMORY; } - size_t nr_written = 0; - - size_t cdat_cluster = tab->tab_next_cdat_cluster++; - enum ec3_status status = ec3_tag_ioctx_write_cluster( - tab->tab_cdat, - cdat_cluster, - data, - len, - &nr_written); - - if (status != EC3_SUCCESS) { - free(entry); - return status; - } - struct ec3_chunk chunk = {0}; memcpy(chunk.c_id, id, sizeof id); int err = b_tree_put( @@ -494,8 +560,18 @@ enum ec3_status chunk_table_put( memset(entry, 0x0, sizeof *entry); memcpy(entry->e_id, id, sizeof id); + memcpy(out_chunk_id, id, sizeof id); put_cache_entry(&tab->tab_cache, entry); return EC3_SUCCESS; } + +void ec3_chunk_id_to_string(const ec3_chunk_id id, char *out, size_t max) +{ + max = b_min(size_t, max, EC3_CHUNK_ID_SIZE * 2 + 1); + + for (size_t i = 0; i < max; i++) { + snprintf(out + (i * 2), max - (i * 2), "%02x", id[i]); + } +} diff --git a/src/chunk-table.h b/src/chunk-table.h index 46f123f..3b29bf4 100644 --- a/src/chunk-table.h +++ b/src/chunk-table.h @@ -16,11 +16,15 @@ struct chunk_table { b_btree tab_cache; unsigned char *tab_cluster_buf; + size_t tab_cluster_buf_pos; size_t tab_next_cdat_cluster; struct ec3_tag_ioctx *tab_ctab; struct ec3_tag_ioctx *tab_cdat; + + b_hash_ctx tab_hash; + size_t tab_first_chunk_cdat_cluster; }; extern enum ec3_status chunk_table_init( @@ -37,12 +41,20 @@ extern enum ec3_status chunk_table_get( void *out_data, size_t *out_len); +extern enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab); extern enum ec3_status chunk_table_put( struct chunk_table *tab, const void *data, - size_t len, - ec3_chunk_id out_id); + size_t len); +extern enum ec3_status chunk_table_end_chunk( + struct chunk_table *tab, + ec3_chunk_id out_chunk_id); extern size_t chunk_table_bytes_per_chunk(struct chunk_table *tab); +extern void ec3_chunk_id_to_string( + const ec3_chunk_id id, + char *out, + size_t max); + #endif