chunk: file contents is now stored in a single, variable-length chunk

This commit is contained in:
2025-03-01 19:27:54 +00:00
parent 962a5dae31
commit 1d11e6dce7
4 changed files with 147 additions and 97 deletions

View File

@@ -57,27 +57,19 @@ enum ec3_tag_flags {
/* each chunk group occupies one cluster, so the number of chunks per group /* each chunk group occupies one cluster, so the number of chunks per group
* depends on the cluster size */ * depends on the cluster size */
#define EC3_CHUNKS_PER_GROUP_4K 146 #define EC3_CHUNKS_PER_GROUP_4K 145
#define EC3_CHUNKS_PER_GROUP_8K 292 #define EC3_CHUNKS_PER_GROUP_8K 291
#define EC3_CHUNKS_PER_GROUP_16K 584 #define EC3_CHUNKS_PER_GROUP_16K 583
#define EC3_CHUNKS_PER_GROUP_32K 1170 #define EC3_CHUNKS_PER_GROUP_32K 1169
#define EC3_CHUNKS_PER_GROUP_64K 2340 #define EC3_CHUNKS_PER_GROUP_64K 2339
/* each vnode group occupies one cluster, so the number of vnodes per group /* each vnode group occupies one cluster, so the number of vnodes per group
* depends on the cluster size */ * depends on the cluster size */
#define EC3_VNODES_PER_GROUP_4K 255 #define EC3_VNODES_PER_GROUP_4K 102
#define EC3_VNODES_PER_GROUP_8K 511 #define EC3_VNODES_PER_GROUP_8K 204
#define EC3_VNODES_PER_GROUP_16K 1023 #define EC3_VNODES_PER_GROUP_16K 409
#define EC3_VNODES_PER_GROUP_32K 2047 #define EC3_VNODES_PER_GROUP_32K 819
#define EC3_VNODES_PER_GROUP_64K 4095 #define EC3_VNODES_PER_GROUP_64K 1638
/* each vnode/chunk link group occupies one cluster, so the number of links per
* group depends on the cluster size */
#define EC3_VNCH_PER_GROUP_4K 146
#define EC3_VNCH_PER_GROUP_8K 292
#define EC3_VNCH_PER_GROUP_16K 584
#define EC3_VNCH_PER_GROUP_32K 1170
#define EC3_VNCH_PER_GROUP_64K 2340
/* Chunks are identified by a 128-bit (16-byte) hash */ /* Chunks are identified by a 128-bit (16-byte) hash */
#define EC3_CHUNK_ID_SIZE 16 #define EC3_CHUNK_ID_SIZE 16
@@ -156,9 +148,8 @@ struct ec3_extent {
struct ec3_chunk { struct ec3_chunk {
ec3_chunk_id c_id; ec3_chunk_id c_id;
b_i16 c_length; b_i32 c_first_cluster;
b_i16 c_offset1; b_i32 c_nr_clusters;
b_i32 c_offset0;
}; };
struct ec3_chunk_group { struct ec3_chunk_group {
@@ -201,9 +192,11 @@ struct ec3_directory_entry {
struct ec3_vnode { struct ec3_vnode {
b_i32 n_id; b_i32 n_id;
b_i32 n_attrib; b_i32 n_atime, n_mtime;
/* length of file in chunks */ b_i16 n_mode, n_reserved;
b_i32 n_length; b_i16 n_uid, n_gid;
/* id of the chunk containing the vnode data */
ec3_chunk_id n_data;
}; };
struct ec3_vnode_group { struct ec3_vnode_group {
@@ -216,7 +209,6 @@ struct ec3_vnode_group {
struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_4K]; struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_4K];
b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_4K + 1]; b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_4K + 1];
} g_4k; } g_4k;
struct { struct {
struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_8K]; struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_8K];
b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_8K + 1]; b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_8K + 1];
@@ -239,43 +231,4 @@ struct ec3_vnode_group {
}; };
}; };
struct ec3_vnode_chunk {
ec3_chunk_id c_chunk_id;
b_i32 c_vnode_id;
b_i32 c_vnode_index;
};
struct ec3_vnode_chunk_group {
/* the number of vnode/chunk links that this group contains */
b_i16 g_nr_entries;
uint8_t g_reserved[2];
union {
struct {
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_4K];
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_4K + 1];
} g_4k;
struct {
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_8K];
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_8K + 1];
} g_8k;
struct {
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_16K];
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_16K + 1];
} g_16k;
struct {
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_32K];
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_32K + 1];
} g_32k;
struct {
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_64K];
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_64K + 1];
} g_64k;
};
};
#endif #endif

View File

@@ -67,6 +67,9 @@ static enum ec3_status capture_file(
} }
enum ec3_status s2 = EC3_SUCCESS; enum ec3_status s2 = EC3_SUCCESS;
chunk_table_begin_chunk(&ctx->ctx_chunks);
size_t chunk_size = 0;
while (1) { while (1) {
size_t nr_read = 0; size_t nr_read = 0;
@@ -84,22 +87,31 @@ static enum ec3_status capture_file(
break; break;
} }
ec3_chunk_id chunk; enum ec3_status s2
enum ec3_status s2 = chunk_table_put( = chunk_table_put(&ctx->ctx_chunks, buf, nr_read);
&ctx->ctx_chunks,
buf,
nr_read,
chunk);
if (s2 != EC3_SUCCESS) { if (s2 != EC3_SUCCESS) {
break; break;
} }
chunk_size += nr_read;
if (nr_read < buf_len) { if (nr_read < buf_len) {
break; break;
} }
} }
ec3_chunk_id id = {0};
s2 = chunk_table_end_chunk(&ctx->ctx_chunks, id);
if (s2 != EC3_SUCCESS) {
return s2;
}
char id_str[128];
ec3_chunk_id_to_string(id, id_str, sizeof id_str);
printf("wrote %zu byte chunk %s\n", chunk_size, id_str);
free(buf); free(buf);
b_file_release(src); b_file_release(src);
return s2; return s2;
@@ -175,10 +187,6 @@ static int capture(
const b_arglist *opt, const b_arglist *opt,
const b_array *args) const b_array *args)
{ {
printf("sizeof(struct ec3_vnode_group) = %zu\n",
sizeof(struct ec3_vnode_group));
return 0;
const char *out_path = NULL; const char *out_path = NULL;
b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path); b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path);
@@ -351,6 +359,7 @@ static int capture(
ec3_tag_ioctx_close(stab); ec3_tag_ioctx_close(stab);
ec3_tag_ioctx_close(ctab); ec3_tag_ioctx_close(ctab);
ec3_tag_ioctx_close(cdat); ec3_tag_ioctx_close(cdat);
ec3_image_ioctx_close(image); ec3_image_ioctx_close(image);
return 0; return 0;

View File

@@ -444,21 +444,102 @@ enum ec3_status chunk_table_get(
return EC3_ERR_NOT_SUPPORTED; return EC3_ERR_NOT_SUPPORTED;
} }
enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab)
{
b_hash_ctx_init(&tab->tab_hash, B_HASH_SHAKE128);
size_t nr_written = 0;
tab->tab_first_chunk_cdat_cluster = tab->tab_next_cdat_cluster;
return EC3_SUCCESS;
}
static enum ec3_status flush_cluster_buf(struct chunk_table *tab)
{
size_t nr_written = 0;
size_t cdat_cluster = tab->tab_next_cdat_cluster++;
enum ec3_status status = ec3_tag_ioctx_write_cluster(
tab->tab_cdat,
cdat_cluster,
tab->tab_cluster_buf,
tab->tab_cluster_buf_pos,
&nr_written);
if (status != EC3_SUCCESS) {
return status;
}
tab->tab_cluster_buf_pos = 0;
return EC3_SUCCESS;
}
enum ec3_status chunk_table_put( enum ec3_status chunk_table_put(
struct chunk_table *tab, struct chunk_table *tab,
const void *data, const void *data,
size_t len, size_t len)
ec3_chunk_id out_id)
{ {
b_hash_ctx hash; enum ec3_status status = EC3_SUCCESS;
b_hash_ctx_init(&hash, B_HASH_SHAKE128);
b_hash_ctx_update(&hash, data, len); size_t cluster_size
= ec3_cluster_size_id_to_bytes(tab->tab_cluster_size);
size_t copied = 0;
b_hash_ctx_update(&tab->tab_hash, data, len);
while (len > 0) {
size_t remaining = cluster_size - tab->tab_cluster_buf_pos;
size_t to_copy = len;
if (to_copy > remaining) {
to_copy = remaining;
}
unsigned char *src = (unsigned char *)data + copied;
unsigned char *dest = (unsigned char *)tab->tab_cluster_buf
+ tab->tab_cluster_buf_pos;
memcpy(dest, src, to_copy);
copied += to_copy;
tab->tab_cluster_buf_pos += to_copy;
len -= to_copy;
remaining -= to_copy;
if (remaining == 0) {
status = flush_cluster_buf(tab);
}
if (status != EC3_SUCCESS) {
break;
}
}
if (status != EC3_SUCCESS) {
return status;
}
return EC3_SUCCESS;
}
enum ec3_status chunk_table_end_chunk(
struct chunk_table *tab,
ec3_chunk_id out_chunk_id)
{
enum ec3_status status = EC3_SUCCESS;
if (tab->tab_cluster_buf_pos > 0) {
status = flush_cluster_buf(tab);
}
if (status != EC3_SUCCESS) {
return status;
}
ec3_chunk_id id; ec3_chunk_id id;
b_hash_ctx_finish(&hash, id, sizeof id); b_hash_ctx_finish(&tab->tab_hash, id, sizeof id);
struct cache_entry *entry = get_cache_entry(&tab->tab_cache, id); struct cache_entry *entry = get_cache_entry(&tab->tab_cache, id);
if (entry) { if (entry) {
memcpy(out_chunk_id, id, sizeof id);
return EC3_SUCCESS; return EC3_SUCCESS;
} }
@@ -467,21 +548,6 @@ enum ec3_status chunk_table_put(
return EC3_ERR_NO_MEMORY; return EC3_ERR_NO_MEMORY;
} }
size_t nr_written = 0;
size_t cdat_cluster = tab->tab_next_cdat_cluster++;
enum ec3_status status = ec3_tag_ioctx_write_cluster(
tab->tab_cdat,
cdat_cluster,
data,
len,
&nr_written);
if (status != EC3_SUCCESS) {
free(entry);
return status;
}
struct ec3_chunk chunk = {0}; struct ec3_chunk chunk = {0};
memcpy(chunk.c_id, id, sizeof id); memcpy(chunk.c_id, id, sizeof id);
int err = b_tree_put( int err = b_tree_put(
@@ -494,8 +560,18 @@ enum ec3_status chunk_table_put(
memset(entry, 0x0, sizeof *entry); memset(entry, 0x0, sizeof *entry);
memcpy(entry->e_id, id, sizeof id); memcpy(entry->e_id, id, sizeof id);
memcpy(out_chunk_id, id, sizeof id);
put_cache_entry(&tab->tab_cache, entry); put_cache_entry(&tab->tab_cache, entry);
return EC3_SUCCESS; return EC3_SUCCESS;
} }
void ec3_chunk_id_to_string(const ec3_chunk_id id, char *out, size_t max)
{
max = b_min(size_t, max, EC3_CHUNK_ID_SIZE * 2 + 1);
for (size_t i = 0; i < max; i++) {
snprintf(out + (i * 2), max - (i * 2), "%02x", id[i]);
}
}

View File

@@ -16,11 +16,15 @@ struct chunk_table {
b_btree tab_cache; b_btree tab_cache;
unsigned char *tab_cluster_buf; unsigned char *tab_cluster_buf;
size_t tab_cluster_buf_pos;
size_t tab_next_cdat_cluster; size_t tab_next_cdat_cluster;
struct ec3_tag_ioctx *tab_ctab; struct ec3_tag_ioctx *tab_ctab;
struct ec3_tag_ioctx *tab_cdat; struct ec3_tag_ioctx *tab_cdat;
b_hash_ctx tab_hash;
size_t tab_first_chunk_cdat_cluster;
}; };
extern enum ec3_status chunk_table_init( extern enum ec3_status chunk_table_init(
@@ -37,12 +41,20 @@ extern enum ec3_status chunk_table_get(
void *out_data, void *out_data,
size_t *out_len); size_t *out_len);
extern enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab);
extern enum ec3_status chunk_table_put( extern enum ec3_status chunk_table_put(
struct chunk_table *tab, struct chunk_table *tab,
const void *data, const void *data,
size_t len, size_t len);
ec3_chunk_id out_id); extern enum ec3_status chunk_table_end_chunk(
struct chunk_table *tab,
ec3_chunk_id out_chunk_id);
extern size_t chunk_table_bytes_per_chunk(struct chunk_table *tab); extern size_t chunk_table_bytes_per_chunk(struct chunk_table *tab);
extern void ec3_chunk_id_to_string(
const ec3_chunk_id id,
char *out,
size_t max);
#endif #endif