chunk: file contents is now stored in a single, variable-length chunk
This commit is contained in:
81
src/bin.h
81
src/bin.h
@@ -57,27 +57,19 @@ enum ec3_tag_flags {
|
||||
|
||||
/* each chunk group occupies one cluster, so the number of chunks per group
|
||||
* depends on the cluster size */
|
||||
#define EC3_CHUNKS_PER_GROUP_4K 146
|
||||
#define EC3_CHUNKS_PER_GROUP_8K 292
|
||||
#define EC3_CHUNKS_PER_GROUP_16K 584
|
||||
#define EC3_CHUNKS_PER_GROUP_32K 1170
|
||||
#define EC3_CHUNKS_PER_GROUP_64K 2340
|
||||
#define EC3_CHUNKS_PER_GROUP_4K 145
|
||||
#define EC3_CHUNKS_PER_GROUP_8K 291
|
||||
#define EC3_CHUNKS_PER_GROUP_16K 583
|
||||
#define EC3_CHUNKS_PER_GROUP_32K 1169
|
||||
#define EC3_CHUNKS_PER_GROUP_64K 2339
|
||||
|
||||
/* each vnode group occupies one cluster, so the number of vnodes per group
|
||||
* depends on the cluster size */
|
||||
#define EC3_VNODES_PER_GROUP_4K 255
|
||||
#define EC3_VNODES_PER_GROUP_8K 511
|
||||
#define EC3_VNODES_PER_GROUP_16K 1023
|
||||
#define EC3_VNODES_PER_GROUP_32K 2047
|
||||
#define EC3_VNODES_PER_GROUP_64K 4095
|
||||
|
||||
/* each vnode/chunk link group occupies one cluster, so the number of links per
|
||||
* group depends on the cluster size */
|
||||
#define EC3_VNCH_PER_GROUP_4K 146
|
||||
#define EC3_VNCH_PER_GROUP_8K 292
|
||||
#define EC3_VNCH_PER_GROUP_16K 584
|
||||
#define EC3_VNCH_PER_GROUP_32K 1170
|
||||
#define EC3_VNCH_PER_GROUP_64K 2340
|
||||
#define EC3_VNODES_PER_GROUP_4K 102
|
||||
#define EC3_VNODES_PER_GROUP_8K 204
|
||||
#define EC3_VNODES_PER_GROUP_16K 409
|
||||
#define EC3_VNODES_PER_GROUP_32K 819
|
||||
#define EC3_VNODES_PER_GROUP_64K 1638
|
||||
|
||||
/* Chunks are identified by a 128-bit (16-byte) hash */
|
||||
#define EC3_CHUNK_ID_SIZE 16
|
||||
@@ -156,9 +148,8 @@ struct ec3_extent {
|
||||
|
||||
struct ec3_chunk {
|
||||
ec3_chunk_id c_id;
|
||||
b_i16 c_length;
|
||||
b_i16 c_offset1;
|
||||
b_i32 c_offset0;
|
||||
b_i32 c_first_cluster;
|
||||
b_i32 c_nr_clusters;
|
||||
};
|
||||
|
||||
struct ec3_chunk_group {
|
||||
@@ -201,9 +192,11 @@ struct ec3_directory_entry {
|
||||
|
||||
struct ec3_vnode {
|
||||
b_i32 n_id;
|
||||
b_i32 n_attrib;
|
||||
/* length of file in chunks */
|
||||
b_i32 n_length;
|
||||
b_i32 n_atime, n_mtime;
|
||||
b_i16 n_mode, n_reserved;
|
||||
b_i16 n_uid, n_gid;
|
||||
/* id of the chunk containing the vnode data */
|
||||
ec3_chunk_id n_data;
|
||||
};
|
||||
|
||||
struct ec3_vnode_group {
|
||||
@@ -216,7 +209,6 @@ struct ec3_vnode_group {
|
||||
struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_4K];
|
||||
b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_4K + 1];
|
||||
} g_4k;
|
||||
|
||||
struct {
|
||||
struct ec3_vnode g_vnodes[EC3_VNODES_PER_GROUP_8K];
|
||||
b_i32 g_child_offsets[EC3_VNODES_PER_GROUP_8K + 1];
|
||||
@@ -239,43 +231,4 @@ struct ec3_vnode_group {
|
||||
};
|
||||
};
|
||||
|
||||
struct ec3_vnode_chunk {
|
||||
ec3_chunk_id c_chunk_id;
|
||||
b_i32 c_vnode_id;
|
||||
b_i32 c_vnode_index;
|
||||
};
|
||||
|
||||
struct ec3_vnode_chunk_group {
|
||||
/* the number of vnode/chunk links that this group contains */
|
||||
b_i16 g_nr_entries;
|
||||
uint8_t g_reserved[2];
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_4K];
|
||||
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_4K + 1];
|
||||
} g_4k;
|
||||
|
||||
struct {
|
||||
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_8K];
|
||||
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_8K + 1];
|
||||
} g_8k;
|
||||
|
||||
struct {
|
||||
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_16K];
|
||||
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_16K + 1];
|
||||
} g_16k;
|
||||
|
||||
struct {
|
||||
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_32K];
|
||||
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_32K + 1];
|
||||
} g_32k;
|
||||
|
||||
struct {
|
||||
struct ec3_vnode_chunk g_links[EC3_VNCH_PER_GROUP_64K];
|
||||
b_i32 g_child_offsets[EC3_VNCH_PER_GROUP_64K + 1];
|
||||
} g_64k;
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -67,6 +67,9 @@ static enum ec3_status capture_file(
|
||||
}
|
||||
|
||||
enum ec3_status s2 = EC3_SUCCESS;
|
||||
chunk_table_begin_chunk(&ctx->ctx_chunks);
|
||||
|
||||
size_t chunk_size = 0;
|
||||
|
||||
while (1) {
|
||||
size_t nr_read = 0;
|
||||
@@ -84,22 +87,31 @@ static enum ec3_status capture_file(
|
||||
break;
|
||||
}
|
||||
|
||||
ec3_chunk_id chunk;
|
||||
enum ec3_status s2 = chunk_table_put(
|
||||
&ctx->ctx_chunks,
|
||||
buf,
|
||||
nr_read,
|
||||
chunk);
|
||||
enum ec3_status s2
|
||||
= chunk_table_put(&ctx->ctx_chunks, buf, nr_read);
|
||||
|
||||
if (s2 != EC3_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
chunk_size += nr_read;
|
||||
|
||||
if (nr_read < buf_len) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ec3_chunk_id id = {0};
|
||||
s2 = chunk_table_end_chunk(&ctx->ctx_chunks, id);
|
||||
|
||||
if (s2 != EC3_SUCCESS) {
|
||||
return s2;
|
||||
}
|
||||
|
||||
char id_str[128];
|
||||
ec3_chunk_id_to_string(id, id_str, sizeof id_str);
|
||||
printf("wrote %zu byte chunk %s\n", chunk_size, id_str);
|
||||
|
||||
free(buf);
|
||||
b_file_release(src);
|
||||
return s2;
|
||||
@@ -175,10 +187,6 @@ static int capture(
|
||||
const b_arglist *opt,
|
||||
const b_array *args)
|
||||
{
|
||||
printf("sizeof(struct ec3_vnode_group) = %zu\n",
|
||||
sizeof(struct ec3_vnode_group));
|
||||
return 0;
|
||||
|
||||
const char *out_path = NULL;
|
||||
b_arglist_get_string(opt, OPT_OUTPATH, OPT_OUTPATH_PATH, 0, &out_path);
|
||||
|
||||
@@ -351,6 +359,7 @@ static int capture(
|
||||
ec3_tag_ioctx_close(stab);
|
||||
ec3_tag_ioctx_close(ctab);
|
||||
ec3_tag_ioctx_close(cdat);
|
||||
|
||||
ec3_image_ioctx_close(image);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -444,21 +444,102 @@ enum ec3_status chunk_table_get(
|
||||
return EC3_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab)
|
||||
{
|
||||
b_hash_ctx_init(&tab->tab_hash, B_HASH_SHAKE128);
|
||||
size_t nr_written = 0;
|
||||
tab->tab_first_chunk_cdat_cluster = tab->tab_next_cdat_cluster;
|
||||
return EC3_SUCCESS;
|
||||
}
|
||||
|
||||
static enum ec3_status flush_cluster_buf(struct chunk_table *tab)
|
||||
{
|
||||
size_t nr_written = 0;
|
||||
size_t cdat_cluster = tab->tab_next_cdat_cluster++;
|
||||
|
||||
enum ec3_status status = ec3_tag_ioctx_write_cluster(
|
||||
tab->tab_cdat,
|
||||
cdat_cluster,
|
||||
tab->tab_cluster_buf,
|
||||
tab->tab_cluster_buf_pos,
|
||||
&nr_written);
|
||||
|
||||
if (status != EC3_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
tab->tab_cluster_buf_pos = 0;
|
||||
return EC3_SUCCESS;
|
||||
}
|
||||
|
||||
enum ec3_status chunk_table_put(
|
||||
struct chunk_table *tab,
|
||||
const void *data,
|
||||
size_t len,
|
||||
ec3_chunk_id out_id)
|
||||
size_t len)
|
||||
{
|
||||
b_hash_ctx hash;
|
||||
b_hash_ctx_init(&hash, B_HASH_SHAKE128);
|
||||
b_hash_ctx_update(&hash, data, len);
|
||||
enum ec3_status status = EC3_SUCCESS;
|
||||
|
||||
size_t cluster_size
|
||||
= ec3_cluster_size_id_to_bytes(tab->tab_cluster_size);
|
||||
size_t copied = 0;
|
||||
|
||||
b_hash_ctx_update(&tab->tab_hash, data, len);
|
||||
|
||||
while (len > 0) {
|
||||
size_t remaining = cluster_size - tab->tab_cluster_buf_pos;
|
||||
|
||||
size_t to_copy = len;
|
||||
if (to_copy > remaining) {
|
||||
to_copy = remaining;
|
||||
}
|
||||
|
||||
unsigned char *src = (unsigned char *)data + copied;
|
||||
unsigned char *dest = (unsigned char *)tab->tab_cluster_buf
|
||||
+ tab->tab_cluster_buf_pos;
|
||||
|
||||
memcpy(dest, src, to_copy);
|
||||
|
||||
copied += to_copy;
|
||||
tab->tab_cluster_buf_pos += to_copy;
|
||||
|
||||
len -= to_copy;
|
||||
remaining -= to_copy;
|
||||
|
||||
if (remaining == 0) {
|
||||
status = flush_cluster_buf(tab);
|
||||
}
|
||||
|
||||
if (status != EC3_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (status != EC3_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return EC3_SUCCESS;
|
||||
}
|
||||
|
||||
enum ec3_status chunk_table_end_chunk(
|
||||
struct chunk_table *tab,
|
||||
ec3_chunk_id out_chunk_id)
|
||||
{
|
||||
enum ec3_status status = EC3_SUCCESS;
|
||||
if (tab->tab_cluster_buf_pos > 0) {
|
||||
status = flush_cluster_buf(tab);
|
||||
}
|
||||
|
||||
if (status != EC3_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
ec3_chunk_id id;
|
||||
b_hash_ctx_finish(&hash, id, sizeof id);
|
||||
b_hash_ctx_finish(&tab->tab_hash, id, sizeof id);
|
||||
|
||||
struct cache_entry *entry = get_cache_entry(&tab->tab_cache, id);
|
||||
if (entry) {
|
||||
memcpy(out_chunk_id, id, sizeof id);
|
||||
return EC3_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -467,21 +548,6 @@ enum ec3_status chunk_table_put(
|
||||
return EC3_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
size_t nr_written = 0;
|
||||
|
||||
size_t cdat_cluster = tab->tab_next_cdat_cluster++;
|
||||
enum ec3_status status = ec3_tag_ioctx_write_cluster(
|
||||
tab->tab_cdat,
|
||||
cdat_cluster,
|
||||
data,
|
||||
len,
|
||||
&nr_written);
|
||||
|
||||
if (status != EC3_SUCCESS) {
|
||||
free(entry);
|
||||
return status;
|
||||
}
|
||||
|
||||
struct ec3_chunk chunk = {0};
|
||||
memcpy(chunk.c_id, id, sizeof id);
|
||||
int err = b_tree_put(
|
||||
@@ -494,8 +560,18 @@ enum ec3_status chunk_table_put(
|
||||
|
||||
memset(entry, 0x0, sizeof *entry);
|
||||
memcpy(entry->e_id, id, sizeof id);
|
||||
memcpy(out_chunk_id, id, sizeof id);
|
||||
|
||||
put_cache_entry(&tab->tab_cache, entry);
|
||||
|
||||
return EC3_SUCCESS;
|
||||
}
|
||||
|
||||
void ec3_chunk_id_to_string(const ec3_chunk_id id, char *out, size_t max)
|
||||
{
|
||||
max = b_min(size_t, max, EC3_CHUNK_ID_SIZE * 2 + 1);
|
||||
|
||||
for (size_t i = 0; i < max; i++) {
|
||||
snprintf(out + (i * 2), max - (i * 2), "%02x", id[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,11 +16,15 @@ struct chunk_table {
|
||||
b_btree tab_cache;
|
||||
|
||||
unsigned char *tab_cluster_buf;
|
||||
size_t tab_cluster_buf_pos;
|
||||
|
||||
size_t tab_next_cdat_cluster;
|
||||
|
||||
struct ec3_tag_ioctx *tab_ctab;
|
||||
struct ec3_tag_ioctx *tab_cdat;
|
||||
|
||||
b_hash_ctx tab_hash;
|
||||
size_t tab_first_chunk_cdat_cluster;
|
||||
};
|
||||
|
||||
extern enum ec3_status chunk_table_init(
|
||||
@@ -37,12 +41,20 @@ extern enum ec3_status chunk_table_get(
|
||||
void *out_data,
|
||||
size_t *out_len);
|
||||
|
||||
extern enum ec3_status chunk_table_begin_chunk(struct chunk_table *tab);
|
||||
extern enum ec3_status chunk_table_put(
|
||||
struct chunk_table *tab,
|
||||
const void *data,
|
||||
size_t len,
|
||||
ec3_chunk_id out_id);
|
||||
size_t len);
|
||||
extern enum ec3_status chunk_table_end_chunk(
|
||||
struct chunk_table *tab,
|
||||
ec3_chunk_id out_chunk_id);
|
||||
|
||||
extern size_t chunk_table_bytes_per_chunk(struct chunk_table *tab);
|
||||
|
||||
extern void ec3_chunk_id_to_string(
|
||||
const ec3_chunk_id id,
|
||||
char *out,
|
||||
size_t max);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user