dev: implement reading from block devices
reading from block devices is done using the block cache (bcache). This cache stores sectors from a block device in pages of memory marked as 'cached', which will allow them to be reclaimed when memory pressure is high (TODO). while block device drivers implement callbacks allowing reading/writing at block-granularity, the device subsystem uses the block cache to implement reading/writing at byte-granularity in a driver-agnostic way. block drivers can disable the block cache for their devices, but this will require that any clients communicate with the devices at block-granularity. also added an offset parameter to device and object read/write functions/callbacks.
This commit is contained in:
287
dev/block.c
287
dev/block.c
@@ -1,4 +1,5 @@
|
||||
#include <socks/device.h>
|
||||
#include <socks/block.h>
|
||||
#include <socks/util.h>
|
||||
#include <socks/printk.h>
|
||||
#include <socks/libc/stdio.h>
|
||||
@@ -24,9 +25,191 @@ struct block_device *block_device_from_generic(struct device *dev)
|
||||
return BLOCK_DEVICE(dev);
|
||||
}
|
||||
|
||||
kern_status_t block_device_read(struct device *dev, void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
|
||||
static kern_status_t do_read_blocks(struct block_device *blockdev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags)
|
||||
{
|
||||
return KERN_UNIMPLEMENTED;
|
||||
struct device *dev = block_device_base(blockdev);
|
||||
struct iovec vec = { .io_buf = buf, .io_len = nr_sectors * blockdev->b_sector_size };
|
||||
kern_status_t status = blockdev->b_ops->read_blocks(dev, offset, &nr_sectors, &vec, 1, flags);
|
||||
*sectors_read = nr_sectors;
|
||||
return status;
|
||||
}
|
||||
|
||||
extern kern_status_t get_cached_sector(struct block_device *blockdev, sectors_t sector, socks_flags_t flags, void **bufp)
|
||||
{
|
||||
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
|
||||
return KERN_UNSUPPORTED;
|
||||
}
|
||||
|
||||
kern_status_t status = KERN_OK;
|
||||
struct bcache_sector cache_buf;
|
||||
status = bcache_get(&blockdev->b_cache, sector, true, &cache_buf);
|
||||
if (status != KERN_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (!cache_buf.sect_present) {
|
||||
size_t nr_read = 0;
|
||||
/* TODO read all missing blocks in one go */
|
||||
status = do_read_blocks(blockdev, cache_buf.sect_buf, sector, 1, &nr_read, flags);
|
||||
if (status != KERN_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
bcache_mark_present(&cache_buf);
|
||||
}
|
||||
|
||||
*bufp = cache_buf.sect_buf;
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
kern_status_t block_device_read_blocks(struct device *dev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags)
|
||||
{
|
||||
struct block_device *blockdev = BLOCK_DEVICE(dev);
|
||||
if (!blockdev) {
|
||||
return KERN_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (!blockdev->b_ops || !blockdev->b_ops->read_blocks) {
|
||||
return KERN_UNSUPPORTED;
|
||||
}
|
||||
|
||||
if (offset >= blockdev->b_capacity) {
|
||||
*sectors_read = 0;
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
if (offset + nr_sectors >= blockdev->b_capacity) {
|
||||
nr_sectors = blockdev->b_capacity - offset;
|
||||
}
|
||||
|
||||
if (!nr_sectors) {
|
||||
*sectors_read = 0;
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
|
||||
return do_read_blocks(blockdev, buf, offset, nr_sectors, sectors_read, flags);
|
||||
}
|
||||
|
||||
bcache_lock(&blockdev->b_cache);
|
||||
|
||||
size_t nr_read = 0;
|
||||
|
||||
kern_status_t status = KERN_OK;
|
||||
for (sectors_t i = 0; i < nr_sectors; i++) {
|
||||
sectors_t sect = offset + i;
|
||||
void *sect_cache_buf;
|
||||
status = get_cached_sector(blockdev, sect, flags, §_cache_buf);
|
||||
if (status != KERN_OK) {
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*sectors_read = nr_read;
|
||||
return status;
|
||||
}
|
||||
|
||||
char *sect_dest_buf = (char *)buf + (i * blockdev->b_sector_size);
|
||||
memcpy(sect_dest_buf, sect_cache_buf, blockdev->b_sector_size);
|
||||
nr_read++;
|
||||
}
|
||||
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*sectors_read = nr_read;
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
kern_status_t block_device_read(struct device *dev, void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags)
|
||||
{
|
||||
struct block_device *blockdev = BLOCK_DEVICE(dev);
|
||||
if (!blockdev) {
|
||||
return KERN_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
kern_status_t status = KERN_OK;
|
||||
|
||||
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
|
||||
/* no bcache for this device, so the client has to read data at sector granularity. */
|
||||
sectors_t sect_offset = offset / blockdev->b_sector_size;
|
||||
size_t nr_sectors = size / blockdev->b_sector_size;
|
||||
if ((sect_offset * blockdev->b_sector_size != offset) || (nr_sectors * blockdev->b_sector_size != size)) {
|
||||
/* args are not sector-aligned */
|
||||
return KERN_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
size_t sectors_read = 0;
|
||||
|
||||
status = block_device_read_blocks(dev, buf, sect_offset, nr_sectors, §ors_read, flags);
|
||||
*bytes_read = sectors_read * blockdev->b_sector_size;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
bcache_lock(&blockdev->b_cache);
|
||||
|
||||
char *dest = buf;
|
||||
|
||||
sectors_t first_sect = offset / blockdev->b_sector_size;
|
||||
sectors_t last_sect = (offset + size) / blockdev->b_sector_size;
|
||||
size_t nr_read = 0;
|
||||
|
||||
if (first_sect * blockdev->b_sector_size < offset) {
|
||||
/* non-sector sized chunk at the start of the buffer. */
|
||||
void *sector_cachebuf;
|
||||
status = get_cached_sector(blockdev, first_sect, flags, §or_cachebuf);
|
||||
|
||||
if (status != KERN_OK) {
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*bytes_read = nr_read;
|
||||
return status;
|
||||
}
|
||||
|
||||
unsigned int in_sect_offset = (offset - (first_sect * blockdev->b_sector_size));
|
||||
unsigned int in_sect_size = MIN(blockdev->b_sector_size - in_sect_offset, size);
|
||||
|
||||
char *p = (char *)sector_cachebuf + in_sect_offset;
|
||||
memcpy(dest, p, in_sect_size);
|
||||
|
||||
dest += in_sect_size;
|
||||
nr_read += in_sect_size;
|
||||
first_sect++;
|
||||
}
|
||||
|
||||
for (sectors_t i = first_sect; i < last_sect; i++) {
|
||||
void *sector_cachebuf;
|
||||
status = get_cached_sector(blockdev, i, flags, §or_cachebuf);
|
||||
|
||||
if (status != KERN_OK) {
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*bytes_read = nr_read;
|
||||
return status;
|
||||
}
|
||||
|
||||
char *p = sector_cachebuf;
|
||||
memcpy(dest, p, blockdev->b_sector_size);
|
||||
dest += blockdev->b_sector_size;
|
||||
nr_read += blockdev->b_sector_size;
|
||||
}
|
||||
|
||||
|
||||
if (last_sect * blockdev->b_sector_size < offset + size && nr_read < size) {
|
||||
/* non-sector sized chunk at the end of the buffer. */
|
||||
void *sector_cachebuf;
|
||||
status = get_cached_sector(blockdev, last_sect, flags, §or_cachebuf);
|
||||
|
||||
if (status != KERN_OK) {
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*bytes_read = nr_read;
|
||||
return status;
|
||||
}
|
||||
|
||||
unsigned int in_sect_size = (offset + size) - (last_sect * blockdev->b_sector_size);
|
||||
|
||||
char *p = sector_cachebuf;
|
||||
memcpy(dest, p, in_sect_size);
|
||||
nr_read += in_sect_size;
|
||||
}
|
||||
|
||||
bcache_unlock(&blockdev->b_cache);
|
||||
*bytes_read = nr_read;
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
static kern_status_t generate_name(struct block_device *dev, char out[DEV_NAME_MAX])
|
||||
@@ -37,13 +220,21 @@ static kern_status_t generate_name(struct block_device *dev, char out[DEV_NAME_M
|
||||
|
||||
kern_status_t block_device_register(struct device *dev)
|
||||
{
|
||||
struct block_device *blockdev = &dev->blk;
|
||||
|
||||
if (!(blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE)) {
|
||||
kern_status_t status = bcache_init(&blockdev->b_cache, blockdev->b_sector_size);
|
||||
if (status != KERN_OK) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&block_device_ids_lock, &flags);
|
||||
unsigned int id = bitmap_lowest_clear(block_device_ids, BLOCK_DEVICE_MAX);
|
||||
bitmap_set(block_device_ids, id);
|
||||
spin_unlock_irqrestore(&block_device_ids_lock, flags);
|
||||
|
||||
struct block_device *blockdev = &dev->blk;
|
||||
blockdev->b_id = id;
|
||||
|
||||
char name[DEV_NAME_MAX];
|
||||
@@ -52,7 +243,7 @@ kern_status_t block_device_register(struct device *dev)
|
||||
snprintf(path, sizeof path, "/dev/block/%s", name);
|
||||
|
||||
char size_string[32];
|
||||
data_size_to_string(blockdev->sector_size * blockdev->capacity, size_string, sizeof size_string);
|
||||
data_size_to_string(blockdev->b_sector_size * blockdev->b_capacity, size_string, sizeof size_string);
|
||||
|
||||
printk("dev: found %s %s block device '%s'", size_string, dev->dev_owner->drv_name, dev->dev_model_name);
|
||||
|
||||
@@ -63,3 +254,91 @@ struct device_type_ops block_type_ops = {
|
||||
.register_device = block_device_register,
|
||||
.read = block_device_read,
|
||||
};
|
||||
|
||||
static BTREE_DEFINE_SIMPLE_GET(struct vm_page, sectors_t, p_bnode, p_blockid, get_block_page)
|
||||
static BTREE_DEFINE_SIMPLE_INSERT(struct vm_page, p_bnode, p_blockid, put_block_page)
|
||||
|
||||
struct bcache *bcache_create(unsigned int block_size)
|
||||
{
|
||||
struct bcache *out = kmalloc(sizeof *out, VM_NORMAL);
|
||||
if (!out) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bcache_init(out, block_size) != KERN_OK) {
|
||||
kfree(out);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void bcache_destroy(struct bcache *cache)
|
||||
{
|
||||
bcache_deinit(cache);
|
||||
kfree(cache);
|
||||
}
|
||||
|
||||
kern_status_t bcache_init(struct bcache *cache, unsigned int block_size)
|
||||
{
|
||||
memset(cache, 0x0, sizeof *cache);
|
||||
cache->b_sector_size = block_size;
|
||||
cache->b_sectors_per_page = VM_PAGE_SIZE / block_size;
|
||||
cache->b_lock = SPIN_LOCK_INIT;
|
||||
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
void bcache_deinit(struct bcache *cache)
|
||||
{
|
||||
struct btree_node *first_node = btree_first(&cache->b_pagetree);
|
||||
if (!first_node) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct vm_page *cur = BTREE_CONTAINER(struct vm_page, p_bnode, first_node);
|
||||
|
||||
while (cur) {
|
||||
struct btree_node *next_node = btree_next(&cur->p_bnode);
|
||||
struct vm_page *next = BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
|
||||
|
||||
cur->p_flags &= ~(VM_PAGE_CACHE);
|
||||
btree_delete(&cache->b_pagetree, &cur->p_bnode);
|
||||
vm_page_free(cur);
|
||||
cur = next;
|
||||
}
|
||||
}
|
||||
|
||||
kern_status_t bcache_get(struct bcache *cache, sectors_t at, bool create, struct bcache_sector *out)
|
||||
{
|
||||
unsigned int page_index = at % cache->b_sectors_per_page;
|
||||
at /= cache->b_sectors_per_page;
|
||||
|
||||
struct vm_page *page = get_block_page(&cache->b_pagetree, at);
|
||||
if (!page) {
|
||||
if (!create) {
|
||||
return KERN_NO_ENTRY;
|
||||
}
|
||||
|
||||
page = vm_page_alloc(VM_PAGE_MIN_ORDER, VM_NORMAL);
|
||||
if (!page) {
|
||||
return KERN_NO_MEMORY;
|
||||
}
|
||||
|
||||
page->p_flags |= VM_PAGE_CACHE;
|
||||
bitmap_zero(page->p_blockbits, VM_MAX_SECTORS_PER_PAGE);
|
||||
page->p_blockid = at;
|
||||
}
|
||||
|
||||
out->sect_page = page;
|
||||
out->sect_index = page_index;
|
||||
out->sect_buf = vm_page_get_vaddr(page);
|
||||
out->sect_present = bitmap_check(page->p_blockbits, page_index);
|
||||
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
void bcache_mark_present(struct bcache_sector *sect)
|
||||
{
|
||||
bitmap_set(sect->sect_page->p_blockbits, sect->sect_index);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user