dev: implement reading from block devices

reading from block devices is done using the block cache (bcache).
This cache stores sectors from a block device in pages of memory
marked as 'cached', which will allow them to be reclaimed when
memory pressure is high (TODO).

while block device drivers implement callbacks allowing reading/writing
at block-granularity, the device subsystem uses the block cache to
implement reading/writing at byte-granularity in a driver-agnostic way.

block drivers can disable the block cache for their devices, but this
will require that any clients communicate with the devices at
block-granularity.

also added an offset parameter to device and object read/write functions/callbacks.
This commit is contained in:
2023-07-09 21:58:40 +01:00
parent 53440653f2
commit 3233169f25
14 changed files with 435 additions and 52 deletions

View File

@@ -1,4 +1,5 @@
#include <socks/device.h>
#include <socks/block.h>
#include <socks/util.h>
#include <socks/printk.h>
#include <socks/libc/stdio.h>
@@ -24,9 +25,191 @@ struct block_device *block_device_from_generic(struct device *dev)
return BLOCK_DEVICE(dev);
}
kern_status_t block_device_read(struct device *dev, void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
static kern_status_t do_read_blocks(struct block_device *blockdev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags)
{
return KERN_UNIMPLEMENTED;
struct device *dev = block_device_base(blockdev);
struct iovec vec = { .io_buf = buf, .io_len = nr_sectors * blockdev->b_sector_size };
kern_status_t status = blockdev->b_ops->read_blocks(dev, offset, &nr_sectors, &vec, 1, flags);
*sectors_read = nr_sectors;
return status;
}
extern kern_status_t get_cached_sector(struct block_device *blockdev, sectors_t sector, socks_flags_t flags, void **bufp)
{
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
return KERN_UNSUPPORTED;
}
kern_status_t status = KERN_OK;
struct bcache_sector cache_buf;
status = bcache_get(&blockdev->b_cache, sector, true, &cache_buf);
if (status != KERN_OK) {
return status;
}
if (!cache_buf.sect_present) {
size_t nr_read = 0;
/* TODO read all missing blocks in one go */
status = do_read_blocks(blockdev, cache_buf.sect_buf, sector, 1, &nr_read, flags);
if (status != KERN_OK) {
return status;
}
bcache_mark_present(&cache_buf);
}
*bufp = cache_buf.sect_buf;
return KERN_OK;
}
kern_status_t block_device_read_blocks(struct device *dev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags)
{
struct block_device *blockdev = BLOCK_DEVICE(dev);
if (!blockdev) {
return KERN_INVALID_ARGUMENT;
}
if (!blockdev->b_ops || !blockdev->b_ops->read_blocks) {
return KERN_UNSUPPORTED;
}
if (offset >= blockdev->b_capacity) {
*sectors_read = 0;
return KERN_OK;
}
if (offset + nr_sectors >= blockdev->b_capacity) {
nr_sectors = blockdev->b_capacity - offset;
}
if (!nr_sectors) {
*sectors_read = 0;
return KERN_OK;
}
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
return do_read_blocks(blockdev, buf, offset, nr_sectors, sectors_read, flags);
}
bcache_lock(&blockdev->b_cache);
size_t nr_read = 0;
kern_status_t status = KERN_OK;
for (sectors_t i = 0; i < nr_sectors; i++) {
sectors_t sect = offset + i;
void *sect_cache_buf;
status = get_cached_sector(blockdev, sect, flags, &sect_cache_buf);
if (status != KERN_OK) {
bcache_unlock(&blockdev->b_cache);
*sectors_read = nr_read;
return status;
}
char *sect_dest_buf = (char *)buf + (i * blockdev->b_sector_size);
memcpy(sect_dest_buf, sect_cache_buf, blockdev->b_sector_size);
nr_read++;
}
bcache_unlock(&blockdev->b_cache);
*sectors_read = nr_read;
return KERN_OK;
}
kern_status_t block_device_read(struct device *dev, void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags)
{
struct block_device *blockdev = BLOCK_DEVICE(dev);
if (!blockdev) {
return KERN_INVALID_ARGUMENT;
}
kern_status_t status = KERN_OK;
if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) {
/* no bcache for this device, so the client has to read data at sector granularity. */
sectors_t sect_offset = offset / blockdev->b_sector_size;
size_t nr_sectors = size / blockdev->b_sector_size;
if ((sect_offset * blockdev->b_sector_size != offset) || (nr_sectors * blockdev->b_sector_size != size)) {
/* args are not sector-aligned */
return KERN_INVALID_ARGUMENT;
}
size_t sectors_read = 0;
status = block_device_read_blocks(dev, buf, sect_offset, nr_sectors, &sectors_read, flags);
*bytes_read = sectors_read * blockdev->b_sector_size;
return status;
}
bcache_lock(&blockdev->b_cache);
char *dest = buf;
sectors_t first_sect = offset / blockdev->b_sector_size;
sectors_t last_sect = (offset + size) / blockdev->b_sector_size;
size_t nr_read = 0;
if (first_sect * blockdev->b_sector_size < offset) {
/* non-sector sized chunk at the start of the buffer. */
void *sector_cachebuf;
status = get_cached_sector(blockdev, first_sect, flags, &sector_cachebuf);
if (status != KERN_OK) {
bcache_unlock(&blockdev->b_cache);
*bytes_read = nr_read;
return status;
}
unsigned int in_sect_offset = (offset - (first_sect * blockdev->b_sector_size));
unsigned int in_sect_size = MIN(blockdev->b_sector_size - in_sect_offset, size);
char *p = (char *)sector_cachebuf + in_sect_offset;
memcpy(dest, p, in_sect_size);
dest += in_sect_size;
nr_read += in_sect_size;
first_sect++;
}
for (sectors_t i = first_sect; i < last_sect; i++) {
void *sector_cachebuf;
status = get_cached_sector(blockdev, i, flags, &sector_cachebuf);
if (status != KERN_OK) {
bcache_unlock(&blockdev->b_cache);
*bytes_read = nr_read;
return status;
}
char *p = sector_cachebuf;
memcpy(dest, p, blockdev->b_sector_size);
dest += blockdev->b_sector_size;
nr_read += blockdev->b_sector_size;
}
if (last_sect * blockdev->b_sector_size < offset + size && nr_read < size) {
/* non-sector sized chunk at the end of the buffer. */
void *sector_cachebuf;
status = get_cached_sector(blockdev, last_sect, flags, &sector_cachebuf);
if (status != KERN_OK) {
bcache_unlock(&blockdev->b_cache);
*bytes_read = nr_read;
return status;
}
unsigned int in_sect_size = (offset + size) - (last_sect * blockdev->b_sector_size);
char *p = sector_cachebuf;
memcpy(dest, p, in_sect_size);
nr_read += in_sect_size;
}
bcache_unlock(&blockdev->b_cache);
*bytes_read = nr_read;
return KERN_OK;
}
static kern_status_t generate_name(struct block_device *dev, char out[DEV_NAME_MAX])
@@ -37,13 +220,21 @@ static kern_status_t generate_name(struct block_device *dev, char out[DEV_NAME_M
kern_status_t block_device_register(struct device *dev)
{
struct block_device *blockdev = &dev->blk;
if (!(blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE)) {
kern_status_t status = bcache_init(&blockdev->b_cache, blockdev->b_sector_size);
if (status != KERN_OK) {
return status;
}
}
unsigned long flags;
spin_lock_irqsave(&block_device_ids_lock, &flags);
unsigned int id = bitmap_lowest_clear(block_device_ids, BLOCK_DEVICE_MAX);
bitmap_set(block_device_ids, id);
spin_unlock_irqrestore(&block_device_ids_lock, flags);
struct block_device *blockdev = &dev->blk;
blockdev->b_id = id;
char name[DEV_NAME_MAX];
@@ -52,7 +243,7 @@ kern_status_t block_device_register(struct device *dev)
snprintf(path, sizeof path, "/dev/block/%s", name);
char size_string[32];
data_size_to_string(blockdev->sector_size * blockdev->capacity, size_string, sizeof size_string);
data_size_to_string(blockdev->b_sector_size * blockdev->b_capacity, size_string, sizeof size_string);
printk("dev: found %s %s block device '%s'", size_string, dev->dev_owner->drv_name, dev->dev_model_name);
@@ -63,3 +254,91 @@ struct device_type_ops block_type_ops = {
.register_device = block_device_register,
.read = block_device_read,
};
static BTREE_DEFINE_SIMPLE_GET(struct vm_page, sectors_t, p_bnode, p_blockid, get_block_page)
static BTREE_DEFINE_SIMPLE_INSERT(struct vm_page, p_bnode, p_blockid, put_block_page)
struct bcache *bcache_create(unsigned int block_size)
{
struct bcache *out = kmalloc(sizeof *out, VM_NORMAL);
if (!out) {
return NULL;
}
if (bcache_init(out, block_size) != KERN_OK) {
kfree(out);
return NULL;
}
return out;
}
void bcache_destroy(struct bcache *cache)
{
bcache_deinit(cache);
kfree(cache);
}
kern_status_t bcache_init(struct bcache *cache, unsigned int block_size)
{
memset(cache, 0x0, sizeof *cache);
cache->b_sector_size = block_size;
cache->b_sectors_per_page = VM_PAGE_SIZE / block_size;
cache->b_lock = SPIN_LOCK_INIT;
return KERN_OK;
}
void bcache_deinit(struct bcache *cache)
{
struct btree_node *first_node = btree_first(&cache->b_pagetree);
if (!first_node) {
return;
}
struct vm_page *cur = BTREE_CONTAINER(struct vm_page, p_bnode, first_node);
while (cur) {
struct btree_node *next_node = btree_next(&cur->p_bnode);
struct vm_page *next = BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
cur->p_flags &= ~(VM_PAGE_CACHE);
btree_delete(&cache->b_pagetree, &cur->p_bnode);
vm_page_free(cur);
cur = next;
}
}
kern_status_t bcache_get(struct bcache *cache, sectors_t at, bool create, struct bcache_sector *out)
{
unsigned int page_index = at % cache->b_sectors_per_page;
at /= cache->b_sectors_per_page;
struct vm_page *page = get_block_page(&cache->b_pagetree, at);
if (!page) {
if (!create) {
return KERN_NO_ENTRY;
}
page = vm_page_alloc(VM_PAGE_MIN_ORDER, VM_NORMAL);
if (!page) {
return KERN_NO_MEMORY;
}
page->p_flags |= VM_PAGE_CACHE;
bitmap_zero(page->p_blockbits, VM_MAX_SECTORS_PER_PAGE);
page->p_blockid = at;
}
out->sect_page = page;
out->sect_index = page_index;
out->sect_buf = vm_page_get_vaddr(page);
out->sect_present = bitmap_check(page->p_blockbits, page_index);
return KERN_OK;
}
void bcache_mark_present(struct bcache_sector *sect)
{
bitmap_set(sect->sect_page->p_blockbits, sect->sect_index);
}

View File

@@ -18,25 +18,25 @@ struct char_device *char_device_from_generic(struct device *dev)
return CHAR_DEVICE(dev);
}
static kern_status_t char_device_read(struct device *dev, void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
static kern_status_t char_device_read(struct device *dev, void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags)
{
kern_status_t status = KERN_UNSUPPORTED;
struct char_device *cdev = CHAR_DEVICE(dev);
if (cdev->c_ops && cdev->c_ops->read) {
status = cdev->c_ops->read(dev, buf, size, bytes_read, flags);
status = cdev->c_ops->read(dev, buf, offset, size, bytes_read, flags);
}
return status;
}
static kern_status_t char_device_write(struct device *dev, const void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
static kern_status_t char_device_write(struct device *dev, const void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags)
{
kern_status_t status = KERN_UNSUPPORTED;
struct char_device *cdev = CHAR_DEVICE(dev);
if (cdev->c_ops && cdev->c_ops->write) {
status = cdev->c_ops->write(dev, buf, size, bytes_read, flags);
status = cdev->c_ops->write(dev, buf, offset, size, bytes_read, flags);
}
return status;

View File

@@ -11,8 +11,8 @@ static struct object *dev_folder = NULL;
static struct device *__root_device = NULL;
static struct device *__misc_device = NULL;
static kern_status_t device_object_destroy(struct object *);
static kern_status_t device_object_read(struct object *obj, void *, size_t *, socks_flags_t);
static kern_status_t device_object_write(struct object *obj, const void *, size_t *, socks_flags_t);
static kern_status_t device_object_read(struct object *obj, void *, size_t, size_t *, socks_flags_t);
static kern_status_t device_object_write(struct object *obj, const void *, size_t, size_t *, socks_flags_t);
static kern_status_t device_object_query_name(struct object *, char out[OBJECT_NAME_MAX]);
static kern_status_t device_object_get_child_at(struct object *, size_t, struct object **);
static kern_status_t device_object_get_child_named(struct object *, const char *, struct object **);
@@ -123,23 +123,23 @@ struct device *generic_device_create(void)
return dev;
}
kern_status_t device_read(struct device *dev, void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
kern_status_t device_read(struct device *dev, void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags)
{
kern_status_t status = KERN_UNSUPPORTED;
if (type_ops[dev->dev_type] && type_ops[dev->dev_type]->read) {
status = type_ops[dev->dev_type]->read(dev, buf, size, bytes_read, flags);
status = type_ops[dev->dev_type]->read(dev, buf, offset, size, bytes_read, flags);
}
return status;
}
kern_status_t device_write(struct device *dev, const void *buf, size_t size, size_t *bytes_written, socks_flags_t flags)
kern_status_t device_write(struct device *dev, const void *buf, size_t offset, size_t size, size_t *bytes_written, socks_flags_t flags)
{
kern_status_t status = KERN_UNSUPPORTED;
if (type_ops[dev->dev_type] && type_ops[dev->dev_type]->write) {
status = type_ops[dev->dev_type]->write(dev, buf, size, bytes_written, flags);
status = type_ops[dev->dev_type]->write(dev, buf, offset, size, bytes_written, flags);
}
return status;
@@ -150,16 +150,16 @@ struct device *cast_to_device(struct object *obj)
return DEVICE_CAST(obj);
}
static kern_status_t device_object_read(struct object *obj, void *p, size_t *count, socks_flags_t flags)
static kern_status_t device_object_read(struct object *obj, void *p, size_t offset, size_t *count, socks_flags_t flags)
{
struct device *dev = DEVICE_CAST(obj);
return device_read(dev, p, *count, count, flags);
return device_read(dev, p, *count, offset, count, flags);
}
static kern_status_t device_object_write(struct object *obj, const void *p, size_t *count, socks_flags_t flags)
static kern_status_t device_object_write(struct object *obj, const void *p, size_t offset, size_t *count, socks_flags_t flags)
{
struct device *dev = DEVICE_CAST(obj);
return device_write(dev, p, *count, count, flags);
return device_write(dev, p, *count, offset, count, flags);
}
static kern_status_t device_object_destroy(struct object *obj)

View File

@@ -60,7 +60,8 @@ kern_status_t input_device_report_event(struct input_device *dev, const struct i
return r == sizeof *ev ? KERN_OK : KERN_WOULD_BLOCK;
}
kern_status_t input_device_read(struct device *dev, void *buf, size_t size, size_t *bytes_read, socks_flags_t flags)
kern_status_t input_device_read(struct device *dev, void *buf, size_t offset,
size_t size, size_t *bytes_read, socks_flags_t flags)
{
if (dev->dev_type != DEV_TYPE_INPUT || (size % sizeof (struct input_event)) != 0) {
return KERN_INVALID_ARGUMENT;