#include #include #include #include #include static DECLARE_BITMAP(block_device_ids, BLOCK_DEVICE_MAX); static spin_lock_t block_device_ids_lock = SPIN_LOCK_INIT; struct block_device *block_device_create(void) { struct device *dev = device_alloc(); if (!dev) { return NULL; } dev->dev_type = DEV_TYPE_BLOCK; return BLOCK_DEVICE(dev); } struct block_device *block_device_from_generic(struct device *dev) { dev->dev_type = DEV_TYPE_BLOCK; return BLOCK_DEVICE(dev); } static kern_status_t do_read_blocks(struct block_device *blockdev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags) { struct device *dev = block_device_base(blockdev); struct iovec vec = { .io_buf = buf, .io_len = nr_sectors * blockdev->b_sector_size }; kern_status_t status = blockdev->b_ops->read_blocks(dev, offset, &nr_sectors, &vec, 1, flags); *sectors_read = nr_sectors; return status; } extern kern_status_t get_cached_sector(struct block_device *blockdev, sectors_t sector, socks_flags_t flags, void **bufp) { if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) { return KERN_UNSUPPORTED; } kern_status_t status = KERN_OK; struct bcache_sector cache_buf; status = bcache_get(&blockdev->b_cache, sector, true, &cache_buf); if (status != KERN_OK) { return status; } if (!cache_buf.sect_present) { size_t nr_read = 0; /* TODO read all missing blocks in one go */ status = do_read_blocks(blockdev, cache_buf.sect_buf, sector, 1, &nr_read, flags); if (status != KERN_OK) { return status; } bcache_mark_present(&cache_buf); } *bufp = cache_buf.sect_buf; return KERN_OK; } kern_status_t block_device_read_blocks(struct device *dev, void *buf, sectors_t offset, size_t nr_sectors, size_t *sectors_read, socks_flags_t flags) { struct block_device *blockdev = BLOCK_DEVICE(dev); if (!blockdev) { return KERN_INVALID_ARGUMENT; } if (!blockdev->b_ops || !blockdev->b_ops->read_blocks) { return KERN_UNSUPPORTED; } if (offset >= blockdev->b_capacity) { *sectors_read = 0; return KERN_OK; } if (offset + nr_sectors >= blockdev->b_capacity) { nr_sectors = blockdev->b_capacity - offset; } if (!nr_sectors) { *sectors_read = 0; return KERN_OK; } if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) { return do_read_blocks(blockdev, buf, offset, nr_sectors, sectors_read, flags); } bcache_lock(&blockdev->b_cache); size_t nr_read = 0; kern_status_t status = KERN_OK; for (sectors_t i = 0; i < nr_sectors; i++) { sectors_t sect = offset + i; void *sect_cache_buf; status = get_cached_sector(blockdev, sect, flags, §_cache_buf); if (status != KERN_OK) { bcache_unlock(&blockdev->b_cache); *sectors_read = nr_read; return status; } char *sect_dest_buf = (char *)buf + (i * blockdev->b_sector_size); memcpy(sect_dest_buf, sect_cache_buf, blockdev->b_sector_size); nr_read++; } bcache_unlock(&blockdev->b_cache); *sectors_read = nr_read; return KERN_OK; } kern_status_t block_device_read(struct device *dev, void *buf, size_t offset, size_t size, size_t *bytes_read, socks_flags_t flags) { struct block_device *blockdev = BLOCK_DEVICE(dev); if (!blockdev) { return KERN_INVALID_ARGUMENT; } kern_status_t status = KERN_OK; if (blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE) { /* no bcache for this device, so the client has to read data at sector granularity. */ sectors_t sect_offset = offset / blockdev->b_sector_size; size_t nr_sectors = size / blockdev->b_sector_size; if ((sect_offset * blockdev->b_sector_size != offset) || (nr_sectors * blockdev->b_sector_size != size)) { /* args are not sector-aligned */ return KERN_INVALID_ARGUMENT; } size_t sectors_read = 0; status = block_device_read_blocks(dev, buf, sect_offset, nr_sectors, §ors_read, flags); *bytes_read = sectors_read * blockdev->b_sector_size; return status; } bcache_lock(&blockdev->b_cache); char *dest = buf; sectors_t first_sect = offset / blockdev->b_sector_size; sectors_t last_sect = (offset + size) / blockdev->b_sector_size; size_t nr_read = 0; if (first_sect * blockdev->b_sector_size < offset) { /* non-sector sized chunk at the start of the buffer. */ void *sector_cachebuf; status = get_cached_sector(blockdev, first_sect, flags, §or_cachebuf); if (status != KERN_OK) { bcache_unlock(&blockdev->b_cache); *bytes_read = nr_read; return status; } unsigned int in_sect_offset = (offset - (first_sect * blockdev->b_sector_size)); unsigned int in_sect_size = MIN(blockdev->b_sector_size - in_sect_offset, size); char *p = (char *)sector_cachebuf + in_sect_offset; memcpy(dest, p, in_sect_size); dest += in_sect_size; nr_read += in_sect_size; first_sect++; } for (sectors_t i = first_sect; i < last_sect; i++) { void *sector_cachebuf; status = get_cached_sector(blockdev, i, flags, §or_cachebuf); if (status != KERN_OK) { bcache_unlock(&blockdev->b_cache); *bytes_read = nr_read; return status; } char *p = sector_cachebuf; memcpy(dest, p, blockdev->b_sector_size); dest += blockdev->b_sector_size; nr_read += blockdev->b_sector_size; } if (last_sect * blockdev->b_sector_size < offset + size && nr_read < size) { /* non-sector sized chunk at the end of the buffer. */ void *sector_cachebuf; status = get_cached_sector(blockdev, last_sect, flags, §or_cachebuf); if (status != KERN_OK) { bcache_unlock(&blockdev->b_cache); *bytes_read = nr_read; return status; } unsigned int in_sect_size = (offset + size) - (last_sect * blockdev->b_sector_size); char *p = sector_cachebuf; memcpy(dest, p, in_sect_size); nr_read += in_sect_size; } bcache_unlock(&blockdev->b_cache); *bytes_read = nr_read; return KERN_OK; } static kern_status_t generate_name(struct block_device *dev, char out[DEV_NAME_MAX]) { snprintf(out, DEV_NAME_MAX, "disk%u", dev->b_id); return KERN_OK; } kern_status_t block_device_register(struct device *dev) { struct block_device *blockdev = &dev->blk; if (!(blockdev->b_flags & BLOCK_DEVICE_NO_BCACHE)) { kern_status_t status = bcache_init(&blockdev->b_cache, blockdev->b_sector_size); if (status != KERN_OK) { return status; } } unsigned long flags; spin_lock_irqsave(&block_device_ids_lock, &flags); unsigned int id = bitmap_lowest_clear(block_device_ids, BLOCK_DEVICE_MAX); bitmap_set(block_device_ids, id); spin_unlock_irqrestore(&block_device_ids_lock, flags); blockdev->b_id = id; char name[DEV_NAME_MAX]; generate_name(blockdev, name); char path[OBJECT_PATH_MAX]; snprintf(path, sizeof path, "/dev/block/%s", name); char size_string[32]; data_size_to_string(blockdev->b_sector_size * blockdev->b_capacity, size_string, sizeof size_string); printk("dev: found %s %s block device '%s'", size_string, dev->dev_owner->drv_name, dev->dev_model_name); return object_namespace_create_link(global_namespace(), path, &dev->dev_base); } struct device_type_ops block_type_ops = { .register_device = block_device_register, .read = block_device_read, }; static BTREE_DEFINE_SIMPLE_GET(struct vm_page, sectors_t, p_bnode, p_blockid, get_block_page) static BTREE_DEFINE_SIMPLE_INSERT(struct vm_page, p_bnode, p_blockid, put_block_page) struct bcache *bcache_create(unsigned int block_size) { struct bcache *out = kmalloc(sizeof *out, VM_NORMAL); if (!out) { return NULL; } if (bcache_init(out, block_size) != KERN_OK) { kfree(out); return NULL; } return out; } void bcache_destroy(struct bcache *cache) { bcache_deinit(cache); kfree(cache); } kern_status_t bcache_init(struct bcache *cache, unsigned int block_size) { memset(cache, 0x0, sizeof *cache); cache->b_sector_size = block_size; cache->b_sectors_per_page = VM_PAGE_SIZE / block_size; cache->b_lock = SPIN_LOCK_INIT; return KERN_OK; } void bcache_deinit(struct bcache *cache) { struct btree_node *first_node = btree_first(&cache->b_pagetree); if (!first_node) { return; } struct vm_page *cur = BTREE_CONTAINER(struct vm_page, p_bnode, first_node); while (cur) { struct btree_node *next_node = btree_next(&cur->p_bnode); struct vm_page *next = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); cur->p_flags &= ~(VM_PAGE_CACHE); btree_delete(&cache->b_pagetree, &cur->p_bnode); vm_page_free(cur); cur = next; } } kern_status_t bcache_get(struct bcache *cache, sectors_t at, bool create, struct bcache_sector *out) { unsigned int page_index = at % cache->b_sectors_per_page; at /= cache->b_sectors_per_page; struct vm_page *page = get_block_page(&cache->b_pagetree, at); if (!page) { if (!create) { return KERN_NO_ENTRY; } page = vm_page_alloc(VM_PAGE_MIN_ORDER, VM_NORMAL); if (!page) { return KERN_NO_MEMORY; } page->p_flags |= VM_PAGE_CACHE; bitmap_zero(page->p_blockbits, VM_MAX_SECTORS_PER_PAGE); page->p_blockid = at; } out->sect_page = page; out->sect_index = page_index; out->sect_buf = vm_page_get_vaddr(page); out->sect_present = bitmap_check(page->p_blockbits, page_index); return KERN_OK; } void bcache_mark_present(struct bcache_sector *sect) { bitmap_set(sect->sect_page->p_blockbits, sect->sect_index); }