diff --git a/vm/vm-region.c b/vm/vm-region.c index 0e2a774..b4ac9e6 100644 --- a/vm/vm-region.c +++ b/vm/vm-region.c @@ -22,6 +22,25 @@ region_find_free_area_linear(region, length) #endif +/* iterates over a range of mapped virtual memory in a region, and provides + * a moving buffer through which the memory can be accessed */ +struct vm_iterator { + struct vm_region *it_region; + struct vm_region_mapping *it_mapping; + virt_addr_t it_base; + vm_prot_t it_prot; + void *it_buf; + size_t it_max; +}; + +/* iterates recursively over the entries in a region */ +struct entry_iterator { + struct vm_region *it_root; + struct vm_region_entry *it_entry; + /* depth of it_entry relative to it_root */ + unsigned int it_depth; +}; + enum search_direction { SEARCH_LEFT, SEARCH_RIGHT, @@ -62,7 +81,7 @@ static struct vm_region_mapping *mapping_from_entry( return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); } -static virt_addr_t entry_absolute_address(struct vm_region_entry *entry) +static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry) { virt_addr_t result = 0; while (entry) { @@ -285,11 +304,11 @@ static virt_addr_t region_find_free_area_ex( switch (direction) { case SEARCH_LEFT: right_node = start; - left_node = start ? btree_left(start) : NULL; + left_node = start ? btree_prev(start) : NULL; break; case SEARCH_RIGHT: left_node = start; - right_node = start ? btree_left(start) : NULL; + right_node = start ? btree_next(start) : NULL; break; default: return 0; @@ -361,7 +380,7 @@ static off_t region_find_free_area_linear( size_t target_length) { if (!region->vr_entries.b_root) { - return region->vr_entry.e_offset; + return 0; } return region_find_free_area_ex( @@ -403,16 +422,16 @@ static bool region_is_area_free( off_t base, size_t len) { - if (len >= region->vr_entry.e_size) { - return false; - } - - if (base + len > region->vr_entry.e_size) { - return false; - } - off_t limit = base + len - 1; + if (base >= region->vr_entry.e_size) { + return false; + } + + if (limit >= region->vr_entry.e_size) { + return false; + } + struct btree_node *cur = region->vr_entries.b_root; if (!cur) { return true; @@ -441,7 +460,7 @@ static bool region_is_area_free( static kern_status_t region_validate_allocation( struct vm_region *parent, - enum vm_prot prot, + vm_prot_t prot, off_t *offp, size_t len) { @@ -455,10 +474,13 @@ static kern_status_t region_validate_allocation( if (offset == VM_REGION_ANY_OFFSET) { offset = region_find_free_area(parent, len); - if (offset == 0) { - return KERN_NO_MEMORY; - } - } else if (!region_is_area_free(parent, offset, len)) { + *offp = offset; + return (offset == INVALID_OFFSET) ? KERN_NO_MEMORY : KERN_OK; + } + + offset &= ~VM_PAGE_MASK; + + if (!region_is_area_free(parent, offset, len)) { return KERN_INVALID_ARGUMENT; } @@ -466,6 +488,248 @@ static kern_status_t region_validate_allocation( return KERN_OK; } +static void vm_iterator_begin( + struct vm_iterator *it, + struct vm_region *region, + virt_addr_t base, + vm_prot_t prot) +{ + memset(it, 0x0, sizeof *it); + it->it_base = base; + it->it_region = region; + it->it_prot = prot; + + off_t offset = base - vm_region_get_base_address(region); + it->it_mapping = region_get_mapping_recursive(region, &offset, 1); + if (!it->it_mapping || (it->it_mapping->m_prot & prot) != prot) { + return; + } + + off_t object_offset = offset - it->it_mapping->m_entry.e_offset + + it->it_mapping->m_object_offset; + struct vm_page *pg = NULL; + if (prot & VM_PROT_WRITE) { + pg = vm_object_alloc_page( + it->it_mapping->m_object, + object_offset, + VM_PAGE_4K); + } else { + pg = vm_object_get_page( + it->it_mapping->m_object, + object_offset); + } + + if (!pg) { + return; + } + + void *buffer_base = vm_page_get_vaddr(pg); + phys_addr_t pg_addr = vm_page_get_paddr(pg); + size_t buffer_size = vm_page_get_size_bytes(pg); + + while (1) { + struct btree_node *next_node = btree_next(&pg->p_bnode); + struct vm_page *next + = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); + if (!next) { + break; + } + + phys_addr_t next_addr = vm_page_get_paddr(next); + if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { + break; + } + + pg = next; + pg_addr = next_addr; + buffer_size += vm_page_get_size_bytes(next); + } + + it->it_buf = buffer_base; + it->it_max = buffer_size; +} + +static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) +{ + if (nr_bytes < it->it_max) { + it->it_base += nr_bytes; + it->it_buf = (char *)it->it_buf + nr_bytes; + it->it_max -= nr_bytes; + return KERN_OK; + } + + it->it_base += nr_bytes; + off_t offset = it->it_base - vm_region_get_base_address(it->it_region); + + struct vm_region_mapping *next_mapping + = region_get_mapping_recursive(it->it_region, &offset, 1); + if (!next_mapping) { + it->it_buf = NULL; + it->it_max = 0; + return KERN_MEMORY_FAULT; + } + + if ((next_mapping->m_prot & it->it_prot) != it->it_prot) { + it->it_buf = NULL; + it->it_max = 0; + return KERN_MEMORY_FAULT; + } + + off_t object_offset = offset - it->it_mapping->m_entry.e_offset + + it->it_mapping->m_object_offset; + struct vm_page *pg = NULL; + if (it->it_prot & VM_PROT_WRITE) { + pg = vm_object_alloc_page( + it->it_mapping->m_object, + object_offset, + VM_PAGE_4K); + } else { + pg = vm_object_get_page( + it->it_mapping->m_object, + object_offset); + } + + if (!pg) { + return KERN_NO_MEMORY; + } + + void *buffer_base = vm_page_get_vaddr(pg); + phys_addr_t pg_addr = vm_page_get_paddr(pg); + size_t buffer_size = vm_page_get_size_bytes(pg); + + while (1) { + struct btree_node *next_node = btree_next(&pg->p_bnode); + struct vm_page *next + = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); + if (!next) { + break; + } + + phys_addr_t next_addr = vm_page_get_paddr(next); + if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { + break; + } + + pg = next; + pg_addr = next_addr; + buffer_size += vm_page_get_size_bytes(next); + } + + it->it_buf = buffer_base; + it->it_max = buffer_size; + return KERN_OK; +} + +static void entry_iterator_begin( + struct entry_iterator *it, + struct vm_region *root) +{ + memset(it, 0x0, sizeof *it); + it->it_root = root; + it->it_entry = &root->vr_entry; +} + +static void entry_iterator_move_next(struct entry_iterator *it) +{ + struct vm_region *region = region_from_entry(it->it_entry); + bool has_children = (region && !btree_empty(®ion->vr_entries)); + + if (has_children) { + /* visit the first child */ + struct btree_node *node = btree_first(®ion->vr_entries); + struct vm_region_entry *entry + = BTREE_CONTAINER(struct vm_region_entry, e_node, node); + it->it_depth++; + it->it_entry = entry; + return; + } + + /* go back up until we find a right sibling. */ + struct vm_region_entry *cur = it->it_entry; + + while (1) { + struct btree_node *sibling = btree_next(&cur->e_node); + if (sibling) { + it->it_entry = BTREE_CONTAINER( + struct vm_region_entry, + e_node, + sibling); + return; + } + + if (cur == &it->it_root->vr_entry) { + it->it_entry = NULL; + return; + } + + struct vm_region_entry *parent_entry = cur->e_parent; + struct vm_region *parent = region_from_entry(parent_entry); + + if (!parent) { + it->it_entry = NULL; + return; + } + + it->it_depth--; + cur = parent_entry; + } +} + +static void mapping_iterator_begin( + struct entry_iterator *it, + struct vm_region *root, + off_t offset, + size_t length, + off_t *offp) +{ + entry_iterator_begin(it, root); + while (it->it_entry) { + off_t base = entry_absolute_address(it->it_entry) + - root->vr_entry.e_offset; + off_t limit = base + it->it_entry->e_size - 1; + + if (it->it_entry->e_type == VM_REGION_ENTRY_MAPPING) { + if (offset >= base && offset <= limit) { + *offp = base; + return; + } + + if (offset + length >= base + && offset + length <= limit) { + *offp = base; + return; + } + } + + entry_iterator_move_next(it); + } +} + +static void mapping_iterator_move_next( + struct entry_iterator *it, + off_t offset, + size_t length, + off_t *offp) +{ + do { + entry_iterator_move_next(it); + } while (it->it_entry + && it->it_entry->e_type != VM_REGION_ENTRY_MAPPING); + + if (!it->it_entry) { + return; + } + + off_t base = entry_absolute_address(it->it_entry) + - it->it_root->vr_entry.e_offset; + + if (base >= offset + length) { + it->it_entry = NULL; + } else { + *offp = base; + } +} + /*** PUBLIC API ***************************************************************/ kern_status_t vm_region_type_init(void) @@ -550,17 +814,32 @@ kern_status_t vm_region_map_object( struct vm_object *object, off_t object_offset, size_t length, - enum vm_prot prot, + vm_prot_t prot, virt_addr_t *out) { object_offset &= ~VM_PAGE_MASK; + if (region_offset != VM_REGION_ANY_OFFSET) { + off_t limit = region_offset + length; + + if (region_offset & VM_PAGE_MASK) { + region_offset &= ~VM_PAGE_MASK; + } + + if (limit & VM_PAGE_MASK) { + limit &= ~VM_PAGE_MASK; + limit += VM_PAGE_SIZE; + } + + length = limit - region_offset; + } + if (length & VM_PAGE_MASK) { length &= ~VM_PAGE_MASK; length += VM_PAGE_SIZE; } - if (!region || !object || !out) { + if (!region || !object) { return KERN_INVALID_ARGUMENT; } @@ -611,9 +890,6 @@ kern_status_t vm_region_map_object( mapping->m_entry.e_offset = region_offset; mapping->m_entry.e_size = length; - region_put_entry(region, &mapping->m_entry); - queue_push_back(&object->vo_mappings, &mapping->m_object_entry); - #ifdef TRACE virt_addr_t abs_base = entry_absolute_address(&mapping->m_entry); tracek("mapping %s at [%llx-%llx]", @@ -621,11 +897,304 @@ kern_status_t vm_region_map_object( abs_base, abs_base + length); #endif + region_put_entry(region, &mapping->m_entry); + queue_push_back(&object->vo_mappings, &mapping->m_object_entry); + + if (out) { + *out = entry_absolute_address(&mapping->m_entry); + } - *out = entry_absolute_address(&mapping->m_entry); return KERN_OK; } +/* unmap some pages in the middle of a mapping, splitting it into two separate + * mappings */ +static kern_status_t split_mapping( + struct vm_region_mapping *mapping, + struct vm_region *root, + off_t mapping_offset, + off_t unmap_offset, + off_t unmap_limit) +{ + tracek("split mapping [%zx-%zx] subtract [%zx-%zx]", + mapping_offset, + mapping_offset + mapping->m_entry.e_size, + unmap_offset, + unmap_limit); + + off_t mapping_limit = mapping_offset + mapping->m_entry.e_size; + + struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); + struct vm_region_mapping *left = mapping; + struct vm_region_mapping *right + = vm_cache_alloc(&mapping_cache, VM_NORMAL); + if (!right) { + return KERN_NO_MEMORY; + } + + off_t left_offset = mapping->m_entry.e_offset; + off_t right_offset = unmap_limit - mapping_offset; + off_t left_object_offset = mapping->m_object_offset; + size_t left_length = unmap_offset - mapping_offset; + size_t right_length = mapping_limit - unmap_limit; + off_t right_object_offset = mapping->m_object_offset + + mapping->m_entry.e_size - right_length; + + tracek("mapping=[%zx-%zx]->[%zx-%zx]", + mapping_offset, + mapping_limit, + mapping->m_object_offset, + mapping->m_object_offset + mapping->m_entry.e_size); + tracek("left=[%zx-%zx]->[%zx-%zx], right=[%zx-%zx]->[%zx-%zx]", + left_offset, + left_offset + left_length, + left_object_offset, + left_object_offset + left_length, + right_offset, + right_offset + right_length, + right_object_offset, + right_object_offset + right_length); + + left->m_object_offset = left_object_offset; + left->m_entry.e_offset = left_offset; + left->m_entry.e_size = left_length; + + right->m_object = left->m_object; + right->m_prot = left->m_prot; + right->m_entry.e_type = VM_REGION_ENTRY_MAPPING; + right->m_entry.e_parent = left->m_entry.e_parent; + + right->m_object_offset = right_object_offset; + right->m_entry.e_offset = right_offset; + right->m_entry.e_size = right_length; + + virt_addr_t unmap_base = root->vr_entry.e_offset + unmap_offset; + size_t unmap_length = unmap_limit - unmap_offset; + + for (size_t i = 0; i < unmap_length; i += VM_PAGE_SIZE) { + tracek("unmapping %zx", unmap_base + i); + pmap_remove(root->vr_pmap, unmap_base + i); + } + + region_put_entry(parent, &right->m_entry); + + return KERN_OK; +} + +/* unmap some pages from the left-side of a mapping to somewhere in the + * middle. */ +static kern_status_t left_reduce_mapping( + struct vm_region_mapping *mapping, + struct vm_region *root, + off_t mapping_offset, + off_t unmap_offset, + off_t unmap_limit) +{ + /* unmap_limit falls somwwhere between mapping_offset and + * mapping_offset+length */ + tracek("left reduce mapping [%zx-%zx] subtract [%zx-%zx]", + mapping_offset, + mapping_offset + mapping->m_entry.e_size, + unmap_offset, + unmap_limit); + + virt_addr_t base = root->vr_entry.e_offset + mapping_offset; + off_t limit = mapping_offset + mapping->m_entry.e_size; + size_t length = mapping->m_entry.e_size - (limit - unmap_limit); + tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); + + for (size_t i = 0; i < length; i += VM_PAGE_SIZE) { + pmap_remove(root->vr_pmap, base + i); + } + + mapping->m_entry.e_offset += length; + mapping->m_object_offset += length; + mapping->m_entry.e_size -= length; + + return KERN_OK; +} + +/* unmap some pages from the middle of a mapping to the right-side. */ +static kern_status_t right_reduce_mapping( + struct vm_region_mapping *mapping, + struct vm_region *root, + off_t mapping_offset, + off_t unmap_offset, + off_t unmap_limit) +{ + /* unmap_base falls somwwhere between mapping_offset and + * mapping_offset+length */ + tracek("right reduce mapping [%zx-%zx] subtract [%zx-%zx]", + mapping_offset, + mapping_offset + mapping->m_entry.e_size, + unmap_offset, + unmap_limit); + + virt_addr_t base = root->vr_entry.e_offset + unmap_offset; + off_t limit = mapping_offset + mapping->m_entry.e_size; + size_t length = limit - unmap_offset; + tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); + + for (size_t i = 0; i < length; i += VM_PAGE_SIZE) { + pmap_remove(root->vr_pmap, base + i); + } + + mapping->m_entry.e_size -= length; + + return KERN_OK; +} + +/* completely unmap and delete an entire mapping */ +static kern_status_t delete_mapping( + struct vm_region_mapping *mapping, + struct vm_region *root, + off_t mapping_offset) +{ + virt_addr_t base = root->vr_entry.e_offset + mapping_offset; + tracek("delete mapping [%zx-%zx]", + base, + base + mapping->m_entry.e_size); + + for (size_t i = 0; i < mapping->m_entry.e_size; i += VM_PAGE_SIZE) { + pmap_remove(root->vr_pmap, base + i); + } + + struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); + + queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry); + btree_delete(&parent->vr_entries, &mapping->m_entry.e_node); + + vm_cache_free(&mapping_cache, mapping); + + return KERN_OK; +} + +kern_status_t vm_region_unmap( + struct vm_region *region, + off_t unmap_area_offset, + size_t unmap_area_length) +{ + kern_status_t status = KERN_OK; + struct entry_iterator it; + off_t unmap_area_limit = unmap_area_offset + unmap_area_length; + tracek("unmapping %zx-%zx", unmap_area_offset, unmap_area_limit); + off_t tmp = 0; + + mapping_iterator_begin( + &it, + region, + unmap_area_offset, + unmap_area_length, + &tmp); + while (it.it_entry) { + struct vm_region_mapping *mapping + = mapping_from_entry(it.it_entry); + off_t mapping_offset = tmp; + off_t mapping_limit = mapping_offset + it.it_entry->e_size; + + mapping_iterator_move_next( + &it, + unmap_area_offset, + unmap_area_length, + &tmp); + + bool split + = (unmap_area_offset > mapping_offset + && unmap_area_limit < mapping_limit); + bool delete + = (unmap_area_offset <= mapping_offset + && unmap_area_limit >= mapping_limit); + bool left_reduce + = (unmap_area_offset <= mapping_offset + && unmap_area_limit < mapping_limit); + bool right_reduce + = (unmap_area_offset > mapping_offset + && unmap_area_limit >= mapping_limit); + + if (split) { + status = split_mapping( + mapping, + region, + mapping_offset, + unmap_area_offset, + unmap_area_limit); + } else if (delete) { + status = delete_mapping( + mapping, + region, + mapping_offset); + } else if (left_reduce) { + status = left_reduce_mapping( + mapping, + region, + mapping_offset, + unmap_area_offset, + unmap_area_limit); + } else if (right_reduce) { + status = right_reduce_mapping( + mapping, + region, + mapping_offset, + unmap_area_offset, + unmap_area_limit); + } else { + panic("don't know what to do with this mapping"); + } + + if (status != KERN_OK) { + break; + } + } + + return status; +} + +bool vm_region_validate_access( + struct vm_region *region, + virt_addr_t ptr, + size_t len, + vm_prot_t prot) +{ + if (len == 0) { + return true; + } + + if (ptr < region->vr_entry.e_offset) { + return false; + } + + off_t offset = ptr - region->vr_entry.e_offset; + + if (len >= region->vr_entry.e_size) { + return false; + } + + if (offset + len > region->vr_entry.e_size) { + return false; + } + + offset &= ~VM_PAGE_MASK; + + /* TODO improve this to not require a per-page loop */ + for (off_t i = 0; i < len; i += VM_PAGE_SIZE) { + off_t x = offset + i; + struct vm_region_mapping *mapping + = region_get_mapping_recursive( + region, + &x, + VM_PAGE_SIZE); + if (!mapping) { + return false; + } + + if ((mapping->m_prot & prot) != prot) { + return false; + } + } + + return true; +} + kern_status_t vm_region_demand_map( struct vm_region *region, virt_addr_t addr, @@ -665,68 +1234,148 @@ kern_status_t vm_region_demand_map( PMAP_NORMAL); } -#ifdef TRACE -void vm_region_dump(struct vm_region *region, int depth) +virt_addr_t vm_region_get_base_address(const struct vm_region *region) { - char line[128] = {0}; + return entry_absolute_address(®ion->vr_entry); +} + +kern_status_t vm_region_memmove( + struct vm_region *dest_region, + virt_addr_t dest_ptr, + struct vm_region *src_region, + virt_addr_t src_ptr, + size_t count, + size_t *nr_moved) +{ + struct vm_iterator src, dest; + vm_iterator_begin( + &src, + src_region, + src_ptr, + VM_PROT_READ | VM_PROT_USER); + vm_iterator_begin( + &dest, + dest_region, + dest_ptr, + VM_PROT_WRITE | VM_PROT_USER); + + kern_status_t status = KERN_OK; + size_t r = 0; + + while (count && src.it_max && dest.it_max) { + size_t to_move = MIN(MIN(src.it_max, dest.it_max), count); + memmove(dest.it_buf, src.it_buf, to_move); + + status = vm_iterator_seek(&src, to_move); + if (status != KERN_OK) { + break; + } + + status = vm_iterator_seek(&dest, to_move); + if (status != KERN_OK) { + break; + } + + count -= to_move; + r += to_move; + } + + if (nr_moved) { + *nr_moved = r; + } + + return status; +} + +extern kern_status_t vm_region_memmove_v( + struct vm_region *dest_region, + size_t dest_offset, + struct iovec *dest_vecs, + size_t nr_dest_vecs, + struct vm_region *src_region, + size_t src_offset, + const struct iovec *src_vecs, + size_t nr_src_vecs, + size_t bytes_to_move) +{ + struct iovec_iterator src, dest; + iovec_iterator_begin(&src, src_vecs, nr_src_vecs); + iovec_iterator_begin(&dest, dest_vecs, nr_dest_vecs); + + iovec_iterator_seek(&src, src_offset); + iovec_iterator_seek(&dest, dest_offset); + + while (bytes_to_move && src.it_len && dest.it_len) { + size_t to_move + = MIN(MIN(src.it_len, dest.it_len), bytes_to_move); + + kern_status_t status = vm_region_memmove( + dest_region, + dest.it_base, + src_region, + src.it_base, + to_move, + NULL); + if (status != KERN_OK) { + return status; + } + + iovec_iterator_seek(&src, to_move); + iovec_iterator_seek(&dest, to_move); + bytes_to_move -= to_move; + } + + return KERN_OK; +} + +#ifdef TRACE +void vm_region_dump(struct vm_region *region) +{ + char s[128]; size_t p = 0; - for (int i = 0; i < depth; i++) { - p += snprintf(line + p, sizeof line - p, " "); - } - p += snprintf( - line + p, - sizeof line - p, - "region: %s [%llx-%llx]", - region->vr_name, - region->vr_entry.e_offset, - region->vr_entry.e_offset + region->vr_entry.e_size); - - printk("%s", line); - - struct btree_node *cur = btree_first(®ion->vr_entries); - while (cur) { - memset(line, 0x0, sizeof line); + struct entry_iterator it; + entry_iterator_begin(&it, region); + while (it.it_entry) { p = 0; - for (int i = 0; i < depth + 1; i++) { - p += snprintf(line + p, sizeof line - p, " "); + for (unsigned int i = 0; i < it.it_depth; i++) { + p += snprintf(s + p, sizeof s - p, " "); } - struct vm_region_entry *entry - = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); - struct vm_region *child_region = region_from_entry(entry); - struct vm_region_mapping *child_mapping - = mapping_from_entry(entry); - - switch (entry->e_type) { - case VM_REGION_ENTRY_REGION: - break; - case VM_REGION_ENTRY_MAPPING: + switch (it.it_entry->e_type) { + case VM_REGION_ENTRY_REGION: { + struct vm_region *child + = region_from_entry(it.it_entry); p += snprintf( - line + p, - sizeof line - p, - "mapping: %s p:[%llx-%llx] -> v:[%llx-%llx]", - child_mapping->m_object->vo_name, - child_mapping->m_object_offset, - child_mapping->m_object_offset - + child_mapping->m_entry.e_size, - child_mapping->m_entry.e_offset, - child_mapping->m_entry.e_offset - + child_mapping->m_entry.e_size); - printk("%s", line); + s + p, + sizeof s - p, + "-region [%zx-%zx] %s", + child->vr_entry.e_offset, + child->vr_entry.e_offset + + child->vr_entry.e_size, + child->vr_name); + break; + } + case VM_REGION_ENTRY_MAPPING: { + struct vm_region_mapping *mapping + = mapping_from_entry(it.it_entry); + p += snprintf( + s + p, + sizeof s - p, + "+mapping [%zx-%zx] %s", + mapping->m_entry.e_offset, + mapping->m_entry.e_offset + + mapping->m_entry.e_size, + mapping->m_object->vo_name); break; default: - p += snprintf(line + p, sizeof line - p, "invalid"); - printk("%s", line); break; } - - if (child_region) { - vm_region_dump(child_region, depth + 1); } - cur = btree_next(cur); + tracek("%s", s); + entry_iterator_move_next(&it); } } #endif