diff --git a/include/kernel/syscall.h b/include/kernel/syscall.h index b7dd0ad..6f7908b 100644 --- a/include/kernel/syscall.h +++ b/include/kernel/syscall.h @@ -24,6 +24,7 @@ VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER) extern kern_status_t sys_task_exit(int status); +extern kern_status_t sys_task_self(kern_handle_t *out); extern kern_status_t sys_task_create( kern_handle_t parent_handle, const char *name, @@ -37,6 +38,9 @@ extern kern_status_t sys_task_create_thread( uintptr_t *args, size_t nr_args, kern_handle_t *out_thread); +extern kern_status_t sys_task_get_address_space( + kern_handle_t task, + kern_handle_t *out); extern kern_status_t sys_thread_start(kern_handle_t thread); @@ -75,6 +79,7 @@ extern kern_status_t sys_vm_region_create( vm_prot_t prot, kern_handle_t *out, virt_addr_t *out_base_address); +extern kern_status_t sys_vm_region_kill(kern_handle_t region); extern kern_status_t sys_vm_region_read( kern_handle_t region, void *dst, diff --git a/libmango/arch/x86_64/syscall.S b/libmango/arch/x86_64/syscall.S index ea4b2af..a2059df 100644 --- a/libmango/arch/x86_64/syscall.S +++ b/libmango/arch/x86_64/syscall.S @@ -56,8 +56,10 @@ .endm SYSCALL_GATE task_exit SYS_TASK_EXIT 1 +SYSCALL_GATE task_self SYS_TASK_SELF 0 SYSCALL_GATE task_create SYS_TASK_CREATE 5 SYSCALL_GATE task_create_thread SYS_TASK_CREATE_THREAD 6 +SYSCALL_GATE task_get_address_space SYS_TASK_GET_ADDRESS_SPACE 1 SYSCALL_GATE thread_start SYS_THREAD_START 1 @@ -67,6 +69,7 @@ SYSCALL_GATE vm_object_write SYS_VM_OBJECT_WRITE 5 SYSCALL_GATE vm_object_copy SYS_VM_OBJECT_COPY 6 SYSCALL_GATE vm_region_create SYS_VM_REGION_CREATE 8 +SYSCALL_GATE vm_region_kill SYS_VM_REGION_KILL 1 SYSCALL_GATE vm_region_read SYS_VM_REGION_READ 5 SYSCALL_GATE vm_region_write SYS_VM_REGION_WRITE 5 SYSCALL_GATE vm_region_map_absolute SYS_VM_REGION_MAP_ABSOLUTE 7 diff --git a/libmango/include-user/mango/task.h b/libmango/include-user/mango/task.h index 236ee14..e33c8f7 100644 --- a/libmango/include-user/mango/task.h +++ b/libmango/include-user/mango/task.h @@ -5,6 +5,7 @@ #include extern kern_status_t task_exit(int status); +extern kern_status_t task_self(kern_handle_t *out); extern kern_status_t task_create( kern_handle_t parent, @@ -19,6 +20,9 @@ extern kern_status_t task_create_thread( uintptr_t *args, size_t nr_args, kern_handle_t *out_thread); +extern kern_status_t task_get_address_space( + kern_handle_t task, + kern_handle_t *out); extern kern_status_t thread_start(kern_handle_t thread); diff --git a/libmango/include-user/mango/vm.h b/libmango/include-user/mango/vm.h index 67ee6db..d044291 100644 --- a/libmango/include-user/mango/vm.h +++ b/libmango/include-user/mango/vm.h @@ -39,6 +39,7 @@ extern kern_status_t vm_region_create( vm_prot_t prot, kern_handle_t *out, virt_addr_t *out_base_address); +extern kern_status_t vm_region_kill(kern_handle_t region); extern kern_status_t vm_region_read( kern_handle_t region, void *dst, diff --git a/libmango/include/mango/syscall.h b/libmango/include/mango/syscall.h index 2d17fcb..03fd379 100644 --- a/libmango/include/mango/syscall.h +++ b/libmango/include/mango/syscall.h @@ -2,14 +2,17 @@ #define MANGO_SYSCALL_H_ #define SYS_TASK_EXIT 1 +#define SYS_TASK_SELF 31 #define SYS_TASK_CREATE 2 #define SYS_TASK_CREATE_THREAD 3 +#define SYS_TASK_GET_ADDRESS_SPACE 33 #define SYS_THREAD_START 30 #define SYS_VM_OBJECT_CREATE 4 #define SYS_VM_OBJECT_READ 5 #define SYS_VM_OBJECT_WRITE 6 #define SYS_VM_OBJECT_COPY 29 #define SYS_VM_REGION_CREATE 7 +#define SYS_VM_REGION_KILL 34 #define SYS_VM_REGION_READ 8 #define SYS_VM_REGION_WRITE 9 #define SYS_VM_REGION_MAP_ABSOLUTE 10 diff --git a/syscall/dispatch.c b/syscall/dispatch.c index c96d23b..e714af9 100644 --- a/syscall/dispatch.c +++ b/syscall/dispatch.c @@ -6,14 +6,17 @@ static const virt_addr_t syscall_table[] = { SYSCALL_TABLE_ENTRY(TASK_EXIT, task_exit), + SYSCALL_TABLE_ENTRY(TASK_SELF, task_self), SYSCALL_TABLE_ENTRY(TASK_CREATE, task_create), SYSCALL_TABLE_ENTRY(TASK_CREATE_THREAD, task_create_thread), + SYSCALL_TABLE_ENTRY(TASK_GET_ADDRESS_SPACE, task_get_address_space), SYSCALL_TABLE_ENTRY(THREAD_START, thread_start), SYSCALL_TABLE_ENTRY(VM_OBJECT_CREATE, vm_object_create), SYSCALL_TABLE_ENTRY(VM_OBJECT_READ, vm_object_read), SYSCALL_TABLE_ENTRY(VM_OBJECT_WRITE, vm_object_write), SYSCALL_TABLE_ENTRY(VM_OBJECT_COPY, vm_object_copy), SYSCALL_TABLE_ENTRY(VM_REGION_CREATE, vm_region_create), + SYSCALL_TABLE_ENTRY(VM_REGION_KILL, vm_region_kill), SYSCALL_TABLE_ENTRY(VM_REGION_READ, vm_region_read), SYSCALL_TABLE_ENTRY(VM_REGION_WRITE, vm_region_write), SYSCALL_TABLE_ENTRY(VM_REGION_MAP_ABSOLUTE, vm_region_map_absolute), diff --git a/syscall/task.c b/syscall/task.c index de06597..daa9c61 100644 --- a/syscall/task.c +++ b/syscall/task.c @@ -14,6 +14,35 @@ extern kern_status_t sys_task_exit(int status) return KERN_UNIMPLEMENTED; } +kern_status_t sys_task_self(kern_handle_t *out) +{ + struct task *self = current_task(); + if (!validate_access_w(self, out, sizeof *out)) { + return KERN_MEMORY_FAULT; + } + + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct handle *handle_slot = NULL; + kern_handle_t handle; + kern_status_t status = handle_table_alloc_handle( + self->t_handles, + &handle_slot, + &handle); + task_unlock_irqrestore(self, flags); + + if (status != KERN_OK) { + return status; + } + + object_add_handle(&self->t_base); + handle_slot->h_object = &self->t_base; + + *out = handle; + return KERN_OK; +} + kern_status_t sys_task_create( kern_handle_t parent_handle, const char *name, @@ -175,6 +204,57 @@ kern_status_t sys_task_create_thread( return KERN_OK; } +kern_status_t sys_task_get_address_space( + kern_handle_t task_handle, + kern_handle_t *out) +{ + struct task *self = current_task(); + if (!validate_access_w(self, out, sizeof *out)) { + return KERN_MEMORY_FAULT; + } + + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct handle *handle_slot = NULL; + kern_handle_t handle; + struct object *task_obj = NULL; + handle_flags_t handle_flags = 0; + kern_status_t status = task_resolve_handle( + self, + task_handle, + &task_obj, + &handle_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + status = handle_table_alloc_handle( + self->t_handles, + &handle_slot, + &handle); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + struct task *task = task_cast(task_obj); + + if (!task) { + handle_table_free_handle(self->t_handles, handle); + task_unlock_irqrestore(self, flags); + return KERN_INVALID_ARGUMENT; + } + + handle_slot->h_object = &task->t_address_space->vr_base; + object_add_handle(&task->t_address_space->vr_base); + task_unlock_irqrestore(self, flags); + + *out = handle; + return KERN_OK; +} + kern_status_t sys_thread_start(kern_handle_t thread_handle) { unsigned long flags; diff --git a/vm/vm-region.c b/vm/vm-region.c index 04bd44b..98b3b0a 100644 --- a/vm/vm-region.c +++ b/vm/vm-region.c @@ -8,6 +8,20 @@ #include #include +/* NOTE Locking Rules + * To avoid deadlocks and crashes, the following locking rules should be + * followed: + * 1. Do NOT lock more than one region at a time IF the regions are siblings. + * 2. When locking a region and it's child(ren) or ancestors, always lock + * the parent region BEFORE the child region. + * 3. When locking a region and a vm-object mapped into that region, always + * lock the region BEFORE the vm-object. + * 3. An entry MUST be locked before any of its data can be read/written, + * including its children (if it's a region) and its e_parent pointer. + * 4. vm_region_mapping has no lock. Instead, its immediate parent region must + * be locked before any child mappings can be accessed. + */ + /*** STATIC DATA + MACROS *****************************************************/ #undef ASLR @@ -49,10 +63,21 @@ enum search_direction { #define VM_REGION_CAST(p) \ OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p) +static kern_status_t vm_region_object_destroy(struct object *obj); + +static kern_status_t region_object_destroy(struct object *obj, struct queue *q); +static kern_status_t region_object_destroy_recurse( + struct queue_entry *entry, + struct object **out); + static struct object_type vm_region_type = { .ob_name = "vm-region", .ob_size = sizeof(struct vm_region), .ob_header_offset = offsetof(struct vm_region, vr_base), + .ob_ops = { + .destroy = region_object_destroy, + .destroy_recurse = region_object_destroy_recurse, + }, }; static struct vm_cache mapping_cache = { @@ -81,17 +106,53 @@ static struct vm_region_mapping *mapping_from_entry( return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); } -static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry) +kern_status_t region_object_destroy(struct object *obj, struct queue *q) { - virt_addr_t result = 0; - while (entry) { - result += entry->e_offset; - entry = entry->e_parent; + struct vm_region *region = VM_REGION_CAST(obj); + if (region->vr_status == VM_REGION_ONLINE) { + panic("last reference closed on an online vm-region"); } - return result; + struct btree_node *node = btree_first(®ion->vr_entries); + while (node) { + struct btree_node *next = btree_next(node); + btree_delete(®ion->vr_entries, node); + + struct vm_region_entry *entry + = BTREE_CONTAINER(struct vm_region_entry, e_node, node); + if (entry->e_type != VM_REGION_ENTRY_REGION) { + panic("offline vm-region still contains non-region " + "children."); + } + + queue_push_back(q, &entry->e_entry); + node = next; + } + + return KERN_OK; } +kern_status_t region_object_destroy_recurse( + struct queue_entry *entry, + struct object **out) +{ + struct vm_region_entry *region_entry + = BTREE_CONTAINER(struct vm_region_entry, e_entry, entry); + if (region_entry->e_type != VM_REGION_ENTRY_REGION) { + panic("offline vm-region still contains non-region " + "children."); + } + struct vm_region *region = region_from_entry(region_entry); + *out = ®ion->vr_base; + return KERN_OK; +} + +static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry) +{ + return entry->e_address; +} + +/* this function must be called with `parent` locked */ static void region_put_entry( struct vm_region *parent, struct vm_region_entry *child) @@ -119,7 +180,15 @@ static void region_put_entry( } else if (child_base > cur_limit) { next = btree_right(cur); } else { - panic("tried to add an overlapping entry to vm-region"); +#ifdef TRACE + vm_region_dump(parent); +#endif + panic("tried to add an overlapping entry [%zx-%zx] to " + "vm-region (overlaps [%zx-%zx])", + child_base, + child_limit, + cur_base, + cur_limit); } if (next) { @@ -179,12 +248,17 @@ static struct vm_region_entry *region_get_entry( /* find the child region that covers the area [*offp,len]. searches recursively * the value in `offp` is updated to the offset of the returned entry relative - * to its parent */ + * to its parent. + * this function should be called with `region` locked. + * the region returned by this function will also be locked. any intermediary + * regions traversed by this function will be locked temporarily, but will + * be unlocked by the time the function returns. */ static struct vm_region *region_get_child_region_recursive( struct vm_region *region, off_t *offp, size_t len) { + struct vm_region *root = region; off_t offset = *offp; if (offset >= region->vr_entry.e_size) { return NULL; @@ -197,6 +271,14 @@ static struct vm_region *region_get_child_region_recursive( struct vm_region *next_region = region_from_entry(next); if (next_region) { offset -= next->e_offset; + /* since `region` is locked, interrupts are already + * disabled, so don't use lock_irq() here */ + vm_region_lock(next_region); + + if (region != root) { + vm_region_unlock(region); + } + region = next_region; } else { break; @@ -207,6 +289,12 @@ static struct vm_region *region_get_child_region_recursive( return region; } +/* find the vm_region_mapping that contains a given memory area. + * `offp` should be a pointer to an off_t value that contains the offset + * of the area relative to the start of `region`. this value will be updated + * to the offset of the mapping relative to its immediate parent. + * this function should be called with `region` locked. if a mapping is found, + * it will be returned with its immediate parent locked. */ static struct vm_region_mapping *region_get_mapping_recursive( struct vm_region *region, off_t *offp, @@ -218,9 +306,12 @@ static struct vm_region_mapping *region_get_mapping_recursive( return NULL; } + /* if `region` is a different region than what was originally passed to + * us, it has now been locked, and its children can be accessed. */ struct vm_region_entry *entry = region_get_entry(region, offset, len); *offp = offset; + /* return the mapping with the parent region still locked */ return mapping_from_entry(entry); } @@ -488,6 +579,7 @@ static kern_status_t region_validate_allocation( return KERN_OK; } +/* this function should be called with `region` locked */ static void vm_iterator_begin( struct vm_iterator *it, struct vm_region *region, @@ -551,6 +643,15 @@ static void vm_iterator_begin( static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) { +#define UNLOCK_MAPPING_PARENT(p) \ + do { \ + struct vm_region *parent \ + = region_from_entry(p->m_entry.e_parent); \ + if (parent != it->it_region) { \ + vm_region_unlock(parent); \ + } \ + } while (0) + if (nr_bytes < it->it_max) { it->it_base += nr_bytes; it->it_buf = (char *)it->it_buf + nr_bytes; @@ -558,6 +659,10 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) return KERN_OK; } + /* the parent region of it->it_mapping is locked here. if it is + * different from it->it_region, it must be unlocked */ + UNLOCK_MAPPING_PARENT(it->it_mapping); + it->it_base += nr_bytes; off_t offset = it->it_base - vm_region_get_base_address(it->it_region); @@ -569,9 +674,13 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) return KERN_MEMORY_FAULT; } + /* past this point, if we encounter an error, must remember to unlock + * the parent region of next_mapping */ + if ((next_mapping->m_prot & it->it_prot) != it->it_prot) { it->it_buf = NULL; it->it_max = 0; + UNLOCK_MAPPING_PARENT(next_mapping); return KERN_MEMORY_FAULT; } @@ -590,6 +699,7 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) } if (!pg) { + UNLOCK_MAPPING_PARENT(next_mapping); return KERN_NO_MEMORY; } @@ -620,6 +730,9 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) return KERN_OK; } +/* this function must be called with `root` locked. `root` will be the first + * entry visited by the iterator. from there, child entries are visited in + * depth-first order. */ static void entry_iterator_begin( struct entry_iterator *it, struct vm_region *root) @@ -629,8 +742,43 @@ static void entry_iterator_begin( it->it_entry = &root->vr_entry; } +/* this function must be called when you are finished with an entry_iterator, + * to ensure that all held locks are released. */ +static void entry_iterator_finish(struct entry_iterator *it) +{ + struct vm_region_entry *cur = it->it_entry; + if (!cur) { + return; + } + + struct vm_region *region = NULL; + if (cur->e_type == VM_REGION_ENTRY_MAPPING) { + region = region_from_entry(cur->e_parent); + } else { + region = region_from_entry(cur); + } + + while (region && region != it->it_root) { + struct vm_region *parent + = region_from_entry(region->vr_entry.e_parent); + vm_region_unlock(region); + region = parent; + } + + memset(it, 0x0, sizeof *it); +} + +/* move to the next entry in the traversal order. + * when this function returns: + * 1. if the visited entry is a region, it will be locked. + * 2. if the visited entry is a mapping, its parent region will be locked. + * a region will remain locked until all of its children and n-grand-children + * have been visited. once iteration is finished, only `it->it_root` will be + * locked. + */ static void entry_iterator_move_next(struct entry_iterator *it) { + /* `region` is locked */ struct vm_region *region = region_from_entry(it->it_entry); bool has_children = (region && !btree_empty(®ion->vr_entries)); @@ -639,6 +787,15 @@ static void entry_iterator_move_next(struct entry_iterator *it) struct btree_node *node = btree_first(®ion->vr_entries); struct vm_region_entry *entry = BTREE_CONTAINER(struct vm_region_entry, e_node, node); + + if (entry->e_type == VM_REGION_ENTRY_REGION) { + struct vm_region *child_region + = region_from_entry(entry); + /* since `region` is locked, interrupts are already + * disabled, so don't use lock_irq() here */ + vm_region_lock(child_region); + } + it->it_depth++; it->it_entry = entry; return; @@ -670,6 +827,72 @@ static void entry_iterator_move_next(struct entry_iterator *it) return; } + if (cur->e_type == VM_REGION_ENTRY_REGION) { + struct vm_region *child_region = region_from_entry(cur); + if (child_region != it->it_root) { + vm_region_unlock(child_region); + } + } + + it->it_depth--; + cur = parent_entry; + } +} + +/* erase the current entry and move to the next entry in the traversal order. + * the current entry MUST be a mapping, otherwise nothing will happen. + */ +static void entry_iterator_erase(struct entry_iterator *it) +{ + /* the parent region of `mapping` is locked */ + struct vm_region_mapping *mapping = mapping_from_entry(it->it_entry); + if (!mapping) { + return; + } + + struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); + + /* go back up until we find a right sibling. */ + struct vm_region_entry *cur = it->it_entry; + + while (1) { + struct btree_node *sibling = btree_next(&cur->e_node); + if (mapping) { + btree_delete( + &parent->vr_entries, + &mapping->m_entry.e_node); + vm_cache_free(&mapping_cache, mapping); + mapping = NULL; + } + + if (sibling) { + it->it_entry = BTREE_CONTAINER( + struct vm_region_entry, + e_node, + sibling); + return; + } + + if (cur == &it->it_root->vr_entry) { + it->it_entry = NULL; + return; + } + + struct vm_region_entry *parent_entry = cur->e_parent; + struct vm_region *parent = region_from_entry(parent_entry); + + if (!parent) { + it->it_entry = NULL; + return; + } + + if (cur->e_type == VM_REGION_ENTRY_REGION) { + struct vm_region *child_region = region_from_entry(cur); + if (child_region != it->it_root) { + vm_region_unlock(child_region); + } + } + it->it_depth--; cur = parent_entry; } @@ -705,6 +928,11 @@ static void mapping_iterator_begin( } } +static void mapping_iterator_finish(struct entry_iterator *it) +{ + entry_iterator_finish(it); +} + static void mapping_iterator_move_next( struct entry_iterator *it, off_t offset, @@ -730,6 +958,34 @@ static void mapping_iterator_move_next( } } +static void mapping_iterator_erase( + struct entry_iterator *it, + off_t offset, + size_t length, + off_t *offp) +{ + entry_iterator_erase(it); + + while (it->it_entry + && it->it_entry->e_type != VM_REGION_ENTRY_MAPPING) { + + entry_iterator_move_next(it); + } + + if (!it->it_entry) { + return; + } + + off_t base = entry_absolute_address(it->it_entry) + - it->it_root->vr_entry.e_offset; + + if (base >= offset + length) { + it->it_entry = NULL; + } else { + *offp = base; + } +} + /*** PUBLIC API ***************************************************************/ kern_status_t vm_region_type_init(void) @@ -743,6 +999,8 @@ struct vm_region *vm_region_cast(struct object *obj) return VM_REGION_CAST(obj); } +/* this function should be called with `parent` locked (if parent is non-NULL) + */ kern_status_t vm_region_create( struct vm_region *parent, const char *name, @@ -752,6 +1010,10 @@ kern_status_t vm_region_create( vm_prot_t prot, struct vm_region **out) { + if (parent && parent->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + if (!offset || !region_len) { return KERN_INVALID_ARGUMENT; } @@ -781,8 +1043,10 @@ kern_status_t vm_region_create( struct vm_region *region = VM_REGION_CAST(region_object); + region->vr_status = VM_REGION_ONLINE; region->vr_prot = prot; region->vr_entry.e_type = VM_REGION_ENTRY_REGION; + region->vr_entry.e_address = offset; region->vr_entry.e_offset = offset; region->vr_entry.e_size = region_len; @@ -794,8 +1058,11 @@ kern_status_t vm_region_create( if (parent) { region->vr_entry.e_parent = &parent->vr_entry; + region->vr_entry.e_address += parent->vr_entry.e_address; region->vr_pmap = parent->vr_pmap; region_put_entry(parent, ®ion->vr_entry); + /* `parent` holds a reference to child `region` */ + object_ref(®ion->vr_base); } if (name && name_len) { @@ -808,8 +1075,67 @@ kern_status_t vm_region_create( return KERN_OK; } -kern_status_t vm_region_map_object( +kern_status_t vm_region_kill( struct vm_region *region, + unsigned long *lock_flags) +{ + if (region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + + if (region->vr_entry.e_parent) { + struct vm_region *parent + = region_from_entry(region->vr_entry.e_parent); + region->vr_entry.e_parent = NULL; + + /* locks must be acquired in parent->child order. since we're + * going backwards here, unlock `region` before locking its + * parent */ + vm_region_unlock_irqrestore(region, *lock_flags); + vm_region_lock_irqsave(parent, lock_flags); + btree_delete(&parent->vr_entries, ®ion->vr_entry.e_node); + vm_region_unlock_irqrestore(parent, *lock_flags); + vm_region_lock_irqsave(region, lock_flags); + /* `region` lock is held, and e_parent is NULL */ + } + + struct entry_iterator it; + entry_iterator_begin(&it, region); + + while (it.it_entry) { + if (it.it_entry->e_type == VM_REGION_ENTRY_REGION) { + struct vm_region *region + = region_from_entry(it.it_entry); + region->vr_status = VM_REGION_DEAD; + entry_iterator_move_next(&it); + continue; + } + + struct vm_region_mapping *mapping + = mapping_from_entry(it.it_entry); + + virt_addr_t base = entry_absolute_address(it.it_entry); + + for (size_t i = 0; i < mapping->m_entry.e_size; + i += VM_PAGE_SIZE) { + pmap_remove(region->vr_pmap, base + i); + } + + unsigned long flags; + vm_object_lock_irqsave(mapping->m_object, &flags); + queue_delete( + &mapping->m_object->vo_mappings, + &mapping->m_object_entry); + vm_object_unlock_irqrestore(mapping->m_object, flags); + + entry_iterator_erase(&it); + } + + return KERN_OK; +} + +kern_status_t vm_region_map_object( + struct vm_region *root, off_t region_offset, struct vm_object *object, off_t object_offset, @@ -839,10 +1165,24 @@ kern_status_t vm_region_map_object( length += VM_PAGE_SIZE; } - if (!region || !object) { + if (!root || !object) { return KERN_INVALID_ARGUMENT; } + struct vm_region *region = root; + if (region_offset != VM_REGION_ANY_OFFSET) { + region = region_get_child_region_recursive( + root, + ®ion_offset, + length); + /* if `region` != `root`, it will need to be unlocked at the end + * of the function */ + } + + if (region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + if ((prot & region->vr_prot) != prot) { return KERN_INVALID_ARGUMENT; } @@ -855,13 +1195,6 @@ kern_status_t vm_region_map_object( return KERN_INVALID_ARGUMENT; } - if (region_offset != VM_REGION_ANY_OFFSET) { - region = region_get_child_region_recursive( - region, - ®ion_offset, - length); - } - if (!region) { return KERN_INVALID_ARGUMENT; } @@ -896,6 +1229,7 @@ kern_status_t vm_region_map_object( mapping->m_object_offset = object_offset; mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING; mapping->m_entry.e_parent = ®ion->vr_entry; + mapping->m_entry.e_address = region->vr_entry.e_address + region_offset; mapping->m_entry.e_offset = region_offset; mapping->m_entry.e_size = length; @@ -907,7 +1241,14 @@ kern_status_t vm_region_map_object( abs_base + length); #endif region_put_entry(region, &mapping->m_entry); + if (region != root) { + vm_region_unlock(region); + } + + unsigned long lock_flags; + vm_object_lock_irqsave(object, &lock_flags); queue_push_back(&object->vo_mappings, &mapping->m_object_entry); + vm_object_unlock_irqrestore(object, lock_flags); if (out) { *out = entry_absolute_address(&mapping->m_entry); @@ -1068,12 +1409,13 @@ static kern_status_t delete_mapping( pmap_remove(root->vr_pmap, base + i); } - struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); - + unsigned long flags; + vm_object_lock_irqsave(mapping->m_object, &flags); queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry); - btree_delete(&parent->vr_entries, &mapping->m_entry.e_node); + vm_object_unlock_irqrestore(mapping->m_object, flags); - vm_cache_free(&mapping_cache, mapping); + /* don't actually delete the mapping yet. that will be done by + * vm_region_unmap */ return KERN_OK; } @@ -1083,6 +1425,10 @@ kern_status_t vm_region_unmap( off_t unmap_area_offset, size_t unmap_area_length) { + if (region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + kern_status_t status = KERN_OK; struct entry_iterator it; off_t unmap_area_limit = unmap_area_offset + unmap_area_length; @@ -1101,12 +1447,6 @@ kern_status_t vm_region_unmap( off_t mapping_offset = tmp; off_t mapping_limit = mapping_offset + it.it_entry->e_size; - mapping_iterator_move_next( - &it, - unmap_area_offset, - unmap_area_length, - &tmp); - bool split = (unmap_area_offset > mapping_offset && unmap_area_limit < mapping_limit); @@ -1127,6 +1467,7 @@ kern_status_t vm_region_unmap( mapping_offset, unmap_area_offset, unmap_area_limit); + delete = true; } else if (delete) { status = delete_mapping( mapping, @@ -1150,11 +1491,27 @@ kern_status_t vm_region_unmap( panic("don't know what to do with this mapping"); } + if (delete) { + mapping_iterator_erase( + &it, + unmap_area_offset, + unmap_area_length, + &tmp); + } else { + mapping_iterator_move_next( + &it, + unmap_area_offset, + unmap_area_length, + &tmp); + } + if (status != KERN_OK) { break; } } + mapping_iterator_finish(&it); + return status; } @@ -1164,6 +1521,10 @@ bool vm_region_validate_access( size_t len, vm_prot_t prot) { + if (region->vr_status != VM_REGION_ONLINE) { + return false; + } + if (len == 0) { return true; } @@ -1199,16 +1560,27 @@ bool vm_region_validate_access( if ((mapping->m_prot & prot) != prot) { return false; } + + struct vm_region *parent + = region_from_entry(mapping->m_entry.e_parent); + if (parent != region) { + vm_region_unlock(parent); + } } return true; } +/* this function must be called with `region` locked */ kern_status_t vm_region_demand_map( struct vm_region *region, virt_addr_t addr, enum pmap_fault_flags flags) { + if (region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + addr &= ~VM_PAGE_MASK; if (addr < region->vr_entry.e_offset || addr > region->vr_entry.e_offset + region->vr_entry.e_size) { @@ -1230,21 +1602,35 @@ kern_status_t vm_region_demand_map( mapping->m_object->vo_name, object_offset); + unsigned long lock_flags; + vm_object_lock_irqsave(mapping->m_object, &lock_flags); struct vm_page *pg = vm_object_alloc_page( mapping->m_object, object_offset, VM_PAGE_4K); + vm_object_unlock_irqrestore(mapping->m_object, lock_flags); tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); - return pmap_add( + kern_status_t status = pmap_add( region->vr_pmap, addr, vm_page_get_pfn(pg), mapping->m_prot, PMAP_NORMAL); + + struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); + if (parent != region) { + vm_region_unlock(parent); + } + + return status; } virt_addr_t vm_region_get_base_address(const struct vm_region *region) { + if (region->vr_status != VM_REGION_ONLINE) { + return 0; + } + return entry_absolute_address(®ion->vr_entry); } @@ -1255,6 +1641,10 @@ kern_status_t vm_region_read_kernel( void *destp, size_t *nr_read) { + if (src_region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + struct vm_iterator src; char *dest = destp; @@ -1296,6 +1686,14 @@ kern_status_t vm_region_memmove( size_t count, size_t *nr_moved) { + if (src_region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + + if (dest_region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + struct vm_iterator src, dest; vm_iterator_begin( &src, @@ -1347,6 +1745,14 @@ extern kern_status_t vm_region_memmove_v( size_t nr_src_vecs, size_t bytes_to_move) { + if (src_region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + + if (dest_region->vr_status != VM_REGION_ONLINE) { + return KERN_BAD_STATE; + } + struct iovec_iterator src, dest; iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs); iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs);