From 0af35c70efb6680477d2cf9f0e0c09a473c1ad01 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Sat, 14 Mar 2026 22:39:14 +0000 Subject: [PATCH] vm: implement demand-paging via userspace services with vm-controller --- arch/x86_64/pmap.c | 9 +- include/kernel/address-space.h | 4 +- include/kernel/syscall.h | 27 +++ include/kernel/vm-controller.h | 23 ++- include/kernel/vm-object.h | 32 +++- libmango/arch/x86_64/syscall.S | 7 + libmango/include-user/mango/vm.h | 4 +- syscall/dispatch.c | 12 ++ syscall/vm-controller.c | 309 +++++++++++++++++++++++++++++++ vm/address-space.c | 135 +++++++++++--- vm/vm-controller.c | 215 ++++++++++++++++++++- vm/vm-object.c | 149 ++++++++++----- 12 files changed, 826 insertions(+), 100 deletions(-) create mode 100644 syscall/vm-controller.c diff --git a/arch/x86_64/pmap.c b/arch/x86_64/pmap.c index a16761e..13b9e14 100644 --- a/arch/x86_64/pmap.c +++ b/arch/x86_64/pmap.c @@ -372,13 +372,8 @@ kern_status_t pmap_handle_fault( return KERN_FATAL_ERROR; } - unsigned long lock_flags; - address_space_lock_irqsave(space, &lock_flags); - kern_status_t status - = address_space_demand_map(space, fault_addr, flags); - address_space_unlock_irqrestore(space, lock_flags); - - return status; + /* this must be called with `space` unlocked. */ + return address_space_demand_map(space, fault_addr, flags); } kern_status_t pmap_add( diff --git a/include/kernel/address-space.h b/include/kernel/address-space.h index 6cc81e3..29a3c46 100644 --- a/include/kernel/address-space.h +++ b/include/kernel/address-space.h @@ -110,7 +110,9 @@ extern bool address_space_validate_access( /* find the mapping corresponding to the given virtual address, and page-in the * necessary vm_page to allow the memory access to succeed. if the relevant - * vm-object page hasn't been allocated yet, it will be allocated here. */ + * vm-object page hasn't been allocated yet, it will be allocated here. + * this function must be called with `region` UNLOCKED and interrupts ENABLED. + */ extern kern_status_t address_space_demand_map( struct address_space *region, virt_addr_t addr, diff --git a/include/kernel/syscall.h b/include/kernel/syscall.h index edd16b9..f3c8b8d 100644 --- a/include/kernel/syscall.h +++ b/include/kernel/syscall.h @@ -170,6 +170,33 @@ extern kern_status_t sys_kern_object_wait( kern_wait_item_t *items, size_t nr_items); +extern kern_status_t sys_vm_controller_create(kern_handle_t *out); +extern kern_status_t sys_vm_controller_recv( + kern_handle_t ctrl, + equeue_packet_page_request_t *out); +extern kern_status_t sys_vm_controller_recv_async( + kern_handle_t ctrl, + kern_handle_t eq, + equeue_key_t key); +extern kern_status_t sys_vm_controller_create_object( + kern_handle_t ctrl, + const char *name, + size_t name_len, + equeue_key_t key, + size_t data_len, + vm_prot_t prot, + kern_handle_t *out); +extern kern_status_t sys_vm_controller_detach_object( + kern_handle_t ctrl, + kern_handle_t vmo); +extern kern_status_t sys_vm_controller_supply_pages( + kern_handle_t ctrl, + kern_handle_t dst_vmo, + off_t dst_offset, + kern_handle_t src_vmo, + off_t src_offset, + size_t count); + extern virt_addr_t syscall_get_function(unsigned int sysid); #endif diff --git a/include/kernel/vm-controller.h b/include/kernel/vm-controller.h index e5b88ac..f800157 100644 --- a/include/kernel/vm-controller.h +++ b/include/kernel/vm-controller.h @@ -18,16 +18,24 @@ enum page_request_status { struct vm_controller { struct object vc_base; /* tree of struct vm_objects bound to this controller, keyed with the - * vm_key_t specified when the object(s) were created. */ + * equeue_key_t specified when the object(s) were created. */ struct btree vc_objects; /* tree of pending page requests */ struct btree vc_requests; + /* the equeue to send async page requests to */ + struct equeue *vc_eq; + equeue_key_t vc_eq_key; + /* the number of page requests queued with status PAGE_REQUEST_PENDING. + * used to assert/clear VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED */ + size_t vc_requests_waiting; }; struct page_request { uint64_t req_id; + unsigned int req_type; enum page_request_status req_status; kern_status_t req_result; + spin_lock_t req_lock; struct vm_object *req_object; struct thread *req_sender; struct btree_node req_node; @@ -45,7 +53,8 @@ extern kern_status_t vm_controller_recv( equeue_packet_page_request_t *out); extern kern_status_t vm_controller_recv_async( struct vm_controller *ctrl, - struct equeue *eq); + struct equeue *eq, + equeue_key_t key); extern kern_status_t vm_controller_create_object( struct vm_controller *ctrl, @@ -58,10 +67,18 @@ extern kern_status_t vm_controller_create_object( extern kern_status_t vm_controller_detach_object( struct vm_controller *ctrl, struct vm_object *vmo); +extern kern_status_t vm_controller_supply_pages( + struct vm_controller *ctrl, + struct vm_object *dst, + off_t dst_offset, + struct vm_object *src, + off_t src_offset, + size_t count); extern kern_status_t vm_controller_send_request( struct vm_controller *ctrl, - struct page_request *req); + struct page_request *req, + unsigned long *irq_flags); DEFINE_OBJECT_LOCK_FUNCTION(vm_controller, vc_base) diff --git a/include/kernel/vm-object.h b/include/kernel/vm-object.h index ab20a06..a5137d9 100644 --- a/include/kernel/vm-object.h +++ b/include/kernel/vm-object.h @@ -6,10 +6,27 @@ #define VM_OBJECT_NAME_MAX 64 +struct vm_controller; + enum vm_object_flags { /* the memory behind this vm-object wasn't allocated by us, and * therefore shouldn't be freed by us */ VMO_IN_PLACE = 0x01u, + /* this vm-object is/was attached to a vm-controller */ + VMO_CONTROLLER = 0x02u, + + /* these flags are for use with vm_object_get_page */ + /**************************************************/ + + /* if the relevant page hasn't been allocated yet, it will be allocated + * and returned. if this flag isn't specified, NULL will be returned. */ + VMO_ALLOCATE_MISSING_PAGE = 0x04u, + /* if the vm-object is attached to a vm-controller, and the relevant + * page is uncommitted, send a request to the vm-controller to provide + * the missing page. will result in the vm-object being unlocked and + * the current thread sleeping until the request is fulfilled. the + * vm-object will be re-locked before the function returns. */ + VMO_REQUEST_MISSING_PAGE = 0x08u, }; struct vm_object { @@ -21,8 +38,12 @@ struct vm_object { /* queue of struct vm_region_mapping */ struct queue vo_mappings; - /* memory protection flags. mappings of this vm_object can only use - * a subset of the flags set in this mask. */ + struct vm_controller *vo_ctrl; + equeue_key_t vo_key; + struct btree_node vo_ctrl_node; + + /* memory protection flags. mappings of this vm_object can only + * use a subset of the flags set in this mask. */ vm_prot_t vo_prot; /* btree of vm_pages that have been allocated to this vm_object. @@ -58,13 +79,10 @@ extern struct vm_object *vm_object_create_in_place( vm_prot_t prot); extern struct vm_page *vm_object_get_page( - const struct vm_object *vo, - off_t offset); - -extern struct vm_page *vm_object_alloc_page( struct vm_object *vo, off_t offset, - enum vm_page_order size); + enum vm_object_flags flags, + unsigned long *irq_flags); extern kern_status_t vm_object_read( struct vm_object *vo, diff --git a/libmango/arch/x86_64/syscall.S b/libmango/arch/x86_64/syscall.S index ed728fb..4be1048 100644 --- a/libmango/arch/x86_64/syscall.S +++ b/libmango/arch/x86_64/syscall.S @@ -91,5 +91,12 @@ SYSCALL_GATE msg_reply SYS_MSG_REPLY 4 SYSCALL_GATE msg_read SYS_MSG_READ 6 SYSCALL_GATE msg_write SYS_MSG_WRITE 6 +SYSCALL_GATE vm_controller_create SYS_VM_CONTROLLER_CREATE 1 +SYSCALL_GATE vm_controller_recv SYS_VM_CONTROLLER_RECV 2 +SYSCALL_GATE vm_controller_recv_async SYS_VM_CONTROLLER_RECV_ASYNC 3 +SYSCALL_GATE vm_controller_create_object SYS_VM_CONTROLLER_CREATE_OBJECT 7 +SYSCALL_GATE vm_controller_detach_object SYS_VM_CONTROLLER_DETACH_OBJECT 2 +SYSCALL_GATE vm_controller_supply_pages SYS_VM_CONTROLLER_SUPPLY_PAGES 6 + SYSCALL_GATE kern_object_wait SYS_KERN_OBJECT_WAIT 2 diff --git a/libmango/include-user/mango/vm.h b/libmango/include-user/mango/vm.h index ad10908..eb085d9 100644 --- a/libmango/include-user/mango/vm.h +++ b/libmango/include-user/mango/vm.h @@ -70,11 +70,13 @@ extern kern_status_t vm_controller_recv( equeue_packet_page_request_t *out); extern kern_status_t vm_controller_recv_async( kern_handle_t ctrl, - kern_handle_t eq); + kern_handle_t eq, + equeue_key_t key); extern kern_status_t vm_controller_create_object( kern_handle_t ctrl, const char *name, size_t name_len, + equeue_key_t key, size_t data_len, vm_prot_t prot, kern_handle_t *out); diff --git a/syscall/dispatch.c b/syscall/dispatch.c index f9bca21..c14deda 100644 --- a/syscall/dispatch.c +++ b/syscall/dispatch.c @@ -35,6 +35,18 @@ static const virt_addr_t syscall_table[] = { SYSCALL_TABLE_ENTRY(MSG_REPLY, msg_reply), SYSCALL_TABLE_ENTRY(MSG_READ, msg_read), SYSCALL_TABLE_ENTRY(MSG_WRITE, msg_write), + SYSCALL_TABLE_ENTRY(VM_CONTROLLER_CREATE, vm_controller_create), + SYSCALL_TABLE_ENTRY(VM_CONTROLLER_RECV, vm_controller_recv), + SYSCALL_TABLE_ENTRY(VM_CONTROLLER_RECV_ASYNC, vm_controller_recv_async), + SYSCALL_TABLE_ENTRY( + VM_CONTROLLER_CREATE_OBJECT, + vm_controller_create_object), + SYSCALL_TABLE_ENTRY( + VM_CONTROLLER_DETACH_OBJECT, + vm_controller_detach_object), + SYSCALL_TABLE_ENTRY( + VM_CONTROLLER_SUPPLY_PAGES, + vm_controller_supply_pages), SYSCALL_TABLE_ENTRY(KERN_OBJECT_WAIT, kern_object_wait), }; static const size_t syscall_table_count diff --git a/syscall/vm-controller.c b/syscall/vm-controller.c new file mode 100644 index 0000000..31c1257 --- /dev/null +++ b/syscall/vm-controller.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include +#include + +kern_status_t sys_vm_controller_create(kern_handle_t *out) +{ + struct task *self = current_task(); + + if (!validate_access_w(self, out, sizeof *out)) { + return KERN_MEMORY_FAULT; + } + + struct vm_controller *ctrl = vm_controller_create(); + if (!ctrl) { + return KERN_NO_MEMORY; + } + + kern_status_t status = task_open_handle(self, &ctrl->vc_base, 0, out); + if (status != KERN_OK) { + object_unref(&ctrl->vc_base); + return status; + } + + return KERN_OK; +} + +kern_status_t sys_vm_controller_recv( + kern_handle_t ctrl_handle, + equeue_packet_page_request_t *out) +{ + struct task *self = current_task(); + + if (!validate_access_w(self, out, sizeof *out)) { + return KERN_MEMORY_FAULT; + } + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *ctrl_obj = NULL; + handle_flags_t handle_flags = 0; + status = task_resolve_handle( + self, + ctrl_handle, + &ctrl_obj, + &handle_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + struct vm_controller *ctrl = vm_controller_cast(ctrl_obj); + task_unlock_irqrestore(self, flags); + if (!ctrl) { + object_unref(ctrl_obj); + return KERN_INVALID_ARGUMENT; + } + + vm_controller_lock_irqsave(ctrl, &flags); + status = vm_controller_recv(ctrl, out); + vm_controller_unlock_irqrestore(ctrl, flags); + + object_unref(ctrl_obj); + + return status; +} + +kern_status_t sys_vm_controller_recv_async( + kern_handle_t ctrl_handle, + kern_handle_t eq_handle, + equeue_key_t key) +{ + struct task *self = current_task(); + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *ctrl_obj = NULL, *eq_obj = NULL; + handle_flags_t ctrl_flags = 0, eq_flags = 0; + status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + status = task_resolve_handle(self, eq_handle, &eq_obj, &eq_flags); + if (status != KERN_OK) { + object_unref(ctrl_obj); + task_unlock_irqrestore(self, flags); + return status; + } + + struct vm_controller *ctrl = vm_controller_cast(ctrl_obj); + struct equeue *eq = equeue_cast(eq_obj); + task_unlock_irqrestore(self, flags); + + if (!ctrl || !eq) { + object_unref(ctrl_obj); + object_unref(eq_obj); + return KERN_INVALID_ARGUMENT; + } + + vm_controller_lock_irqsave(ctrl, &flags); + status = vm_controller_recv_async(ctrl, eq, key); + vm_controller_unlock_irqrestore(ctrl, flags); + + object_unref(ctrl_obj); + object_unref(eq_obj); + + return status; +} + +kern_status_t sys_vm_controller_create_object( + kern_handle_t ctrl_handle, + const char *name, + size_t name_len, + equeue_key_t key, + size_t data_len, + vm_prot_t prot, + kern_handle_t *out) +{ + struct task *self = current_task(); + + if (!validate_access_r(self, name, name_len)) { + return KERN_MEMORY_FAULT; + } + + if (!validate_access_w(self, out, sizeof *out)) { + return KERN_MEMORY_FAULT; + } + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *ctrl_obj = NULL; + handle_flags_t handle_flags = 0; + status = task_resolve_handle( + self, + ctrl_handle, + &ctrl_obj, + &handle_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + struct handle *out_slot = NULL; + kern_handle_t out_handle = KERN_HANDLE_INVALID; + status = handle_table_alloc_handle( + self->t_handles, + &out_slot, + &out_handle); + + struct vm_controller *ctrl = vm_controller_cast(ctrl_obj); + task_unlock_irqrestore(self, flags); + if (!ctrl) { + object_unref(ctrl_obj); + return KERN_INVALID_ARGUMENT; + } + + vm_controller_lock_irqsave(ctrl, &flags); + struct vm_object *out_vmo = NULL; + status = vm_controller_create_object( + ctrl, + name, + name_len, + key, + data_len, + prot, + &out_vmo); + vm_controller_unlock_irqrestore(ctrl, flags); + + object_unref(ctrl_obj); + + if (status != KERN_OK) { + task_lock_irqsave(self, &flags); + handle_table_free_handle(self->t_handles, out_handle); + task_unlock_irqrestore(self, flags); + return status; + } + + out_slot->h_object = &out_vmo->vo_base; + object_add_handle(&out_vmo->vo_base); + object_unref(&out_vmo->vo_base); + + *out = out_handle; + return KERN_OK; +} + +kern_status_t sys_vm_controller_detach_object( + kern_handle_t ctrl_handle, + kern_handle_t vmo_handle) +{ + struct task *self = current_task(); + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *ctrl_obj = NULL, *vmo_obj = NULL; + handle_flags_t ctrl_flags = 0, vmo_flags = 0; + status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + status = task_resolve_handle(self, vmo_handle, &vmo_obj, &vmo_flags); + if (status != KERN_OK) { + object_unref(ctrl_obj); + task_unlock_irqrestore(self, flags); + return status; + } + + struct vm_controller *ctrl = vm_controller_cast(ctrl_obj); + struct vm_object *vmo = vm_object_cast(vmo_obj); + task_unlock_irqrestore(self, flags); + + if (!ctrl || !vmo) { + object_unref(ctrl_obj); + object_unref(vmo_obj); + return KERN_INVALID_ARGUMENT; + } + + vm_controller_lock_irqsave(ctrl, &flags); + vm_object_lock(vmo); + status = vm_controller_detach_object(ctrl, vmo); + vm_object_unlock(vmo); + vm_controller_unlock_irqrestore(ctrl, flags); + + object_unref(ctrl_obj); + object_unref(vmo_obj); + + return status; +} + +kern_status_t sys_vm_controller_supply_pages( + kern_handle_t ctrl_handle, + kern_handle_t dst_handle, + off_t dst_offset, + kern_handle_t src_handle, + off_t src_offset, + size_t count) +{ + struct task *self = current_task(); + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *ctrl_obj = NULL, *src_obj = NULL, *dst_obj = NULL; + handle_flags_t ctrl_flags = 0, src_flags = 0, dst_flags = 0; + status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + status = task_resolve_handle(self, dst_handle, &dst_obj, &dst_flags); + if (status != KERN_OK) { + object_unref(ctrl_obj); + task_unlock_irqrestore(self, flags); + return status; + } + + status = task_resolve_handle(self, src_handle, &src_obj, &src_flags); + if (status != KERN_OK) { + object_unref(ctrl_obj); + object_unref(dst_obj); + task_unlock_irqrestore(self, flags); + return status; + } + + struct vm_controller *ctrl = vm_controller_cast(ctrl_obj); + struct vm_object *dst = vm_object_cast(dst_obj); + struct vm_object *src = vm_object_cast(src_obj); + task_unlock_irqrestore(self, flags); + + if (!ctrl || !dst || !src) { + object_unref(ctrl_obj); + object_unref(dst_obj); + object_unref(src_obj); + return KERN_INVALID_ARGUMENT; + } + + vm_controller_lock_irqsave(ctrl, &flags); + vm_object_lock_pair(src, dst); + status = vm_controller_supply_pages( + ctrl, + dst, + dst_offset, + src, + src_offset, + count); + vm_object_unlock_pair(src, dst); + vm_controller_unlock_irqrestore(ctrl, flags); + + object_unref(ctrl_obj); + object_unref(dst_obj); + object_unref(src_obj); + + return status; +} diff --git a/vm/address-space.c b/vm/address-space.c index 8850208..b63e8b5 100644 --- a/vm/address-space.c +++ b/vm/address-space.c @@ -200,13 +200,24 @@ static bool is_area_free( return false; } + if (base < cur_area->vma_base && limit > cur_area->vma_limit) { + return false; + } + if (base > cur_area->vma_limit) { cur = btree_right(cur); } else if (limit < cur_area->vma_base) { cur = btree_left(cur); } else { /* what */ - panic("unhandled case in is_area_free"); + panic("unhandled case in is_area_free. base=%zx, " + "len=%zx, " + "limit=%zx, cur_area=[%zx-%zx]", + base, + len, + limit, + cur_area->vma_base, + cur_area->vma_limit); } } @@ -247,13 +258,24 @@ static bool is_area_reserved( return true; } + if (base < cur_area->vma_base && limit > cur_area->vma_limit) { + return false; + } + if (base > cur_area->vma_limit) { cur = btree_right(cur); } else if (limit < cur_area->vma_base) { cur = btree_left(cur); } else { /* what */ - panic("unhandled case in is_area_reserved"); + panic("unhandled case in is_area_reserved. base=%zx, " + "len=%zx, " + "limit=%zx, cur_area=[%zx-%zx]", + base, + len, + limit, + cur_area->vma_base, + cur_area->vma_limit); } } @@ -335,17 +357,17 @@ static void vm_iterator_begin( off_t object_offset = base - it->it_mapping->vma_base + it->it_mapping->vma_object_offset; struct vm_page *pg = NULL; + enum vm_object_flags flags = 0; if (prot & VM_PROT_WRITE) { - pg = vm_object_alloc_page( - it->it_mapping->vma_object, - object_offset, - VM_PAGE_4K); - } else { - pg = vm_object_get_page( - it->it_mapping->vma_object, - object_offset); + flags |= VMO_ALLOCATE_MISSING_PAGE; } + pg = vm_object_get_page( + it->it_mapping->vma_object, + object_offset, + flags, + NULL); + if (!pg) { return; } @@ -405,17 +427,17 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) + it->it_mapping->vma_object_offset; struct vm_page *pg = NULL; + enum vm_object_flags flags = 0; if (it->it_prot & VM_PROT_WRITE) { - pg = vm_object_alloc_page( - it->it_mapping->vma_object, - object_offset, - VM_PAGE_4K); - } else { - pg = vm_object_get_page( - it->it_mapping->vma_object, - object_offset); + flags |= VMO_ALLOCATE_MISSING_PAGE; } + pg = vm_object_get_page( + it->it_mapping->vma_object, + object_offset, + flags, + NULL); + if (!pg) { return KERN_NO_MEMORY; } @@ -1094,6 +1116,48 @@ bool address_space_validate_access( return true; } +static kern_status_t request_missing_page( + struct address_space *region, + virt_addr_t addr, + off_t object_offset, + struct vm_object *object, + vm_prot_t prot, + enum pmap_fault_flags flags, + unsigned long *irq_flags) +{ + /* here: + * `region` is locked. + * `object` is unlocked. + * `irq_flags` must be restored when `region` is unlocked. + * the relevant page in `object` may or may not be committed. + * if it isn't, it needs to be requested. + */ + vm_object_lock(object); + address_space_unlock(region); + + struct vm_page *pg = vm_object_get_page( + object, + object_offset, + VMO_ALLOCATE_MISSING_PAGE | VMO_REQUEST_MISSING_PAGE, + irq_flags); + if (!pg) { + vm_object_unlock_irqrestore(object, *irq_flags); + return KERN_FATAL_ERROR; + } + + /* now: `region` is unlocked, and `object` is locked */ + + kern_status_t status = pmap_add( + region->s_pmap, + addr, + vm_page_get_pfn(pg), + prot, + PMAP_NORMAL); + + vm_object_unlock_irqrestore(object, *irq_flags); + return status; +} + /* this function must be called with `region` locked */ kern_status_t address_space_demand_map( struct address_space *region, @@ -1105,12 +1169,26 @@ kern_status_t address_space_demand_map( return KERN_NO_ENTRY; } + unsigned long irq_flags; + address_space_lock_irqsave(region, &irq_flags); + struct vm_area *area = get_entry(region, addr, GET_ENTRY_EXACT); - if (!area) { + if (!area || !area->vma_object) { + address_space_unlock_irqrestore(region, irq_flags); return KERN_NO_ENTRY; } off_t object_offset = addr - area->vma_base + area->vma_object_offset; + if (area->vma_object->vo_ctrl) { + return request_missing_page( + region, + addr, + object_offset, + area->vma_object, + area->vma_prot, + flags, + &irq_flags); + } #if 0 tracek("vm: tried to access vm-object %s at offset=%05llx", @@ -1118,20 +1196,25 @@ kern_status_t address_space_demand_map( object_offset); #endif - unsigned long lock_flags; - vm_object_lock_irqsave(area->vma_object, &lock_flags); - struct vm_page *pg = vm_object_alloc_page( + /* simple case: this vm-object is not attached to a controller */ + vm_object_lock(area->vma_object); + struct vm_page *pg = vm_object_get_page( area->vma_object, object_offset, - VM_PAGE_4K); - vm_object_unlock_irqrestore(area->vma_object, lock_flags); - // tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); - return pmap_add( + VMO_ALLOCATE_MISSING_PAGE, + NULL); + // tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), + // addr); + kern_status_t status = pmap_add( region->s_pmap, addr, vm_page_get_pfn(pg), area->vma_prot, PMAP_NORMAL); + + vm_object_unlock(area->vma_object); + address_space_unlock_irqrestore(region, irq_flags); + return status; } virt_addr_t address_space_get_base_address(const struct address_space *region) diff --git a/vm/vm-controller.c b/vm/vm-controller.c index 6ceaa39..b5f3e7d 100644 --- a/vm/vm-controller.c +++ b/vm/vm-controller.c @@ -1,23 +1,112 @@ +#include #include #include #include #include +#include +#include + +#define VM_CONTROLLER_CAST(p) \ + OBJECT_C_CAST(struct vm_controller, vc_base, &vm_controller_type, p) + +BTREE_DEFINE_SIMPLE_INSERT(struct vm_object, vo_ctrl_node, vo_key, put_object) +BTREE_DEFINE_SIMPLE_GET( + struct vm_object, + equeue_key_t, + vo_ctrl_node, + vo_key, + get_object) + +static struct object_type vm_controller_type = { + .ob_name = "vm-controller", + .ob_size = sizeof(struct vm_controller), + .ob_header_offset = offsetof(struct vm_controller, vc_base), +}; kern_status_t vm_controller_type_init(void) { - return KERN_UNIMPLEMENTED; + return object_type_register(&vm_controller_type); } struct vm_controller *vm_controller_cast(struct object *obj) { - return NULL; + return VM_CONTROLLER_CAST(obj); } struct vm_controller *vm_controller_create(void) { + struct object *ctrl_object = object_create(&vm_controller_type); + if (!ctrl_object) { + return NULL; + } + + struct vm_controller *ctrl = VM_CONTROLLER_CAST(ctrl_object); + + return ctrl; +} + +static struct page_request *get_next_request(struct vm_controller *ctrl) +{ + struct btree_node *cur = btree_first(&ctrl->vc_requests); + while (cur) { + struct page_request *req + = BTREE_CONTAINER(struct page_request, req_node, cur); + spin_lock(&req->req_lock); + if (req->req_status == PAGE_REQUEST_PENDING) { + req->req_status = PAGE_REQUEST_IN_PROGRESS; + ctrl->vc_requests_waiting--; + return req; + } + + spin_unlock(&req->req_lock); + cur = btree_next(cur); + } + return NULL; } +kern_status_t vm_controller_recv( + struct vm_controller *ctrl, + equeue_packet_page_request_t *out) +{ + struct page_request *req = NULL; + + req = get_next_request(ctrl); + if (!req) { + return KERN_NO_ENTRY; + } + + if (ctrl->vc_requests_waiting == 0) { + object_clear_signal( + &ctrl->vc_base, + VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED); + } + + out->req_vmo = req->req_object->vo_key; + out->req_type = req->req_type; + out->req_offset = req->req_offset; + out->req_length = req->req_length; + + spin_unlock(&req->req_lock); + return KERN_OK; +} + +kern_status_t vm_controller_recv_async( + struct vm_controller *ctrl, + struct equeue *eq, + equeue_key_t key) +{ + if (ctrl->vc_eq) { + object_unref(&ctrl->vc_eq->eq_base); + } + + object_ref(&eq->eq_base); + ctrl->vc_eq = eq; + ctrl->vc_eq_key = key; + + return KERN_OK; +} + kern_status_t vm_controller_create_object( struct vm_controller *ctrl, const char *name, @@ -27,14 +116,45 @@ kern_status_t vm_controller_create_object( vm_prot_t prot, struct vm_object **out) { - return KERN_UNIMPLEMENTED; + struct vm_object *vmo = get_object(&ctrl->vc_objects, key); + if (vmo) { + return KERN_NAME_EXISTS; + } + + vmo = vm_object_create(name, name_len, data_len, prot); + if (!vmo) { + return KERN_NO_MEMORY; + } + + object_ref(&ctrl->vc_base); + object_ref(&vmo->vo_base); + + vmo->vo_flags |= VMO_CONTROLLER; + vmo->vo_ctrl = ctrl; + vmo->vo_key = key; + + put_object(&ctrl->vc_objects, vmo); + + *out = vmo; + return KERN_OK; } kern_status_t vm_controller_detach_object( struct vm_controller *ctrl, struct vm_object *vmo) { - return KERN_UNIMPLEMENTED; + if (vmo->vo_ctrl != ctrl) { + return KERN_INVALID_ARGUMENT; + } + + vmo->vo_ctrl = NULL; + vmo->vo_key = 0; + btree_delete(&ctrl->vc_objects, &vmo->vo_ctrl_node); + + object_unref(&ctrl->vc_base); + object_unref(&vmo->vo_base); + + return KERN_OK; } static kern_status_t try_enqueue(struct btree *tree, struct page_request *req) @@ -91,22 +211,103 @@ static void wait_for_reply( break; } - vm_controller_unlock_irqrestore(ctrl, *lock_flags); + spin_unlock_irqrestore(&req->req_lock, *lock_flags); schedule(SCHED_NORMAL); - vm_controller_lock_irqsave(ctrl, lock_flags); + spin_lock_irqsave(&req->req_lock, lock_flags); } self->tr_state = THREAD_READY; } +static void fulfill_requests( + struct vm_controller *ctrl, + struct vm_object *obj, + off_t offset, + size_t length, + kern_status_t result) +{ + off_t limit = offset + length - 1; + struct btree_node *cur = btree_first(&ctrl->vc_requests); + while (cur) { + struct page_request *req + = BTREE_CONTAINER(struct page_request, req_node, cur); + spin_lock(&req->req_lock); + bool match = false; + off_t req_base = req->req_offset; + off_t req_limit = req->req_offset + req->req_length - 1; + + if (req_base >= offset && req_base <= limit) { + match = true; + } else if (req_limit >= offset && req_limit <= limit) { + match = true; + } + + if (req->req_object != obj) { + match = false; + } + + if (match) { + req->req_status = PAGE_REQUEST_COMPLETE; + req->req_result = result; + thread_awaken(req->req_sender); + } + + spin_unlock(&req->req_lock); + cur = btree_next(cur); + } +} + +kern_status_t vm_controller_supply_pages( + struct vm_controller *ctrl, + struct vm_object *dst, + off_t dst_offset, + struct vm_object *src, + off_t src_offset, + size_t count) +{ + if (src->vo_flags & VMO_CONTROLLER) { + return KERN_INVALID_ARGUMENT; + } + + if (dst->vo_ctrl != ctrl) { + return KERN_INVALID_ARGUMENT; + } + + kern_status_t status = vm_object_transfer( + dst, + dst_offset, + src, + src_offset, + count, + NULL); + fulfill_requests(ctrl, dst, dst_offset, count, status); + + return status; +} + kern_status_t vm_controller_send_request( struct vm_controller *ctrl, - struct page_request *req) + struct page_request *req, + unsigned long *irq_flags) { fill_random(&req->req_id, sizeof req->req_id); while (!try_enqueue(&ctrl->vc_requests, req)) { req->req_id++; } + ctrl->vc_requests_waiting++; + object_assert_signal( + &ctrl->vc_base, + VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED); + + vm_controller_unlock(ctrl); + wait_for_reply(ctrl, req, irq_flags); + + spin_unlock_irqrestore(&req->req_lock, *irq_flags); + vm_controller_lock_irqsave(ctrl, irq_flags); + spin_lock(&req->req_lock); + + btree_delete(&ctrl->vc_requests, &req->req_node); + return KERN_OK; } diff --git a/vm/vm-object.c b/vm/vm-object.c index 2e07763..281be8b 100644 --- a/vm/vm-object.c +++ b/vm/vm-object.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #define VM_OBJECT_CAST(p) \ @@ -40,15 +41,16 @@ static kern_status_t object_iterator_begin( it->it_obj = obj; it->it_alloc = alloc; + enum vm_object_flags flags = 0; if (alloc) { - it->it_pg = vm_object_alloc_page(obj, 0, VM_PAGE_4K); + flags |= VMO_ALLOCATE_MISSING_PAGE; + } - if (!it->it_pg) { - return KERN_NO_MEMORY; - } - } else { - it->it_pg = vm_object_get_page(obj, 0); + it->it_pg = vm_object_get_page(obj, 0, flags, NULL); + + if (alloc && !it->it_pg) { + return KERN_NO_MEMORY; } if (it->it_pg) { @@ -83,17 +85,16 @@ static kern_status_t object_iterator_seek( return KERN_OK; } - if (it->it_alloc) { - it->it_pg = vm_object_alloc_page( - it->it_obj, - it->it_offset, - VM_PAGE_4K); + enum vm_object_flags flags = 0; - if (!it->it_pg) { - return KERN_NO_MEMORY; - } - } else { - it->it_pg = vm_object_get_page(it->it_obj, it->it_offset); + if (it->it_alloc) { + flags |= VMO_ALLOCATE_MISSING_PAGE; + } + + it->it_pg = vm_object_get_page(it->it_obj, it->it_offset, flags, NULL); + + if (it->it_alloc && !it->it_pg) { + return KERN_NO_MEMORY; } if (it->it_pg) { @@ -248,36 +249,7 @@ extern struct vm_object *vm_object_create_in_place( return vmo; } -extern struct vm_page *vm_object_get_page( - const struct vm_object *vo, - off_t offset) -{ - struct btree_node *cur = vo->vo_pages.b_root; - while (cur) { - struct vm_page *page - = BTREE_CONTAINER(struct vm_page, p_bnode, cur); - struct btree_node *next = NULL; - - off_t base = page->p_vmo_offset; - off_t limit = base + vm_page_get_size_bytes(page); - if (offset < base) { - next = btree_left(cur); - } else if (offset >= limit) { - next = btree_right(cur); - } else { - return page; - } - - cur = next; - } - - return NULL; -} - -extern struct vm_page *vm_object_alloc_page( - struct vm_object *vo, - off_t offset, - enum vm_page_order size) +static struct vm_page *alloc_page(struct vm_object *vo, off_t offset) { struct vm_page *page = NULL; struct btree_node *cur = vo->vo_pages.b_root; @@ -340,6 +312,87 @@ extern struct vm_page *vm_object_alloc_page( return NULL; } +static struct vm_page *get_page(struct vm_object *vo, off_t offset) +{ + struct btree_node *cur = vo->vo_pages.b_root; + while (cur) { + struct vm_page *page + = BTREE_CONTAINER(struct vm_page, p_bnode, cur); + struct btree_node *next = NULL; + + off_t base = page->p_vmo_offset; + off_t limit = base + vm_page_get_size_bytes(page); + if (offset < base) { + next = btree_left(cur); + } else if (offset >= limit) { + next = btree_right(cur); + } else { + return page; + } + + cur = next; + } + + return NULL; +} + +static kern_status_t request_page( + struct vm_object *vo, + off_t offset, + unsigned long *irq_flags) +{ + struct vm_controller *ctrl = vo->vo_ctrl; + struct page_request req = {0}; + req.req_status = PAGE_REQUEST_PENDING; + req.req_offset = offset; + req.req_length = vm_page_order_to_bytes(VM_PAGE_4K); + req.req_sender = current_thread(); + + object_ref(&vo->vo_base); + req.req_object = vo; + + vm_object_unlock_irqrestore(vo, *irq_flags); + vm_controller_lock_irqsave(ctrl, irq_flags); + spin_lock(&req.req_lock); + + kern_status_t status + = vm_controller_send_request(ctrl, &req, irq_flags); + + spin_unlock(&req.req_lock); + vm_controller_unlock_irqrestore(ctrl, *irq_flags); + object_unref(&vo->vo_base); + vm_object_lock_irqsave(vo, irq_flags); + + return status; +} + +struct vm_page *vm_object_get_page( + struct vm_object *vo, + off_t offset, + enum vm_object_flags flags, + unsigned long *irq_flags) +{ + if (!vo->vo_ctrl && (flags & VMO_ALLOCATE_MISSING_PAGE)) { + return alloc_page(vo, offset); + } + + struct vm_page *pg = get_page(vo, offset); + if (pg) { + return pg; + } + + if (!vo->vo_ctrl) { + return NULL; + } + + kern_status_t status = request_page(vo, offset, irq_flags); + if (status != KERN_OK) { + return NULL; + } + + return get_page(vo, offset); +} + #if 0 /* read data from a vm-object, where [offset, offset+count] is confined to * a single page */ @@ -800,7 +853,7 @@ kern_status_t vm_object_transfer( size_t moved = 0; for (size_t i = 0; i < count; i += VM_PAGE_SIZE) { struct vm_page *src_pg - = vm_object_get_page(src, src_offset + i); + = vm_object_get_page(src, src_offset + i, 0, NULL); if (!src_pg) { continue; } @@ -808,7 +861,7 @@ kern_status_t vm_object_transfer( btree_delete(&src->vo_pages, &src_pg->p_bnode); struct vm_page *dst_pg - = vm_object_get_page(src, dst_offset + i); + = vm_object_get_page(src, dst_offset + i, 0, NULL); if (dst_pg) { vm_page_free(src_pg); continue;