vm: implement demand-paging via userspace services with vm-controller

This commit is contained in:
2026-03-14 22:39:14 +00:00
parent f04c524bb5
commit 0af35c70ef
12 changed files with 826 additions and 100 deletions

View File

@@ -372,13 +372,8 @@ kern_status_t pmap_handle_fault(
return KERN_FATAL_ERROR; return KERN_FATAL_ERROR;
} }
unsigned long lock_flags; /* this must be called with `space` unlocked. */
address_space_lock_irqsave(space, &lock_flags); return address_space_demand_map(space, fault_addr, flags);
kern_status_t status
= address_space_demand_map(space, fault_addr, flags);
address_space_unlock_irqrestore(space, lock_flags);
return status;
} }
kern_status_t pmap_add( kern_status_t pmap_add(

View File

@@ -110,7 +110,9 @@ extern bool address_space_validate_access(
/* find the mapping corresponding to the given virtual address, and page-in the /* find the mapping corresponding to the given virtual address, and page-in the
* necessary vm_page to allow the memory access to succeed. if the relevant * necessary vm_page to allow the memory access to succeed. if the relevant
* vm-object page hasn't been allocated yet, it will be allocated here. */ * vm-object page hasn't been allocated yet, it will be allocated here.
* this function must be called with `region` UNLOCKED and interrupts ENABLED.
*/
extern kern_status_t address_space_demand_map( extern kern_status_t address_space_demand_map(
struct address_space *region, struct address_space *region,
virt_addr_t addr, virt_addr_t addr,

View File

@@ -170,6 +170,33 @@ extern kern_status_t sys_kern_object_wait(
kern_wait_item_t *items, kern_wait_item_t *items,
size_t nr_items); size_t nr_items);
extern kern_status_t sys_vm_controller_create(kern_handle_t *out);
extern kern_status_t sys_vm_controller_recv(
kern_handle_t ctrl,
equeue_packet_page_request_t *out);
extern kern_status_t sys_vm_controller_recv_async(
kern_handle_t ctrl,
kern_handle_t eq,
equeue_key_t key);
extern kern_status_t sys_vm_controller_create_object(
kern_handle_t ctrl,
const char *name,
size_t name_len,
equeue_key_t key,
size_t data_len,
vm_prot_t prot,
kern_handle_t *out);
extern kern_status_t sys_vm_controller_detach_object(
kern_handle_t ctrl,
kern_handle_t vmo);
extern kern_status_t sys_vm_controller_supply_pages(
kern_handle_t ctrl,
kern_handle_t dst_vmo,
off_t dst_offset,
kern_handle_t src_vmo,
off_t src_offset,
size_t count);
extern virt_addr_t syscall_get_function(unsigned int sysid); extern virt_addr_t syscall_get_function(unsigned int sysid);
#endif #endif

View File

@@ -18,16 +18,24 @@ enum page_request_status {
struct vm_controller { struct vm_controller {
struct object vc_base; struct object vc_base;
/* tree of struct vm_objects bound to this controller, keyed with the /* tree of struct vm_objects bound to this controller, keyed with the
* vm_key_t specified when the object(s) were created. */ * equeue_key_t specified when the object(s) were created. */
struct btree vc_objects; struct btree vc_objects;
/* tree of pending page requests */ /* tree of pending page requests */
struct btree vc_requests; struct btree vc_requests;
/* the equeue to send async page requests to */
struct equeue *vc_eq;
equeue_key_t vc_eq_key;
/* the number of page requests queued with status PAGE_REQUEST_PENDING.
* used to assert/clear VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED */
size_t vc_requests_waiting;
}; };
struct page_request { struct page_request {
uint64_t req_id; uint64_t req_id;
unsigned int req_type;
enum page_request_status req_status; enum page_request_status req_status;
kern_status_t req_result; kern_status_t req_result;
spin_lock_t req_lock;
struct vm_object *req_object; struct vm_object *req_object;
struct thread *req_sender; struct thread *req_sender;
struct btree_node req_node; struct btree_node req_node;
@@ -45,7 +53,8 @@ extern kern_status_t vm_controller_recv(
equeue_packet_page_request_t *out); equeue_packet_page_request_t *out);
extern kern_status_t vm_controller_recv_async( extern kern_status_t vm_controller_recv_async(
struct vm_controller *ctrl, struct vm_controller *ctrl,
struct equeue *eq); struct equeue *eq,
equeue_key_t key);
extern kern_status_t vm_controller_create_object( extern kern_status_t vm_controller_create_object(
struct vm_controller *ctrl, struct vm_controller *ctrl,
@@ -58,10 +67,18 @@ extern kern_status_t vm_controller_create_object(
extern kern_status_t vm_controller_detach_object( extern kern_status_t vm_controller_detach_object(
struct vm_controller *ctrl, struct vm_controller *ctrl,
struct vm_object *vmo); struct vm_object *vmo);
extern kern_status_t vm_controller_supply_pages(
struct vm_controller *ctrl,
struct vm_object *dst,
off_t dst_offset,
struct vm_object *src,
off_t src_offset,
size_t count);
extern kern_status_t vm_controller_send_request( extern kern_status_t vm_controller_send_request(
struct vm_controller *ctrl, struct vm_controller *ctrl,
struct page_request *req); struct page_request *req,
unsigned long *irq_flags);
DEFINE_OBJECT_LOCK_FUNCTION(vm_controller, vc_base) DEFINE_OBJECT_LOCK_FUNCTION(vm_controller, vc_base)

View File

@@ -6,10 +6,27 @@
#define VM_OBJECT_NAME_MAX 64 #define VM_OBJECT_NAME_MAX 64
struct vm_controller;
enum vm_object_flags { enum vm_object_flags {
/* the memory behind this vm-object wasn't allocated by us, and /* the memory behind this vm-object wasn't allocated by us, and
* therefore shouldn't be freed by us */ * therefore shouldn't be freed by us */
VMO_IN_PLACE = 0x01u, VMO_IN_PLACE = 0x01u,
/* this vm-object is/was attached to a vm-controller */
VMO_CONTROLLER = 0x02u,
/* these flags are for use with vm_object_get_page */
/**************************************************/
/* if the relevant page hasn't been allocated yet, it will be allocated
* and returned. if this flag isn't specified, NULL will be returned. */
VMO_ALLOCATE_MISSING_PAGE = 0x04u,
/* if the vm-object is attached to a vm-controller, and the relevant
* page is uncommitted, send a request to the vm-controller to provide
* the missing page. will result in the vm-object being unlocked and
* the current thread sleeping until the request is fulfilled. the
* vm-object will be re-locked before the function returns. */
VMO_REQUEST_MISSING_PAGE = 0x08u,
}; };
struct vm_object { struct vm_object {
@@ -21,8 +38,12 @@ struct vm_object {
/* queue of struct vm_region_mapping */ /* queue of struct vm_region_mapping */
struct queue vo_mappings; struct queue vo_mappings;
/* memory protection flags. mappings of this vm_object can only use struct vm_controller *vo_ctrl;
* a subset of the flags set in this mask. */ equeue_key_t vo_key;
struct btree_node vo_ctrl_node;
/* memory protection flags. mappings of this vm_object can only
* use a subset of the flags set in this mask. */
vm_prot_t vo_prot; vm_prot_t vo_prot;
/* btree of vm_pages that have been allocated to this vm_object. /* btree of vm_pages that have been allocated to this vm_object.
@@ -58,13 +79,10 @@ extern struct vm_object *vm_object_create_in_place(
vm_prot_t prot); vm_prot_t prot);
extern struct vm_page *vm_object_get_page( extern struct vm_page *vm_object_get_page(
const struct vm_object *vo,
off_t offset);
extern struct vm_page *vm_object_alloc_page(
struct vm_object *vo, struct vm_object *vo,
off_t offset, off_t offset,
enum vm_page_order size); enum vm_object_flags flags,
unsigned long *irq_flags);
extern kern_status_t vm_object_read( extern kern_status_t vm_object_read(
struct vm_object *vo, struct vm_object *vo,

View File

@@ -91,5 +91,12 @@ SYSCALL_GATE msg_reply SYS_MSG_REPLY 4
SYSCALL_GATE msg_read SYS_MSG_READ 6 SYSCALL_GATE msg_read SYS_MSG_READ 6
SYSCALL_GATE msg_write SYS_MSG_WRITE 6 SYSCALL_GATE msg_write SYS_MSG_WRITE 6
SYSCALL_GATE vm_controller_create SYS_VM_CONTROLLER_CREATE 1
SYSCALL_GATE vm_controller_recv SYS_VM_CONTROLLER_RECV 2
SYSCALL_GATE vm_controller_recv_async SYS_VM_CONTROLLER_RECV_ASYNC 3
SYSCALL_GATE vm_controller_create_object SYS_VM_CONTROLLER_CREATE_OBJECT 7
SYSCALL_GATE vm_controller_detach_object SYS_VM_CONTROLLER_DETACH_OBJECT 2
SYSCALL_GATE vm_controller_supply_pages SYS_VM_CONTROLLER_SUPPLY_PAGES 6
SYSCALL_GATE kern_object_wait SYS_KERN_OBJECT_WAIT 2 SYSCALL_GATE kern_object_wait SYS_KERN_OBJECT_WAIT 2

View File

@@ -70,11 +70,13 @@ extern kern_status_t vm_controller_recv(
equeue_packet_page_request_t *out); equeue_packet_page_request_t *out);
extern kern_status_t vm_controller_recv_async( extern kern_status_t vm_controller_recv_async(
kern_handle_t ctrl, kern_handle_t ctrl,
kern_handle_t eq); kern_handle_t eq,
equeue_key_t key);
extern kern_status_t vm_controller_create_object( extern kern_status_t vm_controller_create_object(
kern_handle_t ctrl, kern_handle_t ctrl,
const char *name, const char *name,
size_t name_len, size_t name_len,
equeue_key_t key,
size_t data_len, size_t data_len,
vm_prot_t prot, vm_prot_t prot,
kern_handle_t *out); kern_handle_t *out);

View File

@@ -35,6 +35,18 @@ static const virt_addr_t syscall_table[] = {
SYSCALL_TABLE_ENTRY(MSG_REPLY, msg_reply), SYSCALL_TABLE_ENTRY(MSG_REPLY, msg_reply),
SYSCALL_TABLE_ENTRY(MSG_READ, msg_read), SYSCALL_TABLE_ENTRY(MSG_READ, msg_read),
SYSCALL_TABLE_ENTRY(MSG_WRITE, msg_write), SYSCALL_TABLE_ENTRY(MSG_WRITE, msg_write),
SYSCALL_TABLE_ENTRY(VM_CONTROLLER_CREATE, vm_controller_create),
SYSCALL_TABLE_ENTRY(VM_CONTROLLER_RECV, vm_controller_recv),
SYSCALL_TABLE_ENTRY(VM_CONTROLLER_RECV_ASYNC, vm_controller_recv_async),
SYSCALL_TABLE_ENTRY(
VM_CONTROLLER_CREATE_OBJECT,
vm_controller_create_object),
SYSCALL_TABLE_ENTRY(
VM_CONTROLLER_DETACH_OBJECT,
vm_controller_detach_object),
SYSCALL_TABLE_ENTRY(
VM_CONTROLLER_SUPPLY_PAGES,
vm_controller_supply_pages),
SYSCALL_TABLE_ENTRY(KERN_OBJECT_WAIT, kern_object_wait), SYSCALL_TABLE_ENTRY(KERN_OBJECT_WAIT, kern_object_wait),
}; };
static const size_t syscall_table_count static const size_t syscall_table_count

309
syscall/vm-controller.c Normal file
View File

@@ -0,0 +1,309 @@
#include <kernel/equeue.h>
#include <kernel/sched.h>
#include <kernel/syscall.h>
#include <kernel/task.h>
#include <kernel/vm-controller.h>
#include <kernel/vm-object.h>
kern_status_t sys_vm_controller_create(kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
struct vm_controller *ctrl = vm_controller_create();
if (!ctrl) {
return KERN_NO_MEMORY;
}
kern_status_t status = task_open_handle(self, &ctrl->vc_base, 0, out);
if (status != KERN_OK) {
object_unref(&ctrl->vc_base);
return status;
}
return KERN_OK;
}
kern_status_t sys_vm_controller_recv(
kern_handle_t ctrl_handle,
equeue_packet_page_request_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
kern_status_t status = KERN_OK;
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *ctrl_obj = NULL;
handle_flags_t handle_flags = 0;
status = task_resolve_handle(
self,
ctrl_handle,
&ctrl_obj,
&handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_controller *ctrl = vm_controller_cast(ctrl_obj);
task_unlock_irqrestore(self, flags);
if (!ctrl) {
object_unref(ctrl_obj);
return KERN_INVALID_ARGUMENT;
}
vm_controller_lock_irqsave(ctrl, &flags);
status = vm_controller_recv(ctrl, out);
vm_controller_unlock_irqrestore(ctrl, flags);
object_unref(ctrl_obj);
return status;
}
kern_status_t sys_vm_controller_recv_async(
kern_handle_t ctrl_handle,
kern_handle_t eq_handle,
equeue_key_t key)
{
struct task *self = current_task();
kern_status_t status = KERN_OK;
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *ctrl_obj = NULL, *eq_obj = NULL;
handle_flags_t ctrl_flags = 0, eq_flags = 0;
status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
status = task_resolve_handle(self, eq_handle, &eq_obj, &eq_flags);
if (status != KERN_OK) {
object_unref(ctrl_obj);
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_controller *ctrl = vm_controller_cast(ctrl_obj);
struct equeue *eq = equeue_cast(eq_obj);
task_unlock_irqrestore(self, flags);
if (!ctrl || !eq) {
object_unref(ctrl_obj);
object_unref(eq_obj);
return KERN_INVALID_ARGUMENT;
}
vm_controller_lock_irqsave(ctrl, &flags);
status = vm_controller_recv_async(ctrl, eq, key);
vm_controller_unlock_irqrestore(ctrl, flags);
object_unref(ctrl_obj);
object_unref(eq_obj);
return status;
}
kern_status_t sys_vm_controller_create_object(
kern_handle_t ctrl_handle,
const char *name,
size_t name_len,
equeue_key_t key,
size_t data_len,
vm_prot_t prot,
kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_r(self, name, name_len)) {
return KERN_MEMORY_FAULT;
}
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
kern_status_t status = KERN_OK;
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *ctrl_obj = NULL;
handle_flags_t handle_flags = 0;
status = task_resolve_handle(
self,
ctrl_handle,
&ctrl_obj,
&handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct handle *out_slot = NULL;
kern_handle_t out_handle = KERN_HANDLE_INVALID;
status = handle_table_alloc_handle(
self->t_handles,
&out_slot,
&out_handle);
struct vm_controller *ctrl = vm_controller_cast(ctrl_obj);
task_unlock_irqrestore(self, flags);
if (!ctrl) {
object_unref(ctrl_obj);
return KERN_INVALID_ARGUMENT;
}
vm_controller_lock_irqsave(ctrl, &flags);
struct vm_object *out_vmo = NULL;
status = vm_controller_create_object(
ctrl,
name,
name_len,
key,
data_len,
prot,
&out_vmo);
vm_controller_unlock_irqrestore(ctrl, flags);
object_unref(ctrl_obj);
if (status != KERN_OK) {
task_lock_irqsave(self, &flags);
handle_table_free_handle(self->t_handles, out_handle);
task_unlock_irqrestore(self, flags);
return status;
}
out_slot->h_object = &out_vmo->vo_base;
object_add_handle(&out_vmo->vo_base);
object_unref(&out_vmo->vo_base);
*out = out_handle;
return KERN_OK;
}
kern_status_t sys_vm_controller_detach_object(
kern_handle_t ctrl_handle,
kern_handle_t vmo_handle)
{
struct task *self = current_task();
kern_status_t status = KERN_OK;
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *ctrl_obj = NULL, *vmo_obj = NULL;
handle_flags_t ctrl_flags = 0, vmo_flags = 0;
status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
status = task_resolve_handle(self, vmo_handle, &vmo_obj, &vmo_flags);
if (status != KERN_OK) {
object_unref(ctrl_obj);
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_controller *ctrl = vm_controller_cast(ctrl_obj);
struct vm_object *vmo = vm_object_cast(vmo_obj);
task_unlock_irqrestore(self, flags);
if (!ctrl || !vmo) {
object_unref(ctrl_obj);
object_unref(vmo_obj);
return KERN_INVALID_ARGUMENT;
}
vm_controller_lock_irqsave(ctrl, &flags);
vm_object_lock(vmo);
status = vm_controller_detach_object(ctrl, vmo);
vm_object_unlock(vmo);
vm_controller_unlock_irqrestore(ctrl, flags);
object_unref(ctrl_obj);
object_unref(vmo_obj);
return status;
}
kern_status_t sys_vm_controller_supply_pages(
kern_handle_t ctrl_handle,
kern_handle_t dst_handle,
off_t dst_offset,
kern_handle_t src_handle,
off_t src_offset,
size_t count)
{
struct task *self = current_task();
kern_status_t status = KERN_OK;
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *ctrl_obj = NULL, *src_obj = NULL, *dst_obj = NULL;
handle_flags_t ctrl_flags = 0, src_flags = 0, dst_flags = 0;
status = task_resolve_handle(self, ctrl_handle, &ctrl_obj, &ctrl_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
status = task_resolve_handle(self, dst_handle, &dst_obj, &dst_flags);
if (status != KERN_OK) {
object_unref(ctrl_obj);
task_unlock_irqrestore(self, flags);
return status;
}
status = task_resolve_handle(self, src_handle, &src_obj, &src_flags);
if (status != KERN_OK) {
object_unref(ctrl_obj);
object_unref(dst_obj);
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_controller *ctrl = vm_controller_cast(ctrl_obj);
struct vm_object *dst = vm_object_cast(dst_obj);
struct vm_object *src = vm_object_cast(src_obj);
task_unlock_irqrestore(self, flags);
if (!ctrl || !dst || !src) {
object_unref(ctrl_obj);
object_unref(dst_obj);
object_unref(src_obj);
return KERN_INVALID_ARGUMENT;
}
vm_controller_lock_irqsave(ctrl, &flags);
vm_object_lock_pair(src, dst);
status = vm_controller_supply_pages(
ctrl,
dst,
dst_offset,
src,
src_offset,
count);
vm_object_unlock_pair(src, dst);
vm_controller_unlock_irqrestore(ctrl, flags);
object_unref(ctrl_obj);
object_unref(dst_obj);
object_unref(src_obj);
return status;
}

View File

@@ -200,13 +200,24 @@ static bool is_area_free(
return false; return false;
} }
if (base < cur_area->vma_base && limit > cur_area->vma_limit) {
return false;
}
if (base > cur_area->vma_limit) { if (base > cur_area->vma_limit) {
cur = btree_right(cur); cur = btree_right(cur);
} else if (limit < cur_area->vma_base) { } else if (limit < cur_area->vma_base) {
cur = btree_left(cur); cur = btree_left(cur);
} else { } else {
/* what */ /* what */
panic("unhandled case in is_area_free"); panic("unhandled case in is_area_free. base=%zx, "
"len=%zx, "
"limit=%zx, cur_area=[%zx-%zx]",
base,
len,
limit,
cur_area->vma_base,
cur_area->vma_limit);
} }
} }
@@ -247,13 +258,24 @@ static bool is_area_reserved(
return true; return true;
} }
if (base < cur_area->vma_base && limit > cur_area->vma_limit) {
return false;
}
if (base > cur_area->vma_limit) { if (base > cur_area->vma_limit) {
cur = btree_right(cur); cur = btree_right(cur);
} else if (limit < cur_area->vma_base) { } else if (limit < cur_area->vma_base) {
cur = btree_left(cur); cur = btree_left(cur);
} else { } else {
/* what */ /* what */
panic("unhandled case in is_area_reserved"); panic("unhandled case in is_area_reserved. base=%zx, "
"len=%zx, "
"limit=%zx, cur_area=[%zx-%zx]",
base,
len,
limit,
cur_area->vma_base,
cur_area->vma_limit);
} }
} }
@@ -335,16 +357,16 @@ static void vm_iterator_begin(
off_t object_offset = base - it->it_mapping->vma_base off_t object_offset = base - it->it_mapping->vma_base
+ it->it_mapping->vma_object_offset; + it->it_mapping->vma_object_offset;
struct vm_page *pg = NULL; struct vm_page *pg = NULL;
enum vm_object_flags flags = 0;
if (prot & VM_PROT_WRITE) { if (prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page( flags |= VMO_ALLOCATE_MISSING_PAGE;
it->it_mapping->vma_object, }
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page( pg = vm_object_get_page(
it->it_mapping->vma_object, it->it_mapping->vma_object,
object_offset); object_offset,
} flags,
NULL);
if (!pg) { if (!pg) {
return; return;
@@ -405,16 +427,16 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
+ it->it_mapping->vma_object_offset; + it->it_mapping->vma_object_offset;
struct vm_page *pg = NULL; struct vm_page *pg = NULL;
enum vm_object_flags flags = 0;
if (it->it_prot & VM_PROT_WRITE) { if (it->it_prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page( flags |= VMO_ALLOCATE_MISSING_PAGE;
it->it_mapping->vma_object, }
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page( pg = vm_object_get_page(
it->it_mapping->vma_object, it->it_mapping->vma_object,
object_offset); object_offset,
} flags,
NULL);
if (!pg) { if (!pg) {
return KERN_NO_MEMORY; return KERN_NO_MEMORY;
@@ -1094,6 +1116,48 @@ bool address_space_validate_access(
return true; return true;
} }
static kern_status_t request_missing_page(
struct address_space *region,
virt_addr_t addr,
off_t object_offset,
struct vm_object *object,
vm_prot_t prot,
enum pmap_fault_flags flags,
unsigned long *irq_flags)
{
/* here:
* `region` is locked.
* `object` is unlocked.
* `irq_flags` must be restored when `region` is unlocked.
* the relevant page in `object` may or may not be committed.
* if it isn't, it needs to be requested.
*/
vm_object_lock(object);
address_space_unlock(region);
struct vm_page *pg = vm_object_get_page(
object,
object_offset,
VMO_ALLOCATE_MISSING_PAGE | VMO_REQUEST_MISSING_PAGE,
irq_flags);
if (!pg) {
vm_object_unlock_irqrestore(object, *irq_flags);
return KERN_FATAL_ERROR;
}
/* now: `region` is unlocked, and `object` is locked */
kern_status_t status = pmap_add(
region->s_pmap,
addr,
vm_page_get_pfn(pg),
prot,
PMAP_NORMAL);
vm_object_unlock_irqrestore(object, *irq_flags);
return status;
}
/* this function must be called with `region` locked */ /* this function must be called with `region` locked */
kern_status_t address_space_demand_map( kern_status_t address_space_demand_map(
struct address_space *region, struct address_space *region,
@@ -1105,12 +1169,26 @@ kern_status_t address_space_demand_map(
return KERN_NO_ENTRY; return KERN_NO_ENTRY;
} }
unsigned long irq_flags;
address_space_lock_irqsave(region, &irq_flags);
struct vm_area *area = get_entry(region, addr, GET_ENTRY_EXACT); struct vm_area *area = get_entry(region, addr, GET_ENTRY_EXACT);
if (!area) { if (!area || !area->vma_object) {
address_space_unlock_irqrestore(region, irq_flags);
return KERN_NO_ENTRY; return KERN_NO_ENTRY;
} }
off_t object_offset = addr - area->vma_base + area->vma_object_offset; off_t object_offset = addr - area->vma_base + area->vma_object_offset;
if (area->vma_object->vo_ctrl) {
return request_missing_page(
region,
addr,
object_offset,
area->vma_object,
area->vma_prot,
flags,
&irq_flags);
}
#if 0 #if 0
tracek("vm: tried to access vm-object %s at offset=%05llx", tracek("vm: tried to access vm-object %s at offset=%05llx",
@@ -1118,20 +1196,25 @@ kern_status_t address_space_demand_map(
object_offset); object_offset);
#endif #endif
unsigned long lock_flags; /* simple case: this vm-object is not attached to a controller */
vm_object_lock_irqsave(area->vma_object, &lock_flags); vm_object_lock(area->vma_object);
struct vm_page *pg = vm_object_alloc_page( struct vm_page *pg = vm_object_get_page(
area->vma_object, area->vma_object,
object_offset, object_offset,
VM_PAGE_4K); VMO_ALLOCATE_MISSING_PAGE,
vm_object_unlock_irqrestore(area->vma_object, lock_flags); NULL);
// tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); // tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg),
return pmap_add( // addr);
kern_status_t status = pmap_add(
region->s_pmap, region->s_pmap,
addr, addr,
vm_page_get_pfn(pg), vm_page_get_pfn(pg),
area->vma_prot, area->vma_prot,
PMAP_NORMAL); PMAP_NORMAL);
vm_object_unlock(area->vma_object);
address_space_unlock_irqrestore(region, irq_flags);
return status;
} }
virt_addr_t address_space_get_base_address(const struct address_space *region) virt_addr_t address_space_get_base_address(const struct address_space *region)

View File

@@ -1,21 +1,110 @@
#include <kernel/equeue.h>
#include <kernel/sched.h> #include <kernel/sched.h>
#include <kernel/thread.h> #include <kernel/thread.h>
#include <kernel/util.h> #include <kernel/util.h>
#include <kernel/vm-controller.h> #include <kernel/vm-controller.h>
#include <kernel/vm-object.h>
#include <mango/signal.h>
#define VM_CONTROLLER_CAST(p) \
OBJECT_C_CAST(struct vm_controller, vc_base, &vm_controller_type, p)
BTREE_DEFINE_SIMPLE_INSERT(struct vm_object, vo_ctrl_node, vo_key, put_object)
BTREE_DEFINE_SIMPLE_GET(
struct vm_object,
equeue_key_t,
vo_ctrl_node,
vo_key,
get_object)
static struct object_type vm_controller_type = {
.ob_name = "vm-controller",
.ob_size = sizeof(struct vm_controller),
.ob_header_offset = offsetof(struct vm_controller, vc_base),
};
kern_status_t vm_controller_type_init(void) kern_status_t vm_controller_type_init(void)
{ {
return KERN_UNIMPLEMENTED; return object_type_register(&vm_controller_type);
} }
struct vm_controller *vm_controller_cast(struct object *obj) struct vm_controller *vm_controller_cast(struct object *obj)
{ {
return NULL; return VM_CONTROLLER_CAST(obj);
} }
struct vm_controller *vm_controller_create(void) struct vm_controller *vm_controller_create(void)
{ {
struct object *ctrl_object = object_create(&vm_controller_type);
if (!ctrl_object) {
return NULL; return NULL;
}
struct vm_controller *ctrl = VM_CONTROLLER_CAST(ctrl_object);
return ctrl;
}
static struct page_request *get_next_request(struct vm_controller *ctrl)
{
struct btree_node *cur = btree_first(&ctrl->vc_requests);
while (cur) {
struct page_request *req
= BTREE_CONTAINER(struct page_request, req_node, cur);
spin_lock(&req->req_lock);
if (req->req_status == PAGE_REQUEST_PENDING) {
req->req_status = PAGE_REQUEST_IN_PROGRESS;
ctrl->vc_requests_waiting--;
return req;
}
spin_unlock(&req->req_lock);
cur = btree_next(cur);
}
return NULL;
}
kern_status_t vm_controller_recv(
struct vm_controller *ctrl,
equeue_packet_page_request_t *out)
{
struct page_request *req = NULL;
req = get_next_request(ctrl);
if (!req) {
return KERN_NO_ENTRY;
}
if (ctrl->vc_requests_waiting == 0) {
object_clear_signal(
&ctrl->vc_base,
VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED);
}
out->req_vmo = req->req_object->vo_key;
out->req_type = req->req_type;
out->req_offset = req->req_offset;
out->req_length = req->req_length;
spin_unlock(&req->req_lock);
return KERN_OK;
}
kern_status_t vm_controller_recv_async(
struct vm_controller *ctrl,
struct equeue *eq,
equeue_key_t key)
{
if (ctrl->vc_eq) {
object_unref(&ctrl->vc_eq->eq_base);
}
object_ref(&eq->eq_base);
ctrl->vc_eq = eq;
ctrl->vc_eq_key = key;
return KERN_OK;
} }
kern_status_t vm_controller_create_object( kern_status_t vm_controller_create_object(
@@ -27,14 +116,45 @@ kern_status_t vm_controller_create_object(
vm_prot_t prot, vm_prot_t prot,
struct vm_object **out) struct vm_object **out)
{ {
return KERN_UNIMPLEMENTED; struct vm_object *vmo = get_object(&ctrl->vc_objects, key);
if (vmo) {
return KERN_NAME_EXISTS;
}
vmo = vm_object_create(name, name_len, data_len, prot);
if (!vmo) {
return KERN_NO_MEMORY;
}
object_ref(&ctrl->vc_base);
object_ref(&vmo->vo_base);
vmo->vo_flags |= VMO_CONTROLLER;
vmo->vo_ctrl = ctrl;
vmo->vo_key = key;
put_object(&ctrl->vc_objects, vmo);
*out = vmo;
return KERN_OK;
} }
kern_status_t vm_controller_detach_object( kern_status_t vm_controller_detach_object(
struct vm_controller *ctrl, struct vm_controller *ctrl,
struct vm_object *vmo) struct vm_object *vmo)
{ {
return KERN_UNIMPLEMENTED; if (vmo->vo_ctrl != ctrl) {
return KERN_INVALID_ARGUMENT;
}
vmo->vo_ctrl = NULL;
vmo->vo_key = 0;
btree_delete(&ctrl->vc_objects, &vmo->vo_ctrl_node);
object_unref(&ctrl->vc_base);
object_unref(&vmo->vo_base);
return KERN_OK;
} }
static kern_status_t try_enqueue(struct btree *tree, struct page_request *req) static kern_status_t try_enqueue(struct btree *tree, struct page_request *req)
@@ -91,22 +211,103 @@ static void wait_for_reply(
break; break;
} }
vm_controller_unlock_irqrestore(ctrl, *lock_flags); spin_unlock_irqrestore(&req->req_lock, *lock_flags);
schedule(SCHED_NORMAL); schedule(SCHED_NORMAL);
vm_controller_lock_irqsave(ctrl, lock_flags); spin_lock_irqsave(&req->req_lock, lock_flags);
} }
self->tr_state = THREAD_READY; self->tr_state = THREAD_READY;
} }
static void fulfill_requests(
struct vm_controller *ctrl,
struct vm_object *obj,
off_t offset,
size_t length,
kern_status_t result)
{
off_t limit = offset + length - 1;
struct btree_node *cur = btree_first(&ctrl->vc_requests);
while (cur) {
struct page_request *req
= BTREE_CONTAINER(struct page_request, req_node, cur);
spin_lock(&req->req_lock);
bool match = false;
off_t req_base = req->req_offset;
off_t req_limit = req->req_offset + req->req_length - 1;
if (req_base >= offset && req_base <= limit) {
match = true;
} else if (req_limit >= offset && req_limit <= limit) {
match = true;
}
if (req->req_object != obj) {
match = false;
}
if (match) {
req->req_status = PAGE_REQUEST_COMPLETE;
req->req_result = result;
thread_awaken(req->req_sender);
}
spin_unlock(&req->req_lock);
cur = btree_next(cur);
}
}
kern_status_t vm_controller_supply_pages(
struct vm_controller *ctrl,
struct vm_object *dst,
off_t dst_offset,
struct vm_object *src,
off_t src_offset,
size_t count)
{
if (src->vo_flags & VMO_CONTROLLER) {
return KERN_INVALID_ARGUMENT;
}
if (dst->vo_ctrl != ctrl) {
return KERN_INVALID_ARGUMENT;
}
kern_status_t status = vm_object_transfer(
dst,
dst_offset,
src,
src_offset,
count,
NULL);
fulfill_requests(ctrl, dst, dst_offset, count, status);
return status;
}
kern_status_t vm_controller_send_request( kern_status_t vm_controller_send_request(
struct vm_controller *ctrl, struct vm_controller *ctrl,
struct page_request *req) struct page_request *req,
unsigned long *irq_flags)
{ {
fill_random(&req->req_id, sizeof req->req_id); fill_random(&req->req_id, sizeof req->req_id);
while (!try_enqueue(&ctrl->vc_requests, req)) { while (!try_enqueue(&ctrl->vc_requests, req)) {
req->req_id++; req->req_id++;
} }
ctrl->vc_requests_waiting++;
object_assert_signal(
&ctrl->vc_base,
VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED);
vm_controller_unlock(ctrl);
wait_for_reply(ctrl, req, irq_flags);
spin_unlock_irqrestore(&req->req_lock, *irq_flags);
vm_controller_lock_irqsave(ctrl, irq_flags);
spin_lock(&req->req_lock);
btree_delete(&ctrl->vc_requests, &req->req_node);
return KERN_OK; return KERN_OK;
} }

View File

@@ -1,6 +1,7 @@
#include <kernel/printk.h> #include <kernel/printk.h>
#include <kernel/sched.h> #include <kernel/sched.h>
#include <kernel/util.h> #include <kernel/util.h>
#include <kernel/vm-controller.h>
#include <kernel/vm-object.h> #include <kernel/vm-object.h>
#define VM_OBJECT_CAST(p) \ #define VM_OBJECT_CAST(p) \
@@ -40,15 +41,16 @@ static kern_status_t object_iterator_begin(
it->it_obj = obj; it->it_obj = obj;
it->it_alloc = alloc; it->it_alloc = alloc;
enum vm_object_flags flags = 0;
if (alloc) { if (alloc) {
it->it_pg = vm_object_alloc_page(obj, 0, VM_PAGE_4K); flags |= VMO_ALLOCATE_MISSING_PAGE;
if (!it->it_pg) {
return KERN_NO_MEMORY;
} }
} else {
it->it_pg = vm_object_get_page(obj, 0); it->it_pg = vm_object_get_page(obj, 0, flags, NULL);
if (alloc && !it->it_pg) {
return KERN_NO_MEMORY;
} }
if (it->it_pg) { if (it->it_pg) {
@@ -83,17 +85,16 @@ static kern_status_t object_iterator_seek(
return KERN_OK; return KERN_OK;
} }
if (it->it_alloc) { enum vm_object_flags flags = 0;
it->it_pg = vm_object_alloc_page(
it->it_obj,
it->it_offset,
VM_PAGE_4K);
if (!it->it_pg) { if (it->it_alloc) {
return KERN_NO_MEMORY; flags |= VMO_ALLOCATE_MISSING_PAGE;
} }
} else {
it->it_pg = vm_object_get_page(it->it_obj, it->it_offset); it->it_pg = vm_object_get_page(it->it_obj, it->it_offset, flags, NULL);
if (it->it_alloc && !it->it_pg) {
return KERN_NO_MEMORY;
} }
if (it->it_pg) { if (it->it_pg) {
@@ -248,36 +249,7 @@ extern struct vm_object *vm_object_create_in_place(
return vmo; return vmo;
} }
extern struct vm_page *vm_object_get_page( static struct vm_page *alloc_page(struct vm_object *vo, off_t offset)
const struct vm_object *vo,
off_t offset)
{
struct btree_node *cur = vo->vo_pages.b_root;
while (cur) {
struct vm_page *page
= BTREE_CONTAINER(struct vm_page, p_bnode, cur);
struct btree_node *next = NULL;
off_t base = page->p_vmo_offset;
off_t limit = base + vm_page_get_size_bytes(page);
if (offset < base) {
next = btree_left(cur);
} else if (offset >= limit) {
next = btree_right(cur);
} else {
return page;
}
cur = next;
}
return NULL;
}
extern struct vm_page *vm_object_alloc_page(
struct vm_object *vo,
off_t offset,
enum vm_page_order size)
{ {
struct vm_page *page = NULL; struct vm_page *page = NULL;
struct btree_node *cur = vo->vo_pages.b_root; struct btree_node *cur = vo->vo_pages.b_root;
@@ -340,6 +312,87 @@ extern struct vm_page *vm_object_alloc_page(
return NULL; return NULL;
} }
static struct vm_page *get_page(struct vm_object *vo, off_t offset)
{
struct btree_node *cur = vo->vo_pages.b_root;
while (cur) {
struct vm_page *page
= BTREE_CONTAINER(struct vm_page, p_bnode, cur);
struct btree_node *next = NULL;
off_t base = page->p_vmo_offset;
off_t limit = base + vm_page_get_size_bytes(page);
if (offset < base) {
next = btree_left(cur);
} else if (offset >= limit) {
next = btree_right(cur);
} else {
return page;
}
cur = next;
}
return NULL;
}
static kern_status_t request_page(
struct vm_object *vo,
off_t offset,
unsigned long *irq_flags)
{
struct vm_controller *ctrl = vo->vo_ctrl;
struct page_request req = {0};
req.req_status = PAGE_REQUEST_PENDING;
req.req_offset = offset;
req.req_length = vm_page_order_to_bytes(VM_PAGE_4K);
req.req_sender = current_thread();
object_ref(&vo->vo_base);
req.req_object = vo;
vm_object_unlock_irqrestore(vo, *irq_flags);
vm_controller_lock_irqsave(ctrl, irq_flags);
spin_lock(&req.req_lock);
kern_status_t status
= vm_controller_send_request(ctrl, &req, irq_flags);
spin_unlock(&req.req_lock);
vm_controller_unlock_irqrestore(ctrl, *irq_flags);
object_unref(&vo->vo_base);
vm_object_lock_irqsave(vo, irq_flags);
return status;
}
struct vm_page *vm_object_get_page(
struct vm_object *vo,
off_t offset,
enum vm_object_flags flags,
unsigned long *irq_flags)
{
if (!vo->vo_ctrl && (flags & VMO_ALLOCATE_MISSING_PAGE)) {
return alloc_page(vo, offset);
}
struct vm_page *pg = get_page(vo, offset);
if (pg) {
return pg;
}
if (!vo->vo_ctrl) {
return NULL;
}
kern_status_t status = request_page(vo, offset, irq_flags);
if (status != KERN_OK) {
return NULL;
}
return get_page(vo, offset);
}
#if 0 #if 0
/* read data from a vm-object, where [offset, offset+count] is confined to /* read data from a vm-object, where [offset, offset+count] is confined to
* a single page */ * a single page */
@@ -800,7 +853,7 @@ kern_status_t vm_object_transfer(
size_t moved = 0; size_t moved = 0;
for (size_t i = 0; i < count; i += VM_PAGE_SIZE) { for (size_t i = 0; i < count; i += VM_PAGE_SIZE) {
struct vm_page *src_pg struct vm_page *src_pg
= vm_object_get_page(src, src_offset + i); = vm_object_get_page(src, src_offset + i, 0, NULL);
if (!src_pg) { if (!src_pg) {
continue; continue;
} }
@@ -808,7 +861,7 @@ kern_status_t vm_object_transfer(
btree_delete(&src->vo_pages, &src_pg->p_bnode); btree_delete(&src->vo_pages, &src_pg->p_bnode);
struct vm_page *dst_pg struct vm_page *dst_pg
= vm_object_get_page(src, dst_offset + i); = vm_object_get_page(src, dst_offset + i, 0, NULL);
if (dst_pg) { if (dst_pg) {
vm_page_free(src_pg); vm_page_free(src_pg);
continue; continue;