vm: implement demand-paging via userspace services with vm-controller

This commit is contained in:
2026-03-14 22:39:14 +00:00
parent f04c524bb5
commit 0af35c70ef
12 changed files with 826 additions and 100 deletions

View File

@@ -200,13 +200,24 @@ static bool is_area_free(
return false;
}
if (base < cur_area->vma_base && limit > cur_area->vma_limit) {
return false;
}
if (base > cur_area->vma_limit) {
cur = btree_right(cur);
} else if (limit < cur_area->vma_base) {
cur = btree_left(cur);
} else {
/* what */
panic("unhandled case in is_area_free");
panic("unhandled case in is_area_free. base=%zx, "
"len=%zx, "
"limit=%zx, cur_area=[%zx-%zx]",
base,
len,
limit,
cur_area->vma_base,
cur_area->vma_limit);
}
}
@@ -247,13 +258,24 @@ static bool is_area_reserved(
return true;
}
if (base < cur_area->vma_base && limit > cur_area->vma_limit) {
return false;
}
if (base > cur_area->vma_limit) {
cur = btree_right(cur);
} else if (limit < cur_area->vma_base) {
cur = btree_left(cur);
} else {
/* what */
panic("unhandled case in is_area_reserved");
panic("unhandled case in is_area_reserved. base=%zx, "
"len=%zx, "
"limit=%zx, cur_area=[%zx-%zx]",
base,
len,
limit,
cur_area->vma_base,
cur_area->vma_limit);
}
}
@@ -335,17 +357,17 @@ static void vm_iterator_begin(
off_t object_offset = base - it->it_mapping->vma_base
+ it->it_mapping->vma_object_offset;
struct vm_page *pg = NULL;
enum vm_object_flags flags = 0;
if (prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->vma_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->vma_object,
object_offset);
flags |= VMO_ALLOCATE_MISSING_PAGE;
}
pg = vm_object_get_page(
it->it_mapping->vma_object,
object_offset,
flags,
NULL);
if (!pg) {
return;
}
@@ -405,17 +427,17 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
+ it->it_mapping->vma_object_offset;
struct vm_page *pg = NULL;
enum vm_object_flags flags = 0;
if (it->it_prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->vma_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->vma_object,
object_offset);
flags |= VMO_ALLOCATE_MISSING_PAGE;
}
pg = vm_object_get_page(
it->it_mapping->vma_object,
object_offset,
flags,
NULL);
if (!pg) {
return KERN_NO_MEMORY;
}
@@ -1094,6 +1116,48 @@ bool address_space_validate_access(
return true;
}
static kern_status_t request_missing_page(
struct address_space *region,
virt_addr_t addr,
off_t object_offset,
struct vm_object *object,
vm_prot_t prot,
enum pmap_fault_flags flags,
unsigned long *irq_flags)
{
/* here:
* `region` is locked.
* `object` is unlocked.
* `irq_flags` must be restored when `region` is unlocked.
* the relevant page in `object` may or may not be committed.
* if it isn't, it needs to be requested.
*/
vm_object_lock(object);
address_space_unlock(region);
struct vm_page *pg = vm_object_get_page(
object,
object_offset,
VMO_ALLOCATE_MISSING_PAGE | VMO_REQUEST_MISSING_PAGE,
irq_flags);
if (!pg) {
vm_object_unlock_irqrestore(object, *irq_flags);
return KERN_FATAL_ERROR;
}
/* now: `region` is unlocked, and `object` is locked */
kern_status_t status = pmap_add(
region->s_pmap,
addr,
vm_page_get_pfn(pg),
prot,
PMAP_NORMAL);
vm_object_unlock_irqrestore(object, *irq_flags);
return status;
}
/* this function must be called with `region` locked */
kern_status_t address_space_demand_map(
struct address_space *region,
@@ -1105,12 +1169,26 @@ kern_status_t address_space_demand_map(
return KERN_NO_ENTRY;
}
unsigned long irq_flags;
address_space_lock_irqsave(region, &irq_flags);
struct vm_area *area = get_entry(region, addr, GET_ENTRY_EXACT);
if (!area) {
if (!area || !area->vma_object) {
address_space_unlock_irqrestore(region, irq_flags);
return KERN_NO_ENTRY;
}
off_t object_offset = addr - area->vma_base + area->vma_object_offset;
if (area->vma_object->vo_ctrl) {
return request_missing_page(
region,
addr,
object_offset,
area->vma_object,
area->vma_prot,
flags,
&irq_flags);
}
#if 0
tracek("vm: tried to access vm-object %s at offset=%05llx",
@@ -1118,20 +1196,25 @@ kern_status_t address_space_demand_map(
object_offset);
#endif
unsigned long lock_flags;
vm_object_lock_irqsave(area->vma_object, &lock_flags);
struct vm_page *pg = vm_object_alloc_page(
/* simple case: this vm-object is not attached to a controller */
vm_object_lock(area->vma_object);
struct vm_page *pg = vm_object_get_page(
area->vma_object,
object_offset,
VM_PAGE_4K);
vm_object_unlock_irqrestore(area->vma_object, lock_flags);
// tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr);
return pmap_add(
VMO_ALLOCATE_MISSING_PAGE,
NULL);
// tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg),
// addr);
kern_status_t status = pmap_add(
region->s_pmap,
addr,
vm_page_get_pfn(pg),
area->vma_prot,
PMAP_NORMAL);
vm_object_unlock(area->vma_object);
address_space_unlock_irqrestore(region, irq_flags);
return status;
}
virt_addr_t address_space_get_base_address(const struct address_space *region)

View File

@@ -1,23 +1,112 @@
#include <kernel/equeue.h>
#include <kernel/sched.h>
#include <kernel/thread.h>
#include <kernel/util.h>
#include <kernel/vm-controller.h>
#include <kernel/vm-object.h>
#include <mango/signal.h>
#define VM_CONTROLLER_CAST(p) \
OBJECT_C_CAST(struct vm_controller, vc_base, &vm_controller_type, p)
BTREE_DEFINE_SIMPLE_INSERT(struct vm_object, vo_ctrl_node, vo_key, put_object)
BTREE_DEFINE_SIMPLE_GET(
struct vm_object,
equeue_key_t,
vo_ctrl_node,
vo_key,
get_object)
static struct object_type vm_controller_type = {
.ob_name = "vm-controller",
.ob_size = sizeof(struct vm_controller),
.ob_header_offset = offsetof(struct vm_controller, vc_base),
};
kern_status_t vm_controller_type_init(void)
{
return KERN_UNIMPLEMENTED;
return object_type_register(&vm_controller_type);
}
struct vm_controller *vm_controller_cast(struct object *obj)
{
return NULL;
return VM_CONTROLLER_CAST(obj);
}
struct vm_controller *vm_controller_create(void)
{
struct object *ctrl_object = object_create(&vm_controller_type);
if (!ctrl_object) {
return NULL;
}
struct vm_controller *ctrl = VM_CONTROLLER_CAST(ctrl_object);
return ctrl;
}
static struct page_request *get_next_request(struct vm_controller *ctrl)
{
struct btree_node *cur = btree_first(&ctrl->vc_requests);
while (cur) {
struct page_request *req
= BTREE_CONTAINER(struct page_request, req_node, cur);
spin_lock(&req->req_lock);
if (req->req_status == PAGE_REQUEST_PENDING) {
req->req_status = PAGE_REQUEST_IN_PROGRESS;
ctrl->vc_requests_waiting--;
return req;
}
spin_unlock(&req->req_lock);
cur = btree_next(cur);
}
return NULL;
}
kern_status_t vm_controller_recv(
struct vm_controller *ctrl,
equeue_packet_page_request_t *out)
{
struct page_request *req = NULL;
req = get_next_request(ctrl);
if (!req) {
return KERN_NO_ENTRY;
}
if (ctrl->vc_requests_waiting == 0) {
object_clear_signal(
&ctrl->vc_base,
VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED);
}
out->req_vmo = req->req_object->vo_key;
out->req_type = req->req_type;
out->req_offset = req->req_offset;
out->req_length = req->req_length;
spin_unlock(&req->req_lock);
return KERN_OK;
}
kern_status_t vm_controller_recv_async(
struct vm_controller *ctrl,
struct equeue *eq,
equeue_key_t key)
{
if (ctrl->vc_eq) {
object_unref(&ctrl->vc_eq->eq_base);
}
object_ref(&eq->eq_base);
ctrl->vc_eq = eq;
ctrl->vc_eq_key = key;
return KERN_OK;
}
kern_status_t vm_controller_create_object(
struct vm_controller *ctrl,
const char *name,
@@ -27,14 +116,45 @@ kern_status_t vm_controller_create_object(
vm_prot_t prot,
struct vm_object **out)
{
return KERN_UNIMPLEMENTED;
struct vm_object *vmo = get_object(&ctrl->vc_objects, key);
if (vmo) {
return KERN_NAME_EXISTS;
}
vmo = vm_object_create(name, name_len, data_len, prot);
if (!vmo) {
return KERN_NO_MEMORY;
}
object_ref(&ctrl->vc_base);
object_ref(&vmo->vo_base);
vmo->vo_flags |= VMO_CONTROLLER;
vmo->vo_ctrl = ctrl;
vmo->vo_key = key;
put_object(&ctrl->vc_objects, vmo);
*out = vmo;
return KERN_OK;
}
kern_status_t vm_controller_detach_object(
struct vm_controller *ctrl,
struct vm_object *vmo)
{
return KERN_UNIMPLEMENTED;
if (vmo->vo_ctrl != ctrl) {
return KERN_INVALID_ARGUMENT;
}
vmo->vo_ctrl = NULL;
vmo->vo_key = 0;
btree_delete(&ctrl->vc_objects, &vmo->vo_ctrl_node);
object_unref(&ctrl->vc_base);
object_unref(&vmo->vo_base);
return KERN_OK;
}
static kern_status_t try_enqueue(struct btree *tree, struct page_request *req)
@@ -91,22 +211,103 @@ static void wait_for_reply(
break;
}
vm_controller_unlock_irqrestore(ctrl, *lock_flags);
spin_unlock_irqrestore(&req->req_lock, *lock_flags);
schedule(SCHED_NORMAL);
vm_controller_lock_irqsave(ctrl, lock_flags);
spin_lock_irqsave(&req->req_lock, lock_flags);
}
self->tr_state = THREAD_READY;
}
static void fulfill_requests(
struct vm_controller *ctrl,
struct vm_object *obj,
off_t offset,
size_t length,
kern_status_t result)
{
off_t limit = offset + length - 1;
struct btree_node *cur = btree_first(&ctrl->vc_requests);
while (cur) {
struct page_request *req
= BTREE_CONTAINER(struct page_request, req_node, cur);
spin_lock(&req->req_lock);
bool match = false;
off_t req_base = req->req_offset;
off_t req_limit = req->req_offset + req->req_length - 1;
if (req_base >= offset && req_base <= limit) {
match = true;
} else if (req_limit >= offset && req_limit <= limit) {
match = true;
}
if (req->req_object != obj) {
match = false;
}
if (match) {
req->req_status = PAGE_REQUEST_COMPLETE;
req->req_result = result;
thread_awaken(req->req_sender);
}
spin_unlock(&req->req_lock);
cur = btree_next(cur);
}
}
kern_status_t vm_controller_supply_pages(
struct vm_controller *ctrl,
struct vm_object *dst,
off_t dst_offset,
struct vm_object *src,
off_t src_offset,
size_t count)
{
if (src->vo_flags & VMO_CONTROLLER) {
return KERN_INVALID_ARGUMENT;
}
if (dst->vo_ctrl != ctrl) {
return KERN_INVALID_ARGUMENT;
}
kern_status_t status = vm_object_transfer(
dst,
dst_offset,
src,
src_offset,
count,
NULL);
fulfill_requests(ctrl, dst, dst_offset, count, status);
return status;
}
kern_status_t vm_controller_send_request(
struct vm_controller *ctrl,
struct page_request *req)
struct page_request *req,
unsigned long *irq_flags)
{
fill_random(&req->req_id, sizeof req->req_id);
while (!try_enqueue(&ctrl->vc_requests, req)) {
req->req_id++;
}
ctrl->vc_requests_waiting++;
object_assert_signal(
&ctrl->vc_base,
VM_CONTROLLER_SIGNAL_REQUEST_RECEIVED);
vm_controller_unlock(ctrl);
wait_for_reply(ctrl, req, irq_flags);
spin_unlock_irqrestore(&req->req_lock, *irq_flags);
vm_controller_lock_irqsave(ctrl, irq_flags);
spin_lock(&req->req_lock);
btree_delete(&ctrl->vc_requests, &req->req_node);
return KERN_OK;
}

View File

@@ -1,6 +1,7 @@
#include <kernel/printk.h>
#include <kernel/sched.h>
#include <kernel/util.h>
#include <kernel/vm-controller.h>
#include <kernel/vm-object.h>
#define VM_OBJECT_CAST(p) \
@@ -40,15 +41,16 @@ static kern_status_t object_iterator_begin(
it->it_obj = obj;
it->it_alloc = alloc;
enum vm_object_flags flags = 0;
if (alloc) {
it->it_pg = vm_object_alloc_page(obj, 0, VM_PAGE_4K);
flags |= VMO_ALLOCATE_MISSING_PAGE;
}
if (!it->it_pg) {
return KERN_NO_MEMORY;
}
} else {
it->it_pg = vm_object_get_page(obj, 0);
it->it_pg = vm_object_get_page(obj, 0, flags, NULL);
if (alloc && !it->it_pg) {
return KERN_NO_MEMORY;
}
if (it->it_pg) {
@@ -83,17 +85,16 @@ static kern_status_t object_iterator_seek(
return KERN_OK;
}
if (it->it_alloc) {
it->it_pg = vm_object_alloc_page(
it->it_obj,
it->it_offset,
VM_PAGE_4K);
enum vm_object_flags flags = 0;
if (!it->it_pg) {
return KERN_NO_MEMORY;
}
} else {
it->it_pg = vm_object_get_page(it->it_obj, it->it_offset);
if (it->it_alloc) {
flags |= VMO_ALLOCATE_MISSING_PAGE;
}
it->it_pg = vm_object_get_page(it->it_obj, it->it_offset, flags, NULL);
if (it->it_alloc && !it->it_pg) {
return KERN_NO_MEMORY;
}
if (it->it_pg) {
@@ -248,36 +249,7 @@ extern struct vm_object *vm_object_create_in_place(
return vmo;
}
extern struct vm_page *vm_object_get_page(
const struct vm_object *vo,
off_t offset)
{
struct btree_node *cur = vo->vo_pages.b_root;
while (cur) {
struct vm_page *page
= BTREE_CONTAINER(struct vm_page, p_bnode, cur);
struct btree_node *next = NULL;
off_t base = page->p_vmo_offset;
off_t limit = base + vm_page_get_size_bytes(page);
if (offset < base) {
next = btree_left(cur);
} else if (offset >= limit) {
next = btree_right(cur);
} else {
return page;
}
cur = next;
}
return NULL;
}
extern struct vm_page *vm_object_alloc_page(
struct vm_object *vo,
off_t offset,
enum vm_page_order size)
static struct vm_page *alloc_page(struct vm_object *vo, off_t offset)
{
struct vm_page *page = NULL;
struct btree_node *cur = vo->vo_pages.b_root;
@@ -340,6 +312,87 @@ extern struct vm_page *vm_object_alloc_page(
return NULL;
}
static struct vm_page *get_page(struct vm_object *vo, off_t offset)
{
struct btree_node *cur = vo->vo_pages.b_root;
while (cur) {
struct vm_page *page
= BTREE_CONTAINER(struct vm_page, p_bnode, cur);
struct btree_node *next = NULL;
off_t base = page->p_vmo_offset;
off_t limit = base + vm_page_get_size_bytes(page);
if (offset < base) {
next = btree_left(cur);
} else if (offset >= limit) {
next = btree_right(cur);
} else {
return page;
}
cur = next;
}
return NULL;
}
static kern_status_t request_page(
struct vm_object *vo,
off_t offset,
unsigned long *irq_flags)
{
struct vm_controller *ctrl = vo->vo_ctrl;
struct page_request req = {0};
req.req_status = PAGE_REQUEST_PENDING;
req.req_offset = offset;
req.req_length = vm_page_order_to_bytes(VM_PAGE_4K);
req.req_sender = current_thread();
object_ref(&vo->vo_base);
req.req_object = vo;
vm_object_unlock_irqrestore(vo, *irq_flags);
vm_controller_lock_irqsave(ctrl, irq_flags);
spin_lock(&req.req_lock);
kern_status_t status
= vm_controller_send_request(ctrl, &req, irq_flags);
spin_unlock(&req.req_lock);
vm_controller_unlock_irqrestore(ctrl, *irq_flags);
object_unref(&vo->vo_base);
vm_object_lock_irqsave(vo, irq_flags);
return status;
}
struct vm_page *vm_object_get_page(
struct vm_object *vo,
off_t offset,
enum vm_object_flags flags,
unsigned long *irq_flags)
{
if (!vo->vo_ctrl && (flags & VMO_ALLOCATE_MISSING_PAGE)) {
return alloc_page(vo, offset);
}
struct vm_page *pg = get_page(vo, offset);
if (pg) {
return pg;
}
if (!vo->vo_ctrl) {
return NULL;
}
kern_status_t status = request_page(vo, offset, irq_flags);
if (status != KERN_OK) {
return NULL;
}
return get_page(vo, offset);
}
#if 0
/* read data from a vm-object, where [offset, offset+count] is confined to
* a single page */
@@ -800,7 +853,7 @@ kern_status_t vm_object_transfer(
size_t moved = 0;
for (size_t i = 0; i < count; i += VM_PAGE_SIZE) {
struct vm_page *src_pg
= vm_object_get_page(src, src_offset + i);
= vm_object_get_page(src, src_offset + i, 0, NULL);
if (!src_pg) {
continue;
}
@@ -808,7 +861,7 @@ kern_status_t vm_object_transfer(
btree_delete(&src->vo_pages, &src_pg->p_bnode);
struct vm_page *dst_pg
= vm_object_get_page(src, dst_offset + i);
= vm_object_get_page(src, dst_offset + i, 0, NULL);
if (dst_pg) {
vm_page_free(src_pg);
continue;