vm: add vm-region to manage userspace virtual memory address spaces

vm-region supports creating nested regions of virtual memory, each with their
own memory protection restrictions.

vm-objects can be mapped into a vm-region, making the underlying memory
accessible. all mappings are lazy: page tables are not updated until the
mapped memory is accessed.
This commit is contained in:
2026-02-08 12:59:08 +00:00
parent b8ccffd2d4
commit 883b5ac9e2
3 changed files with 831 additions and 0 deletions

View File

@@ -4,6 +4,7 @@
#include <mango/printk.h>
#include <mango/status.h>
#include <mango/vm-object.h>
#include <mango/vm-region.h>
#include <mango/vm.h>
#include <stddef.h>
#include <stdint.h>
@@ -41,6 +42,7 @@ kern_status_t vm_bootstrap(
kmalloc_init();
vm_object_type_init();
vm_region_type_init();
return KERN_OK;
}

703
vm/vm-region.c Normal file
View File

@@ -0,0 +1,703 @@
#include <mango/libc/stdio.h>
#include <mango/object.h>
#include <mango/panic.h>
#include <mango/printk.h>
#include <mango/status.h>
#include <mango/util.h>
#include <mango/vm-object.h>
#include <mango/vm-region.h>
#undef ASLR
enum search_direction {
SEARCH_LEFT,
SEARCH_RIGHT,
};
#define VM_REGION_CAST(p) \
OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p)
static struct object_type vm_region_type = {
.ob_name = "vm-region",
.ob_size = sizeof(struct vm_region),
.ob_header_offset = offsetof(struct vm_region, vr_base),
};
static struct vm_cache mapping_cache = {
.c_name = "vm-region-mapping",
.c_obj_size = sizeof(struct vm_region_mapping),
};
struct entry_pair {
struct vm_region_entry *p_left, *p_right;
};
kern_status_t vm_region_type_init(void)
{
vm_cache_init(&mapping_cache);
return object_type_register(&vm_region_type);
}
static virt_addr_t find_free_area_linear(
struct vm_region *region,
size_t target_length);
static virt_addr_t find_free_area_random(
struct vm_region *region,
size_t target_length);
static void put_entry(struct vm_region *parent, struct vm_region_entry *child)
{
struct btree_node *cur = parent->vr_entries.b_root;
if (!cur) {
parent->vr_entries.b_root = &child->e_node;
btree_insert_fixup(&parent->vr_entries, &child->e_node);
return;
}
virt_addr_t child_base = child->e_base_address;
virt_addr_t child_limit = child_base + child->e_size - 1;
while (cur) {
struct vm_region_entry *cur_entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
virt_addr_t cur_base = cur_entry->e_base_address;
virt_addr_t cur_limit = cur_base + cur_entry->e_size - 1;
if (child_limit < cur_base) {
next = btree_left(cur);
} else if (child_base > cur_limit) {
next = btree_right(cur);
} else {
panic("tried to add an overlapping entry to vm-region");
}
if (next) {
cur = next;
continue;
}
if (child_limit < cur_base) {
btree_put_left(cur, &child->e_node);
} else {
btree_put_right(cur, &child->e_node);
}
btree_insert_fixup(&parent->vr_entries, &child->e_node);
break;
}
}
static struct vm_region *vm_region_from_entry(struct vm_region_entry *entry)
{
if (entry->e_type != VM_REGION_ENTRY_REGION) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region, vr_entry, entry);
}
static struct vm_region_mapping *vm_region_mapping_from_entry(
struct vm_region_entry *entry)
{
if (entry->e_type != VM_REGION_ENTRY_MAPPING) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry);
}
kern_status_t vm_region_create(
struct vm_region *parent,
const char *name,
virt_addr_t base,
size_t len,
enum vm_prot prot,
struct vm_region **out)
{
if (!base || !len) {
return KERN_INVALID_ARGUMENT;
}
if (len & VM_PAGE_MASK) {
len &= ~VM_PAGE_MASK;
len += VM_PAGE_SIZE;
}
if (parent) {
if ((prot & parent->vr_prot) != prot) {
/* child region protection must match or be a
* subset of parent region protection */
return KERN_INVALID_ARGUMENT;
}
if (base == VM_REGION_ANY_MAP_ADDRESS) {
#ifdef ASLR
map_address = find_free_area_random(region, length);
#else
base = find_free_area_linear(parent, len);
#endif
base &= ~VM_PAGE_MASK;
if (base == 0) {
return KERN_NO_MEMORY;
}
} else if (!vm_region_is_area_free(parent, base, len)) {
return KERN_INVALID_ARGUMENT;
}
}
struct object *region_object = object_create(&vm_region_type);
if (!region_object) {
return KERN_NO_MEMORY;
}
struct vm_region *region = VM_REGION_CAST(region_object);
region->vr_prot = prot;
region->vr_entry.e_type = VM_REGION_ENTRY_REGION;
region->vr_entry.e_base_address = base;
region->vr_entry.e_size = len;
if (parent) {
region->vr_entry.e_parent = &parent->vr_entry;
region->vr_pmap = parent->vr_pmap;
put_entry(parent, &region->vr_entry);
}
if (name) {
strncpy(region->vr_name, name, sizeof region->vr_name);
region->vr_name[sizeof region->vr_name - 1] = '\0';
}
*out = region;
return KERN_OK;
}
static struct vm_region_entry *vm_region_find_entry(
struct vm_region *region,
virt_addr_t addr)
{
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return NULL;
}
struct vm_region_entry *result = NULL;
while (cur) {
struct vm_region_entry *child
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
virt_addr_t child_limit
= child->e_base_address + child->e_size - 1;
if (addr < child->e_base_address) {
next = btree_left(cur);
} else if (addr > child_limit) {
next = btree_right(cur);
} else {
result = child;
break;
}
cur = next;
}
return result;
}
struct vm_region *vm_region_find_child(
struct vm_region *region,
virt_addr_t addr)
{
struct vm_region_entry *result = vm_region_find_entry(region, addr);
if (!result || result->e_type != VM_REGION_ENTRY_REGION) {
return region;
}
return vm_region_from_entry(result);
}
struct vm_region *vm_region_find_child_for_area(
struct vm_region *region,
virt_addr_t base,
size_t len)
{
virt_addr_t limit = base + len - 1;
while (region) {
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
break;
}
bool found_new_region = false;
while (cur) {
struct vm_region_entry *child = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
cur);
struct btree_node *next = NULL;
virt_addr_t child_base = child->e_base_address;
virt_addr_t child_limit
= child_base + child->e_size - 1;
if (limit < child_base) {
next = btree_left(cur);
} else if (base > child_limit) {
next = btree_right(cur);
} else if (base >= child_base && limit <= child_limit) {
region = vm_region_from_entry(child);
found_new_region = true;
break;
} else {
return NULL;
}
cur = next;
}
if (!found_new_region) {
break;
}
}
return region;
}
struct vm_region_mapping *vm_region_find_mapping(
struct vm_region *region,
virt_addr_t addr)
{
struct vm_region_entry *result = vm_region_find_entry(region, addr);
if (!result) {
return NULL;
}
return vm_region_mapping_from_entry(result);
}
static struct vm_region_entry *get_random_child(struct vm_region *region)
{
enum {
STEP_LEFT = 0,
STEP_RIGHT = 1,
STEP_FINISH = 2,
} step;
struct btree_node *result = NULL;
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return NULL;
}
while (1) {
unsigned long r;
fill_random(&r, sizeof r);
struct btree_node *next = NULL;
step = r % 3;
switch (step) {
case STEP_LEFT:
next = btree_left(cur);
break;
case STEP_RIGHT:
next = btree_right(cur);
break;
case STEP_FINISH:
result = cur;
break;
default:
return NULL;
}
if (!next) {
result = cur;
break;
}
cur = next;
}
if (!result) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region_entry, e_node, result);
}
static virt_addr_t find_free_area_linear_ex(
struct vm_region *region,
size_t target_length,
struct btree_node *start,
enum search_direction direction)
{
if (region->vr_entry.e_size < target_length) {
return 0;
}
struct btree_node *left_node = NULL, *right_node = NULL;
switch (direction) {
case SEARCH_LEFT:
right_node = start;
left_node = start ? btree_left(start) : NULL;
break;
case SEARCH_RIGHT:
left_node = start;
right_node = start ? btree_left(start) : NULL;
break;
default:
return 0;
}
if (!left_node && !right_node) {
return 0;
}
while (1) {
struct vm_region_entry *left = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
left_node);
struct vm_region_entry *right = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
right_node);
/* addresses of the first and last free bytes in the area
* respectively. */
virt_addr_t area_base, area_limit;
if (left && right) {
area_base = left->e_base_address + left->e_size;
area_limit = right->e_base_address - 1;
} else if (right) {
area_base = region->vr_entry.e_base_address;
area_limit = left->e_base_address - 1;
} else if (left) {
area_base = left->e_base_address + left->e_size;
area_limit = region->vr_entry.e_base_address
+ region->vr_entry.e_size - 1;
} else {
return 0;
}
size_t area_size = 0;
if (area_limit >= area_base) {
area_size = area_limit - area_base + 1;
}
if (area_size >= target_length) {
return area_base;
}
if (direction == SEARCH_RIGHT) {
left_node = right_node;
right_node = btree_next(right_node);
} else {
right_node = left_node;
left_node = btree_prev(right_node);
}
}
return 0;
}
static virt_addr_t find_free_area_linear(
struct vm_region *region,
size_t target_length)
{
if (!region->vr_entries.b_root) {
return region->vr_entry.e_base_address;
}
return find_free_area_linear_ex(
region,
target_length,
btree_first(&region->vr_entries),
SEARCH_RIGHT);
}
static virt_addr_t random_address(
virt_addr_t area_base,
size_t area_length,
size_t target_length)
{
size_t random_range = area_length - target_length;
off_t offset = 0;
fill_random(&offset, sizeof offset);
offset %= random_range;
return area_base + offset;
}
static virt_addr_t find_free_area_random(
struct vm_region *region,
size_t target_length)
{
int tmp = 0;
struct btree_node *node = NULL;
struct vm_region_entry *basis = get_random_child(region);
fill_random(&tmp, sizeof tmp);
enum search_direction direction = tmp % 2;
struct vm_region_entry *left = NULL, *right = NULL;
if (direction == SEARCH_LEFT) {
node = basis ? btree_left(&basis->e_node) : NULL;
right = basis;
left = BTREE_CONTAINER(struct vm_region_entry, e_node, node);
} else {
node = basis ? btree_right(&basis->e_node) : NULL;
left = basis;
right = BTREE_CONTAINER(struct vm_region_entry, e_node, node);
}
virt_addr_t base = region->vr_entry.e_base_address,
limit = base + region->vr_entry.e_size - 1;
if (left) {
base = left->e_base_address;
}
if (right) {
limit = right->e_base_address + right->e_size - 1;
}
return random_address(base, limit - base + 1, target_length);
}
kern_status_t vm_region_map_object(
struct vm_region *region,
virt_addr_t map_address,
struct vm_object *object,
off_t object_offset,
size_t length,
enum vm_prot prot,
virt_addr_t *out)
{
object_offset &= ~VM_PAGE_MASK;
if (length & VM_PAGE_MASK) {
length &= ~VM_PAGE_MASK;
length += VM_PAGE_SIZE;
}
if (!region || !object || !out) {
return KERN_INVALID_ARGUMENT;
}
if ((prot & region->vr_prot) != prot) {
return KERN_INVALID_ARGUMENT;
}
if ((prot & object->vo_prot) != prot) {
return KERN_INVALID_ARGUMENT;
}
if (!length || object_offset + length > object->vo_size) {
return KERN_INVALID_ARGUMENT;
}
if (map_address != VM_REGION_ANY_MAP_ADDRESS) {
region = vm_region_find_child_for_area(
region,
map_address,
length);
}
if (!region) {
return KERN_INVALID_ARGUMENT;
}
if (map_address == VM_REGION_ANY_MAP_ADDRESS) {
#ifdef ASLR
map_address = find_free_area_random(region, length);
#else
map_address = find_free_area_linear(region, length);
#endif
map_address &= ~VM_PAGE_MASK;
if (map_address == 0) {
return KERN_NO_MEMORY;
}
} else if (!vm_region_is_area_free(region, map_address, length)) {
return KERN_INVALID_ARGUMENT;
}
struct vm_region_mapping *mapping
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!mapping) {
return KERN_NO_MEMORY;
}
tracek("mapping %s at [%llx-%llx]",
object->vo_name,
map_address,
map_address + length);
mapping->m_object = object;
mapping->m_prot = prot;
mapping->m_object_offset = object_offset;
mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
mapping->m_entry.e_parent = &region->vr_entry;
mapping->m_entry.e_base_address = map_address;
mapping->m_entry.e_size = length;
put_entry(region, &mapping->m_entry);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
*out = map_address;
return KERN_OK;
}
bool vm_region_is_area_free(
const struct vm_region *region,
virt_addr_t base,
size_t len)
{
/* address of the last byte in the region */
virt_addr_t region_limit
= region->vr_entry.e_base_address + region->vr_entry.e_size - 1;
if (base < region->vr_entry.e_base_address || base > region_limit) {
return false;
}
if (base + len - 1 > region_limit) {
return false;
}
virt_addr_t limit = base + len - 1;
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return true;
}
while (cur) {
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
virt_addr_t entry_limit
= entry->e_base_address + entry->e_size - 1;
if (base > entry_limit) {
next = btree_right(cur);
} else if (limit < entry->e_base_address) {
next = btree_left(cur);
} else {
return false;
}
cur = next;
}
return true;
}
kern_status_t vm_region_demand_map(
struct vm_region *region,
virt_addr_t addr,
enum pmap_fault_flags flags)
{
addr &= ~VM_PAGE_MASK;
region = vm_region_find_child(region, addr);
struct vm_region_mapping *mapping
= vm_region_find_mapping(region, addr);
if (!mapping) {
return KERN_NO_ENTRY;
}
off_t offset = addr - mapping->m_entry.e_base_address
+ mapping->m_object_offset;
tracek("vm: tried to access vm-object %s at offset=%05llx",
mapping->m_object->vo_name,
offset);
struct vm_page *pg
= vm_object_alloc_page(mapping->m_object, offset, VM_PAGE_4K);
tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr);
return pmap_add(
region->vr_pmap,
addr,
vm_page_get_pfn(pg),
mapping->m_prot,
PMAP_NORMAL);
}
#ifdef TRACE
void vm_region_dump(struct vm_region *region, int depth)
{
char line[128] = {0};
size_t p = 0;
for (int i = 0; i < depth; i++) {
p += snprintf(line + p, sizeof line - p, " ");
}
p += snprintf(
line + p,
sizeof line - p,
"region: %s [%llx-%llx]",
region->vr_name,
region->vr_entry.e_base_address,
region->vr_entry.e_base_address + region->vr_entry.e_size);
printk("%s", line);
struct btree_node *cur = btree_first(&region->vr_entries);
while (cur) {
memset(line, 0x0, sizeof line);
p = 0;
for (int i = 0; i < depth + 1; i++) {
p += snprintf(line + p, sizeof line - p, " ");
}
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct vm_region *child_region = vm_region_from_entry(entry);
struct vm_region_mapping *child_mapping
= vm_region_mapping_from_entry(entry);
switch (entry->e_type) {
case VM_REGION_ENTRY_REGION:
break;
case VM_REGION_ENTRY_MAPPING:
p += snprintf(
line + p,
sizeof line - p,
"mapping: %s [%llx-%llx] -> [%llx-%llx]",
child_mapping->m_object->vo_name,
child_mapping->m_object_offset,
child_mapping->m_object_offset
+ child_mapping->m_entry.e_size,
child_mapping->m_entry.e_base_address,
child_mapping->m_entry.e_base_address
+ child_mapping->m_entry.e_size);
printk("%s", line);
break;
default:
p += snprintf(line + p, sizeof line - p, "invalid");
printk("%s", line);
break;
}
if (child_region) {
vm_region_dump(child_region, depth + 1);
}
cur = btree_next(cur);
}
}
#endif