vm: replace vm-region with address-space

address-space is a non-recursive data structure, which contains a flat list of vm_areas representing
mapped vm-objects.

userspace programs can no longer create sub-address-spaces. instead, they can reserve portions of
the address space, and use that reserved space to create mappings.
This commit is contained in:
2026-03-13 19:44:50 +00:00
parent c6b0bee827
commit c628390f4a
28 changed files with 1719 additions and 2612 deletions

View File

@@ -0,0 +1,161 @@
#ifndef KERNEL_ADDRESS_SPACE_H_
#define KERNEL_ADDRESS_SPACE_H_
#include <kernel/object.h>
#include <kernel/pmap.h>
#include <kernel/vm.h>
#define ADDRESS_SPACE_COPY_ALL ((size_t)-1)
struct address_space;
struct vm_object;
struct vm_area {
/* the vm-object mapped into this area.
* if this is NULL, the vm_area represents an area of reserved memory.
* it cannot be accessed, and mapping operations with MAP_ADDRESS_ANY
* will avoid the area, but fixed address mappings in this area
* will succeed. */
struct vm_object *vma_object;
/* used to link to vm_object->vo_mappings */
struct queue_entry vma_object_entry;
/* the memory protection flags applied to this area */
vm_prot_t vma_prot;
/* offset in bytes to the start of the object data that was mapped */
off_t vma_object_offset;
/* used to link to address_space->s_mappings */
struct btree_node vma_node;
/* address of the first byte in this area */
virt_addr_t vma_base;
/* address of the last byte in this area */
virt_addr_t vma_limit;
};
struct address_space {
struct object s_base;
/* address of the first byte in this address space */
virt_addr_t s_base_address;
/* address of the last byte in this address space */
virt_addr_t s_limit_address;
/* btree of struct vm_area representing mapped vm-objects.
* sibling entries cannot overlap each other. */
struct btree s_mappings;
/* btree of struct vm_area representing reserved regions of the
* address space.
* reserved regions will not be automatically allocated by the kernel.
* sibling entries cannot overlap each other.
* overlap between s_mappings and s_reserved IS allowed. */
struct btree s_reserved;
/* the corresponding physical address space */
pmap_t s_pmap;
};
extern kern_status_t address_space_type_init(void);
extern struct address_space *address_space_cast(struct object *obj);
/* create a new vm-region, optionally within a parent region.
* `offset` is the byte offset within the parent region where the new region
* should start.
* if no parent is specified, `offset` is the absolute virtual address of the
* start of the region.
* in both cases, `len` is the length of the new region in bytes. */
extern kern_status_t address_space_create(
virt_addr_t base,
virt_addr_t limit,
struct address_space **out);
/* map a vm-object into a vm-region.
* [region_offset,length] must fall within exactly one region, and cannot span
* multiple sibling regions.
* if [region_offset,length] falls within a child region, the map operation
* will be transparently redirected to the relevant region.
* `prot` must be allowed both by the region into which the mapping is being
* created AND the vm-object being mapped. */
extern kern_status_t address_space_map(
struct address_space *space,
virt_addr_t map_address,
struct vm_object *object,
off_t object_offset,
size_t length,
vm_prot_t prot,
virt_addr_t *out);
extern kern_status_t address_space_unmap(
struct address_space *region,
virt_addr_t base,
size_t length);
/* reserve an area of the address space. the kernel will not place any
* new mappings in this area unless explicitly told to (i.e. by not using
* MAP_ADDRESS_ANY). Use MAP_ADDRESS_ANY to have the kernel allocate a region
* of the address space for you */
extern kern_status_t address_space_reserve(
struct address_space *space,
virt_addr_t base,
size_t length,
virt_addr_t *out);
/* release a previously reserved area of the address space. */
extern kern_status_t address_space_release(
struct address_space *space,
virt_addr_t base,
size_t length);
extern bool address_space_validate_access(
struct address_space *region,
virt_addr_t base,
size_t len,
vm_prot_t prot);
/* find the mapping corresponding to the given virtual address, and page-in the
* necessary vm_page to allow the memory access to succeed. if the relevant
* vm-object page hasn't been allocated yet, it will be allocated here. */
extern kern_status_t address_space_demand_map(
struct address_space *region,
virt_addr_t addr,
enum pmap_fault_flags flags);
/* read data from the user-space area of a vm-region into a kernel-mode buffer
*/
extern kern_status_t address_space_read(
struct address_space *src_region,
virt_addr_t src_ptr,
size_t count,
void *dest,
size_t *nr_read);
/* write data to the user-space area of a vm-region from a kernel-mode buffer
*/
extern kern_status_t address_space_write(
struct address_space *dst_region,
virt_addr_t dst_ptr,
size_t count,
const void *src,
size_t *nr_written);
extern kern_status_t address_space_memmove(
struct address_space *dest_space,
virt_addr_t dest_ptr,
struct address_space *src_space,
virt_addr_t src_ptr,
size_t count,
size_t *nr_moved);
extern kern_status_t address_space_memmove_v(
struct address_space *dest_space,
size_t dest_offset,
const kern_iovec_t *dest_iov,
size_t nr_dest_iov,
struct address_space *src_space,
size_t src_offset,
const kern_iovec_t *src_iov,
size_t nr_src_iov,
size_t bytes_to_move,
size_t *nr_bytes_moved);
void address_space_dump(struct address_space *region);
DEFINE_OBJECT_LOCK_FUNCTION(address_space, s_base)
#endif

View File

@@ -37,7 +37,7 @@ extern kern_status_t channel_read_msg(
struct channel *channel,
msgid_t msg,
size_t offset,
struct vm_region *dest_region,
struct address_space *dest_region,
const kern_iovec_t *dest_iov,
size_t dest_iov_count,
size_t *nr_read);
@@ -45,7 +45,7 @@ extern kern_status_t channel_write_msg(
struct channel *channel,
msgid_t msg,
size_t offset,
struct vm_region *src_region,
struct address_space *src_region,
const kern_iovec_t *src_iov,
size_t src_iov_count,
size_t *nr_written);

View File

@@ -17,7 +17,7 @@ typedef uintptr_t handle_flags_t;
struct task;
struct object;
struct vm_region;
struct address_space;
struct handle_list;
struct handle {
@@ -57,11 +57,11 @@ extern struct handle *handle_table_get_handle(
kern_handle_t handle);
extern kern_status_t handle_table_transfer(
struct vm_region *dst_region,
struct address_space *dst_region,
struct handle_table *dst,
kern_msg_handle_t *dst_handles,
size_t dst_handles_max,
struct vm_region *src_region,
struct address_space *src_region,
struct handle_table *src,
kern_msg_handle_t *src_handles,
size_t src_handles_count);

View File

@@ -4,10 +4,12 @@
#include <mango/types.h>
#include <stddef.h>
struct address_space;
struct iovec_iterator {
/* if this is set, we are iterating over a list of iovecs stored in
* userspace, and must go through this region to retrieve the data. */
struct vm_region *it_region;
struct address_space *it_region;
const kern_iovec_t *it_vecs;
size_t it_nr_vecs;
size_t it_vec_ptr;
@@ -22,7 +24,7 @@ extern void iovec_iterator_begin(
size_t nr_vecs);
extern void iovec_iterator_begin_user(
struct iovec_iterator *it,
struct vm_region *address_space,
struct address_space *address_space,
const kern_iovec_t *vecs,
size_t nr_vecs);

View File

@@ -1,9 +1,9 @@
#ifndef KERNEL_SYSCALL_H_
#define KERNEL_SYSCALL_H_
#include <kernel/address-space.h>
#include <kernel/handle.h>
#include <kernel/task.h>
#include <kernel/vm-region.h>
#include <kernel/vm.h>
#include <mango/status.h>
#include <mango/syscall.h>
@@ -28,13 +28,13 @@ static inline bool __validate_access(
vm_prot_t flags)
{
unsigned long irq_flags;
vm_region_lock_irqsave(task->t_address_space, &irq_flags);
bool result = vm_region_validate_access(
address_space_lock_irqsave(task->t_address_space, &irq_flags);
bool result = address_space_validate_access(
task->t_address_space,
(virt_addr_t)ptr,
len,
flags | VM_PROT_USER);
vm_region_unlock_irqrestore(task->t_address_space, irq_flags);
address_space_unlock_irqrestore(task->t_address_space, irq_flags);
return result;
}
@@ -85,29 +85,19 @@ extern kern_status_t sys_vm_object_copy(
size_t count,
size_t *nr_copied);
extern kern_status_t sys_vm_region_create(
kern_handle_t parent,
const char *name,
size_t name_len,
off_t offset,
size_t region_len,
vm_prot_t prot,
kern_handle_t *out,
virt_addr_t *out_base_address);
extern kern_status_t sys_vm_region_kill(kern_handle_t region);
extern kern_status_t sys_vm_region_read(
extern kern_status_t sys_address_space_read(
kern_handle_t region,
void *dst,
off_t offset,
virt_addr_t base,
size_t count,
size_t *nr_read);
extern kern_status_t sys_vm_region_write(
extern kern_status_t sys_address_space_write(
kern_handle_t region,
const void *src,
off_t offset,
virt_addr_t base,
size_t count,
size_t *nr_read);
extern kern_status_t sys_vm_region_map_absolute(
extern kern_status_t sys_address_space_map(
kern_handle_t region,
virt_addr_t map_address,
kern_handle_t object,
@@ -115,21 +105,18 @@ extern kern_status_t sys_vm_region_map_absolute(
size_t length,
vm_prot_t prot,
virt_addr_t *out_base_address);
extern kern_status_t sys_vm_region_map_relative(
extern kern_status_t sys_address_space_unmap(
kern_handle_t region,
off_t region_offset,
kern_handle_t object,
off_t object_offset,
size_t length,
vm_prot_t prot,
virt_addr_t *out_base_address);
extern kern_status_t sys_vm_region_unmap_absolute(
kern_handle_t region,
virt_addr_t address,
virt_addr_t base,
size_t length);
extern kern_status_t sys_vm_region_unmap_relative(
extern kern_status_t sys_address_space_reserve(
kern_handle_t region,
off_t offset,
virt_addr_t base,
size_t length,
virt_addr_t *out_base_address);
extern kern_status_t sys_address_space_release(
kern_handle_t region,
virt_addr_t base,
size_t length);
extern kern_status_t sys_kern_log(const char *s);

View File

@@ -24,7 +24,7 @@ struct task {
char t_name[TASK_NAME_MAX];
pmap_t t_pmap;
struct vm_region *t_address_space;
struct address_space *t_address_space;
spin_lock_t t_handles_lock;
struct handle_table *t_handles;
struct btree b_channels;

View File

@@ -1,191 +0,0 @@
#ifndef KERNEL_VM_REGION_H_
#define KERNEL_VM_REGION_H_
#include <kernel/object.h>
#include <kernel/pmap.h>
#include <kernel/vm.h>
#define VM_REGION_NAME_MAX 64
#define VM_REGION_COPY_ALL ((size_t)-1)
struct vm_region;
struct vm_object;
enum vm_region_status {
VM_REGION_DEAD = 0,
VM_REGION_ONLINE,
};
enum vm_region_entry_type {
VM_REGION_ENTRY_NONE = 0,
VM_REGION_ENTRY_REGION,
VM_REGION_ENTRY_MAPPING,
};
struct vm_region_entry {
union {
struct btree_node e_node;
/* this entry is only used to queue vm-region objects for
* recursive cleanup */
struct queue_entry e_entry;
};
struct vm_region_entry *e_parent;
enum vm_region_entry_type e_type;
/* absolute address of this entry */
virt_addr_t e_address;
/* offset in bytes of this entry within its immediate parent. */
off_t e_offset;
/* size of the entry in bytes */
size_t e_size;
};
struct vm_region_mapping {
struct vm_region_entry m_entry;
struct vm_object *m_object;
/* used to link to vm_object->vo_mappings */
struct queue_entry m_object_entry;
vm_prot_t m_prot;
/* offset in bytes to the start of the object data that was mapped */
off_t m_object_offset;
};
struct vm_region {
struct object vr_base;
enum vm_region_status vr_status;
struct vm_region_entry vr_entry;
char vr_name[VM_REGION_NAME_MAX];
/* btree of struct vm_region_entry.
* sibling entries cannot overlap each other, and child entries must
* be entirely contained within their immediate parent entry. */
struct btree vr_entries;
/* memory protection restriction mask.
* any mapping in this region, or any of its children, cannot use
* protection flags that are not set in this mask.
* for example, if VM_PROT_EXEC is /not/ set here, no mapping
* can be created in this region or any child region with VM_PROT_EXEC
* set. */
vm_prot_t vr_prot;
/* the physical address space in which mappings in this region (and
* its children) are created */
pmap_t vr_pmap;
};
extern kern_status_t vm_region_type_init(void);
extern struct vm_region *vm_region_cast(struct object *obj);
/* create a new vm-region, optionally within a parent region.
* `offset` is the byte offset within the parent region where the new region
* should start.
* if no parent is specified, `offset` is the absolute virtual address of the
* start of the region.
* in both cases, `len` is the length of the new region in bytes. */
extern kern_status_t vm_region_create(
struct vm_region *parent,
const char *name,
size_t name_len,
off_t offset,
size_t region_len,
vm_prot_t prot,
struct vm_region **out);
/* recursively kills a given region and all of its sub-regions.
* when a region is killed, all of its mappings are unmapped, and any further
* operations on the region are denied. however, all handles and references to
* the region (any any sub-region) remain valid, and no kernel memory is
* de-allocated.
* the memory used by the vm-region object itself is de-allocated when the last
* handle/reference to the object is released.
* this function should be called with `region` locked.
*/
extern kern_status_t vm_region_kill(
struct vm_region *region,
unsigned long *lock_flags);
/* map a vm-object into a vm-region.
* [region_offset,length] must fall within exactly one region, and cannot span
* multiple sibling regions.
* if [region_offset,length] falls within a child region, the map operation
* will be transparently redirected to the relevant region.
* `prot` must be allowed both by the region into which the mapping is being
* created AND the vm-object being mapped. */
extern kern_status_t vm_region_map_object(
struct vm_region *region,
off_t region_offset,
struct vm_object *object,
off_t object_offset,
size_t length,
vm_prot_t prot,
virt_addr_t *out);
extern kern_status_t vm_region_unmap(
struct vm_region *region,
off_t region_offset,
size_t length);
extern bool vm_region_validate_access(
struct vm_region *region,
off_t offset,
size_t len,
vm_prot_t prot);
/* find the mapping corresponding to the given virtual address, and page-in the
* necessary vm_page to allow the memory access to succeed. if the relevant
* vm-object page hasn't been allocated yet, it will be allocated here. */
extern kern_status_t vm_region_demand_map(
struct vm_region *region,
virt_addr_t addr,
enum pmap_fault_flags flags);
/* get the absolute base virtual address of a region within its
* parent/ancestors. */
extern virt_addr_t vm_region_get_base_address(const struct vm_region *region);
extern void vm_region_dump(struct vm_region *region);
/* read data from the user-space area of a vm-region into a kernel-mode buffer
*/
extern kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *dest,
size_t *nr_read);
/* write data to the user-space area of a vm-region from a kernel-mode buffer
*/
extern kern_status_t vm_region_write_kernel(
struct vm_region *dst_region,
virt_addr_t dst_ptr,
size_t count,
const void *src,
size_t *nr_written);
extern kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
size_t *nr_moved);
extern kern_status_t vm_region_memmove_v(
struct vm_region *dest_region,
size_t dest_offset,
const kern_iovec_t *dest,
size_t nr_dest,
struct vm_region *src_region,
size_t src_offset,
const kern_iovec_t *src,
size_t nr_src,
size_t bytes_to_move,
size_t *nr_bytes_moved);
DEFINE_OBJECT_LOCK_FUNCTION(vm_region, vr_base)
#endif