diff --git a/arch/x86_64/pmap.c b/arch/x86_64/pmap.c index 3fb28e1..a16761e 100644 --- a/arch/x86_64/pmap.c +++ b/arch/x86_64/pmap.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,7 +8,6 @@ #include #include #include -#include #include #include @@ -363,12 +363,20 @@ kern_status_t pmap_handle_fault( } struct task *task = current_task(); - struct vm_region *space = task->t_address_space; + if (!task) { + return KERN_FATAL_ERROR; + } + + struct address_space *space = task->t_address_space; + if (!space) { + return KERN_FATAL_ERROR; + } unsigned long lock_flags; - vm_region_lock_irqsave(space, &lock_flags); - kern_status_t status = vm_region_demand_map(space, fault_addr, flags); - vm_region_unlock_irqrestore(space, lock_flags); + address_space_lock_irqsave(space, &lock_flags); + kern_status_t status + = address_space_demand_map(space, fault_addr, flags); + address_space_unlock_irqrestore(space, lock_flags); return status; } diff --git a/include/kernel/address-space.h b/include/kernel/address-space.h new file mode 100644 index 0000000..6cc81e3 --- /dev/null +++ b/include/kernel/address-space.h @@ -0,0 +1,161 @@ +#ifndef KERNEL_ADDRESS_SPACE_H_ +#define KERNEL_ADDRESS_SPACE_H_ + +#include +#include +#include + +#define ADDRESS_SPACE_COPY_ALL ((size_t)-1) + +struct address_space; +struct vm_object; + +struct vm_area { + /* the vm-object mapped into this area. + * if this is NULL, the vm_area represents an area of reserved memory. + * it cannot be accessed, and mapping operations with MAP_ADDRESS_ANY + * will avoid the area, but fixed address mappings in this area + * will succeed. */ + struct vm_object *vma_object; + /* used to link to vm_object->vo_mappings */ + struct queue_entry vma_object_entry; + /* the memory protection flags applied to this area */ + vm_prot_t vma_prot; + /* offset in bytes to the start of the object data that was mapped */ + off_t vma_object_offset; + /* used to link to address_space->s_mappings */ + struct btree_node vma_node; + /* address of the first byte in this area */ + virt_addr_t vma_base; + /* address of the last byte in this area */ + virt_addr_t vma_limit; +}; + +struct address_space { + struct object s_base; + + /* address of the first byte in this address space */ + virt_addr_t s_base_address; + /* address of the last byte in this address space */ + virt_addr_t s_limit_address; + + /* btree of struct vm_area representing mapped vm-objects. + * sibling entries cannot overlap each other. */ + struct btree s_mappings; + /* btree of struct vm_area representing reserved regions of the + * address space. + * reserved regions will not be automatically allocated by the kernel. + * sibling entries cannot overlap each other. + * overlap between s_mappings and s_reserved IS allowed. */ + struct btree s_reserved; + + /* the corresponding physical address space */ + pmap_t s_pmap; +}; + +extern kern_status_t address_space_type_init(void); +extern struct address_space *address_space_cast(struct object *obj); + +/* create a new vm-region, optionally within a parent region. + * `offset` is the byte offset within the parent region where the new region + * should start. + * if no parent is specified, `offset` is the absolute virtual address of the + * start of the region. + * in both cases, `len` is the length of the new region in bytes. */ +extern kern_status_t address_space_create( + virt_addr_t base, + virt_addr_t limit, + struct address_space **out); + +/* map a vm-object into a vm-region. + * [region_offset,length] must fall within exactly one region, and cannot span + * multiple sibling regions. + * if [region_offset,length] falls within a child region, the map operation + * will be transparently redirected to the relevant region. + * `prot` must be allowed both by the region into which the mapping is being + * created AND the vm-object being mapped. */ +extern kern_status_t address_space_map( + struct address_space *space, + virt_addr_t map_address, + struct vm_object *object, + off_t object_offset, + size_t length, + vm_prot_t prot, + virt_addr_t *out); +extern kern_status_t address_space_unmap( + struct address_space *region, + virt_addr_t base, + size_t length); + +/* reserve an area of the address space. the kernel will not place any + * new mappings in this area unless explicitly told to (i.e. by not using + * MAP_ADDRESS_ANY). Use MAP_ADDRESS_ANY to have the kernel allocate a region + * of the address space for you */ +extern kern_status_t address_space_reserve( + struct address_space *space, + virt_addr_t base, + size_t length, + virt_addr_t *out); +/* release a previously reserved area of the address space. */ +extern kern_status_t address_space_release( + struct address_space *space, + virt_addr_t base, + size_t length); + +extern bool address_space_validate_access( + struct address_space *region, + virt_addr_t base, + size_t len, + vm_prot_t prot); + +/* find the mapping corresponding to the given virtual address, and page-in the + * necessary vm_page to allow the memory access to succeed. if the relevant + * vm-object page hasn't been allocated yet, it will be allocated here. */ +extern kern_status_t address_space_demand_map( + struct address_space *region, + virt_addr_t addr, + enum pmap_fault_flags flags); + +/* read data from the user-space area of a vm-region into a kernel-mode buffer + */ +extern kern_status_t address_space_read( + struct address_space *src_region, + virt_addr_t src_ptr, + size_t count, + void *dest, + size_t *nr_read); + +/* write data to the user-space area of a vm-region from a kernel-mode buffer + */ +extern kern_status_t address_space_write( + struct address_space *dst_region, + virt_addr_t dst_ptr, + size_t count, + const void *src, + size_t *nr_written); + +extern kern_status_t address_space_memmove( + struct address_space *dest_space, + virt_addr_t dest_ptr, + struct address_space *src_space, + virt_addr_t src_ptr, + size_t count, + size_t *nr_moved); + +extern kern_status_t address_space_memmove_v( + struct address_space *dest_space, + size_t dest_offset, + const kern_iovec_t *dest_iov, + size_t nr_dest_iov, + struct address_space *src_space, + size_t src_offset, + const kern_iovec_t *src_iov, + size_t nr_src_iov, + size_t bytes_to_move, + size_t *nr_bytes_moved); + +void address_space_dump(struct address_space *region); + +DEFINE_OBJECT_LOCK_FUNCTION(address_space, s_base) + +#endif diff --git a/include/kernel/channel.h b/include/kernel/channel.h index 3c8a38e..d504712 100644 --- a/include/kernel/channel.h +++ b/include/kernel/channel.h @@ -37,7 +37,7 @@ extern kern_status_t channel_read_msg( struct channel *channel, msgid_t msg, size_t offset, - struct vm_region *dest_region, + struct address_space *dest_region, const kern_iovec_t *dest_iov, size_t dest_iov_count, size_t *nr_read); @@ -45,7 +45,7 @@ extern kern_status_t channel_write_msg( struct channel *channel, msgid_t msg, size_t offset, - struct vm_region *src_region, + struct address_space *src_region, const kern_iovec_t *src_iov, size_t src_iov_count, size_t *nr_written); diff --git a/include/kernel/handle.h b/include/kernel/handle.h index 91908ea..98715f4 100644 --- a/include/kernel/handle.h +++ b/include/kernel/handle.h @@ -17,7 +17,7 @@ typedef uintptr_t handle_flags_t; struct task; struct object; -struct vm_region; +struct address_space; struct handle_list; struct handle { @@ -57,11 +57,11 @@ extern struct handle *handle_table_get_handle( kern_handle_t handle); extern kern_status_t handle_table_transfer( - struct vm_region *dst_region, + struct address_space *dst_region, struct handle_table *dst, kern_msg_handle_t *dst_handles, size_t dst_handles_max, - struct vm_region *src_region, + struct address_space *src_region, struct handle_table *src, kern_msg_handle_t *src_handles, size_t src_handles_count); diff --git a/include/kernel/iovec.h b/include/kernel/iovec.h index aa1957a..065c748 100644 --- a/include/kernel/iovec.h +++ b/include/kernel/iovec.h @@ -4,10 +4,12 @@ #include #include +struct address_space; + struct iovec_iterator { /* if this is set, we are iterating over a list of iovecs stored in * userspace, and must go through this region to retrieve the data. */ - struct vm_region *it_region; + struct address_space *it_region; const kern_iovec_t *it_vecs; size_t it_nr_vecs; size_t it_vec_ptr; @@ -22,7 +24,7 @@ extern void iovec_iterator_begin( size_t nr_vecs); extern void iovec_iterator_begin_user( struct iovec_iterator *it, - struct vm_region *address_space, + struct address_space *address_space, const kern_iovec_t *vecs, size_t nr_vecs); diff --git a/include/kernel/syscall.h b/include/kernel/syscall.h index c71fe74..4c059b2 100644 --- a/include/kernel/syscall.h +++ b/include/kernel/syscall.h @@ -1,9 +1,9 @@ #ifndef KERNEL_SYSCALL_H_ #define KERNEL_SYSCALL_H_ +#include #include #include -#include #include #include #include @@ -28,13 +28,13 @@ static inline bool __validate_access( vm_prot_t flags) { unsigned long irq_flags; - vm_region_lock_irqsave(task->t_address_space, &irq_flags); - bool result = vm_region_validate_access( + address_space_lock_irqsave(task->t_address_space, &irq_flags); + bool result = address_space_validate_access( task->t_address_space, (virt_addr_t)ptr, len, flags | VM_PROT_USER); - vm_region_unlock_irqrestore(task->t_address_space, irq_flags); + address_space_unlock_irqrestore(task->t_address_space, irq_flags); return result; } @@ -85,29 +85,19 @@ extern kern_status_t sys_vm_object_copy( size_t count, size_t *nr_copied); -extern kern_status_t sys_vm_region_create( - kern_handle_t parent, - const char *name, - size_t name_len, - off_t offset, - size_t region_len, - vm_prot_t prot, - kern_handle_t *out, - virt_addr_t *out_base_address); -extern kern_status_t sys_vm_region_kill(kern_handle_t region); -extern kern_status_t sys_vm_region_read( +extern kern_status_t sys_address_space_read( kern_handle_t region, void *dst, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_read); -extern kern_status_t sys_vm_region_write( +extern kern_status_t sys_address_space_write( kern_handle_t region, const void *src, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_read); -extern kern_status_t sys_vm_region_map_absolute( +extern kern_status_t sys_address_space_map( kern_handle_t region, virt_addr_t map_address, kern_handle_t object, @@ -115,21 +105,18 @@ extern kern_status_t sys_vm_region_map_absolute( size_t length, vm_prot_t prot, virt_addr_t *out_base_address); -extern kern_status_t sys_vm_region_map_relative( +extern kern_status_t sys_address_space_unmap( kern_handle_t region, - off_t region_offset, - kern_handle_t object, - off_t object_offset, - size_t length, - vm_prot_t prot, - virt_addr_t *out_base_address); -extern kern_status_t sys_vm_region_unmap_absolute( - kern_handle_t region, - virt_addr_t address, + virt_addr_t base, size_t length); -extern kern_status_t sys_vm_region_unmap_relative( +extern kern_status_t sys_address_space_reserve( kern_handle_t region, - off_t offset, + virt_addr_t base, + size_t length, + virt_addr_t *out_base_address); +extern kern_status_t sys_address_space_release( + kern_handle_t region, + virt_addr_t base, size_t length); extern kern_status_t sys_kern_log(const char *s); diff --git a/include/kernel/task.h b/include/kernel/task.h index 3b57c70..32a1e1c 100644 --- a/include/kernel/task.h +++ b/include/kernel/task.h @@ -24,7 +24,7 @@ struct task { char t_name[TASK_NAME_MAX]; pmap_t t_pmap; - struct vm_region *t_address_space; + struct address_space *t_address_space; spin_lock_t t_handles_lock; struct handle_table *t_handles; struct btree b_channels; diff --git a/include/kernel/vm-region.h b/include/kernel/vm-region.h deleted file mode 100644 index 3585337..0000000 --- a/include/kernel/vm-region.h +++ /dev/null @@ -1,191 +0,0 @@ -#ifndef KERNEL_VM_REGION_H_ -#define KERNEL_VM_REGION_H_ - -#include -#include -#include - -#define VM_REGION_NAME_MAX 64 -#define VM_REGION_COPY_ALL ((size_t)-1) - -struct vm_region; -struct vm_object; - -enum vm_region_status { - VM_REGION_DEAD = 0, - VM_REGION_ONLINE, -}; - -enum vm_region_entry_type { - VM_REGION_ENTRY_NONE = 0, - VM_REGION_ENTRY_REGION, - VM_REGION_ENTRY_MAPPING, -}; - -struct vm_region_entry { - union { - struct btree_node e_node; - /* this entry is only used to queue vm-region objects for - * recursive cleanup */ - struct queue_entry e_entry; - }; - struct vm_region_entry *e_parent; - enum vm_region_entry_type e_type; - /* absolute address of this entry */ - virt_addr_t e_address; - /* offset in bytes of this entry within its immediate parent. */ - off_t e_offset; - /* size of the entry in bytes */ - size_t e_size; -}; - -struct vm_region_mapping { - struct vm_region_entry m_entry; - struct vm_object *m_object; - - /* used to link to vm_object->vo_mappings */ - struct queue_entry m_object_entry; - - vm_prot_t m_prot; - /* offset in bytes to the start of the object data that was mapped */ - off_t m_object_offset; -}; - -struct vm_region { - struct object vr_base; - enum vm_region_status vr_status; - struct vm_region_entry vr_entry; - - char vr_name[VM_REGION_NAME_MAX]; - - /* btree of struct vm_region_entry. - * sibling entries cannot overlap each other, and child entries must - * be entirely contained within their immediate parent entry. */ - struct btree vr_entries; - - /* memory protection restriction mask. - * any mapping in this region, or any of its children, cannot use - * protection flags that are not set in this mask. - * for example, if VM_PROT_EXEC is /not/ set here, no mapping - * can be created in this region or any child region with VM_PROT_EXEC - * set. */ - vm_prot_t vr_prot; - - /* the physical address space in which mappings in this region (and - * its children) are created */ - pmap_t vr_pmap; -}; - -extern kern_status_t vm_region_type_init(void); -extern struct vm_region *vm_region_cast(struct object *obj); - -/* create a new vm-region, optionally within a parent region. - * `offset` is the byte offset within the parent region where the new region - * should start. - * if no parent is specified, `offset` is the absolute virtual address of the - * start of the region. - * in both cases, `len` is the length of the new region in bytes. */ -extern kern_status_t vm_region_create( - struct vm_region *parent, - const char *name, - size_t name_len, - off_t offset, - size_t region_len, - vm_prot_t prot, - struct vm_region **out); - -/* recursively kills a given region and all of its sub-regions. - * when a region is killed, all of its mappings are unmapped, and any further - * operations on the region are denied. however, all handles and references to - * the region (any any sub-region) remain valid, and no kernel memory is - * de-allocated. - * the memory used by the vm-region object itself is de-allocated when the last - * handle/reference to the object is released. - * this function should be called with `region` locked. - */ -extern kern_status_t vm_region_kill( - struct vm_region *region, - unsigned long *lock_flags); - -/* map a vm-object into a vm-region. - * [region_offset,length] must fall within exactly one region, and cannot span - * multiple sibling regions. - * if [region_offset,length] falls within a child region, the map operation - * will be transparently redirected to the relevant region. - * `prot` must be allowed both by the region into which the mapping is being - * created AND the vm-object being mapped. */ -extern kern_status_t vm_region_map_object( - struct vm_region *region, - off_t region_offset, - struct vm_object *object, - off_t object_offset, - size_t length, - vm_prot_t prot, - virt_addr_t *out); - -extern kern_status_t vm_region_unmap( - struct vm_region *region, - off_t region_offset, - size_t length); - -extern bool vm_region_validate_access( - struct vm_region *region, - off_t offset, - size_t len, - vm_prot_t prot); - -/* find the mapping corresponding to the given virtual address, and page-in the - * necessary vm_page to allow the memory access to succeed. if the relevant - * vm-object page hasn't been allocated yet, it will be allocated here. */ -extern kern_status_t vm_region_demand_map( - struct vm_region *region, - virt_addr_t addr, - enum pmap_fault_flags flags); - -/* get the absolute base virtual address of a region within its - * parent/ancestors. */ -extern virt_addr_t vm_region_get_base_address(const struct vm_region *region); - -extern void vm_region_dump(struct vm_region *region); - -/* read data from the user-space area of a vm-region into a kernel-mode buffer - */ -extern kern_status_t vm_region_read_kernel( - struct vm_region *src_region, - virt_addr_t src_ptr, - size_t count, - void *dest, - size_t *nr_read); - -/* write data to the user-space area of a vm-region from a kernel-mode buffer - */ -extern kern_status_t vm_region_write_kernel( - struct vm_region *dst_region, - virt_addr_t dst_ptr, - size_t count, - const void *src, - size_t *nr_written); - -extern kern_status_t vm_region_memmove( - struct vm_region *dest_region, - virt_addr_t dest_ptr, - struct vm_region *src_region, - virt_addr_t src_ptr, - size_t count, - size_t *nr_moved); - -extern kern_status_t vm_region_memmove_v( - struct vm_region *dest_region, - size_t dest_offset, - const kern_iovec_t *dest, - size_t nr_dest, - struct vm_region *src_region, - size_t src_offset, - const kern_iovec_t *src, - size_t nr_src, - size_t bytes_to_move, - size_t *nr_bytes_moved); - -DEFINE_OBJECT_LOCK_FUNCTION(vm_region, vr_base) - -#endif diff --git a/init/main.c b/init/main.c index c4c9eab..6a0125b 100644 --- a/init/main.c +++ b/init/main.c @@ -112,7 +112,10 @@ void kernel_init(uintptr_t arg) struct task *bootstrap_task = task_create("bootstrap", 9); tracek("created bootstrap task (pid=%u)", bootstrap_task->t_id); - bsp_launch_async(&bsp, bootstrap_task); + status = bsp_launch_async(&bsp, bootstrap_task); + if (status != KERN_OK) { + printk("bsp launch failed with status %d", status); + } hang(); } diff --git a/kernel/bsp.c b/kernel/bsp.c index 3a4cc6d..477355e 100644 --- a/kernel/bsp.c +++ b/kernel/bsp.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -6,7 +7,6 @@ #include #include #include -#include #define BOOTSTRAP_STACK_SIZE 0x10000 @@ -71,101 +71,6 @@ kern_status_t bsp_load(struct bsp *bsp, const struct boot_module *mod) return KERN_OK; } -static kern_status_t map_executable_dyn( - struct bsp *bsp, - struct task *task, - virt_addr_t *entry) -{ - kern_status_t status = KERN_OK; - size_t exec_size = 0; - if (bsp->bsp_trailer.bsp_text_vaddr > bsp->bsp_trailer.bsp_data_vaddr) { - exec_size = bsp->bsp_trailer.bsp_text_vaddr - + bsp->bsp_trailer.bsp_text_size; - } else { - exec_size = bsp->bsp_trailer.bsp_data_vaddr - + bsp->bsp_trailer.bsp_data_size; - } - - struct vm_region *region; - status = vm_region_create( - task->t_address_space, - "exec", - 4, - VM_REGION_ANY_OFFSET, - exec_size, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC | VM_PROT_USER, - ®ion); - if (status != KERN_OK) { - return status; - } - - struct vm_object *data = vm_object_create( - ".data", - 5, - bsp->bsp_trailer.bsp_data_size, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER); - if (!data) { - return KERN_NO_MEMORY; - } - - virt_addr_t text_base = 0, data_base = 0; - - off_t text_foffset = bsp->bsp_trailer.bsp_exec_offset - + bsp->bsp_trailer.bsp_text_faddr; - off_t data_foffset = 0; - off_t text_voffset = bsp->bsp_trailer.bsp_text_vaddr; - off_t data_voffset = bsp->bsp_trailer.bsp_data_vaddr; - -#if 0 - size_t tmp = 0; - status = vm_object_copy( - data, - 0, - bsp->bsp_vmo, - bsp->bsp_trailer.bsp_data_faddr, - bsp->bsp_trailer.bsp_data_size, - &tmp); - - tracek("read %zuB of data from executable", tmp); -#endif - - tracek("text_foffset=%06llx, data_foffset=%06llx", - text_foffset, - data_foffset); - tracek("text_voffset=%08llx, data_voffset=%08llx", - text_voffset, - data_voffset); - - status = vm_region_map_object( - region, - text_voffset, - bsp->bsp_vmo, - text_foffset, - bsp->bsp_trailer.bsp_text_size, - VM_PROT_READ | VM_PROT_EXEC | VM_PROT_USER, - &text_base); - if (status != KERN_OK) { - return status; - } - - status = vm_region_map_object( - region, - data_voffset, - data, - data_foffset, - bsp->bsp_trailer.bsp_data_size, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER, - &data_base); - if (status != KERN_OK) { - return status; - } - - tracek("text_base=%08llx, data_base=%08llx", text_base, data_base); - - *entry = text_base + bsp->bsp_trailer.bsp_exec_entry; - return KERN_OK; -} - static kern_status_t map_executable_exec( struct bsp *bsp, struct task *task, @@ -189,9 +94,6 @@ static kern_status_t map_executable_exec( off_t text_voffset = bsp->bsp_trailer.bsp_text_vaddr; off_t data_voffset = bsp->bsp_trailer.bsp_data_vaddr; - text_voffset -= vm_region_get_base_address(task->t_address_space); - data_voffset -= vm_region_get_base_address(task->t_address_space); - #if 0 size_t tmp = 0; status = vm_object_copy( @@ -212,7 +114,7 @@ static kern_status_t map_executable_exec( text_voffset, data_voffset); - status = vm_region_map_object( + status = address_space_map( task->t_address_space, text_voffset, bsp->bsp_vmo, @@ -224,7 +126,7 @@ static kern_status_t map_executable_exec( return status; } - status = vm_region_map_object( + status = address_space_map( task->t_address_space, data_voffset, data, @@ -257,9 +159,9 @@ kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task) return KERN_NO_ENTRY; } - status = vm_region_map_object( + status = address_space_map( task->t_address_space, - VM_REGION_ANY_OFFSET, + MAP_ADDRESS_ANY, user_stack, 0, BOOTSTRAP_STACK_SIZE, @@ -270,9 +172,9 @@ kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task) return status; } - status = vm_region_map_object( + status = address_space_map( task->t_address_space, - VM_REGION_ANY_OFFSET, + MAP_ADDRESS_ANY, bsp->bsp_vmo, 0, bsp->bsp_trailer.bsp_exec_offset, @@ -288,7 +190,7 @@ kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task) return status; } #ifdef TRACE - vm_region_dump(task->t_address_space); + address_space_dump(task->t_address_space); #endif sp = stack_buffer + BOOTSTRAP_STACK_SIZE; @@ -298,7 +200,7 @@ kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task) task_open_handle(task, &task->t_base, 0, &self); task_open_handle( task, - &task->t_address_space->vr_base, + &task->t_address_space->s_base, 0, &self_address_space); diff --git a/kernel/channel.c b/kernel/channel.c index f064558..7608c27 100644 --- a/kernel/channel.c +++ b/kernel/channel.c @@ -1,10 +1,10 @@ +#include #include #include #include #include #include #include -#include #include #define CHANNEL_CAST(p) OBJECT_C_CAST(struct channel, c_base, &channel_type, p) @@ -167,13 +167,13 @@ extern kern_status_t channel_recv_msg( struct task *sender = msg->msg_sender_thread->tr_parent; struct task *receiver = self->tr_parent; - struct vm_region *src = sender->t_address_space, - *dst = receiver->t_address_space; + struct address_space *src = sender->t_address_space, + *dst = receiver->t_address_space; unsigned long f; - vm_region_lock_pair_irqsave(src, dst, &f); + address_space_lock_pair_irqsave(src, dst, &f); - kern_status_t status = vm_region_memmove_v( + kern_status_t status = address_space_memmove_v( dst, 0, out_msg->msg_data, @@ -182,7 +182,7 @@ extern kern_status_t channel_recv_msg( 0, msg->msg_req.msg_data, msg->msg_req.msg_data_count, - VM_REGION_COPY_ALL, + ADDRESS_SPACE_COPY_ALL, NULL); if (status != KERN_OK) { @@ -210,7 +210,7 @@ extern kern_status_t channel_recv_msg( &sender->t_handles_lock, &receiver->t_handles_lock, f); - vm_region_unlock_pair_irqrestore(src, dst, f); + address_space_unlock_pair_irqrestore(src, dst, f); if (status != KERN_OK) { kmsg_reply_error(msg, status, &msg_lock_flags); @@ -250,12 +250,12 @@ extern kern_status_t channel_reply_msg( /* the task that is about to send the response */ struct task *sender = self->tr_parent; - struct vm_region *src = sender->t_address_space, - *dst = receiver->t_address_space; + struct address_space *src = sender->t_address_space, + *dst = receiver->t_address_space; unsigned long f; - vm_region_lock_pair_irqsave(src, dst, &f); + address_space_lock_pair_irqsave(src, dst, &f); - kern_status_t status = vm_region_memmove_v( + kern_status_t status = address_space_memmove_v( dst, 0, msg->msg_resp.msg_data, @@ -264,7 +264,7 @@ extern kern_status_t channel_reply_msg( 0, reply->msg_data, reply->msg_data_count, - VM_REGION_COPY_ALL, + ADDRESS_SPACE_COPY_ALL, NULL); if (status != KERN_OK) { @@ -292,7 +292,7 @@ extern kern_status_t channel_reply_msg( &sender->t_handles_lock, &receiver->t_handles_lock, f); - vm_region_unlock_pair_irqrestore(src, dst, f); + address_space_unlock_pair_irqrestore(src, dst, f); if (status != KERN_OK) { kmsg_reply_error(msg, status, &msg_lock_flags); @@ -308,7 +308,7 @@ extern kern_status_t channel_read_msg( struct channel *channel, msgid_t id, size_t offset, - struct vm_region *dest_region, + struct address_space *dest_region, const kern_iovec_t *dest_iov, size_t dest_iov_count, size_t *nr_read) @@ -325,13 +325,13 @@ extern kern_status_t channel_read_msg( return KERN_INVALID_ARGUMENT; } - struct vm_region *src_region + struct address_space *src_region = msg->msg_sender_thread->tr_parent->t_address_space; unsigned long f; - vm_region_lock_pair_irqsave(src_region, dest_region, &f); + address_space_lock_pair_irqsave(src_region, dest_region, &f); - kern_status_t status = vm_region_memmove_v( + kern_status_t status = address_space_memmove_v( dest_region, 0, dest_iov, @@ -340,9 +340,9 @@ extern kern_status_t channel_read_msg( offset, msg->msg_req.msg_data, msg->msg_req.msg_data_count, - VM_REGION_COPY_ALL, + ADDRESS_SPACE_COPY_ALL, nr_read); - vm_region_unlock_pair_irqrestore(src_region, dest_region, f); + address_space_unlock_pair_irqrestore(src_region, dest_region, f); spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags); @@ -353,7 +353,7 @@ extern kern_status_t channel_write_msg( struct channel *channel, msgid_t id, size_t offset, - struct vm_region *src_region, + struct address_space *src_region, const kern_iovec_t *src_iov, size_t src_iov_count, size_t *nr_written) @@ -370,13 +370,13 @@ extern kern_status_t channel_write_msg( return KERN_INVALID_ARGUMENT; } - struct vm_region *dest_region + struct address_space *dest_region = msg->msg_sender_thread->tr_parent->t_address_space; unsigned long f; - vm_region_lock_pair_irqsave(src_region, dest_region, &f); + address_space_lock_pair_irqsave(src_region, dest_region, &f); - kern_status_t status = vm_region_memmove_v( + kern_status_t status = address_space_memmove_v( dest_region, offset, msg->msg_resp.msg_data, @@ -385,9 +385,9 @@ extern kern_status_t channel_write_msg( 0, src_iov, src_iov_count, - VM_REGION_COPY_ALL, + ADDRESS_SPACE_COPY_ALL, nr_written); - vm_region_unlock_pair_irqrestore(src_region, dest_region, f); + address_space_unlock_pair_irqrestore(src_region, dest_region, f); spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags); diff --git a/kernel/handle.c b/kernel/handle.c index b123bda..39ba13a 100644 --- a/kernel/handle.c +++ b/kernel/handle.c @@ -1,9 +1,9 @@ +#include #include #include #include #include #include -#include #include #include @@ -195,11 +195,11 @@ struct handle *handle_table_get_handle( } kern_status_t handle_table_transfer( - struct vm_region *dst_region, + struct address_space *dst_region, struct handle_table *dst, kern_msg_handle_t *dst_handles, size_t dst_handles_max, - struct vm_region *src_region, + struct address_space *src_region, struct handle_table *src, kern_msg_handle_t *src_handles, size_t src_handles_count) @@ -214,7 +214,7 @@ kern_status_t handle_table_transfer( = (virt_addr_t)src_handles + (i * sizeof src_handle); virt_addr_t dst_handle_addr = (virt_addr_t)dst_handles + (i * sizeof dst_handle); - status = vm_region_read_kernel( + status = address_space_read( src_region, src_handle_addr, sizeof src_handle, @@ -223,7 +223,7 @@ kern_status_t handle_table_transfer( if (status != KERN_OK) { src_handle.hnd_result = KERN_OK; - vm_region_write_kernel( + address_space_write( src_region, src_handle_addr, sizeof src_handle, @@ -244,7 +244,7 @@ kern_status_t handle_table_transfer( if (!src_entry) { status = KERN_INVALID_ARGUMENT; src_handle.hnd_result = KERN_INVALID_ARGUMENT; - vm_region_write_kernel( + address_space_write( src_region, src_handle_addr, sizeof src_handle, @@ -299,13 +299,13 @@ kern_status_t handle_table_transfer( src_handle.hnd_result = status; - vm_region_write_kernel( + address_space_write( src_region, src_handle_addr, sizeof src_handle, &src_handle, NULL); - vm_region_write_kernel( + address_space_write( dst_region, dst_handle_addr, sizeof dst_handle, @@ -317,7 +317,7 @@ kern_status_t handle_table_transfer( kern_msg_handle_t handle = {0}; virt_addr_t handle_addr = (virt_addr_t)src_handles + (i * sizeof handle); - vm_region_read_kernel( + address_space_read( src_region, handle_addr, sizeof handle, diff --git a/kernel/iovec.c b/kernel/iovec.c index 262878a..dcf14db 100644 --- a/kernel/iovec.c +++ b/kernel/iovec.c @@ -1,7 +1,7 @@ +#include #include #include #include -#include static bool read_iovec( struct iovec_iterator *it, @@ -18,7 +18,7 @@ static bool read_iovec( } size_t nr_read = 0; - kern_status_t status = vm_region_read_kernel( + kern_status_t status = address_space_read( it->it_region, (virt_addr_t)it->it_vecs + (index * sizeof(kern_iovec_t)), sizeof(kern_iovec_t), @@ -30,7 +30,7 @@ static bool read_iovec( void iovec_iterator_begin_user( struct iovec_iterator *it, - struct vm_region *region, + struct address_space *region, const kern_iovec_t *vecs, size_t nr_vecs) { diff --git a/libmango/arch/x86_64/syscall.S b/libmango/arch/x86_64/syscall.S index 9a37e4d..d9fd17c 100644 --- a/libmango/arch/x86_64/syscall.S +++ b/libmango/arch/x86_64/syscall.S @@ -68,14 +68,12 @@ SYSCALL_GATE vm_object_read SYS_VM_OBJECT_READ 5 SYSCALL_GATE vm_object_write SYS_VM_OBJECT_WRITE 5 SYSCALL_GATE vm_object_copy SYS_VM_OBJECT_COPY 6 -SYSCALL_GATE vm_region_create SYS_VM_REGION_CREATE 8 -SYSCALL_GATE vm_region_kill SYS_VM_REGION_KILL 1 -SYSCALL_GATE vm_region_read SYS_VM_REGION_READ 5 -SYSCALL_GATE vm_region_write SYS_VM_REGION_WRITE 5 -SYSCALL_GATE vm_region_map_absolute SYS_VM_REGION_MAP_ABSOLUTE 7 -SYSCALL_GATE vm_region_map_relative SYS_VM_REGION_MAP_RELATIVE 7 -SYSCALL_GATE vm_region_unmap_absolute SYS_VM_REGION_UNMAP_ABSOLUTE 3 -SYSCALL_GATE vm_region_unmap_relative SYS_VM_REGION_UNMAP_RELATIVE 3 +SYSCALL_GATE address_space_read SYS_ADDRESS_SPACE_READ 5 +SYSCALL_GATE address_space_write SYS_ADDRESS_SPACE_WRITE 5 +SYSCALL_GATE address_space_map SYS_ADDRESS_SPACE_MAP 7 +SYSCALL_GATE address_space_unmap SYS_ADDRESS_SPACE_UNMAP 3 +SYSCALL_GATE address_space_reserve SYS_ADDRESS_SPACE_RESERVE 4 +SYSCALL_GATE address_space_release SYS_ADDRESS_SPACE_RELEASE 3 SYSCALL_GATE kern_log SYS_KERN_LOG 1 SYSCALL_GATE kern_handle_close SYS_KERN_HANDLE_CLOSE 1 diff --git a/libmango/include-user/mango/vm.h b/libmango/include-user/mango/vm.h index ebf1d74..ad10908 100644 --- a/libmango/include-user/mango/vm.h +++ b/libmango/include-user/mango/vm.h @@ -30,29 +30,19 @@ extern kern_status_t vm_object_copy( size_t count, size_t *nr_copied); -extern kern_status_t vm_region_create( - kern_handle_t parent, - const char *name, - size_t name_len, - off_t offset, - size_t region_len, - vm_prot_t prot, - kern_handle_t *out, - virt_addr_t *out_base_address); -extern kern_status_t vm_region_kill(kern_handle_t region); -extern kern_status_t vm_region_read( +extern kern_status_t address_space_read( kern_handle_t region, void *dst, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_read); -extern kern_status_t vm_region_write( +extern kern_status_t address_space_write( kern_handle_t region, const void *src, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_read); -extern kern_status_t vm_region_map_absolute( +extern kern_status_t address_space_map( kern_handle_t region, virt_addr_t map_address, kern_handle_t object, @@ -60,21 +50,18 @@ extern kern_status_t vm_region_map_absolute( size_t length, vm_prot_t prot, virt_addr_t *out_base_address); -extern kern_status_t vm_region_map_relative( +extern kern_status_t address_space_unmap( kern_handle_t region, - off_t region_offset, - kern_handle_t object, - off_t object_offset, - size_t length, - vm_prot_t prot, - virt_addr_t *out_base_address); -extern kern_status_t vm_region_unmap_absolute( - kern_handle_t region, - virt_addr_t address, + virt_addr_t base, size_t length); -extern kern_status_t vm_region_unmap_relative( +extern kern_status_t address_space_reserve( kern_handle_t region, - off_t offset, + virt_addr_t base, + size_t length, + virt_addr_t *out_base_address); +extern kern_status_t address_space_release( + kern_handle_t region, + virt_addr_t base, size_t length); extern kern_status_t vm_controller_create(kern_handle_t *out); diff --git a/libmango/include/mango/syscall.h b/libmango/include/mango/syscall.h index 7081b71..9cec4f5 100644 --- a/libmango/include/mango/syscall.h +++ b/libmango/include/mango/syscall.h @@ -17,14 +17,12 @@ #define SYS_VM_OBJECT_READ 0x0Du #define SYS_VM_OBJECT_WRITE 0x0Eu #define SYS_VM_OBJECT_COPY 0x0Fu -#define SYS_VM_REGION_CREATE 0x10u -#define SYS_VM_REGION_KILL 0x11u -#define SYS_VM_REGION_READ 0x12u -#define SYS_VM_REGION_WRITE 0x13u -#define SYS_VM_REGION_MAP_ABSOLUTE 0x14u -#define SYS_VM_REGION_MAP_RELATIVE 0x15u -#define SYS_VM_REGION_UNMAP_ABSOLUTE 0x16u -#define SYS_VM_REGION_UNMAP_RELATIVE 0x17u +#define SYS_ADDRESS_SPACE_READ 0x12u +#define SYS_ADDRESS_SPACE_WRITE 0x13u +#define SYS_ADDRESS_SPACE_MAP 0x14u +#define SYS_ADDRESS_SPACE_UNMAP 0x15u +#define SYS_ADDRESS_SPACE_RESERVE 0x16u +#define SYS_ADDRESS_SPACE_RELEASE 0x17u #define SYS_MSG_SEND 0x18u #define SYS_MSG_RECV 0x19u #define SYS_MSG_REPLY 0x1Au diff --git a/libmango/include/mango/types.h b/libmango/include/mango/types.h index bbd55cd..f9b49ad 100644 --- a/libmango/include/mango/types.h +++ b/libmango/include/mango/types.h @@ -12,7 +12,8 @@ #define VM_PROT_NOCACHE 0x10u #define VM_PROT_MAP_SPECIFIC 0x40u -#define VM_REGION_ANY_OFFSET ((off_t) - 1) +#define MAP_ADDRESS_ANY ((virt_addr_t) - 1) +#define MAP_ADDRESS_INVALID ((virt_addr_t)0) #define KERN_HANDLE_INVALID ((kern_handle_t)0xFFFFFFFF) #define KERN_CFG_INVALID 0x00u diff --git a/sched/core.c b/sched/core.c index 3746677..70bbfc5 100644 --- a/sched/core.c +++ b/sched/core.c @@ -6,7 +6,6 @@ #include #include #include -#include extern kern_status_t setup_kernel_task(void); extern kern_status_t setup_idle_task(void); diff --git a/sched/task.c b/sched/task.c index 5c081ef..7e72043 100644 --- a/sched/task.c +++ b/sched/task.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -10,7 +11,6 @@ #include #include #include -#include #define TASK_CAST(p) OBJECT_C_CAST(struct task, t_base, &task_type, p) @@ -95,15 +95,6 @@ kern_status_t setup_kernel_task(void) __kernel_task->t_state = TASK_RUNNING; __kernel_task->t_pmap = get_kernel_pmap(); - vm_region_create( - NULL, - "root", - 4, - VM_KERNEL_BASE, - VM_KERNEL_LIMIT - VM_KERNEL_BASE, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC | VM_PROT_SVR, - &__kernel_task->t_address_space); - snprintf( __kernel_task->t_name, sizeof __kernel_task->t_name, @@ -195,16 +186,12 @@ struct task *task_create(const char *name, size_t name_len) task->t_id = pid_alloc(); task->t_pmap = pmap; - vm_region_create( - NULL, - "root", - 4, + address_space_create( VM_USER_BASE, - VM_USER_LIMIT - VM_USER_BASE, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC | VM_PROT_USER, + VM_USER_LIMIT, &task->t_address_space); - task->t_address_space->vr_pmap = pmap; + task->t_address_space->s_pmap = pmap; task->t_state = TASK_RUNNING; task->t_handles = handle_table_create(); diff --git a/syscall/vm-region.c b/syscall/address-space.c similarity index 51% rename from syscall/vm-region.c rename to syscall/address-space.c index 387db5b..190c78a 100644 --- a/syscall/vm-region.c +++ b/syscall/address-space.c @@ -1,134 +1,13 @@ +#include #include #include #include #include -#include -kern_status_t sys_vm_region_create( - kern_handle_t parent, - const char *name, - size_t name_len, - off_t offset, - size_t region_len, - vm_prot_t prot, - kern_handle_t *out, - virt_addr_t *out_base_address) -{ - struct task *self = current_task(); - - if (name_len && !validate_access_r(self, name, name_len)) { - return KERN_MEMORY_FAULT; - } - - if (!validate_access_w(self, out, sizeof *out)) { - return KERN_MEMORY_FAULT; - } - - if (!validate_access_w( - self, - out_base_address, - sizeof *out_base_address)) { - return KERN_MEMORY_FAULT; - } - - unsigned long flags; - task_lock_irqsave(self, &flags); - - struct object *obj = NULL; - handle_flags_t handle_flags = 0; - kern_status_t status - = task_resolve_handle(self, parent, &obj, &handle_flags); - if (status != KERN_OK) { - task_unlock_irqrestore(self, flags); - return status; - } - - struct vm_region *parent_region = vm_region_cast(obj); - if (!parent_region) { - task_unlock_irqrestore(self, flags); - return KERN_INVALID_ARGUMENT; - } - - struct handle *child_handle_slot = NULL; - kern_handle_t child_handle = KERN_HANDLE_INVALID; - - status = handle_table_alloc_handle( - self->t_handles, - &child_handle_slot, - &child_handle); - if (status != KERN_OK) { - task_unlock_irqrestore(self, flags); - return status; - } - - task_unlock_irqrestore(self, flags); - vm_region_lock_irqsave(parent_region, &flags); - - struct vm_region *child = NULL; - status = vm_region_create( - parent_region, - name, - name_len, - offset, - region_len, - prot, - &child); - vm_region_unlock_irqrestore(parent_region, flags); - object_unref(obj); - - if (status != KERN_OK) { - task_lock_irqsave(self, &flags); - handle_table_free_handle(self->t_handles, child_handle); - task_unlock_irqrestore(self, flags); - return status; - } - - child_handle_slot->h_object = &child->vr_base; - object_add_handle(&child->vr_base); - object_unref(&child->vr_base); - - *out = child_handle; - *out_base_address = vm_region_get_base_address(child); - - return KERN_OK; -} - -kern_status_t sys_vm_region_kill(kern_handle_t region_handle) -{ - struct task *self = current_task(); - - unsigned long flags; - task_lock_irqsave(self, &flags); - - struct object *obj = NULL; - handle_flags_t handle_flags = 0; - kern_status_t status - = task_resolve_handle(self, region_handle, &obj, &handle_flags); - if (status != KERN_OK) { - task_unlock_irqrestore(self, flags); - return status; - } - - struct vm_region *region = vm_region_cast(obj); - if (!region) { - task_unlock_irqrestore(self, flags); - return KERN_INVALID_ARGUMENT; - } - - task_unlock_irqrestore(self, flags); - - vm_region_lock_irqsave(region, &flags); - status = vm_region_kill(region, &flags); - vm_region_unlock_irqrestore(region, flags); - object_unref(obj); - - return status; -} - -kern_status_t sys_vm_region_read( +kern_status_t sys_address_space_read( kern_handle_t region_handle, void *dst, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_read) { @@ -154,7 +33,7 @@ kern_status_t sys_vm_region_read( return status; } - struct vm_region *region = vm_region_cast(obj); + struct address_space *region = address_space_cast(obj); if (!region) { task_unlock_irqrestore(self, flags); return KERN_INVALID_ARGUMENT; @@ -162,23 +41,25 @@ kern_status_t sys_vm_region_read( task_unlock_irqrestore(self, flags); - virt_addr_t src_address = vm_region_get_base_address(region) + offset; - status = vm_region_memmove( + address_space_lock_irqsave(region, &flags); + status = address_space_memmove( self->t_address_space, (virt_addr_t)dst, region, - src_address, + base, count, nr_read); + address_space_unlock_irqrestore(region, flags); + object_unref(obj); return status; } -kern_status_t sys_vm_region_write( +kern_status_t sys_address_space_write( kern_handle_t region_handle, const void *src, - off_t offset, + virt_addr_t base, size_t count, size_t *nr_written) { @@ -205,7 +86,7 @@ kern_status_t sys_vm_region_write( return status; } - struct vm_region *region = vm_region_cast(obj); + struct address_space *region = address_space_cast(obj); if (!region) { task_unlock_irqrestore(self, flags); return KERN_INVALID_ARGUMENT; @@ -213,20 +94,22 @@ kern_status_t sys_vm_region_write( task_unlock_irqrestore(self, flags); - virt_addr_t dst_address = vm_region_get_base_address(region) + offset; - status = vm_region_memmove( + address_space_lock_irqsave(region, &flags); + status = address_space_memmove( region, - dst_address, + base, self->t_address_space, (virt_addr_t)src, count, nr_written); + address_space_unlock_irqrestore(region, flags); + object_unref(obj); return status; } -kern_status_t sys_vm_region_map_absolute( +kern_status_t sys_address_space_map( kern_handle_t region_handle, virt_addr_t map_address, kern_handle_t object_handle, @@ -267,7 +150,7 @@ kern_status_t sys_vm_region_map_absolute( return status; } - struct vm_region *region = vm_region_cast(region_obj); + struct address_space *region = address_space_cast(region_obj); if (!region) { task_unlock_irqrestore(self, flags); return KERN_INVALID_ARGUMENT; @@ -280,21 +163,17 @@ kern_status_t sys_vm_region_map_absolute( } task_unlock_irqrestore(self, flags); - - off_t region_offset = VM_REGION_ANY_OFFSET; - if (map_address != VM_REGION_ANY_OFFSET) { - region_offset - = map_address - vm_region_get_base_address(region); - } - - status = vm_region_map_object( + address_space_lock_irqsave(region, &flags); + /* address_space_map will take care of locking `vmo` */ + status = address_space_map( region, - region_offset, + map_address, vmo, object_offset, length, prot, out_base_address); + address_space_unlock_irqrestore(region, flags); object_unref(vmo_obj); object_unref(region_obj); @@ -302,23 +181,50 @@ kern_status_t sys_vm_region_map_absolute( return status; } -kern_status_t sys_vm_region_map_relative( +kern_status_t sys_address_space_unmap( kern_handle_t region_handle, - off_t region_offset, - kern_handle_t object_handle, - off_t object_offset, + virt_addr_t base, + size_t length) +{ + struct task *self = current_task(); + + kern_status_t status = KERN_OK; + unsigned long flags; + task_lock_irqsave(self, &flags); + + struct object *region_obj = NULL; + handle_flags_t region_flags = 0; + status = task_resolve_handle( + self, + region_handle, + ®ion_obj, + ®ion_flags); + if (status != KERN_OK) { + task_unlock_irqrestore(self, flags); + return status; + } + + struct address_space *region = address_space_cast(region_obj); + if (!region) { + task_unlock_irqrestore(self, flags); + return KERN_INVALID_ARGUMENT; + } + + task_unlock_irqrestore(self, flags); + + status = address_space_unmap(region, base, length); + + object_unref(region_obj); + + return status; +} + +kern_status_t sys_address_space_reserve( + kern_handle_t region_handle, + virt_addr_t map_address, size_t length, - vm_prot_t prot, virt_addr_t *out_base_address) { - tracek("vm_region_map_relative(%x, %x, %x, %x, %x, %x, %p)", - region_handle, - region_offset, - object_handle, - object_offset, - length, - prot, - out_base_address); struct task *self = current_task(); if (out_base_address @@ -333,8 +239,8 @@ kern_status_t sys_vm_region_map_relative( unsigned long flags; task_lock_irqsave(self, &flags); - struct object *region_obj = NULL, *vmo_obj = NULL; - handle_flags_t region_flags = 0, vmo_flags = 0; + struct object *region_obj = NULL; + handle_flags_t region_flags = 0; status = task_resolve_handle( self, region_handle, @@ -345,45 +251,30 @@ kern_status_t sys_vm_region_map_relative( return status; } - status = task_resolve_handle(self, object_handle, &vmo_obj, &vmo_flags); - if (status != KERN_OK) { - task_unlock_irqrestore(self, flags); - return status; - } - - struct vm_region *region = vm_region_cast(region_obj); + struct address_space *region = address_space_cast(region_obj); if (!region) { task_unlock_irqrestore(self, flags); return KERN_INVALID_ARGUMENT; } - struct vm_object *vmo = vm_object_cast(vmo_obj); - if (!vmo) { - task_unlock_irqrestore(self, flags); - return KERN_INVALID_ARGUMENT; - } - task_unlock_irqrestore(self, flags); - status = vm_region_map_object( + address_space_lock_irqsave(region, &flags); + status = address_space_reserve( region, - region_offset, - vmo, - object_offset, + map_address, length, - prot, out_base_address); + address_space_unlock_irqrestore(region, flags); - object_unref(vmo_obj); object_unref(region_obj); - tracek("result: %u", status); return status; } -kern_status_t sys_vm_region_unmap_absolute( +kern_status_t sys_address_space_release( kern_handle_t region_handle, - virt_addr_t address, + virt_addr_t base, size_t length) { struct task *self = current_task(); @@ -404,7 +295,7 @@ kern_status_t sys_vm_region_unmap_absolute( return status; } - struct vm_region *region = vm_region_cast(region_obj); + struct address_space *region = address_space_cast(region_obj); if (!region) { task_unlock_irqrestore(self, flags); return KERN_INVALID_ARGUMENT; @@ -412,46 +303,9 @@ kern_status_t sys_vm_region_unmap_absolute( task_unlock_irqrestore(self, flags); - off_t region_offset = address - vm_region_get_base_address(region); - status = vm_region_unmap(region, region_offset, length); - - object_unref(region_obj); - - return status; -} - -kern_status_t sys_vm_region_unmap_relative( - kern_handle_t region_handle, - off_t offset, - size_t length) -{ - struct task *self = current_task(); - - kern_status_t status = KERN_OK; - unsigned long flags; - task_lock_irqsave(self, &flags); - - struct object *region_obj = NULL; - handle_flags_t region_flags = 0; - status = task_resolve_handle( - self, - region_handle, - ®ion_obj, - ®ion_flags); - if (status != KERN_OK) { - task_unlock_irqrestore(self, flags); - return status; - } - - struct vm_region *region = vm_region_cast(region_obj); - if (!region) { - task_unlock_irqrestore(self, flags); - return KERN_INVALID_ARGUMENT; - } - - task_unlock_irqrestore(self, flags); - - status = vm_region_unmap(region, offset, length); + address_space_lock_irqsave(region, &flags); + status = address_space_unmap(region, base, length); + address_space_unlock_irqrestore(region, flags); object_unref(region_obj); diff --git a/syscall/config.c b/syscall/config.c index 5dac66b..d8e020b 100644 --- a/syscall/config.c +++ b/syscall/config.c @@ -1,6 +1,5 @@ #include #include -#include kern_status_t sys_kern_config_get(kern_config_key_t key, void *ptr, size_t len) { diff --git a/syscall/dispatch.c b/syscall/dispatch.c index ce523bb..515c4fa 100644 --- a/syscall/dispatch.c +++ b/syscall/dispatch.c @@ -15,14 +15,12 @@ static const virt_addr_t syscall_table[] = { SYSCALL_TABLE_ENTRY(VM_OBJECT_READ, vm_object_read), SYSCALL_TABLE_ENTRY(VM_OBJECT_WRITE, vm_object_write), SYSCALL_TABLE_ENTRY(VM_OBJECT_COPY, vm_object_copy), - SYSCALL_TABLE_ENTRY(VM_REGION_CREATE, vm_region_create), - SYSCALL_TABLE_ENTRY(VM_REGION_KILL, vm_region_kill), - SYSCALL_TABLE_ENTRY(VM_REGION_READ, vm_region_read), - SYSCALL_TABLE_ENTRY(VM_REGION_WRITE, vm_region_write), - SYSCALL_TABLE_ENTRY(VM_REGION_MAP_ABSOLUTE, vm_region_map_absolute), - SYSCALL_TABLE_ENTRY(VM_REGION_MAP_RELATIVE, vm_region_map_relative), - SYSCALL_TABLE_ENTRY(VM_REGION_UNMAP_ABSOLUTE, vm_region_unmap_absolute), - SYSCALL_TABLE_ENTRY(VM_REGION_UNMAP_RELATIVE, vm_region_unmap_relative), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_READ, address_space_read), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_WRITE, address_space_write), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_MAP, address_space_map), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_UNMAP, address_space_unmap), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_RESERVE, address_space_reserve), + SYSCALL_TABLE_ENTRY(ADDRESS_SPACE_RELEASE, address_space_release), SYSCALL_TABLE_ENTRY(KERN_LOG, kern_log), SYSCALL_TABLE_ENTRY(KERN_HANDLE_CLOSE, kern_handle_close), SYSCALL_TABLE_ENTRY(KERN_CONFIG_GET, kern_config_get), diff --git a/syscall/msg.c b/syscall/msg.c index b32f823..6c06038 100644 --- a/syscall/msg.c +++ b/syscall/msg.c @@ -4,7 +4,6 @@ #include #include #include -#include kern_status_t sys_channel_create(unsigned int id, kern_handle_t *out) { diff --git a/syscall/task.c b/syscall/task.c index 53192d5..0fd072c 100644 --- a/syscall/task.c +++ b/syscall/task.c @@ -1,10 +1,10 @@ +#include #include #include #include #include #include #include -#include extern kern_status_t sys_task_exit(int status) { @@ -128,10 +128,10 @@ kern_status_t sys_task_create( task_unlock_irqrestore(parent, flags); child_handle_slot->h_object = &child->t_base; - space_handle_slot->h_object = &child->t_address_space->vr_base; + space_handle_slot->h_object = &child->t_address_space->s_base; object_add_handle(&child->t_base); - object_add_handle(&child->t_address_space->vr_base); + object_add_handle(&child->t_address_space->s_base); object_unref(parent_obj); @@ -252,8 +252,8 @@ kern_status_t sys_task_get_address_space( return KERN_INVALID_ARGUMENT; } - handle_slot->h_object = &task->t_address_space->vr_base; - object_add_handle(&task->t_address_space->vr_base); + handle_slot->h_object = &task->t_address_space->s_base; + object_add_handle(&task->t_address_space->s_base); task_unlock_irqrestore(self, flags); object_unref(task_obj); diff --git a/syscall/vm-object.c b/syscall/vm-object.c index 7f789ae..15a1d9a 100644 --- a/syscall/vm-object.c +++ b/syscall/vm-object.c @@ -3,7 +3,6 @@ #include #include #include -#include kern_status_t sys_vm_object_create( const char *name, diff --git a/vm/address-space.c b/vm/address-space.c new file mode 100644 index 0000000..8850208 --- /dev/null +++ b/vm/address-space.c @@ -0,0 +1,1341 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*** STATIC DATA + MACROS *****************************************************/ + +#define ADDRESS_SPACE_CAST(p) \ + OBJECT_C_CAST(struct address_space, s_base, &address_space_type, p) +#define INVALID_OFFSET ((off_t) - 1) + +enum get_entry_flags { + GET_ENTRY_EXACT = 0, + GET_ENTRY_CLOSEST_LEFT, + GET_ENTRY_CLOSEST_RIGHT, +}; + +/* iterates over a range of mapped virtual memory in a region, and provides + * a moving buffer through which the memory can be accessed */ +struct vm_iterator { + struct address_space *it_region; + struct vm_area *it_mapping; + virt_addr_t it_base; + vm_prot_t it_prot; + void *it_buf; + size_t it_max; +}; + +/* iterates over the areas in an address space */ +struct area_iterator { + struct address_space *it_root; + struct vm_area *it_area; + virt_addr_t it_search_base, it_search_limit; + virt_addr_t it_base, it_limit; +}; + +enum search_direction { + SEARCH_LEFT, + SEARCH_RIGHT, +}; + +static kern_status_t address_space_object_destroy(struct object *obj); + +static struct object_type address_space_type = { + .ob_name = "address-space", + .ob_size = sizeof(struct address_space), + .ob_header_offset = offsetof(struct address_space, s_base), +}; + +static struct vm_cache vm_area_cache = { + .c_name = "vm-area", + .c_obj_size = sizeof(struct vm_area), +}; + +/*** INTERNAL UTILITY FUNCTION ************************************************/ + +/* this function must be called with `parent` locked */ +static void put_entry(struct btree *tree, struct vm_area *child) +{ + struct btree_node *cur = tree->b_root; + if (!cur) { + tree->b_root = &child->vma_node; + btree_insert_fixup(tree, &child->vma_node); + return; + } + + while (cur) { + struct vm_area *cur_entry + = BTREE_CONTAINER(struct vm_area, vma_node, cur); + + struct btree_node *next = NULL; + + if (child->vma_limit < cur_entry->vma_base) { + next = btree_left(cur); + } else if (child->vma_base > cur_entry->vma_limit) { + next = btree_right(cur); + } else { + panic("tried to add an overlapping entry [%zx-%zx] to " + "vm-region (overlaps [%zx-%zx])", + child->vma_base, + child->vma_limit, + cur_entry->vma_base, + cur_entry->vma_limit); + } + + if (next) { + cur = next; + continue; + } + + if (child->vma_limit < cur_entry->vma_base) { + btree_put_left(cur, &child->vma_node); + } else { + btree_put_right(cur, &child->vma_node); + } + + btree_insert_fixup(tree, &child->vma_node); + break; + } +} + +static struct vm_area *get_entry( + struct address_space *region, + virt_addr_t address, + enum get_entry_flags flags) +{ + /* `x` must be to the left of `y` */ +#define LEFT_DIFF(x, y) ((y) ? ((y)->vma_base - (x)) : ((size_t)-1)) + /* `x` must be to the right of `y` */ +#define RIGHT_DIFF(x, y) ((y) ? ((y)->vma_limit - (x)) : ((size_t)-1)) + + struct btree_node *cur = region->s_mappings.b_root; + if (!cur) { + return NULL; + } + + struct vm_area *result = NULL; + struct vm_area *closest_left = NULL; + struct vm_area *closest_right = NULL; + + while (cur) { + struct vm_area *child + = BTREE_CONTAINER(struct vm_area, vma_node, cur); + + struct btree_node *next = NULL; + + if (address < child->vma_base) { + next = btree_left(cur); + if (LEFT_DIFF(address, child) + < LEFT_DIFF(address, closest_left)) { + closest_left = child; + } + } else if (address > child->vma_limit) { + next = btree_right(cur); + if (RIGHT_DIFF(address, child) + < RIGHT_DIFF(address, closest_right)) { + closest_right = child; + } + } else { + result = child; + break; + } + + cur = next; + } + + if (result) { + return result; + } + + if (flags & GET_ENTRY_CLOSEST_LEFT) { + return closest_left; + } + + if (flags & GET_ENTRY_CLOSEST_RIGHT) { + return closest_right; + } + + return NULL; +#undef LEFT_DIFF +#undef RIGHT_DIFF +} + +/* does not consider reserved areas! */ +static bool is_area_free( + const struct address_space *space, + virt_addr_t base, + size_t len) +{ + virt_addr_t limit = base + len - 1; + + if (base < space->s_base_address) { + return false; + } + + if (base + limit > space->s_limit_address) { + return false; + } + + struct btree_node *cur = space->s_mappings.b_root; + if (!cur) { + return true; + } + + while (cur) { + struct vm_area *cur_area + = BTREE_CONTAINER(struct vm_area, vma_node, cur); + + if (base >= cur_area->vma_base && base <= cur_area->vma_limit) { + return false; + } + + if (limit >= cur_area->vma_base + && limit <= cur_area->vma_limit) { + return false; + } + + if (base > cur_area->vma_limit) { + cur = btree_right(cur); + } else if (limit < cur_area->vma_base) { + cur = btree_left(cur); + } else { + /* what */ + panic("unhandled case in is_area_free"); + } + } + + return true; +} + +/* ONLY considers reserved areas! */ +static bool is_area_reserved( + const struct address_space *space, + virt_addr_t base, + size_t len) +{ + virt_addr_t limit = base + len - 1; + + if (base < space->s_base_address) { + return false; + } + + if (base + limit > space->s_limit_address) { + return false; + } + + struct btree_node *cur = space->s_reserved.b_root; + if (!cur) { + return false; + } + + while (cur) { + struct vm_area *cur_area + = BTREE_CONTAINER(struct vm_area, vma_node, cur); + + if (base >= cur_area->vma_base && base <= cur_area->vma_limit) { + return true; + } + + if (limit >= cur_area->vma_base + && limit <= cur_area->vma_limit) { + return true; + } + + if (base > cur_area->vma_limit) { + cur = btree_right(cur); + } else if (limit < cur_area->vma_base) { + cur = btree_left(cur); + } else { + /* what */ + panic("unhandled case in is_area_reserved"); + } + } + + return false; +} + +static virt_addr_t generate_address( + virt_addr_t lower_bound, + virt_addr_t upper_bound) +{ + virt_addr_t result = 0; + fill_random(&result, sizeof result); + + virt_addr_t mask = upper_bound; + + result += lower_bound; + result &= mask; + result &= ~VM_PAGE_MASK; + + return result; +} + +static virt_addr_t find_free_area( + struct address_space *space, + size_t target_length) +{ + virt_addr_t search_limit = space->s_base_address << 16; + + int attempt = 0; + while (1) { + virt_addr_t base = generate_address( + space->s_base_address, + search_limit - 1); + bool ok = true; + + if (is_area_reserved(space, base, target_length)) { + ok = false; + } + + if (ok && !is_area_free(space, base, target_length)) { + ok = false; + } + + if (ok) { + return base; + } + + attempt++; + if (attempt >= 3) { + search_limit <<= 4; + attempt = 0; + } + } + + return 0; +} + +/* this function should be called with `region` locked */ +static void vm_iterator_begin( + struct vm_iterator *it, + struct address_space *region, + virt_addr_t base, + vm_prot_t prot) +{ + memset(it, 0x0, sizeof *it); + it->it_base = base; + it->it_region = region; + it->it_prot = prot; + + it->it_mapping = get_entry(region, base, GET_ENTRY_EXACT); + if (!it->it_mapping) { + return; + } + + if ((it->it_mapping->vma_prot & prot) != prot) { + return; + } + + off_t object_offset = base - it->it_mapping->vma_base + + it->it_mapping->vma_object_offset; + struct vm_page *pg = NULL; + if (prot & VM_PROT_WRITE) { + pg = vm_object_alloc_page( + it->it_mapping->vma_object, + object_offset, + VM_PAGE_4K); + } else { + pg = vm_object_get_page( + it->it_mapping->vma_object, + object_offset); + } + + if (!pg) { + return; + } + + void *buffer_base = vm_page_get_vaddr(pg); + phys_addr_t pg_addr = vm_page_get_paddr(pg); + size_t buffer_size = vm_page_get_size_bytes(pg); + + while (1) { + struct btree_node *next_node = btree_next(&pg->p_bnode); + struct vm_page *next + = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); + if (!next) { + break; + } + + phys_addr_t next_addr = vm_page_get_paddr(next); + if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { + break; + } + + pg = next; + pg_addr = next_addr; + buffer_size += vm_page_get_size_bytes(next); + } + + it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK); + it->it_max = buffer_size - (object_offset & VM_PAGE_MASK); +} + +static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) +{ + if (nr_bytes < it->it_max) { + it->it_base += nr_bytes; + it->it_buf = (char *)it->it_buf + nr_bytes; + it->it_max -= nr_bytes; + return KERN_OK; + } + + it->it_base += nr_bytes; + + struct vm_area *next_mapping + = get_entry(it->it_region, it->it_base, GET_ENTRY_EXACT); + if (!next_mapping) { + it->it_buf = NULL; + it->it_max = 0; + return KERN_MEMORY_FAULT; + } + + if ((next_mapping->vma_prot & it->it_prot) != it->it_prot) { + it->it_buf = NULL; + it->it_max = 0; + return KERN_MEMORY_FAULT; + } + + off_t object_offset = it->it_base - it->it_mapping->vma_base + + it->it_mapping->vma_object_offset; + + struct vm_page *pg = NULL; + if (it->it_prot & VM_PROT_WRITE) { + pg = vm_object_alloc_page( + it->it_mapping->vma_object, + object_offset, + VM_PAGE_4K); + } else { + pg = vm_object_get_page( + it->it_mapping->vma_object, + object_offset); + } + + if (!pg) { + return KERN_NO_MEMORY; + } + + void *buffer_base = vm_page_get_vaddr(pg); + phys_addr_t pg_addr = vm_page_get_paddr(pg); + size_t buffer_size = vm_page_get_size_bytes(pg); + + while (1) { + struct btree_node *next_node = btree_next(&pg->p_bnode); + struct vm_page *next + = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); + if (!next) { + break; + } + + phys_addr_t next_addr = vm_page_get_paddr(next); + if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { + break; + } + + pg = next; + pg_addr = next_addr; + buffer_size += vm_page_get_size_bytes(next); + } + + it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK); + it->it_max = buffer_size; + return KERN_OK; +} + +static void vm_iterator_finish(struct vm_iterator *it) +{ + memset(it, 0x0, sizeof *it); +} + +/* this function must be called with `space` locked */ +static void area_iterator_begin( + struct area_iterator *it, + struct address_space *space, + virt_addr_t base, + virt_addr_t limit) +{ + memset(it, 0x0, sizeof *it); + + struct vm_area *area = get_entry(space, base, GET_ENTRY_CLOSEST_RIGHT); + if (!area) { + return; + } + + if (area->vma_base > limit) { + return; + } + + it->it_search_base = base; + it->it_search_limit = limit; + it->it_root = space; + it->it_area = area; + it->it_base = area->vma_base; + it->it_limit = area->vma_base; + + if (it->it_base < base) { + it->it_base = base; + } + + if (it->it_limit > limit) { + it->it_limit = limit; + } +} + +static void area_iterator_finish(struct area_iterator *it) +{ + memset(it, 0x0, sizeof *it); +} + +static kern_status_t area_iterator_move_next(struct area_iterator *it) +{ + if (!it->it_root || !it->it_area) { + return KERN_NO_ENTRY; + } + + struct btree_node *next = btree_next(&it->it_area->vma_node); + if (!next) { + goto end; + } + + struct vm_area *area = BTREE_CONTAINER(struct vm_area, vma_node, next); + if (!area) { + goto end; + } + + if (area->vma_base > it->it_search_limit) { + goto end; + } + + it->it_area = area; + it->it_base = area->vma_base; + it->it_limit = area->vma_base; + + if (it->it_base < it->it_search_base) { + it->it_base = it->it_search_base; + } + + if (it->it_limit > it->it_search_limit) { + it->it_limit = it->it_search_limit; + } + + return KERN_OK; + +end: + memset(it, 0x0, sizeof *it); + return KERN_NO_ENTRY; +} + +static void area_iterator_erase(struct area_iterator *it) +{ +} + +/*** PUBLIC API ***************************************************************/ + +kern_status_t address_space_type_init(void) +{ + vm_cache_init(&vm_area_cache); + return object_type_register(&address_space_type); +} + +struct address_space *address_space_cast(struct object *obj) +{ + return ADDRESS_SPACE_CAST(obj); +} + +/* this function should be called with `parent` locked (if parent is + * non-NULL) + */ +kern_status_t address_space_create( + virt_addr_t base, + virt_addr_t limit, + struct address_space **out) +{ + if (!base || !limit || limit <= base) { + return KERN_INVALID_ARGUMENT; + } + + if ((base & VM_PAGE_MASK) || ((limit + 1) & VM_PAGE_MASK)) { + return KERN_INVALID_ARGUMENT; + } + + struct object *region_object = object_create(&address_space_type); + if (!region_object) { + return KERN_NO_MEMORY; + } + + struct address_space *space = ADDRESS_SPACE_CAST(region_object); + + space->s_base_address = base; + space->s_limit_address = limit; +#ifdef TRACE + tracek("creating address space at [%llx-%llx]", base, limit); +#endif + + *out = space; + return KERN_OK; +} + +kern_status_t address_space_map( + struct address_space *root, + virt_addr_t map_address, + struct vm_object *object, + off_t object_offset, + size_t length, + vm_prot_t prot, + virt_addr_t *out) +{ + if (object_offset & VM_PAGE_MASK) { + object_offset &= ~VM_PAGE_MASK; + } + + if (length & VM_PAGE_MASK) { + length &= ~VM_PAGE_MASK; + length += VM_PAGE_SIZE; + } + + if (map_address != MAP_ADDRESS_ANY && (map_address & VM_PAGE_MASK)) { + map_address &= ~VM_PAGE_MASK; + } + + tracek("address_space_map(%zx, %zx)", map_address, length); + + if (!root || !object) { + tracek("null pointer"); + return KERN_INVALID_ARGUMENT; + } + + if ((prot & object->vo_prot) != prot) { + tracek("protection error"); + return KERN_INVALID_ARGUMENT; + } + + if (!length || object_offset + length > object->vo_size) { + tracek("length exceeds object bounds"); + return KERN_INVALID_ARGUMENT; + } + + if (map_address == MAP_ADDRESS_ANY) { + map_address = find_free_area(root, length); + + if (map_address == MAP_ADDRESS_INVALID) { + tracek("no virtual memory available"); + return KERN_NO_MEMORY; + } + } else if (!is_area_free(root, map_address, length)) { + tracek("area already in use"); + return KERN_INVALID_ARGUMENT; + } + + struct vm_area *area = vm_cache_alloc(&vm_area_cache, VM_NORMAL); + if (!area) { + return KERN_NO_MEMORY; + } + + object_ref(&object->vo_base); + area->vma_object = object; + area->vma_prot = prot; + area->vma_object_offset = object_offset; + area->vma_base = map_address; + area->vma_limit = map_address + length - 1; + +#ifdef TRACE + tracek("mapping %s at [%llx-%llx]", + object->vo_name, + area->vma_base, + area->vma_base + length); +#endif + put_entry(&root->s_mappings, area); + + unsigned long lock_flags; + vm_object_lock_irqsave(object, &lock_flags); + queue_push_back(&object->vo_mappings, &area->vma_object_entry); + vm_object_unlock_irqrestore(object, lock_flags); + + if (out) { + *out = map_address; + } + + return KERN_OK; +} + +/* unmap some pages in the middle of an area, splitting it into two + * separate mappings */ +static kern_status_t split_area( + struct vm_area *mapping, + struct address_space *root, + virt_addr_t unmap_base, + virt_addr_t unmap_limit) +{ + struct vm_area *left = mapping; + struct vm_area *right = vm_cache_alloc(&vm_area_cache, VM_NORMAL); + if (!right) { + return KERN_NO_MEMORY; + } + + virt_addr_t left_base = mapping->vma_base; + virt_addr_t right_base = unmap_limit; + off_t left_object_offset = mapping->vma_object_offset; + size_t left_length = unmap_base - mapping->vma_base; + size_t right_length = mapping->vma_limit - unmap_limit; + off_t right_object_offset = mapping->vma_limit - right_length; + + tracek("mapping=[%zx-%zx]->[%zx-%zx]", + mapping->vma_base, + mapping->vma_limit, + mapping->vma_object_offset, + mapping->vma_object_offset + + (mapping->vma_limit - mapping->vma_base)); + tracek("left=[%zx-%zx]->[%zx-%zx], right=[%zx-%zx]->[%zx-%zx]", + left_base, + left_base + left_length, + left_object_offset, + left_object_offset + left_length, + right_base, + right_base + right_length, + right_object_offset, + right_object_offset + right_length); + + left->vma_object_offset = left_object_offset; + left->vma_base = left_base; + left->vma_limit = left_base + left_length - 1; + + right->vma_object = left->vma_object; + right->vma_prot = left->vma_prot; + right->vma_object_offset = right_object_offset; + right->vma_base = right_base; + right->vma_limit = right_base + right_length - 1; + + if (!mapping->vma_object) { + put_entry(&root->s_reserved, right); + /* just a reservation, no page tables to update */ + return KERN_OK; + } + + put_entry(&root->s_mappings, right); + for (size_t i = unmap_base; i < unmap_limit; i += VM_PAGE_SIZE) { + tracek("unmapping %zx", i); + pmap_remove(root->s_pmap, i); + } + + return KERN_OK; +} + +/* unmap some pages from the left-side of a mapping to somewhere in the + * middle. */ +static kern_status_t left_reduce_area( + struct vm_area *mapping, + struct address_space *root, + virt_addr_t unmap_base, + virt_addr_t unmap_limit) +{ + tracek("left reduce mapping [%zx-%zx] subtract [%zx-%zx]", + mapping->vma_base, + mapping->vma_limit, + unmap_base, + unmap_limit); + + virt_addr_t base = mapping->vma_base; + virt_addr_t limit = unmap_limit; + size_t length = limit - base + 1; + + mapping->vma_base += length; + mapping->vma_object_offset += length; + + if (!mapping->vma_object) { + /* just a reservation, no page tables to update */ + tracek(" unreserving %zx-%zx (%zx bytes)", + base, + base + length, + length); + return KERN_OK; + } + + tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); + for (size_t i = base; i < limit; i += VM_PAGE_SIZE) { + pmap_remove(root->s_pmap, i); + } + + return KERN_OK; +} + +/* unmap some pages from the middle of a mapping to the right-side. */ +static kern_status_t right_reduce_area( + struct vm_area *mapping, + struct address_space *root, + virt_addr_t unmap_base, + virt_addr_t unmap_limit) +{ + /* unmap_base falls somwwhere between mapping_offset and + * mapping_offset+length */ + tracek("right reduce mapping [%zx-%zx] subtract [%zx-%zx]", + mapping->vma_base, + mapping->vma_limit, + unmap_base, + unmap_limit); + + virt_addr_t base = unmap_base; + virt_addr_t limit = mapping->vma_limit; + size_t length = limit - base + 1; + mapping->vma_limit -= length; + + if (!mapping->vma_object) { + /* just a reservation, no page tables to update */ + tracek(" unreserving %zx-%zx (%zx bytes)", + base, + base + length, + length); + return KERN_OK; + } + + tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); + for (size_t i = base; i < limit; i += VM_PAGE_SIZE) { + pmap_remove(root->s_pmap, i); + } + + return KERN_OK; +} + +/* completely unmap and delete an entire mapping */ +static kern_status_t delete_area( + struct vm_area *mapping, + struct address_space *root) +{ + if (!mapping->vma_object) { + /* just a reservation, no page tables to update */ + return KERN_OK; + } + + tracek("delete mapping [%zx-%zx]", + mapping->vma_base, + mapping->vma_limit); + + for (size_t i = mapping->vma_base; i < mapping->vma_limit; + i += VM_PAGE_SIZE) { + pmap_remove(root->s_pmap, i); + } + + struct vm_object *object = mapping->vma_object; + unsigned long flags; + vm_object_lock_irqsave(mapping->vma_object, &flags); + queue_delete( + &mapping->vma_object->vo_mappings, + &mapping->vma_object_entry); + mapping->vma_object = NULL; + vm_object_unlock_irqrestore(mapping->vma_object, flags); + object_unref(&object->vo_base); + + /* don't actually delete the mapping yet. that will be done by + * address_space_unmap */ + + return KERN_OK; +} + +kern_status_t address_space_unmap( + struct address_space *region, + virt_addr_t unmap_base, + size_t unmap_length) +{ + if (unmap_length == 0) { + return KERN_OK; + } + + if (unmap_base & VM_PAGE_MASK) { + unmap_base &= ~VM_PAGE_MASK; + } + + if (unmap_length & VM_PAGE_MASK) { + unmap_length &= VM_PAGE_MASK; + unmap_length += VM_PAGE_SIZE; + } + + kern_status_t status = KERN_OK; + struct area_iterator it; + virt_addr_t unmap_limit = unmap_base + unmap_length - 1; + tracek("unmapping %zx-%zx", unmap_base, unmap_limit); + + area_iterator_begin(&it, region, unmap_base, unmap_limit); + while (it.it_area) { + struct vm_area *area = it.it_area; + virt_addr_t area_base = area->vma_base; + virt_addr_t area_limit = area->vma_limit; + + bool split + = (area_base > unmap_base && area_limit < unmap_limit); + bool delete + = (area_base <= unmap_base + && area_limit >= unmap_limit); + bool left_reduce + = (unmap_base <= area_base && unmap_limit < area_limit); + bool right_reduce + = (unmap_base > area_base && unmap_limit >= area_limit); + + if (split) { + status = split_area( + area, + region, + unmap_base, + unmap_limit); + delete = true; + } else if (delete) { + status = delete_area(area, region); + } else if (left_reduce) { + status = left_reduce_area( + area, + region, + unmap_base, + unmap_limit); + } else if (right_reduce) { + status = right_reduce_area( + area, + region, + unmap_base, + unmap_limit); + } else { + panic("don't know what to do with this " + "mapping"); + } + + if (delete) { + area_iterator_erase(&it); + } else { + area_iterator_move_next(&it); + } + + if (status != KERN_OK) { + break; + } + } + + area_iterator_finish(&it); + + return status; +} + +kern_status_t address_space_reserve( + struct address_space *space, + virt_addr_t base, + size_t length, + virt_addr_t *out) +{ + if (length & VM_PAGE_MASK) { + length &= ~VM_PAGE_MASK; + length += VM_PAGE_SIZE; + } + + if (base != MAP_ADDRESS_ANY && (base & VM_PAGE_MASK)) { + base &= ~VM_PAGE_MASK; + } + + if (!space) { + return KERN_INVALID_ARGUMENT; + } + + if (!length || base + length > space->s_limit_address) { + return KERN_INVALID_ARGUMENT; + } + + if (base == MAP_ADDRESS_ANY) { + base = find_free_area(space, length); + + if (base == MAP_ADDRESS_INVALID) { + return KERN_NO_MEMORY; + } + } else { + if (is_area_reserved(space, base, length)) { + /* for now, don't figure out overlapping reservations */ + return KERN_INVALID_ARGUMENT; + } + + if (!is_area_free(space, base, length)) { + return KERN_INVALID_ARGUMENT; + } + } + + struct vm_area *area = vm_cache_alloc(&vm_area_cache, VM_NORMAL); + if (!area) { + return KERN_NO_MEMORY; + } + + area->vma_base = base; + area->vma_limit = base + length - 1; + +#ifdef TRACE + tracek("reservation at [%llx-%llx]", area->vma_base, area->vma_limit); +#endif + put_entry(&space->s_reserved, area); + + if (out) { + *out = base; + } + + return KERN_OK; +} + +kern_status_t address_space_release( + struct address_space *space, + virt_addr_t release_base, + size_t release_length) +{ + if ((release_base & VM_PAGE_MASK) || (release_length & VM_PAGE_MASK)) { + return KERN_INVALID_ARGUMENT; + } + + kern_status_t status = KERN_OK; + struct area_iterator it; + virt_addr_t release_limit = release_base + release_length - 1; + tracek("unreserving %zx-%zx", release_base, release_limit); + + area_iterator_begin(&it, space, release_base, release_limit); + while (it.it_area) { + struct vm_area *area = it.it_area; + virt_addr_t area_base = area->vma_base; + virt_addr_t area_limit = area->vma_limit; + + bool split + = (area_base > release_base + && area_limit < release_limit); + bool delete + = (area_base <= release_base + && area_limit >= release_limit); + bool left_reduce + = (release_base <= area_base + && release_limit < area_limit); + bool right_reduce + = (release_base > area_base + && release_limit >= area_limit); + + if (split) { + status = split_area( + area, + space, + release_base, + release_limit); + delete = true; + } else if (delete) { + status = delete_area(area, space); + } else if (left_reduce) { + status = left_reduce_area( + area, + space, + release_base, + release_limit); + } else if (right_reduce) { + status = right_reduce_area( + area, + space, + release_base, + release_limit); + } else { + panic("don't know what to do with this " + "mapping"); + } + + if (delete) { + area_iterator_erase(&it); + } else { + area_iterator_move_next(&it); + } + + if (status != KERN_OK) { + break; + } + } + + area_iterator_finish(&it); + + return status; +} + +bool address_space_validate_access( + struct address_space *region, + virt_addr_t ptr, + size_t len, + vm_prot_t prot) +{ + if (len == 0) { + return true; + } + + if (ptr < region->s_base_address) { + return false; + } + + if (ptr + len > region->s_limit_address) { + return false; + } + + virt_addr_t base = ptr & ~VM_PAGE_MASK; + virt_addr_t limit = (ptr + len) - 1; + if ((limit + 1) & VM_PAGE_MASK) { + limit &= ~VM_PAGE_MASK; + limit += VM_PAGE_SIZE; + limit -= 1; + } + + /* TODO improve this to not require a per-page loop */ + for (virt_addr_t i = base; i < limit;) { + struct vm_area *area = get_entry(region, i, GET_ENTRY_EXACT); + if (!area) { + return false; + } + + if ((area->vma_prot & prot) != prot) { + return false; + } + + i = area->vma_limit; + } + + return true; +} + +/* this function must be called with `region` locked */ +kern_status_t address_space_demand_map( + struct address_space *region, + virt_addr_t addr, + enum pmap_fault_flags flags) +{ + addr &= ~VM_PAGE_MASK; + if (addr < region->s_base_address || addr > region->s_limit_address) { + return KERN_NO_ENTRY; + } + + struct vm_area *area = get_entry(region, addr, GET_ENTRY_EXACT); + if (!area) { + return KERN_NO_ENTRY; + } + + off_t object_offset = addr - area->vma_base + area->vma_object_offset; + +#if 0 + tracek("vm: tried to access vm-object %s at offset=%05llx", + area->vma_object->vo_name, + object_offset); +#endif + + unsigned long lock_flags; + vm_object_lock_irqsave(area->vma_object, &lock_flags); + struct vm_page *pg = vm_object_alloc_page( + area->vma_object, + object_offset, + VM_PAGE_4K); + vm_object_unlock_irqrestore(area->vma_object, lock_flags); + // tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); + return pmap_add( + region->s_pmap, + addr, + vm_page_get_pfn(pg), + area->vma_prot, + PMAP_NORMAL); +} + +virt_addr_t address_space_get_base_address(const struct address_space *region) +{ + return region->s_base_address; +} + +kern_status_t address_space_read( + struct address_space *src_region, + virt_addr_t src_ptr, + size_t count, + void *destp, + size_t *nr_read) +{ + struct vm_iterator src; + char *dest = destp; + + vm_iterator_begin( + &src, + src_region, + src_ptr, + VM_PROT_READ | VM_PROT_USER); + + kern_status_t status = KERN_OK; + size_t r = 0; + + while (r < count && src.it_max) { + size_t remaining = count - r; + size_t to_move = MIN(src.it_max, remaining); + memmove(dest, src.it_buf, to_move); + + status = vm_iterator_seek(&src, to_move); + if (status != KERN_OK) { + break; + } + + r += to_move; + dest += to_move; + } + + vm_iterator_finish(&src); + + if (nr_read) { + *nr_read = r; + } + + return status; +} + +kern_status_t address_space_write( + struct address_space *dst_region, + virt_addr_t dst_ptr, + size_t count, + const void *srcp, + size_t *nr_written) +{ + struct vm_iterator dst; + const char *src = srcp; + + vm_iterator_begin( + &dst, + dst_region, + dst_ptr, + VM_PROT_WRITE | VM_PROT_USER); + + kern_status_t status = KERN_OK; + size_t r = 0; + + while (r < count && dst.it_max) { + size_t remaining = count - r; + size_t to_move = MIN(dst.it_max, remaining); + memmove(dst.it_buf, src, to_move); + + status = vm_iterator_seek(&dst, to_move); + if (status != KERN_OK) { + break; + } + + r += to_move; + src += to_move; + } + + vm_iterator_finish(&dst); + + if (nr_written) { + *nr_written = r; + } + + return status; +} + +kern_status_t address_space_memmove( + struct address_space *dest_region, + virt_addr_t dest_ptr, + struct address_space *src_region, + virt_addr_t src_ptr, + size_t count, + size_t *nr_moved) +{ + struct vm_iterator src, dest; + vm_iterator_begin( + &src, + src_region, + src_ptr, + VM_PROT_READ | VM_PROT_USER); + vm_iterator_begin( + &dest, + dest_region, + dest_ptr, + VM_PROT_WRITE | VM_PROT_USER); + + kern_status_t status = KERN_OK; + size_t r = 0; + + while (count && src.it_max && dest.it_max) { + size_t to_move = MIN(MIN(src.it_max, dest.it_max), count); + memmove(dest.it_buf, src.it_buf, to_move); + + status = vm_iterator_seek(&src, to_move); + if (status != KERN_OK) { + break; + } + + status = vm_iterator_seek(&dest, to_move); + if (status != KERN_OK) { + break; + } + + count -= to_move; + r += to_move; + } + + vm_iterator_finish(&src); + vm_iterator_finish(&dest); + + if (nr_moved) { + *nr_moved = r; + } + + return status; +} + +extern kern_status_t address_space_memmove_v( + struct address_space *dest_region, + size_t dest_offset, + const kern_iovec_t *dest_vecs, + size_t nr_dest_vecs, + struct address_space *src_region, + size_t src_offset, + const kern_iovec_t *src_vecs, + size_t nr_src_vecs, + size_t bytes_to_move, + size_t *nr_bytes_moved) +{ + struct iovec_iterator src, dest; + iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs); + iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs); + + iovec_iterator_seek(&src, src_offset); + iovec_iterator_seek(&dest, dest_offset); + + size_t moved = 0; + while (bytes_to_move && src.it_len && dest.it_len) { + size_t to_move + = MIN(MIN(src.it_len, dest.it_len), bytes_to_move); + + kern_status_t status = address_space_memmove( + dest_region, + dest.it_base, + src_region, + src.it_base, + to_move, + NULL); + if (status != KERN_OK) { + return status; + } + + iovec_iterator_seek(&src, to_move); + iovec_iterator_seek(&dest, to_move); + bytes_to_move -= to_move; + moved += to_move; + } + + if (nr_bytes_moved) { + *nr_bytes_moved = moved; + } + + return KERN_OK; +} + +#ifdef TRACE +void address_space_dump(struct address_space *region) +{ + struct btree_node *cur = btree_first(®ion->s_mappings); + while (cur) { + struct vm_area *area + = BTREE_CONTAINER(struct vm_area, vma_node, cur); + + tracek("+mapping [%zx-%zx] %s", + area->vma_base, + area->vma_limit, + area->vma_object->vo_name); + + cur = btree_next(cur); + } +} +#endif diff --git a/vm/bootstrap.c b/vm/bootstrap.c index d228c84..12b4b51 100644 --- a/vm/bootstrap.c +++ b/vm/bootstrap.c @@ -1,11 +1,12 @@ -#include +#include #include #include #include -#include +#include #include -#include #include +#include +#include #include #include @@ -42,7 +43,8 @@ kern_status_t vm_bootstrap( kmalloc_init(); vm_object_type_init(); - vm_region_type_init(); + vm_controller_type_init(); + address_space_type_init(); return KERN_OK; } diff --git a/vm/vm-region.c b/vm/vm-region.c deleted file mode 100644 index 018fc27..0000000 --- a/vm/vm-region.c +++ /dev/null @@ -1,1927 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* NOTE Locking Rules - * To avoid deadlocks and crashes, the following locking rules should be - * followed: - * 1. Do NOT lock more than one region at a time IF the regions are siblings. - * 2. When locking a region and it's child(ren) or ancestors, always lock - * the parent region BEFORE the child region. - * 3. When locking a region and a vm-object mapped into that region, always - * lock the region BEFORE the vm-object. - * 3. An entry MUST be locked before any of its data can be read/written, - * including its children (if it's a region) and its e_parent pointer. - * 4. vm_region_mapping has no lock. Instead, its immediate parent region must - * be locked before any child mappings can be accessed. - */ - -/*** STATIC DATA + MACROS *****************************************************/ - -#undef ASLR - -#define INVALID_OFFSET ((off_t) - 1) - -#ifdef ASLR -#define region_find_free_area(region, length) \ - region_find_free_area_random(region, length) -#else -#define region_find_free_area(region, length) \ - region_find_free_area_linear(region, length) -#endif - -#define unlock_mapping_parent(p, root) \ - do { \ - struct vm_region *parent \ - = region_from_entry(p->m_entry.e_parent); \ - if (parent != root) { \ - vm_region_unlock(parent); \ - } \ - } while (0) - -/* iterates over a range of mapped virtual memory in a region, and provides - * a moving buffer through which the memory can be accessed */ -struct vm_iterator { - struct vm_region *it_region; - struct vm_region_mapping *it_mapping; - virt_addr_t it_base; - vm_prot_t it_prot; - void *it_buf; - size_t it_max; -}; - -/* iterates recursively over the entries in a region */ -struct entry_iterator { - struct vm_region *it_root; - struct vm_region_entry *it_entry; - /* depth of it_entry relative to it_root */ - unsigned int it_depth; -}; - -enum search_direction { - SEARCH_LEFT, - SEARCH_RIGHT, -}; - -#define VM_REGION_CAST(p) \ - OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p) - -static kern_status_t vm_region_object_destroy(struct object *obj); - -static kern_status_t region_object_destroy(struct object *obj, struct queue *q); -static kern_status_t region_object_destroy_recurse( - struct queue_entry *entry, - struct object **out); - -static struct object_type vm_region_type = { - .ob_name = "vm-region", - .ob_size = sizeof(struct vm_region), - .ob_header_offset = offsetof(struct vm_region, vr_base), - .ob_ops = { - .destroy = region_object_destroy, - .destroy_recurse = region_object_destroy_recurse, - }, -}; - -static struct vm_cache mapping_cache = { - .c_name = "vm-region-mapping", - .c_obj_size = sizeof(struct vm_region_mapping), -}; - -/*** INTERNAL UTILITY FUNCTION ************************************************/ - -static struct vm_region *region_from_entry(struct vm_region_entry *entry) -{ - if (!entry || entry->e_type != VM_REGION_ENTRY_REGION) { - return NULL; - } - - return BTREE_CONTAINER(struct vm_region, vr_entry, entry); -} - -static struct vm_region_mapping *mapping_from_entry( - struct vm_region_entry *entry) -{ - if (!entry || entry->e_type != VM_REGION_ENTRY_MAPPING) { - return NULL; - } - - return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); -} - -kern_status_t region_object_destroy(struct object *obj, struct queue *q) -{ - struct vm_region *region = VM_REGION_CAST(obj); - if (region->vr_status == VM_REGION_ONLINE) { - panic("last reference closed on an online vm-region"); - } - - struct btree_node *node = btree_first(®ion->vr_entries); - while (node) { - struct btree_node *next = btree_next(node); - btree_delete(®ion->vr_entries, node); - - struct vm_region_entry *entry - = BTREE_CONTAINER(struct vm_region_entry, e_node, node); - if (entry->e_type != VM_REGION_ENTRY_REGION) { - panic("offline vm-region still contains non-region " - "children."); - } - - queue_push_back(q, &entry->e_entry); - node = next; - } - - return KERN_OK; -} - -kern_status_t region_object_destroy_recurse( - struct queue_entry *entry, - struct object **out) -{ - struct vm_region_entry *region_entry - = BTREE_CONTAINER(struct vm_region_entry, e_entry, entry); - if (region_entry->e_type != VM_REGION_ENTRY_REGION) { - panic("offline vm-region still contains non-region " - "children."); - } - struct vm_region *region = region_from_entry(region_entry); - *out = ®ion->vr_base; - return KERN_OK; -} - -static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry) -{ - return entry->e_address; -} - -/* this function must be called with `parent` locked */ -static void region_put_entry( - struct vm_region *parent, - struct vm_region_entry *child) -{ - struct btree_node *cur = parent->vr_entries.b_root; - if (!cur) { - parent->vr_entries.b_root = &child->e_node; - btree_insert_fixup(&parent->vr_entries, &child->e_node); - return; - } - - off_t child_base = child->e_offset; - off_t child_limit = child_base + child->e_size - 1; - - while (cur) { - struct vm_region_entry *cur_entry - = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); - - struct btree_node *next = NULL; - off_t cur_base = cur_entry->e_offset; - off_t cur_limit = cur_base + cur_entry->e_size - 1; - - if (child_limit < cur_base) { - next = btree_left(cur); - } else if (child_base > cur_limit) { - next = btree_right(cur); - } else { -#ifdef TRACE - vm_region_dump(parent); -#endif - panic("tried to add an overlapping entry [%zx-%zx] to " - "vm-region (overlaps [%zx-%zx])", - child_base, - child_limit, - cur_base, - cur_limit); - } - - if (next) { - cur = next; - continue; - } - - if (child_limit < cur_base) { - btree_put_left(cur, &child->e_node); - } else { - btree_put_right(cur, &child->e_node); - } - - btree_insert_fixup(&parent->vr_entries, &child->e_node); - break; - } -} - -/* find the child entry that covers the specified offset. - * DOES NOT search recursively! */ -static struct vm_region_entry *region_get_entry( - struct vm_region *region, - off_t offset, - size_t len) -{ - struct btree_node *cur = region->vr_entries.b_root; - if (!cur) { - return NULL; - } - - struct vm_region_entry *result = NULL; - - off_t base = offset, limit = offset + len - 1; - - while (cur) { - struct vm_region_entry *child - = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); - - struct btree_node *next = NULL; - off_t child_base = child->e_offset; - off_t child_limit = child->e_offset + child->e_size - 1; - - if (limit < child_base) { - next = btree_left(cur); - } else if (base > child_limit) { - next = btree_right(cur); - } else { - result = child; - break; - } - - cur = next; - } - - return result; -} - -/* find the child region that covers the area [*offp,len]. searches recursively - * the value in `offp` is updated to the offset of the returned entry relative - * to its parent. - * this function should be called with `region` locked. - * the region returned by this function will also be locked. any intermediary - * regions traversed by this function will be locked temporarily, but will - * be unlocked by the time the function returns. */ -static struct vm_region *region_get_child_region_recursive( - struct vm_region *region, - off_t *offp, - size_t len) -{ - struct vm_region *root = region; - off_t offset = *offp; - if (offset >= region->vr_entry.e_size) { - return NULL; - } - - while (1) { - struct vm_region_entry *next - = region_get_entry(region, offset, len); - - struct vm_region *next_region = region_from_entry(next); - if (next_region) { - offset -= next->e_offset; - /* since `region` is locked, interrupts are already - * disabled, so don't use lock_irq() here */ - vm_region_lock(next_region); - - if (region != root) { - vm_region_unlock(region); - } - - region = next_region; - } else { - break; - } - } - - *offp = offset; - return region; -} - -/* find the vm_region_mapping that contains a given memory area. - * `offp` should be a pointer to an off_t value that contains the offset - * of the area relative to the start of `region`. this value will be updated - * to the offset of the mapping relative to its immediate parent. - * this function should be called with `region` locked. if a mapping is found, - * it will be returned with its immediate parent locked. */ -static struct vm_region_mapping *region_get_mapping_recursive( - struct vm_region *root, - off_t *offp, - size_t len) -{ - off_t offset = *offp; - struct vm_region *region - = region_get_child_region_recursive(root, &offset, len); - if (!region) { - return NULL; - } - - /* if `region` is a different region than what was originally passed to - * us, it has now been locked, and its children can be accessed. */ - struct vm_region_entry *entry = region_get_entry(region, offset, len); - *offp = offset; - - if (!entry) { - if (region != root) { - vm_region_unlock(region); - } - - return NULL; - } - - /* return the mapping with the parent region still locked */ - return mapping_from_entry(entry); -} - -static off_t generate_random_address( - off_t area_base, - size_t area_length, - size_t target_length) -{ - size_t random_range = area_length - target_length; - - off_t offset = 0; - fill_random(&offset, sizeof offset); - - offset %= random_range; - return area_base + offset; -} - -static struct vm_region_entry *region_get_random_entry(struct vm_region *region) -{ - enum { - STEP_LEFT = 0, - STEP_RIGHT = 1, - STEP_FINISH = 2, - } step; - - struct btree_node *result = NULL; - struct btree_node *cur = region->vr_entries.b_root; - if (!cur) { - return NULL; - } - - while (1) { - unsigned long r; - fill_random(&r, sizeof r); - - struct btree_node *next = NULL; - - step = r % 3; - switch (step) { - case STEP_LEFT: - next = btree_left(cur); - break; - case STEP_RIGHT: - next = btree_right(cur); - break; - case STEP_FINISH: - result = cur; - break; - default: - return NULL; - } - - if (!next) { - result = cur; - break; - } - - cur = next; - } - - if (!result) { - return NULL; - } - - return BTREE_CONTAINER(struct vm_region_entry, e_node, result); -} - -static virt_addr_t region_find_free_area_ex( - struct vm_region *region, - size_t target_length, - struct btree_node *start, - enum search_direction direction, - bool random) -{ - if (region->vr_entry.e_size < target_length) { - return 0; - } - - struct btree_node *left_node = NULL, *right_node = NULL; - - switch (direction) { - case SEARCH_LEFT: - right_node = start; - left_node = start ? btree_prev(start) : NULL; - break; - case SEARCH_RIGHT: - left_node = start; - right_node = start ? btree_next(start) : NULL; - break; - default: - return 0; - } - - if (!left_node && !right_node) { - return 0; - } - - while (1) { - struct vm_region_entry *left = BTREE_CONTAINER( - struct vm_region_entry, - e_node, - left_node); - struct vm_region_entry *right = BTREE_CONTAINER( - struct vm_region_entry, - e_node, - right_node); - - /* addresses of the first and last free bytes in the area - * respectively. */ - off_t area_base, area_limit; - if (left && right) { - area_base = left->e_offset + left->e_size; - area_limit = right->e_offset - 1; - } else if (right) { - area_base = region->vr_entry.e_offset; - area_limit = left->e_offset - 1; - } else if (left) { - area_base = left->e_offset + left->e_size; - area_limit = region->vr_entry.e_offset - + region->vr_entry.e_size - 1; - } else { - return 0; - } - - area_base &= ~VM_PAGE_MASK; - size_t area_size = 0; - if (area_limit >= area_base) { - area_size = area_limit - area_base + 1; - } - - if (area_size >= target_length) { - if (random) { - area_base = generate_random_address( - area_base, - area_size, - target_length); - area_base &= ~VM_PAGE_MASK; - } - - return area_base; - } - - if (direction == SEARCH_RIGHT) { - left_node = right_node; - right_node = btree_next(right_node); - } else { - right_node = left_node; - left_node = btree_prev(right_node); - } - } - - return 0; -} - -static off_t region_find_free_area_linear( - struct vm_region *region, - size_t target_length) -{ - if (!region->vr_entries.b_root) { - return 0; - } - - return region_find_free_area_ex( - region, - target_length, - btree_first(®ion->vr_entries), - SEARCH_RIGHT, - false); -} - -static off_t region_find_free_area_random( - struct vm_region *region, - size_t target_length) -{ - if (!region->vr_entries.b_root) { - off_t offset = generate_random_address( - 0, - region->vr_entry.e_size, - target_length); - return offset & ~VM_PAGE_MASK; - } - - int tmp = 0; - struct vm_region_entry *basis = region_get_random_entry(region); - - fill_random(&tmp, sizeof tmp); - enum search_direction direction = tmp % 2; - - return region_find_free_area_ex( - region, - target_length, - &basis->e_node, - direction, - true); -} - -static bool region_is_area_free( - const struct vm_region *region, - off_t base, - size_t len) -{ - off_t limit = base + len - 1; - - if (base >= region->vr_entry.e_size) { - return false; - } - - if (limit >= region->vr_entry.e_size) { - return false; - } - - struct btree_node *cur = region->vr_entries.b_root; - if (!cur) { - return true; - } - - while (cur) { - struct vm_region_entry *entry - = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); - - struct btree_node *next = NULL; - off_t entry_limit = entry->e_offset + entry->e_size - 1; - - if (base > entry_limit) { - next = btree_right(cur); - } else if (limit < entry->e_offset) { - next = btree_left(cur); - } else { - return false; - } - - cur = next; - } - - return true; -} - -static kern_status_t region_validate_allocation( - struct vm_region *parent, - vm_prot_t prot, - off_t *offp, - size_t len) -{ - off_t offset = *offp; - - if ((prot & parent->vr_prot) != prot) { - /* child region protection must match or be a - * subset of parent region protection */ - return KERN_INVALID_ARGUMENT; - } - - if (offset == VM_REGION_ANY_OFFSET) { - offset = region_find_free_area(parent, len); - *offp = offset; - return (offset == INVALID_OFFSET) ? KERN_NO_MEMORY : KERN_OK; - } - - offset &= ~VM_PAGE_MASK; - - if (!region_is_area_free(parent, offset, len)) { - return KERN_INVALID_ARGUMENT; - } - - *offp = offset; - return KERN_OK; -} - -/* this function should be called with `region` locked */ -static void vm_iterator_begin( - struct vm_iterator *it, - struct vm_region *region, - virt_addr_t base, - vm_prot_t prot) -{ - memset(it, 0x0, sizeof *it); - it->it_base = base; - it->it_region = region; - it->it_prot = prot; - - off_t offset = base - vm_region_get_base_address(region); - it->it_mapping = region_get_mapping_recursive(region, &offset, 1); - if (!it->it_mapping) { - return; - } - - if ((it->it_mapping->m_prot & prot) != prot) { - unlock_mapping_parent(it->it_mapping, region); - return; - } - - off_t object_offset = offset - it->it_mapping->m_entry.e_offset - + it->it_mapping->m_object_offset; - struct vm_page *pg = NULL; - if (prot & VM_PROT_WRITE) { - pg = vm_object_alloc_page( - it->it_mapping->m_object, - object_offset, - VM_PAGE_4K); - } else { - pg = vm_object_get_page( - it->it_mapping->m_object, - object_offset); - } - - if (!pg) { - unlock_mapping_parent(it->it_mapping, region); - return; - } - - void *buffer_base = vm_page_get_vaddr(pg); - phys_addr_t pg_addr = vm_page_get_paddr(pg); - size_t buffer_size = vm_page_get_size_bytes(pg); - - while (1) { - struct btree_node *next_node = btree_next(&pg->p_bnode); - struct vm_page *next - = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); - if (!next) { - break; - } - - phys_addr_t next_addr = vm_page_get_paddr(next); - if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { - break; - } - - pg = next; - pg_addr = next_addr; - buffer_size += vm_page_get_size_bytes(next); - } - - it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK); - it->it_max = buffer_size - (object_offset & VM_PAGE_MASK); -} - -static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) -{ - if (nr_bytes < it->it_max) { - it->it_base += nr_bytes; - it->it_buf = (char *)it->it_buf + nr_bytes; - it->it_max -= nr_bytes; - return KERN_OK; - } - - /* the parent region of it->it_mapping is locked here. if it is - * different from it->it_region, it must be unlocked */ - unlock_mapping_parent(it->it_mapping, it->it_region); - - it->it_base += nr_bytes; - off_t offset = it->it_base - vm_region_get_base_address(it->it_region); - - struct vm_region_mapping *next_mapping - = region_get_mapping_recursive(it->it_region, &offset, 1); - if (!next_mapping) { - it->it_buf = NULL; - it->it_max = 0; - return KERN_MEMORY_FAULT; - } - - /* past this point, if we encounter an error, must remember to - * unlock the parent region of next_mapping */ - - if ((next_mapping->m_prot & it->it_prot) != it->it_prot) { - it->it_buf = NULL; - it->it_max = 0; - unlock_mapping_parent(next_mapping, it->it_region); - return KERN_MEMORY_FAULT; - } - - off_t object_offset = offset - it->it_mapping->m_entry.e_offset - + it->it_mapping->m_object_offset; - struct vm_page *pg = NULL; - if (it->it_prot & VM_PROT_WRITE) { - pg = vm_object_alloc_page( - it->it_mapping->m_object, - object_offset, - VM_PAGE_4K); - } else { - pg = vm_object_get_page( - it->it_mapping->m_object, - object_offset); - } - - if (!pg) { - unlock_mapping_parent(next_mapping, it->it_region); - return KERN_NO_MEMORY; - } - - void *buffer_base = vm_page_get_vaddr(pg); - phys_addr_t pg_addr = vm_page_get_paddr(pg); - size_t buffer_size = vm_page_get_size_bytes(pg); - - while (1) { - struct btree_node *next_node = btree_next(&pg->p_bnode); - struct vm_page *next - = BTREE_CONTAINER(struct vm_page, p_bnode, next_node); - if (!next) { - break; - } - - phys_addr_t next_addr = vm_page_get_paddr(next); - if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) { - break; - } - - pg = next; - pg_addr = next_addr; - buffer_size += vm_page_get_size_bytes(next); - } - - it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK); - it->it_max = buffer_size; - return KERN_OK; -} - -/* this function must be called when you are finished with a - * vm_iterator, to ensure that all held locks are released. */ -static void vm_iterator_finish(struct vm_iterator *it) -{ - if (it->it_mapping) { - unlock_mapping_parent(it->it_mapping, it->it_region); - } - - memset(it, 0x0, sizeof *it); -} - -/* this function must be called with `root` locked. `root` will be the - * first entry visited by the iterator. from there, child entries are - * visited in depth-first order. */ -static void entry_iterator_begin( - struct entry_iterator *it, - struct vm_region *root) -{ - memset(it, 0x0, sizeof *it); - it->it_root = root; - it->it_entry = &root->vr_entry; -} - -/* this function must be called when you are finished with an - * entry_iterator, to ensure that all held locks are released. */ -static void entry_iterator_finish(struct entry_iterator *it) -{ - struct vm_region_entry *cur = it->it_entry; - if (!cur) { - return; - } - - struct vm_region *region = NULL; - if (cur->e_type == VM_REGION_ENTRY_MAPPING) { - region = region_from_entry(cur->e_parent); - } else { - region = region_from_entry(cur); - } - - while (region && region != it->it_root) { - struct vm_region *parent - = region_from_entry(region->vr_entry.e_parent); - vm_region_unlock(region); - region = parent; - } - - memset(it, 0x0, sizeof *it); -} - -/* move to the next entry in the traversal order. - * when this function returns: - * 1. if the visited entry is a region, it will be locked. - * 2. if the visited entry is a mapping, its parent region will be - * locked. a region will remain locked until all of its children and - * n-grand-children have been visited. once iteration is finished, only - * `it->it_root` will be locked. - */ -static void entry_iterator_move_next(struct entry_iterator *it) -{ - /* `region` is locked */ - struct vm_region *region = region_from_entry(it->it_entry); - bool has_children = (region && !btree_empty(®ion->vr_entries)); - - if (has_children) { - /* visit the first child */ - struct btree_node *node = btree_first(®ion->vr_entries); - struct vm_region_entry *entry - = BTREE_CONTAINER(struct vm_region_entry, e_node, node); - - if (entry->e_type == VM_REGION_ENTRY_REGION) { - struct vm_region *child_region - = region_from_entry(entry); - /* since `region` is locked, interrupts are - * already disabled, so don't use lock_irq() - * here */ - vm_region_lock(child_region); - } - - it->it_depth++; - it->it_entry = entry; - return; - } - - /* go back up until we find a right sibling. */ - struct vm_region_entry *cur = it->it_entry; - - while (1) { - struct btree_node *sibling = btree_next(&cur->e_node); - if (sibling) { - it->it_entry = BTREE_CONTAINER( - struct vm_region_entry, - e_node, - sibling); - return; - } - - if (cur == &it->it_root->vr_entry) { - it->it_entry = NULL; - return; - } - - struct vm_region_entry *parent_entry = cur->e_parent; - struct vm_region *parent = region_from_entry(parent_entry); - - if (!parent) { - it->it_entry = NULL; - return; - } - - if (cur->e_type == VM_REGION_ENTRY_REGION) { - struct vm_region *child_region = region_from_entry(cur); - if (child_region != it->it_root) { - vm_region_unlock(child_region); - } - } - - it->it_depth--; - cur = parent_entry; - } -} - -/* erase the current entry and move to the next entry in the traversal - * order. the current entry MUST be a mapping, otherwise nothing will - * happen. - */ -static void entry_iterator_erase(struct entry_iterator *it) -{ - /* the parent region of `mapping` is locked */ - struct vm_region_mapping *mapping = mapping_from_entry(it->it_entry); - if (!mapping) { - return; - } - - struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); - - /* go back up until we find a right sibling. */ - struct vm_region_entry *cur = it->it_entry; - - while (1) { - struct btree_node *sibling = btree_next(&cur->e_node); - if (mapping) { - btree_delete( - &parent->vr_entries, - &mapping->m_entry.e_node); - vm_cache_free(&mapping_cache, mapping); - mapping = NULL; - } - - if (sibling) { - it->it_entry = BTREE_CONTAINER( - struct vm_region_entry, - e_node, - sibling); - return; - } - - if (cur == &it->it_root->vr_entry) { - it->it_entry = NULL; - return; - } - - struct vm_region_entry *parent_entry = cur->e_parent; - struct vm_region *parent = region_from_entry(parent_entry); - - if (!parent) { - it->it_entry = NULL; - return; - } - - if (cur->e_type == VM_REGION_ENTRY_REGION) { - struct vm_region *child_region = region_from_entry(cur); - if (child_region != it->it_root) { - vm_region_unlock(child_region); - } - } - - it->it_depth--; - cur = parent_entry; - } -} - -static void mapping_iterator_begin( - struct entry_iterator *it, - struct vm_region *root, - off_t offset, - size_t length, - off_t *offp) -{ - entry_iterator_begin(it, root); - while (it->it_entry) { - off_t base = entry_absolute_address(it->it_entry) - - root->vr_entry.e_offset; - off_t limit = base + it->it_entry->e_size - 1; - - if (it->it_entry->e_type == VM_REGION_ENTRY_MAPPING) { - if (offset >= base && offset <= limit) { - *offp = base; - return; - } - - if (offset + length >= base - && offset + length <= limit) { - *offp = base; - return; - } - } - - entry_iterator_move_next(it); - } -} - -static void mapping_iterator_finish(struct entry_iterator *it) -{ - entry_iterator_finish(it); -} - -static void mapping_iterator_move_next( - struct entry_iterator *it, - off_t offset, - size_t length, - off_t *offp) -{ - do { - entry_iterator_move_next(it); - } while (it->it_entry - && it->it_entry->e_type != VM_REGION_ENTRY_MAPPING); - - if (!it->it_entry) { - return; - } - - off_t base = entry_absolute_address(it->it_entry) - - it->it_root->vr_entry.e_offset; - - if (base >= offset + length) { - it->it_entry = NULL; - } else { - *offp = base; - } -} - -static void mapping_iterator_erase( - struct entry_iterator *it, - off_t offset, - size_t length, - off_t *offp) -{ - entry_iterator_erase(it); - - while (it->it_entry - && it->it_entry->e_type != VM_REGION_ENTRY_MAPPING) { - - entry_iterator_move_next(it); - } - - if (!it->it_entry) { - return; - } - - off_t base = entry_absolute_address(it->it_entry) - - it->it_root->vr_entry.e_offset; - - if (base >= offset + length) { - it->it_entry = NULL; - } else { - *offp = base; - } -} - -/*** PUBLIC API ***************************************************************/ - -kern_status_t vm_region_type_init(void) -{ - vm_cache_init(&mapping_cache); - return object_type_register(&vm_region_type); -} - -struct vm_region *vm_region_cast(struct object *obj) -{ - return VM_REGION_CAST(obj); -} - -/* this function should be called with `parent` locked (if parent is - * non-NULL) - */ -kern_status_t vm_region_create( - struct vm_region *parent, - const char *name, - size_t name_len, - off_t offset, - size_t region_len, - vm_prot_t prot, - struct vm_region **out) -{ - if (parent && parent->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - if (!offset || !region_len) { - return KERN_INVALID_ARGUMENT; - } - - if (region_len & VM_PAGE_MASK) { - region_len &= ~VM_PAGE_MASK; - region_len += VM_PAGE_SIZE; - } - - kern_status_t status = KERN_OK; - if (parent) { - status = region_validate_allocation( - parent, - prot, - &offset, - region_len); - } - - if (status != KERN_OK) { - return status; - } - - struct object *region_object = object_create(&vm_region_type); - if (!region_object) { - return KERN_NO_MEMORY; - } - - struct vm_region *region = VM_REGION_CAST(region_object); - - region->vr_status = VM_REGION_ONLINE; - region->vr_prot = prot; - region->vr_entry.e_type = VM_REGION_ENTRY_REGION; - region->vr_entry.e_address = offset; - region->vr_entry.e_offset = offset; - region->vr_entry.e_size = region_len; - -#ifdef TRACE - tracek("creating sub-region at [%llx-%llx]", - offset, - offset + region_len); -#endif - - if (parent) { - region->vr_entry.e_parent = &parent->vr_entry; - region->vr_entry.e_address += parent->vr_entry.e_address; - region->vr_pmap = parent->vr_pmap; - region_put_entry(parent, ®ion->vr_entry); - /* `parent` holds a reference to child `region` */ - object_ref(®ion->vr_base); - } - - if (name && name_len) { - name_len = MIN(sizeof region->vr_name - 1, name_len); - memcpy(region->vr_name, name, name_len); - region->vr_name[name_len] = '\0'; - } - - *out = region; - return KERN_OK; -} - -kern_status_t vm_region_kill( - struct vm_region *region, - unsigned long *lock_flags) -{ - if (region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - if (region->vr_entry.e_parent) { - struct vm_region *parent - = region_from_entry(region->vr_entry.e_parent); - region->vr_entry.e_parent = NULL; - - /* locks must be acquired in parent->child order. since - * we're going backwards here, unlock `region` before - * locking its parent */ - vm_region_unlock_irqrestore(region, *lock_flags); - vm_region_lock_irqsave(parent, lock_flags); - btree_delete(&parent->vr_entries, ®ion->vr_entry.e_node); - vm_region_unlock_irqrestore(parent, *lock_flags); - vm_region_lock_irqsave(region, lock_flags); - /* `region` lock is held, and e_parent is NULL */ - } - - struct entry_iterator it; - entry_iterator_begin(&it, region); - - while (it.it_entry) { - if (it.it_entry->e_type == VM_REGION_ENTRY_REGION) { - struct vm_region *region - = region_from_entry(it.it_entry); - region->vr_status = VM_REGION_DEAD; - entry_iterator_move_next(&it); - continue; - } - - struct vm_region_mapping *mapping - = mapping_from_entry(it.it_entry); - - virt_addr_t base = entry_absolute_address(it.it_entry); - - for (size_t i = 0; i < mapping->m_entry.e_size; - i += VM_PAGE_SIZE) { - pmap_remove(region->vr_pmap, base + i); - } - - unsigned long flags; - vm_object_lock_irqsave(mapping->m_object, &flags); - queue_delete( - &mapping->m_object->vo_mappings, - &mapping->m_object_entry); - vm_object_unlock_irqrestore(mapping->m_object, flags); - - entry_iterator_erase(&it); - } - - return KERN_OK; -} - -kern_status_t vm_region_map_object( - struct vm_region *root, - off_t region_offset, - struct vm_object *object, - off_t object_offset, - size_t length, - vm_prot_t prot, - virt_addr_t *out) -{ - object_offset &= ~VM_PAGE_MASK; - - if (region_offset != VM_REGION_ANY_OFFSET) { - off_t limit = region_offset + length; - - if (region_offset & VM_PAGE_MASK) { - region_offset &= ~VM_PAGE_MASK; - } - - if (limit & VM_PAGE_MASK) { - limit &= ~VM_PAGE_MASK; - limit += VM_PAGE_SIZE; - } - - length = limit - region_offset; - } - - if (length & VM_PAGE_MASK) { - length &= ~VM_PAGE_MASK; - length += VM_PAGE_SIZE; - } - - if (!root || !object) { - return KERN_INVALID_ARGUMENT; - } - - struct vm_region *region = root; - if (region_offset != VM_REGION_ANY_OFFSET) { - region = region_get_child_region_recursive( - root, - ®ion_offset, - length); - /* if `region` != `root`, it will need to be unlocked at - * the end of the function */ - } - - if (region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - if ((prot & region->vr_prot) != prot) { - return KERN_INVALID_ARGUMENT; - } - - if ((prot & object->vo_prot) != prot) { - return KERN_INVALID_ARGUMENT; - } - - if (!length || object_offset + length > object->vo_size) { - return KERN_INVALID_ARGUMENT; - } - - if (!region) { - return KERN_INVALID_ARGUMENT; - } - - if (region_offset == VM_REGION_ANY_OFFSET) { - region_offset = region_find_free_area(region, length); - - if (region_offset == INVALID_OFFSET) { - return KERN_NO_MEMORY; - } - } else if (!region_is_area_free(region, region_offset, length)) { - return KERN_INVALID_ARGUMENT; - } - - tracek("vm_region_map_object(%s, %zx, %s, %zx, %zx, %x, %p)", - region->vr_name, - region_offset, - object->vo_name, - object_offset, - length, - prot, - out); - - struct vm_region_mapping *mapping - = vm_cache_alloc(&mapping_cache, VM_NORMAL); - if (!mapping) { - return KERN_NO_MEMORY; - } - - mapping->m_object = object; - mapping->m_prot = prot; - mapping->m_object_offset = object_offset; - mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING; - mapping->m_entry.e_parent = ®ion->vr_entry; - mapping->m_entry.e_address = region->vr_entry.e_address + region_offset; - mapping->m_entry.e_offset = region_offset; - mapping->m_entry.e_size = length; - -#ifdef TRACE - virt_addr_t abs_base = entry_absolute_address(&mapping->m_entry); - tracek("mapping %s at [%llx-%llx]", - object->vo_name, - abs_base, - abs_base + length); -#endif - region_put_entry(region, &mapping->m_entry); - if (region != root) { - vm_region_unlock(region); - } - - unsigned long lock_flags; - vm_object_lock_irqsave(object, &lock_flags); - queue_push_back(&object->vo_mappings, &mapping->m_object_entry); - vm_object_unlock_irqrestore(object, lock_flags); - - if (out) { - *out = entry_absolute_address(&mapping->m_entry); - } - - return KERN_OK; -} - -/* unmap some pages in the middle of a mapping, splitting it into two - * separate mappings */ -static kern_status_t split_mapping( - struct vm_region_mapping *mapping, - struct vm_region *root, - off_t mapping_offset, - off_t unmap_offset, - off_t unmap_limit) -{ - tracek("split mapping [%zx-%zx] subtract [%zx-%zx]", - mapping_offset, - mapping_offset + mapping->m_entry.e_size, - unmap_offset, - unmap_limit); - - off_t mapping_limit = mapping_offset + mapping->m_entry.e_size; - - struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); - struct vm_region_mapping *left = mapping; - struct vm_region_mapping *right - = vm_cache_alloc(&mapping_cache, VM_NORMAL); - if (!right) { - return KERN_NO_MEMORY; - } - - off_t left_offset = mapping->m_entry.e_offset; - off_t right_offset = unmap_limit - mapping_offset; - off_t left_object_offset = mapping->m_object_offset; - size_t left_length = unmap_offset - mapping_offset; - size_t right_length = mapping_limit - unmap_limit; - off_t right_object_offset = mapping->m_object_offset - + mapping->m_entry.e_size - right_length; - - tracek("mapping=[%zx-%zx]->[%zx-%zx]", - mapping_offset, - mapping_limit, - mapping->m_object_offset, - mapping->m_object_offset + mapping->m_entry.e_size); - tracek("left=[%zx-%zx]->[%zx-%zx], right=[%zx-%zx]->[%zx-%zx]", - left_offset, - left_offset + left_length, - left_object_offset, - left_object_offset + left_length, - right_offset, - right_offset + right_length, - right_object_offset, - right_object_offset + right_length); - - left->m_object_offset = left_object_offset; - left->m_entry.e_offset = left_offset; - left->m_entry.e_size = left_length; - - right->m_object = left->m_object; - right->m_prot = left->m_prot; - right->m_entry.e_type = VM_REGION_ENTRY_MAPPING; - right->m_entry.e_parent = left->m_entry.e_parent; - - right->m_object_offset = right_object_offset; - right->m_entry.e_offset = right_offset; - right->m_entry.e_size = right_length; - - virt_addr_t unmap_base = root->vr_entry.e_offset + unmap_offset; - size_t unmap_length = unmap_limit - unmap_offset; - - for (size_t i = 0; i < unmap_length; i += VM_PAGE_SIZE) { - tracek("unmapping %zx", unmap_base + i); - pmap_remove(root->vr_pmap, unmap_base + i); - } - - region_put_entry(parent, &right->m_entry); - - return KERN_OK; -} - -/* unmap some pages from the left-side of a mapping to somewhere in the - * middle. */ -static kern_status_t left_reduce_mapping( - struct vm_region_mapping *mapping, - struct vm_region *root, - off_t mapping_offset, - off_t unmap_offset, - off_t unmap_limit) -{ - /* unmap_limit falls somwwhere between mapping_offset and - * mapping_offset+length */ - tracek("left reduce mapping [%zx-%zx] subtract [%zx-%zx]", - mapping_offset, - mapping_offset + mapping->m_entry.e_size, - unmap_offset, - unmap_limit); - - virt_addr_t base = root->vr_entry.e_offset + mapping_offset; - off_t limit = mapping_offset + mapping->m_entry.e_size; - size_t length = mapping->m_entry.e_size - (limit - unmap_limit); - tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); - - for (size_t i = 0; i < length; i += VM_PAGE_SIZE) { - pmap_remove(root->vr_pmap, base + i); - } - - mapping->m_entry.e_offset += length; - mapping->m_object_offset += length; - mapping->m_entry.e_size -= length; - - return KERN_OK; -} - -/* unmap some pages from the middle of a mapping to the right-side. */ -static kern_status_t right_reduce_mapping( - struct vm_region_mapping *mapping, - struct vm_region *root, - off_t mapping_offset, - off_t unmap_offset, - off_t unmap_limit) -{ - /* unmap_base falls somwwhere between mapping_offset and - * mapping_offset+length */ - tracek("right reduce mapping [%zx-%zx] subtract [%zx-%zx]", - mapping_offset, - mapping_offset + mapping->m_entry.e_size, - unmap_offset, - unmap_limit); - - virt_addr_t base = root->vr_entry.e_offset + unmap_offset; - off_t limit = mapping_offset + mapping->m_entry.e_size; - size_t length = limit - unmap_offset; - tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length); - - for (size_t i = 0; i < length; i += VM_PAGE_SIZE) { - pmap_remove(root->vr_pmap, base + i); - } - - mapping->m_entry.e_size -= length; - - return KERN_OK; -} - -/* completely unmap and delete an entire mapping */ -static kern_status_t delete_mapping( - struct vm_region_mapping *mapping, - struct vm_region *root, - off_t mapping_offset) -{ - virt_addr_t base = root->vr_entry.e_offset + mapping_offset; - tracek("delete mapping [%zx-%zx]", - base, - base + mapping->m_entry.e_size); - - for (size_t i = 0; i < mapping->m_entry.e_size; i += VM_PAGE_SIZE) { - pmap_remove(root->vr_pmap, base + i); - } - - unsigned long flags; - vm_object_lock_irqsave(mapping->m_object, &flags); - queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry); - vm_object_unlock_irqrestore(mapping->m_object, flags); - - /* don't actually delete the mapping yet. that will be done by - * vm_region_unmap */ - - return KERN_OK; -} - -kern_status_t vm_region_unmap( - struct vm_region *region, - off_t unmap_area_offset, - size_t unmap_area_length) -{ - if (region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - kern_status_t status = KERN_OK; - struct entry_iterator it; - off_t unmap_area_limit = unmap_area_offset + unmap_area_length; - tracek("unmapping %zx-%zx", unmap_area_offset, unmap_area_limit); - off_t tmp = 0; - - mapping_iterator_begin( - &it, - region, - unmap_area_offset, - unmap_area_length, - &tmp); - while (it.it_entry) { - struct vm_region_mapping *mapping - = mapping_from_entry(it.it_entry); - off_t mapping_offset = tmp; - off_t mapping_limit = mapping_offset + it.it_entry->e_size; - - bool split - = (unmap_area_offset > mapping_offset - && unmap_area_limit < mapping_limit); - bool delete - = (unmap_area_offset <= mapping_offset - && unmap_area_limit >= mapping_limit); - bool left_reduce - = (unmap_area_offset <= mapping_offset - && unmap_area_limit < mapping_limit); - bool right_reduce - = (unmap_area_offset > mapping_offset - && unmap_area_limit >= mapping_limit); - - if (split) { - status = split_mapping( - mapping, - region, - mapping_offset, - unmap_area_offset, - unmap_area_limit); - delete = true; - } else if (delete) { - status = delete_mapping( - mapping, - region, - mapping_offset); - } else if (left_reduce) { - status = left_reduce_mapping( - mapping, - region, - mapping_offset, - unmap_area_offset, - unmap_area_limit); - } else if (right_reduce) { - status = right_reduce_mapping( - mapping, - region, - mapping_offset, - unmap_area_offset, - unmap_area_limit); - } else { - panic("don't know what to do with this " - "mapping"); - } - - if (delete) { - mapping_iterator_erase( - &it, - unmap_area_offset, - unmap_area_length, - &tmp); - } else { - mapping_iterator_move_next( - &it, - unmap_area_offset, - unmap_area_length, - &tmp); - } - - if (status != KERN_OK) { - break; - } - } - - mapping_iterator_finish(&it); - - return status; -} - -bool vm_region_validate_access( - struct vm_region *region, - virt_addr_t ptr, - size_t len, - vm_prot_t prot) -{ - if (region->vr_status != VM_REGION_ONLINE) { - return false; - } - - if (len == 0) { - return true; - } - - if (ptr < region->vr_entry.e_offset) { - return false; - } - - off_t offset = ptr - region->vr_entry.e_offset; - - if (len >= region->vr_entry.e_size) { - return false; - } - - if (offset + len > region->vr_entry.e_size) { - return false; - } - - offset &= ~VM_PAGE_MASK; - - /* TODO improve this to not require a per-page loop */ - for (off_t i = 0; i < len; i += VM_PAGE_SIZE) { - off_t x = offset + i; - struct vm_region_mapping *mapping - = region_get_mapping_recursive( - region, - &x, - VM_PAGE_SIZE); - if (!mapping) { - return false; - } - - bool ok = (mapping->m_prot & prot) == prot; - - struct vm_region *parent - = region_from_entry(mapping->m_entry.e_parent); - - if (parent != region) { - vm_region_unlock(parent); - } - - if (!ok) { - return false; - } - } - - return true; -} - -/* this function must be called with `region` locked */ -kern_status_t vm_region_demand_map( - struct vm_region *region, - virt_addr_t addr, - enum pmap_fault_flags flags) -{ - if (region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - addr &= ~VM_PAGE_MASK; - if (addr < region->vr_entry.e_offset - || addr > region->vr_entry.e_offset + region->vr_entry.e_size) { - return KERN_NO_ENTRY; - } - - off_t region_offset = addr - region->vr_entry.e_offset; - - struct vm_region_mapping *mapping - = region_get_mapping_recursive(region, ®ion_offset, 1); - if (!mapping) { - return KERN_NO_ENTRY; - } - - off_t object_offset = region_offset - mapping->m_entry.e_offset - + mapping->m_object_offset; - - tracek("vm: tried to access vm-object %s at offset=%05llx", - mapping->m_object->vo_name, - object_offset); - - unsigned long lock_flags; - vm_object_lock_irqsave(mapping->m_object, &lock_flags); - struct vm_page *pg = vm_object_alloc_page( - mapping->m_object, - object_offset, - VM_PAGE_4K); - vm_object_unlock_irqrestore(mapping->m_object, lock_flags); - tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); - kern_status_t status = pmap_add( - region->vr_pmap, - addr, - vm_page_get_pfn(pg), - mapping->m_prot, - PMAP_NORMAL); - - struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); - if (parent != region) { - vm_region_unlock(parent); - } - - return status; -} - -virt_addr_t vm_region_get_base_address(const struct vm_region *region) -{ - if (region->vr_status != VM_REGION_ONLINE) { - return 0; - } - - return entry_absolute_address(®ion->vr_entry); -} - -kern_status_t vm_region_read_kernel( - struct vm_region *src_region, - virt_addr_t src_ptr, - size_t count, - void *destp, - size_t *nr_read) -{ - if (src_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - struct vm_iterator src; - char *dest = destp; - - vm_iterator_begin( - &src, - src_region, - src_ptr, - VM_PROT_READ | VM_PROT_USER); - - kern_status_t status = KERN_OK; - size_t r = 0; - - while (r < count && src.it_max) { - size_t remaining = count - r; - size_t to_move = MIN(src.it_max, remaining); - memmove(dest, src.it_buf, to_move); - - status = vm_iterator_seek(&src, to_move); - if (status != KERN_OK) { - break; - } - - r += to_move; - dest += to_move; - } - - vm_iterator_finish(&src); - - if (nr_read) { - *nr_read = r; - } - - return status; -} - -kern_status_t vm_region_write_kernel( - struct vm_region *dst_region, - virt_addr_t dst_ptr, - size_t count, - const void *srcp, - size_t *nr_written) -{ - if (dst_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - struct vm_iterator dst; - const char *src = srcp; - - vm_iterator_begin( - &dst, - dst_region, - dst_ptr, - VM_PROT_WRITE | VM_PROT_USER); - - kern_status_t status = KERN_OK; - size_t r = 0; - - while (r < count && dst.it_max) { - size_t remaining = count - r; - size_t to_move = MIN(dst.it_max, remaining); - memmove(dst.it_buf, src, to_move); - - status = vm_iterator_seek(&dst, to_move); - if (status != KERN_OK) { - break; - } - - r += to_move; - src += to_move; - } - - vm_iterator_finish(&dst); - - if (nr_written) { - *nr_written = r; - } - - return status; -} - -kern_status_t vm_region_memmove( - struct vm_region *dest_region, - virt_addr_t dest_ptr, - struct vm_region *src_region, - virt_addr_t src_ptr, - size_t count, - size_t *nr_moved) -{ - if (src_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - if (dest_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - struct vm_iterator src, dest; - vm_iterator_begin( - &src, - src_region, - src_ptr, - VM_PROT_READ | VM_PROT_USER); - vm_iterator_begin( - &dest, - dest_region, - dest_ptr, - VM_PROT_WRITE | VM_PROT_USER); - - kern_status_t status = KERN_OK; - size_t r = 0; - - while (count && src.it_max && dest.it_max) { - size_t to_move = MIN(MIN(src.it_max, dest.it_max), count); - memmove(dest.it_buf, src.it_buf, to_move); - - status = vm_iterator_seek(&src, to_move); - if (status != KERN_OK) { - break; - } - - status = vm_iterator_seek(&dest, to_move); - if (status != KERN_OK) { - break; - } - - count -= to_move; - r += to_move; - } - - vm_iterator_finish(&src); - vm_iterator_finish(&dest); - - if (nr_moved) { - *nr_moved = r; - } - - return status; -} - -extern kern_status_t vm_region_memmove_v( - struct vm_region *dest_region, - size_t dest_offset, - const kern_iovec_t *dest_vecs, - size_t nr_dest_vecs, - struct vm_region *src_region, - size_t src_offset, - const kern_iovec_t *src_vecs, - size_t nr_src_vecs, - size_t bytes_to_move, - size_t *nr_bytes_moved) -{ - if (src_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - if (dest_region->vr_status != VM_REGION_ONLINE) { - return KERN_BAD_STATE; - } - - struct iovec_iterator src, dest; - iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs); - iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs); - - iovec_iterator_seek(&src, src_offset); - iovec_iterator_seek(&dest, dest_offset); - - size_t moved = 0; - while (bytes_to_move && src.it_len && dest.it_len) { - size_t to_move - = MIN(MIN(src.it_len, dest.it_len), bytes_to_move); - - kern_status_t status = vm_region_memmove( - dest_region, - dest.it_base, - src_region, - src.it_base, - to_move, - NULL); - if (status != KERN_OK) { - return status; - } - - iovec_iterator_seek(&src, to_move); - iovec_iterator_seek(&dest, to_move); - bytes_to_move -= to_move; - moved += to_move; - } - - if (nr_bytes_moved) { - *nr_bytes_moved = moved; - } - - return KERN_OK; -} - -#ifdef TRACE -void vm_region_dump(struct vm_region *region) -{ - char s[128]; - size_t p = 0; - - struct entry_iterator it; - entry_iterator_begin(&it, region); - while (it.it_entry) { - p = 0; - - for (unsigned int i = 0; i < it.it_depth; i++) { - p += snprintf(s + p, sizeof s - p, " "); - } - - switch (it.it_entry->e_type) { - case VM_REGION_ENTRY_REGION: { - struct vm_region *child - = region_from_entry(it.it_entry); - p += snprintf( - s + p, - sizeof s - p, - "-region [%zx-%zx] %s", - child->vr_entry.e_offset, - child->vr_entry.e_offset - + child->vr_entry.e_size, - child->vr_name); - break; - } - case VM_REGION_ENTRY_MAPPING: { - struct vm_region_mapping *mapping - = mapping_from_entry(it.it_entry); - p += snprintf( - s + p, - sizeof s - p, - "+mapping [%zx-%zx] %s", - mapping->m_entry.e_offset, - mapping->m_entry.e_offset - + mapping->m_entry.e_size, - mapping->m_object->vo_name); - break; - default: - break; - } - } - - tracek("%s", s); - entry_iterator_move_next(&it); - } -} -#endif