Compare commits

...

8 Commits

Author SHA1 Message Date
5f0654430d syscall: add task_self, task_get_address_space, and vm_region_kill 2026-02-23 18:43:49 +00:00
fd1bc0ad5f kernel: check object refcount before performing a recursive deletion 2026-02-23 18:43:11 +00:00
b1ffdcf2bc vm: region: improve locking rules and semantics; implement region killing
the rules around acquiring locks have been strictly defined and
implemented, and general lock usage has been improved, to fix and
prevent several different issues.

a vm-region is now destroyed in two separate steps:
 1. it is "killed": all mappings are unmapped and deleted, the
    region is removed from its parent, and the region and all of
    its sub-regions are marked as "dead", preventing any
    further actions from being performed with the region.
 2. it is "destroyed": the vm-region object is de-allocated when
    the last reference/handle is closed. the references that this
    region holds to any sub-regions are also released, meaning
    these regions may also be de-allocated too.
2026-02-23 18:42:47 +00:00
5690dd5b9c kernel: add support for recursive object destruction (without recursion)
this system makes it possible for an object that forms part of a tree
to be safely recursively destroyed without using recursion.
2026-02-23 18:34:12 +00:00
37ae7aeef7 kernel: implement globally-unique object ids 2026-02-23 18:32:11 +00:00
dbe117135b x86_64: implement proper user/kernel %gs base switching
the %gs base address is now always set to the current cpu block while
in kernel-mode, and is switched back to the userspace %gs base
when returning to user-mode.
2026-02-23 18:26:21 +00:00
273557fa9f x86_64: lock task address space while performing a demand page-map 2026-02-23 18:25:49 +00:00
fe107fbad3 kernel: locks: add spin lock/unlock function that don't change interrupt state 2026-02-23 18:24:49 +00:00
22 changed files with 755 additions and 60 deletions

View File

@@ -1,5 +1,5 @@
#include <kernel/machine/hwlock.h>
#include <kernel/compiler.h> #include <kernel/compiler.h>
#include <kernel/machine/hwlock.h>
void ml_hwlock_lock(ml_hwlock_t *lck) void ml_hwlock_lock(ml_hwlock_t *lck)
{ {

View File

@@ -11,6 +11,41 @@ ml_hwlock_lock:
mov $1, %ecx mov $1, %ecx
mfence
1: mov $0, %eax
lock cmpxchg %ecx, (%rdi)
jne 1b
pop %rbp
ret
.global ml_hwlock_unlock
.type ml_hwlock_unlock, @function
/* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_unlock:
push %rbp
mov %rsp, %rbp
movl $0, (%rdi)
mfence
pop %rbp
ret
.global ml_hwlock_lock_irq
.type ml_hwlock_lock_irq, @function
/* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_lock_irq:
push %rbp
mov %rsp, %rbp
mov $1, %ecx
cli cli
mfence mfence
@@ -21,11 +56,12 @@ ml_hwlock_lock:
pop %rbp pop %rbp
ret ret
.global ml_hwlock_unlock
.type ml_hwlock_unlock, @function .global ml_hwlock_unlock_irq
.type ml_hwlock_unlock_irq, @function
/* %rdi = pointer to ml_hwlock_t (int) */ /* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_unlock: ml_hwlock_unlock_irq:
push %rbp push %rbp
mov %rsp, %rbp mov %rsp, %rbp
@@ -62,6 +98,7 @@ ml_hwlock_lock_irqsave:
pop %rbp pop %rbp
ret ret
.global ml_hwlock_unlock_irqrestore .global ml_hwlock_unlock_irqrestore
.type ml_hwlock_unlock_irqrestore, @function .type ml_hwlock_unlock_irqrestore, @function

View File

@@ -3,7 +3,8 @@
#include <stdint.h> #include <stdint.h>
#define MSR_GS_BASE 0xC0000101 #define MSR_GS_BASE 0xC0000101
#define MSR_KERNEL_GS_BASE 0xC0000102
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

View File

@@ -12,6 +12,9 @@ typedef int ml_hwlock_t;
extern void ml_hwlock_lock(ml_hwlock_t *lck); extern void ml_hwlock_lock(ml_hwlock_t *lck);
extern void ml_hwlock_unlock(ml_hwlock_t *lck); extern void ml_hwlock_unlock(ml_hwlock_t *lck);
extern void ml_hwlock_lock_irq(ml_hwlock_t *lck);
extern void ml_hwlock_unlock_irq(ml_hwlock_t *lck);
extern void ml_hwlock_lock_irqsave(ml_hwlock_t *lck, unsigned long *flags); extern void ml_hwlock_lock_irqsave(ml_hwlock_t *lck, unsigned long *flags);
extern void ml_hwlock_unlock_irqrestore(ml_hwlock_t *lck, unsigned long flags); extern void ml_hwlock_unlock_irqrestore(ml_hwlock_t *lck, unsigned long flags);

View File

@@ -333,11 +333,30 @@ IRQ 223, 255
isr_common_stub: isr_common_stub:
PUSH_REGS PUSH_REGS
# When ISR occurs in Ring 3, CPU sets %ss (and other non-code selectors)
# to 0.
mov %ss, %ax
cmp $0, %ax
jne isr_skipgs1
mov $0x10, %ax
mov %ax, %ss
swapgs
isr_skipgs1:
mov %rsp, %rdi mov %rsp, %rdi
call isr_dispatch call isr_dispatch
POP_REGS POP_REGS
add $16, %rsp add $16, %rsp
cmpq $0x1b, 32(%rsp)
jne isr_skipgs2
swapgs
isr_skipgs2:
iretq iretq
@@ -347,11 +366,31 @@ isr_common_stub:
irq_common_stub: irq_common_stub:
PUSH_REGS PUSH_REGS
# When IRQ occurs in Ring 3, CPU sets %ss (and other non-code selectors)
# to 0.
mov %ss, %ax
cmp $0, %ax
jne irq_skipgs1
mov $0x10, %ax
mov %ax, %ss
swapgs
irq_skipgs1:
mov %rsp, %rdi mov %rsp, %rdi
call irq_dispatch call irq_dispatch
POP_REGS POP_REGS
add $16, %rsp add $16, %rsp
cmpq $0x1b, 32(%rsp)
jne isr_skipgs2
swapgs
irq_skipgs2:
iretq iretq
@@ -363,12 +402,12 @@ irq_common_stub:
syscall_gate: syscall_gate:
swapgs swapgs
movq %rsp, %gs:20 # GS+20 = rsp2 in the current TSS block (user stack storage) movq %rsp, %gs:94 # GS+20 = rsp2 in the current TSS block (user stack storage)
movq %gs:4, %rsp # GS+4 = rsp0 in the current TSS block (per-thread kstack) movq %gs:78, %rsp # GS+4 = rsp0 in the current TSS block (per-thread kstack)
# start building a ml_cpu_context # start building a ml_cpu_context
pushq $0x1b pushq $0x1b
pushq %gs:20 pushq %gs:94
push %r11 push %r11
push $0x23 push $0x23
push %rcx push %rcx
@@ -380,10 +419,6 @@ syscall_gate:
mov %rsp, %rdi mov %rsp, %rdi
# switch back to user gs while in syscall_dispatch. Interrupts are enabled in syscall_dispatch,
# and if the task gets pre-empted, the incoming task will expect %gs to have its usermode value.
swapgs
call syscall_dispatch call syscall_dispatch
POP_REGS POP_REGS
@@ -394,8 +429,8 @@ syscall_gate:
pop %r11 pop %r11
add $16, %rsp add $16, %rsp
swapgs movq %gs:94, %rsp # GS+20 = rsp2 in the current TSS block
movq %gs:20, %rsp # GS+20 = rsp2 in the current TSS block
swapgs swapgs
# back to usermode # back to usermode

View File

@@ -364,7 +364,12 @@ kern_status_t pmap_handle_fault(
struct task *task = current_task(); struct task *task = current_task();
struct vm_region *space = task->t_address_space; struct vm_region *space = task->t_address_space;
return vm_region_demand_map(space, fault_addr, flags); unsigned long lock_flags;
vm_region_lock_irqsave(space, &lock_flags);
kern_status_t status = vm_region_demand_map(space, fault_addr, flags);
vm_region_unlock_irqrestore(space, lock_flags);
return status;
} }
kern_status_t pmap_add( kern_status_t pmap_add(

View File

@@ -73,4 +73,5 @@ ml_thread_switch_user:
pop %rax pop %rax
add $16, %rsp add $16, %rsp
swapgs
iretq iretq

View File

@@ -1,5 +1,3 @@
#include "arch/msr.h"
#include <arch/gdt.h> #include <arch/gdt.h>
#include <arch/tss.h> #include <arch/tss.h>
#include <kernel/libc/string.h> #include <kernel/libc/string.h>
@@ -22,9 +20,6 @@ void tss_init(struct tss *tss, struct tss_ptr *ptr)
void tss_load(struct tss *tss) void tss_load(struct tss *tss)
{ {
tss_flush(TSS_GDT_INDEX); tss_flush(TSS_GDT_INDEX);
uintptr_t kernel_gs_base_reg = 0xC0000102;
wrmsr(kernel_gs_base_reg, (uintptr_t)tss);
} }
virt_addr_t tss_get_kstack(struct tss *tss) virt_addr_t tss_get_kstack(struct tss *tss)

View File

@@ -10,13 +10,17 @@ extern "C" {
typedef __aligned(8) ml_hwlock_t spin_lock_t; typedef __aligned(8) ml_hwlock_t spin_lock_t;
#define SPIN_LOCK_INIT ML_HWLOCK_INIT #define SPIN_LOCK_INIT ML_HWLOCK_INIT
#define spin_lock(lck) ml_hwlock_lock(lck); #define spin_lock(lck) ml_hwlock_lock(lck);
#define spin_unlock(lck) ml_hwlock_unlock(lck); #define spin_unlock(lck) ml_hwlock_unlock(lck);
#define spin_lock_irq(lck) ml_hwlock_lock_irq(lck);
#define spin_unlock_irq(lck) ml_hwlock_unlock_irq(lck);
#define spin_lock_irqsave(lck, flags) ml_hwlock_lock_irqsave(lck, flags); #define spin_lock_irqsave(lck, flags) ml_hwlock_lock_irqsave(lck, flags);
#define spin_unlock_irqrestore(lck, flags) ml_hwlock_unlock_irqrestore(lck, flags); #define spin_unlock_irqrestore(lck, flags) \
ml_hwlock_unlock_irqrestore(lck, flags);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -3,8 +3,8 @@
#include <kernel/flags.h> #include <kernel/flags.h>
#include <kernel/locks.h> #include <kernel/locks.h>
#include <mango/status.h>
#include <kernel/vm.h> #include <kernel/vm.h>
#include <mango/status.h>
#include <stddef.h> #include <stddef.h>
#ifdef __cplusplus #ifdef __cplusplus
@@ -52,7 +52,10 @@ enum object_type_flags {
}; };
struct object_ops { struct object_ops {
kern_status_t (*destroy)(struct object *obj); kern_status_t (*destroy)(struct object *obj, struct queue *q);
kern_status_t (*destroy_recurse)(
struct queue_entry *entry,
struct object **out);
}; };
struct object_type { struct object_type {
@@ -67,6 +70,7 @@ struct object_type {
struct object { struct object {
uint32_t ob_magic; uint32_t ob_magic;
koid_t ob_id;
struct object_type *ob_type; struct object_type *ob_type;
spin_lock_t ob_lock; spin_lock_t ob_lock;
unsigned int ob_refcount; unsigned int ob_refcount;

View File

@@ -24,6 +24,7 @@
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER) VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER)
extern kern_status_t sys_task_exit(int status); extern kern_status_t sys_task_exit(int status);
extern kern_status_t sys_task_self(kern_handle_t *out);
extern kern_status_t sys_task_create( extern kern_status_t sys_task_create(
kern_handle_t parent_handle, kern_handle_t parent_handle,
const char *name, const char *name,
@@ -37,6 +38,9 @@ extern kern_status_t sys_task_create_thread(
uintptr_t *args, uintptr_t *args,
size_t nr_args, size_t nr_args,
kern_handle_t *out_thread); kern_handle_t *out_thread);
extern kern_status_t sys_task_get_address_space(
kern_handle_t task,
kern_handle_t *out);
extern kern_status_t sys_thread_start(kern_handle_t thread); extern kern_status_t sys_thread_start(kern_handle_t thread);
@@ -75,6 +79,7 @@ extern kern_status_t sys_vm_region_create(
vm_prot_t prot, vm_prot_t prot,
kern_handle_t *out, kern_handle_t *out,
virt_addr_t *out_base_address); virt_addr_t *out_base_address);
extern kern_status_t sys_vm_region_kill(kern_handle_t region);
extern kern_status_t sys_vm_region_read( extern kern_status_t sys_vm_region_read(
kern_handle_t region, kern_handle_t region,
void *dst, void *dst,

View File

@@ -11,6 +11,11 @@
struct vm_region; struct vm_region;
struct vm_object; struct vm_object;
enum vm_region_status {
VM_REGION_DEAD = 0,
VM_REGION_ONLINE,
};
enum vm_region_entry_type { enum vm_region_entry_type {
VM_REGION_ENTRY_NONE = 0, VM_REGION_ENTRY_NONE = 0,
VM_REGION_ENTRY_REGION, VM_REGION_ENTRY_REGION,
@@ -18,9 +23,16 @@ enum vm_region_entry_type {
}; };
struct vm_region_entry { struct vm_region_entry {
struct btree_node e_node; union {
struct btree_node e_node;
/* this entry is only used to queue vm-region objects for
* recursive cleanup */
struct queue_entry e_entry;
};
struct vm_region_entry *e_parent; struct vm_region_entry *e_parent;
enum vm_region_entry_type e_type; enum vm_region_entry_type e_type;
/* absolute address of this entry */
virt_addr_t e_address;
/* offset in bytes of this entry within its immediate parent. */ /* offset in bytes of this entry within its immediate parent. */
off_t e_offset; off_t e_offset;
/* size of the entry in bytes */ /* size of the entry in bytes */
@@ -31,7 +43,7 @@ struct vm_region_mapping {
struct vm_region_entry m_entry; struct vm_region_entry m_entry;
struct vm_object *m_object; struct vm_object *m_object;
/* used to link to vm_object->vo_mappings */ /* used to link to vm_object->vo_mappings */
struct queue_entry m_object_entry; struct queue_entry m_object_entry;
vm_prot_t m_prot; vm_prot_t m_prot;
@@ -41,6 +53,7 @@ struct vm_region_mapping {
struct vm_region { struct vm_region {
struct object vr_base; struct object vr_base;
enum vm_region_status vr_status;
struct vm_region_entry vr_entry; struct vm_region_entry vr_entry;
char vr_name[VM_REGION_NAME_MAX]; char vr_name[VM_REGION_NAME_MAX];
@@ -81,6 +94,19 @@ extern kern_status_t vm_region_create(
vm_prot_t prot, vm_prot_t prot,
struct vm_region **out); struct vm_region **out);
/* recursively kills a given region and all of its sub-regions.
* when a region is killed, all of its mappings are unmapped, and any further
* operations on the region are denied. however, all handles and references to
* the region (any any sub-region) remain valid, and no kernel memory is
* de-allocated.
* the memory used by the vm-region object itself is de-allocated when the last
* handle/reference to the object is released.
* this function should be called with `region` locked.
*/
extern kern_status_t vm_region_kill(
struct vm_region *region,
unsigned long *lock_flags);
/* map a vm-object into a vm-region. /* map a vm-object into a vm-region.
* [region_offset,length] must fall within exactly one region, and cannot span * [region_offset,length] must fall within exactly one region, and cannot span
* multiple sibling regions. * multiple sibling regions.

View File

@@ -7,6 +7,20 @@
static struct queue object_types; static struct queue object_types;
static spin_lock_t object_types_lock = SPIN_LOCK_INIT; static spin_lock_t object_types_lock = SPIN_LOCK_INIT;
static koid_t koid_alloc(void)
{
static koid_t counter = 0;
static spin_lock_t lock = SPIN_LOCK_INIT;
unsigned long flags;
spin_lock_irqsave(&lock, &flags);
koid_t result = counter;
counter++;
spin_unlock_irqrestore(&lock, flags);
return result;
}
kern_status_t object_bootstrap(void) kern_status_t object_bootstrap(void)
{ {
return KERN_OK; return KERN_OK;
@@ -53,6 +67,7 @@ struct object *object_create(struct object_type *type)
struct object *obj = (struct object *)((unsigned char *)obj_buf struct object *obj = (struct object *)((unsigned char *)obj_buf
+ type->ob_header_offset); + type->ob_header_offset);
obj->ob_id = koid_alloc();
obj->ob_type = type; obj->ob_type = type;
obj->ob_lock = SPIN_LOCK_INIT; obj->ob_lock = SPIN_LOCK_INIT;
obj->ob_magic = OBJECT_MAGIC; obj->ob_magic = OBJECT_MAGIC;
@@ -68,6 +83,15 @@ struct object *object_ref(struct object *obj)
return obj; return obj;
} }
static void __cleanup(struct object *obj, struct queue *queue)
{
if (HAS_OP(obj, destroy)) {
obj->ob_type->ob_ops.destroy(obj, queue);
}
vm_cache_free(&obj->ob_type->ob_cache, obj);
}
static void object_cleanup(struct object *obj, unsigned long flags) static void object_cleanup(struct object *obj, unsigned long flags)
{ {
if (obj->ob_refcount > 0 || obj->ob_handles > 0) { if (obj->ob_refcount > 0 || obj->ob_handles > 0) {
@@ -75,11 +99,30 @@ static void object_cleanup(struct object *obj, unsigned long flags)
return; return;
} }
if (HAS_OP(obj, destroy)) { struct queue queue = QUEUE_INIT;
obj->ob_type->ob_ops.destroy(obj); __cleanup(obj, &queue);
if (!HAS_OP(obj, destroy_recurse)) {
return;
} }
vm_cache_free(&obj->ob_type->ob_cache, obj); while (!queue_empty(&queue)) {
struct queue_entry *entry = queue_pop_front(&queue);
struct object *child = NULL;
obj->ob_type->ob_ops.destroy_recurse(entry, &child);
if (!child) {
continue;
}
if (child->ob_refcount > 1) {
child->ob_refcount--;
continue;
}
if (child->ob_refcount == 0 && child->ob_handles == 0) {
__cleanup(child, &queue);
}
}
} }
void object_unref(struct object *obj) void object_unref(struct object *obj)

View File

@@ -56,8 +56,10 @@
.endm .endm
SYSCALL_GATE task_exit SYS_TASK_EXIT 1 SYSCALL_GATE task_exit SYS_TASK_EXIT 1
SYSCALL_GATE task_self SYS_TASK_SELF 0
SYSCALL_GATE task_create SYS_TASK_CREATE 5 SYSCALL_GATE task_create SYS_TASK_CREATE 5
SYSCALL_GATE task_create_thread SYS_TASK_CREATE_THREAD 6 SYSCALL_GATE task_create_thread SYS_TASK_CREATE_THREAD 6
SYSCALL_GATE task_get_address_space SYS_TASK_GET_ADDRESS_SPACE 1
SYSCALL_GATE thread_start SYS_THREAD_START 1 SYSCALL_GATE thread_start SYS_THREAD_START 1
@@ -67,6 +69,7 @@ SYSCALL_GATE vm_object_write SYS_VM_OBJECT_WRITE 5
SYSCALL_GATE vm_object_copy SYS_VM_OBJECT_COPY 6 SYSCALL_GATE vm_object_copy SYS_VM_OBJECT_COPY 6
SYSCALL_GATE vm_region_create SYS_VM_REGION_CREATE 8 SYSCALL_GATE vm_region_create SYS_VM_REGION_CREATE 8
SYSCALL_GATE vm_region_kill SYS_VM_REGION_KILL 1
SYSCALL_GATE vm_region_read SYS_VM_REGION_READ 5 SYSCALL_GATE vm_region_read SYS_VM_REGION_READ 5
SYSCALL_GATE vm_region_write SYS_VM_REGION_WRITE 5 SYSCALL_GATE vm_region_write SYS_VM_REGION_WRITE 5
SYSCALL_GATE vm_region_map_absolute SYS_VM_REGION_MAP_ABSOLUTE 7 SYSCALL_GATE vm_region_map_absolute SYS_VM_REGION_MAP_ABSOLUTE 7

View File

@@ -5,6 +5,7 @@
#include <mango/types.h> #include <mango/types.h>
extern kern_status_t task_exit(int status); extern kern_status_t task_exit(int status);
extern kern_status_t task_self(kern_handle_t *out);
extern kern_status_t task_create( extern kern_status_t task_create(
kern_handle_t parent, kern_handle_t parent,
@@ -19,6 +20,9 @@ extern kern_status_t task_create_thread(
uintptr_t *args, uintptr_t *args,
size_t nr_args, size_t nr_args,
kern_handle_t *out_thread); kern_handle_t *out_thread);
extern kern_status_t task_get_address_space(
kern_handle_t task,
kern_handle_t *out);
extern kern_status_t thread_start(kern_handle_t thread); extern kern_status_t thread_start(kern_handle_t thread);

View File

@@ -39,6 +39,7 @@ extern kern_status_t vm_region_create(
vm_prot_t prot, vm_prot_t prot,
kern_handle_t *out, kern_handle_t *out,
virt_addr_t *out_base_address); virt_addr_t *out_base_address);
extern kern_status_t vm_region_kill(kern_handle_t region);
extern kern_status_t vm_region_read( extern kern_status_t vm_region_read(
kern_handle_t region, kern_handle_t region,
void *dst, void *dst,

View File

@@ -2,14 +2,17 @@
#define MANGO_SYSCALL_H_ #define MANGO_SYSCALL_H_
#define SYS_TASK_EXIT 1 #define SYS_TASK_EXIT 1
#define SYS_TASK_SELF 31
#define SYS_TASK_CREATE 2 #define SYS_TASK_CREATE 2
#define SYS_TASK_CREATE_THREAD 3 #define SYS_TASK_CREATE_THREAD 3
#define SYS_TASK_GET_ADDRESS_SPACE 33
#define SYS_THREAD_START 30 #define SYS_THREAD_START 30
#define SYS_VM_OBJECT_CREATE 4 #define SYS_VM_OBJECT_CREATE 4
#define SYS_VM_OBJECT_READ 5 #define SYS_VM_OBJECT_READ 5
#define SYS_VM_OBJECT_WRITE 6 #define SYS_VM_OBJECT_WRITE 6
#define SYS_VM_OBJECT_COPY 29 #define SYS_VM_OBJECT_COPY 29
#define SYS_VM_REGION_CREATE 7 #define SYS_VM_REGION_CREATE 7
#define SYS_VM_REGION_KILL 34
#define SYS_VM_REGION_READ 8 #define SYS_VM_REGION_READ 8
#define SYS_VM_REGION_WRITE 9 #define SYS_VM_REGION_WRITE 9
#define SYS_VM_REGION_MAP_ABSOLUTE 10 #define SYS_VM_REGION_MAP_ABSOLUTE 10

View File

@@ -45,6 +45,7 @@ typedef uintptr_t phys_addr_t;
typedef uintptr_t virt_addr_t; typedef uintptr_t virt_addr_t;
typedef uint64_t msgid_t; typedef uint64_t msgid_t;
typedef uint64_t off_t; typedef uint64_t off_t;
typedef uint64_t koid_t;
typedef unsigned int tid_t; typedef unsigned int tid_t;
typedef uint32_t kern_handle_t; typedef uint32_t kern_handle_t;
typedef uint32_t kern_config_key_t; typedef uint32_t kern_config_key_t;

View File

@@ -6,14 +6,17 @@
static const virt_addr_t syscall_table[] = { static const virt_addr_t syscall_table[] = {
SYSCALL_TABLE_ENTRY(TASK_EXIT, task_exit), SYSCALL_TABLE_ENTRY(TASK_EXIT, task_exit),
SYSCALL_TABLE_ENTRY(TASK_SELF, task_self),
SYSCALL_TABLE_ENTRY(TASK_CREATE, task_create), SYSCALL_TABLE_ENTRY(TASK_CREATE, task_create),
SYSCALL_TABLE_ENTRY(TASK_CREATE_THREAD, task_create_thread), SYSCALL_TABLE_ENTRY(TASK_CREATE_THREAD, task_create_thread),
SYSCALL_TABLE_ENTRY(TASK_GET_ADDRESS_SPACE, task_get_address_space),
SYSCALL_TABLE_ENTRY(THREAD_START, thread_start), SYSCALL_TABLE_ENTRY(THREAD_START, thread_start),
SYSCALL_TABLE_ENTRY(VM_OBJECT_CREATE, vm_object_create), SYSCALL_TABLE_ENTRY(VM_OBJECT_CREATE, vm_object_create),
SYSCALL_TABLE_ENTRY(VM_OBJECT_READ, vm_object_read), SYSCALL_TABLE_ENTRY(VM_OBJECT_READ, vm_object_read),
SYSCALL_TABLE_ENTRY(VM_OBJECT_WRITE, vm_object_write), SYSCALL_TABLE_ENTRY(VM_OBJECT_WRITE, vm_object_write),
SYSCALL_TABLE_ENTRY(VM_OBJECT_COPY, vm_object_copy), SYSCALL_TABLE_ENTRY(VM_OBJECT_COPY, vm_object_copy),
SYSCALL_TABLE_ENTRY(VM_REGION_CREATE, vm_region_create), SYSCALL_TABLE_ENTRY(VM_REGION_CREATE, vm_region_create),
SYSCALL_TABLE_ENTRY(VM_REGION_KILL, vm_region_kill),
SYSCALL_TABLE_ENTRY(VM_REGION_READ, vm_region_read), SYSCALL_TABLE_ENTRY(VM_REGION_READ, vm_region_read),
SYSCALL_TABLE_ENTRY(VM_REGION_WRITE, vm_region_write), SYSCALL_TABLE_ENTRY(VM_REGION_WRITE, vm_region_write),
SYSCALL_TABLE_ENTRY(VM_REGION_MAP_ABSOLUTE, vm_region_map_absolute), SYSCALL_TABLE_ENTRY(VM_REGION_MAP_ABSOLUTE, vm_region_map_absolute),

View File

@@ -14,6 +14,35 @@ extern kern_status_t sys_task_exit(int status)
return KERN_UNIMPLEMENTED; return KERN_UNIMPLEMENTED;
} }
kern_status_t sys_task_self(kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct handle *handle_slot = NULL;
kern_handle_t handle;
kern_status_t status = handle_table_alloc_handle(
self->t_handles,
&handle_slot,
&handle);
task_unlock_irqrestore(self, flags);
if (status != KERN_OK) {
return status;
}
object_add_handle(&self->t_base);
handle_slot->h_object = &self->t_base;
*out = handle;
return KERN_OK;
}
kern_status_t sys_task_create( kern_status_t sys_task_create(
kern_handle_t parent_handle, kern_handle_t parent_handle,
const char *name, const char *name,
@@ -175,6 +204,57 @@ kern_status_t sys_task_create_thread(
return KERN_OK; return KERN_OK;
} }
kern_status_t sys_task_get_address_space(
kern_handle_t task_handle,
kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct handle *handle_slot = NULL;
kern_handle_t handle;
struct object *task_obj = NULL;
handle_flags_t handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
task_handle,
&task_obj,
&handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
status = handle_table_alloc_handle(
self->t_handles,
&handle_slot,
&handle);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct task *task = task_cast(task_obj);
if (!task) {
handle_table_free_handle(self->t_handles, handle);
task_unlock_irqrestore(self, flags);
return KERN_INVALID_ARGUMENT;
}
handle_slot->h_object = &task->t_address_space->vr_base;
object_add_handle(&task->t_address_space->vr_base);
task_unlock_irqrestore(self, flags);
*out = handle;
return KERN_OK;
}
kern_status_t sys_thread_start(kern_handle_t thread_handle) kern_status_t sys_thread_start(kern_handle_t thread_handle)
{ {
unsigned long flags; unsigned long flags;

View File

@@ -63,6 +63,7 @@ kern_status_t sys_vm_region_create(
object_ref(obj); object_ref(obj);
task_unlock_irqrestore(self, flags); task_unlock_irqrestore(self, flags);
vm_region_lock_irqsave(parent_region, &flags);
struct vm_region *child = NULL; struct vm_region *child = NULL;
status = vm_region_create( status = vm_region_create(
@@ -73,6 +74,7 @@ kern_status_t sys_vm_region_create(
region_len, region_len,
prot, prot,
&child); &child);
vm_region_unlock_irqrestore(parent_region, flags);
object_unref(obj); object_unref(obj);
if (status != KERN_OK) { if (status != KERN_OK) {
@@ -92,6 +94,39 @@ kern_status_t sys_vm_region_create(
return KERN_OK; return KERN_OK;
} }
kern_status_t sys_vm_region_kill(kern_handle_t region_handle)
{
struct task *self = current_task();
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *obj = NULL;
handle_flags_t handle_flags = 0;
kern_status_t status
= task_resolve_handle(self, region_handle, &obj, &handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_region *region = vm_region_cast(obj);
if (!region) {
task_unlock_irqrestore(self, flags);
return KERN_INVALID_ARGUMENT;
}
object_ref(obj);
task_unlock_irqrestore(self, flags);
vm_region_lock_irqsave(region, &flags);
status = vm_region_kill(region, &flags);
vm_region_unlock_irqrestore(region, flags);
object_unref(obj);
return status;
}
kern_status_t sys_vm_region_read( kern_status_t sys_vm_region_read(
kern_handle_t region_handle, kern_handle_t region_handle,
void *dst, void *dst,

View File

@@ -8,6 +8,20 @@
#include <kernel/vm-region.h> #include <kernel/vm-region.h>
#include <mango/status.h> #include <mango/status.h>
/* NOTE Locking Rules
* To avoid deadlocks and crashes, the following locking rules should be
* followed:
* 1. Do NOT lock more than one region at a time IF the regions are siblings.
* 2. When locking a region and it's child(ren) or ancestors, always lock
* the parent region BEFORE the child region.
* 3. When locking a region and a vm-object mapped into that region, always
* lock the region BEFORE the vm-object.
* 3. An entry MUST be locked before any of its data can be read/written,
* including its children (if it's a region) and its e_parent pointer.
* 4. vm_region_mapping has no lock. Instead, its immediate parent region must
* be locked before any child mappings can be accessed.
*/
/*** STATIC DATA + MACROS *****************************************************/ /*** STATIC DATA + MACROS *****************************************************/
#undef ASLR #undef ASLR
@@ -49,10 +63,21 @@ enum search_direction {
#define VM_REGION_CAST(p) \ #define VM_REGION_CAST(p) \
OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p) OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p)
static kern_status_t vm_region_object_destroy(struct object *obj);
static kern_status_t region_object_destroy(struct object *obj, struct queue *q);
static kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out);
static struct object_type vm_region_type = { static struct object_type vm_region_type = {
.ob_name = "vm-region", .ob_name = "vm-region",
.ob_size = sizeof(struct vm_region), .ob_size = sizeof(struct vm_region),
.ob_header_offset = offsetof(struct vm_region, vr_base), .ob_header_offset = offsetof(struct vm_region, vr_base),
.ob_ops = {
.destroy = region_object_destroy,
.destroy_recurse = region_object_destroy_recurse,
},
}; };
static struct vm_cache mapping_cache = { static struct vm_cache mapping_cache = {
@@ -81,17 +106,53 @@ static struct vm_region_mapping *mapping_from_entry(
return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry);
} }
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry) kern_status_t region_object_destroy(struct object *obj, struct queue *q)
{ {
virt_addr_t result = 0; struct vm_region *region = VM_REGION_CAST(obj);
while (entry) { if (region->vr_status == VM_REGION_ONLINE) {
result += entry->e_offset; panic("last reference closed on an online vm-region");
entry = entry->e_parent;
} }
return result; struct btree_node *node = btree_first(&region->vr_entries);
while (node) {
struct btree_node *next = btree_next(node);
btree_delete(&region->vr_entries, node);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
queue_push_back(q, &entry->e_entry);
node = next;
}
return KERN_OK;
} }
kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out)
{
struct vm_region_entry *region_entry
= BTREE_CONTAINER(struct vm_region_entry, e_entry, entry);
if (region_entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
struct vm_region *region = region_from_entry(region_entry);
*out = &region->vr_base;
return KERN_OK;
}
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry)
{
return entry->e_address;
}
/* this function must be called with `parent` locked */
static void region_put_entry( static void region_put_entry(
struct vm_region *parent, struct vm_region *parent,
struct vm_region_entry *child) struct vm_region_entry *child)
@@ -119,7 +180,15 @@ static void region_put_entry(
} else if (child_base > cur_limit) { } else if (child_base > cur_limit) {
next = btree_right(cur); next = btree_right(cur);
} else { } else {
panic("tried to add an overlapping entry to vm-region"); #ifdef TRACE
vm_region_dump(parent);
#endif
panic("tried to add an overlapping entry [%zx-%zx] to "
"vm-region (overlaps [%zx-%zx])",
child_base,
child_limit,
cur_base,
cur_limit);
} }
if (next) { if (next) {
@@ -179,12 +248,17 @@ static struct vm_region_entry *region_get_entry(
/* find the child region that covers the area [*offp,len]. searches recursively /* find the child region that covers the area [*offp,len]. searches recursively
* the value in `offp` is updated to the offset of the returned entry relative * the value in `offp` is updated to the offset of the returned entry relative
* to its parent */ * to its parent.
* this function should be called with `region` locked.
* the region returned by this function will also be locked. any intermediary
* regions traversed by this function will be locked temporarily, but will
* be unlocked by the time the function returns. */
static struct vm_region *region_get_child_region_recursive( static struct vm_region *region_get_child_region_recursive(
struct vm_region *region, struct vm_region *region,
off_t *offp, off_t *offp,
size_t len) size_t len)
{ {
struct vm_region *root = region;
off_t offset = *offp; off_t offset = *offp;
if (offset >= region->vr_entry.e_size) { if (offset >= region->vr_entry.e_size) {
return NULL; return NULL;
@@ -197,6 +271,14 @@ static struct vm_region *region_get_child_region_recursive(
struct vm_region *next_region = region_from_entry(next); struct vm_region *next_region = region_from_entry(next);
if (next_region) { if (next_region) {
offset -= next->e_offset; offset -= next->e_offset;
/* since `region` is locked, interrupts are already
* disabled, so don't use lock_irq() here */
vm_region_lock(next_region);
if (region != root) {
vm_region_unlock(region);
}
region = next_region; region = next_region;
} else { } else {
break; break;
@@ -207,6 +289,12 @@ static struct vm_region *region_get_child_region_recursive(
return region; return region;
} }
/* find the vm_region_mapping that contains a given memory area.
* `offp` should be a pointer to an off_t value that contains the offset
* of the area relative to the start of `region`. this value will be updated
* to the offset of the mapping relative to its immediate parent.
* this function should be called with `region` locked. if a mapping is found,
* it will be returned with its immediate parent locked. */
static struct vm_region_mapping *region_get_mapping_recursive( static struct vm_region_mapping *region_get_mapping_recursive(
struct vm_region *region, struct vm_region *region,
off_t *offp, off_t *offp,
@@ -218,9 +306,12 @@ static struct vm_region_mapping *region_get_mapping_recursive(
return NULL; return NULL;
} }
/* if `region` is a different region than what was originally passed to
* us, it has now been locked, and its children can be accessed. */
struct vm_region_entry *entry = region_get_entry(region, offset, len); struct vm_region_entry *entry = region_get_entry(region, offset, len);
*offp = offset; *offp = offset;
/* return the mapping with the parent region still locked */
return mapping_from_entry(entry); return mapping_from_entry(entry);
} }
@@ -488,6 +579,7 @@ static kern_status_t region_validate_allocation(
return KERN_OK; return KERN_OK;
} }
/* this function should be called with `region` locked */
static void vm_iterator_begin( static void vm_iterator_begin(
struct vm_iterator *it, struct vm_iterator *it,
struct vm_region *region, struct vm_region *region,
@@ -551,6 +643,15 @@ static void vm_iterator_begin(
static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes) static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
{ {
#define UNLOCK_MAPPING_PARENT(p) \
do { \
struct vm_region *parent \
= region_from_entry(p->m_entry.e_parent); \
if (parent != it->it_region) { \
vm_region_unlock(parent); \
} \
} while (0)
if (nr_bytes < it->it_max) { if (nr_bytes < it->it_max) {
it->it_base += nr_bytes; it->it_base += nr_bytes;
it->it_buf = (char *)it->it_buf + nr_bytes; it->it_buf = (char *)it->it_buf + nr_bytes;
@@ -558,6 +659,10 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
return KERN_OK; return KERN_OK;
} }
/* the parent region of it->it_mapping is locked here. if it is
* different from it->it_region, it must be unlocked */
UNLOCK_MAPPING_PARENT(it->it_mapping);
it->it_base += nr_bytes; it->it_base += nr_bytes;
off_t offset = it->it_base - vm_region_get_base_address(it->it_region); off_t offset = it->it_base - vm_region_get_base_address(it->it_region);
@@ -569,9 +674,13 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
return KERN_MEMORY_FAULT; return KERN_MEMORY_FAULT;
} }
/* past this point, if we encounter an error, must remember to unlock
* the parent region of next_mapping */
if ((next_mapping->m_prot & it->it_prot) != it->it_prot) { if ((next_mapping->m_prot & it->it_prot) != it->it_prot) {
it->it_buf = NULL; it->it_buf = NULL;
it->it_max = 0; it->it_max = 0;
UNLOCK_MAPPING_PARENT(next_mapping);
return KERN_MEMORY_FAULT; return KERN_MEMORY_FAULT;
} }
@@ -590,6 +699,7 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
} }
if (!pg) { if (!pg) {
UNLOCK_MAPPING_PARENT(next_mapping);
return KERN_NO_MEMORY; return KERN_NO_MEMORY;
} }
@@ -620,6 +730,9 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
return KERN_OK; return KERN_OK;
} }
/* this function must be called with `root` locked. `root` will be the first
* entry visited by the iterator. from there, child entries are visited in
* depth-first order. */
static void entry_iterator_begin( static void entry_iterator_begin(
struct entry_iterator *it, struct entry_iterator *it,
struct vm_region *root) struct vm_region *root)
@@ -629,8 +742,43 @@ static void entry_iterator_begin(
it->it_entry = &root->vr_entry; it->it_entry = &root->vr_entry;
} }
/* this function must be called when you are finished with an entry_iterator,
* to ensure that all held locks are released. */
static void entry_iterator_finish(struct entry_iterator *it)
{
struct vm_region_entry *cur = it->it_entry;
if (!cur) {
return;
}
struct vm_region *region = NULL;
if (cur->e_type == VM_REGION_ENTRY_MAPPING) {
region = region_from_entry(cur->e_parent);
} else {
region = region_from_entry(cur);
}
while (region && region != it->it_root) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
vm_region_unlock(region);
region = parent;
}
memset(it, 0x0, sizeof *it);
}
/* move to the next entry in the traversal order.
* when this function returns:
* 1. if the visited entry is a region, it will be locked.
* 2. if the visited entry is a mapping, its parent region will be locked.
* a region will remain locked until all of its children and n-grand-children
* have been visited. once iteration is finished, only `it->it_root` will be
* locked.
*/
static void entry_iterator_move_next(struct entry_iterator *it) static void entry_iterator_move_next(struct entry_iterator *it)
{ {
/* `region` is locked */
struct vm_region *region = region_from_entry(it->it_entry); struct vm_region *region = region_from_entry(it->it_entry);
bool has_children = (region && !btree_empty(&region->vr_entries)); bool has_children = (region && !btree_empty(&region->vr_entries));
@@ -639,6 +787,15 @@ static void entry_iterator_move_next(struct entry_iterator *it)
struct btree_node *node = btree_first(&region->vr_entries); struct btree_node *node = btree_first(&region->vr_entries);
struct vm_region_entry *entry struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node); = BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region
= region_from_entry(entry);
/* since `region` is locked, interrupts are already
* disabled, so don't use lock_irq() here */
vm_region_lock(child_region);
}
it->it_depth++; it->it_depth++;
it->it_entry = entry; it->it_entry = entry;
return; return;
@@ -670,6 +827,72 @@ static void entry_iterator_move_next(struct entry_iterator *it)
return; return;
} }
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--;
cur = parent_entry;
}
}
/* erase the current entry and move to the next entry in the traversal order.
* the current entry MUST be a mapping, otherwise nothing will happen.
*/
static void entry_iterator_erase(struct entry_iterator *it)
{
/* the parent region of `mapping` is locked */
struct vm_region_mapping *mapping = mapping_from_entry(it->it_entry);
if (!mapping) {
return;
}
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
/* go back up until we find a right sibling. */
struct vm_region_entry *cur = it->it_entry;
while (1) {
struct btree_node *sibling = btree_next(&cur->e_node);
if (mapping) {
btree_delete(
&parent->vr_entries,
&mapping->m_entry.e_node);
vm_cache_free(&mapping_cache, mapping);
mapping = NULL;
}
if (sibling) {
it->it_entry = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
sibling);
return;
}
if (cur == &it->it_root->vr_entry) {
it->it_entry = NULL;
return;
}
struct vm_region_entry *parent_entry = cur->e_parent;
struct vm_region *parent = region_from_entry(parent_entry);
if (!parent) {
it->it_entry = NULL;
return;
}
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--; it->it_depth--;
cur = parent_entry; cur = parent_entry;
} }
@@ -705,6 +928,11 @@ static void mapping_iterator_begin(
} }
} }
static void mapping_iterator_finish(struct entry_iterator *it)
{
entry_iterator_finish(it);
}
static void mapping_iterator_move_next( static void mapping_iterator_move_next(
struct entry_iterator *it, struct entry_iterator *it,
off_t offset, off_t offset,
@@ -730,6 +958,34 @@ static void mapping_iterator_move_next(
} }
} }
static void mapping_iterator_erase(
struct entry_iterator *it,
off_t offset,
size_t length,
off_t *offp)
{
entry_iterator_erase(it);
while (it->it_entry
&& it->it_entry->e_type != VM_REGION_ENTRY_MAPPING) {
entry_iterator_move_next(it);
}
if (!it->it_entry) {
return;
}
off_t base = entry_absolute_address(it->it_entry)
- it->it_root->vr_entry.e_offset;
if (base >= offset + length) {
it->it_entry = NULL;
} else {
*offp = base;
}
}
/*** PUBLIC API ***************************************************************/ /*** PUBLIC API ***************************************************************/
kern_status_t vm_region_type_init(void) kern_status_t vm_region_type_init(void)
@@ -743,6 +999,8 @@ struct vm_region *vm_region_cast(struct object *obj)
return VM_REGION_CAST(obj); return VM_REGION_CAST(obj);
} }
/* this function should be called with `parent` locked (if parent is non-NULL)
*/
kern_status_t vm_region_create( kern_status_t vm_region_create(
struct vm_region *parent, struct vm_region *parent,
const char *name, const char *name,
@@ -752,6 +1010,10 @@ kern_status_t vm_region_create(
vm_prot_t prot, vm_prot_t prot,
struct vm_region **out) struct vm_region **out)
{ {
if (parent && parent->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (!offset || !region_len) { if (!offset || !region_len) {
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
} }
@@ -781,8 +1043,10 @@ kern_status_t vm_region_create(
struct vm_region *region = VM_REGION_CAST(region_object); struct vm_region *region = VM_REGION_CAST(region_object);
region->vr_status = VM_REGION_ONLINE;
region->vr_prot = prot; region->vr_prot = prot;
region->vr_entry.e_type = VM_REGION_ENTRY_REGION; region->vr_entry.e_type = VM_REGION_ENTRY_REGION;
region->vr_entry.e_address = offset;
region->vr_entry.e_offset = offset; region->vr_entry.e_offset = offset;
region->vr_entry.e_size = region_len; region->vr_entry.e_size = region_len;
@@ -794,8 +1058,11 @@ kern_status_t vm_region_create(
if (parent) { if (parent) {
region->vr_entry.e_parent = &parent->vr_entry; region->vr_entry.e_parent = &parent->vr_entry;
region->vr_entry.e_address += parent->vr_entry.e_address;
region->vr_pmap = parent->vr_pmap; region->vr_pmap = parent->vr_pmap;
region_put_entry(parent, &region->vr_entry); region_put_entry(parent, &region->vr_entry);
/* `parent` holds a reference to child `region` */
object_ref(&region->vr_base);
} }
if (name && name_len) { if (name && name_len) {
@@ -808,8 +1075,67 @@ kern_status_t vm_region_create(
return KERN_OK; return KERN_OK;
} }
kern_status_t vm_region_map_object( kern_status_t vm_region_kill(
struct vm_region *region, struct vm_region *region,
unsigned long *lock_flags)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (region->vr_entry.e_parent) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
region->vr_entry.e_parent = NULL;
/* locks must be acquired in parent->child order. since we're
* going backwards here, unlock `region` before locking its
* parent */
vm_region_unlock_irqrestore(region, *lock_flags);
vm_region_lock_irqsave(parent, lock_flags);
btree_delete(&parent->vr_entries, &region->vr_entry.e_node);
vm_region_unlock_irqrestore(parent, *lock_flags);
vm_region_lock_irqsave(region, lock_flags);
/* `region` lock is held, and e_parent is NULL */
}
struct entry_iterator it;
entry_iterator_begin(&it, region);
while (it.it_entry) {
if (it.it_entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *region
= region_from_entry(it.it_entry);
region->vr_status = VM_REGION_DEAD;
entry_iterator_move_next(&it);
continue;
}
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
virt_addr_t base = entry_absolute_address(it.it_entry);
for (size_t i = 0; i < mapping->m_entry.e_size;
i += VM_PAGE_SIZE) {
pmap_remove(region->vr_pmap, base + i);
}
unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(
&mapping->m_object->vo_mappings,
&mapping->m_object_entry);
vm_object_unlock_irqrestore(mapping->m_object, flags);
entry_iterator_erase(&it);
}
return KERN_OK;
}
kern_status_t vm_region_map_object(
struct vm_region *root,
off_t region_offset, off_t region_offset,
struct vm_object *object, struct vm_object *object,
off_t object_offset, off_t object_offset,
@@ -839,10 +1165,24 @@ kern_status_t vm_region_map_object(
length += VM_PAGE_SIZE; length += VM_PAGE_SIZE;
} }
if (!region || !object) { if (!root || !object) {
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
} }
struct vm_region *region = root;
if (region_offset != VM_REGION_ANY_OFFSET) {
region = region_get_child_region_recursive(
root,
&region_offset,
length);
/* if `region` != `root`, it will need to be unlocked at the end
* of the function */
}
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if ((prot & region->vr_prot) != prot) { if ((prot & region->vr_prot) != prot) {
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
} }
@@ -855,13 +1195,6 @@ kern_status_t vm_region_map_object(
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
} }
if (region_offset != VM_REGION_ANY_OFFSET) {
region = region_get_child_region_recursive(
region,
&region_offset,
length);
}
if (!region) { if (!region) {
return KERN_INVALID_ARGUMENT; return KERN_INVALID_ARGUMENT;
} }
@@ -896,6 +1229,7 @@ kern_status_t vm_region_map_object(
mapping->m_object_offset = object_offset; mapping->m_object_offset = object_offset;
mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING; mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
mapping->m_entry.e_parent = &region->vr_entry; mapping->m_entry.e_parent = &region->vr_entry;
mapping->m_entry.e_address = region->vr_entry.e_address + region_offset;
mapping->m_entry.e_offset = region_offset; mapping->m_entry.e_offset = region_offset;
mapping->m_entry.e_size = length; mapping->m_entry.e_size = length;
@@ -907,7 +1241,14 @@ kern_status_t vm_region_map_object(
abs_base + length); abs_base + length);
#endif #endif
region_put_entry(region, &mapping->m_entry); region_put_entry(region, &mapping->m_entry);
if (region != root) {
vm_region_unlock(region);
}
unsigned long lock_flags;
vm_object_lock_irqsave(object, &lock_flags);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry); queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
vm_object_unlock_irqrestore(object, lock_flags);
if (out) { if (out) {
*out = entry_absolute_address(&mapping->m_entry); *out = entry_absolute_address(&mapping->m_entry);
@@ -1068,12 +1409,13 @@ static kern_status_t delete_mapping(
pmap_remove(root->vr_pmap, base + i); pmap_remove(root->vr_pmap, base + i);
} }
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent); unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry); queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry);
btree_delete(&parent->vr_entries, &mapping->m_entry.e_node); vm_object_unlock_irqrestore(mapping->m_object, flags);
vm_cache_free(&mapping_cache, mapping); /* don't actually delete the mapping yet. that will be done by
* vm_region_unmap */
return KERN_OK; return KERN_OK;
} }
@@ -1083,6 +1425,10 @@ kern_status_t vm_region_unmap(
off_t unmap_area_offset, off_t unmap_area_offset,
size_t unmap_area_length) size_t unmap_area_length)
{ {
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
kern_status_t status = KERN_OK; kern_status_t status = KERN_OK;
struct entry_iterator it; struct entry_iterator it;
off_t unmap_area_limit = unmap_area_offset + unmap_area_length; off_t unmap_area_limit = unmap_area_offset + unmap_area_length;
@@ -1101,12 +1447,6 @@ kern_status_t vm_region_unmap(
off_t mapping_offset = tmp; off_t mapping_offset = tmp;
off_t mapping_limit = mapping_offset + it.it_entry->e_size; off_t mapping_limit = mapping_offset + it.it_entry->e_size;
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
bool split bool split
= (unmap_area_offset > mapping_offset = (unmap_area_offset > mapping_offset
&& unmap_area_limit < mapping_limit); && unmap_area_limit < mapping_limit);
@@ -1127,6 +1467,7 @@ kern_status_t vm_region_unmap(
mapping_offset, mapping_offset,
unmap_area_offset, unmap_area_offset,
unmap_area_limit); unmap_area_limit);
delete = true;
} else if (delete) { } else if (delete) {
status = delete_mapping( status = delete_mapping(
mapping, mapping,
@@ -1150,11 +1491,27 @@ kern_status_t vm_region_unmap(
panic("don't know what to do with this mapping"); panic("don't know what to do with this mapping");
} }
if (delete) {
mapping_iterator_erase(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
} else {
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
}
if (status != KERN_OK) { if (status != KERN_OK) {
break; break;
} }
} }
mapping_iterator_finish(&it);
return status; return status;
} }
@@ -1164,6 +1521,10 @@ bool vm_region_validate_access(
size_t len, size_t len,
vm_prot_t prot) vm_prot_t prot)
{ {
if (region->vr_status != VM_REGION_ONLINE) {
return false;
}
if (len == 0) { if (len == 0) {
return true; return true;
} }
@@ -1199,16 +1560,27 @@ bool vm_region_validate_access(
if ((mapping->m_prot & prot) != prot) { if ((mapping->m_prot & prot) != prot) {
return false; return false;
} }
struct vm_region *parent
= region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
} }
return true; return true;
} }
/* this function must be called with `region` locked */
kern_status_t vm_region_demand_map( kern_status_t vm_region_demand_map(
struct vm_region *region, struct vm_region *region,
virt_addr_t addr, virt_addr_t addr,
enum pmap_fault_flags flags) enum pmap_fault_flags flags)
{ {
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
addr &= ~VM_PAGE_MASK; addr &= ~VM_PAGE_MASK;
if (addr < region->vr_entry.e_offset if (addr < region->vr_entry.e_offset
|| addr > region->vr_entry.e_offset + region->vr_entry.e_size) { || addr > region->vr_entry.e_offset + region->vr_entry.e_size) {
@@ -1230,21 +1602,35 @@ kern_status_t vm_region_demand_map(
mapping->m_object->vo_name, mapping->m_object->vo_name,
object_offset); object_offset);
unsigned long lock_flags;
vm_object_lock_irqsave(mapping->m_object, &lock_flags);
struct vm_page *pg = vm_object_alloc_page( struct vm_page *pg = vm_object_alloc_page(
mapping->m_object, mapping->m_object,
object_offset, object_offset,
VM_PAGE_4K); VM_PAGE_4K);
vm_object_unlock_irqrestore(mapping->m_object, lock_flags);
tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr);
return pmap_add( kern_status_t status = pmap_add(
region->vr_pmap, region->vr_pmap,
addr, addr,
vm_page_get_pfn(pg), vm_page_get_pfn(pg),
mapping->m_prot, mapping->m_prot,
PMAP_NORMAL); PMAP_NORMAL);
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
return status;
} }
virt_addr_t vm_region_get_base_address(const struct vm_region *region) virt_addr_t vm_region_get_base_address(const struct vm_region *region)
{ {
if (region->vr_status != VM_REGION_ONLINE) {
return 0;
}
return entry_absolute_address(&region->vr_entry); return entry_absolute_address(&region->vr_entry);
} }
@@ -1255,6 +1641,10 @@ kern_status_t vm_region_read_kernel(
void *destp, void *destp,
size_t *nr_read) size_t *nr_read)
{ {
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src; struct vm_iterator src;
char *dest = destp; char *dest = destp;
@@ -1296,6 +1686,14 @@ kern_status_t vm_region_memmove(
size_t count, size_t count,
size_t *nr_moved) size_t *nr_moved)
{ {
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src, dest; struct vm_iterator src, dest;
vm_iterator_begin( vm_iterator_begin(
&src, &src,
@@ -1347,6 +1745,14 @@ extern kern_status_t vm_region_memmove_v(
size_t nr_src_vecs, size_t nr_src_vecs,
size_t bytes_to_move) size_t bytes_to_move)
{ {
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct iovec_iterator src, dest; struct iovec_iterator src, dest;
iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs); iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs);
iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs); iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs);