Compare commits

...

43 Commits

Author SHA1 Message Date
de520cdd2d libmango: types: add macro to define a kern_msg_handle_t 2026-03-10 19:08:49 +00:00
e84ed6057d channel: fix incorrect offset used in channel_write_msg 2026-03-10 19:08:20 +00:00
1d4cb882a8 libmango: types: add ssize_t definition 2026-03-06 20:12:32 +00:00
18b281debf kernel: bsp: add support for static bootstrap executables 2026-03-06 20:12:12 +00:00
09d292fd09 kernel: msg: include details about who sent a message 2026-03-05 21:04:02 +00:00
36c5ac7837 kernel: re-implement sending handles via port messages 2026-03-01 19:10:01 +00:00
b1bdb89ca4 vm: region: add a function to write data from a kernel buffer to a vm-region 2026-03-01 19:09:30 +00:00
f8a7a4285f syscall: msg: validate iovec array itself as well as the buffers it points to 2026-02-26 20:55:17 +00:00
f9bf4c618a syscall: log: add task id to log output 2026-02-26 20:54:14 +00:00
e4de3af00d kernel: remove support for sending kernel handles via port/channel 2026-02-26 20:53:47 +00:00
b59d0d8948 syscall: msg: locking of vm-region is now handled by channel_read_msg 2026-02-26 19:43:07 +00:00
8cc877c251 kernel: port: dequeue kmsg struct once reply is received 2026-02-26 19:42:29 +00:00
2073cad97b kernel: fix channel locking and status update issues 2026-02-26 19:42:12 +00:00
eb8758bc5e vm: region: fix some cases where regions weren't being unlocked after use. 2026-02-26 19:41:40 +00:00
1cdde0d32e kernel: add functions for safely (un)locking pairs of objects
when locking a pair of objects, the object with the lesser memory address
is always locked first. the pair is unlocked in the opposite order.
2026-02-26 19:38:49 +00:00
1c7c90ef39 kernel: channel: implement channel_read_msg and msg_read 2026-02-23 21:52:03 +00:00
11c741bd68 libmango: add nr_read output param to msg_read 2026-02-23 21:51:26 +00:00
34bd6e479c vm: region: add nr_bytes_moved output param to memmove_v 2026-02-23 21:50:35 +00:00
5f0654430d syscall: add task_self, task_get_address_space, and vm_region_kill 2026-02-23 18:43:49 +00:00
fd1bc0ad5f kernel: check object refcount before performing a recursive deletion 2026-02-23 18:43:11 +00:00
b1ffdcf2bc vm: region: improve locking rules and semantics; implement region killing
the rules around acquiring locks have been strictly defined and
implemented, and general lock usage has been improved, to fix and
prevent several different issues.

a vm-region is now destroyed in two separate steps:
 1. it is "killed": all mappings are unmapped and deleted, the
    region is removed from its parent, and the region and all of
    its sub-regions are marked as "dead", preventing any
    further actions from being performed with the region.
 2. it is "destroyed": the vm-region object is de-allocated when
    the last reference/handle is closed. the references that this
    region holds to any sub-regions are also released, meaning
    these regions may also be de-allocated too.
2026-02-23 18:42:47 +00:00
5690dd5b9c kernel: add support for recursive object destruction (without recursion)
this system makes it possible for an object that forms part of a tree
to be safely recursively destroyed without using recursion.
2026-02-23 18:34:12 +00:00
37ae7aeef7 kernel: implement globally-unique object ids 2026-02-23 18:32:11 +00:00
dbe117135b x86_64: implement proper user/kernel %gs base switching
the %gs base address is now always set to the current cpu block while
in kernel-mode, and is switched back to the userspace %gs base
when returning to user-mode.
2026-02-23 18:26:21 +00:00
273557fa9f x86_64: lock task address space while performing a demand page-map 2026-02-23 18:25:49 +00:00
fe107fbad3 kernel: locks: add spin lock/unlock function that don't change interrupt state 2026-02-23 18:24:49 +00:00
b2d04c5983 vm: object: zero-initialise pages allocated for vm-object 2026-02-21 23:19:49 +00:00
6c2ca888ee x86_64: remove kernel image post-build ELF32 patch
this patch must now be done by the wider OS build system, to avoid
interference with any bootloaders that don't support this kind of
patching (e.g GRUB i386-pc)
2026-02-21 23:18:22 +00:00
044b3688aa vm: cache: all allocations are now zero-initialised 2026-02-21 23:18:09 +00:00
77936e3511 kernel: implement sending, receiving, and replying to message via port/channel 2026-02-21 11:32:57 +00:00
08c78bd6e7 vm: object: add vm_object_copy syscall trace output 2026-02-21 11:30:44 +00:00
2537ca46de libmango: add macros for easily defining msg and iovec variables 2026-02-21 11:29:25 +00:00
3190035086 libmango: add temporary formatted log function 2026-02-21 11:28:58 +00:00
7f049293f4 vm: memblock: add memblock_dump to header 2026-02-21 11:27:28 +00:00
9b2c2f6b29 x86_64: start the kernel bootstrap heap above 16MiB
this will keep the memory area below 16MiB free for DMA memory allocations.
2026-02-21 11:24:36 +00:00
6e39dd45a4 sched: only disable/enable interrupts if schedule() is called from non-IRQ context 2026-02-21 11:23:43 +00:00
855440f584 vm: add trace output 2026-02-21 11:22:51 +00:00
e1e025ab6a vm: region: memmove_v() now supports iovec arrays stored in userspace 2026-02-21 11:20:09 +00:00
0680b73461 kernel: iovec: implement iterating through an iovec list stored in userspace 2026-02-21 11:17:16 +00:00
aa0933be10 vm: region: implement reading from a user-space vm-region into a kernel buffer 2026-02-21 11:16:11 +00:00
8b188a0ac4 vm: region: fix iterator using wrong buffer offset when seek exceeds current buffer size 2026-02-21 11:07:53 +00:00
ed25ee6761 vm: object: fix iterator using wrong buffer offset when seek exceeds current buffer size 2026-02-21 11:07:12 +00:00
0bae39e550 vm: zone: ensure memblock region bounds are page-aligned while creating zone blocks 2026-02-21 11:01:58 +00:00
51 changed files with 2200 additions and 515 deletions

View File

@@ -1,5 +1,5 @@
#include <kernel/machine/hwlock.h>
#include <kernel/compiler.h>
#include <kernel/machine/hwlock.h>
void ml_hwlock_lock(ml_hwlock_t *lck)
{

View File

@@ -2,9 +2,3 @@ target_compile_options(${kernel_exe_name} PRIVATE
-z max-page-size=0x1000 -m64 -mcmodel=large -mno-red-zone -mno-mmx
-mno-sse -mno-sse2 -D_64BIT -DBYTE_ORDER=1234)
target_link_libraries(${kernel_exe_name} "-z max-page-size=0x1000" "-T ${CMAKE_CURRENT_SOURCE_DIR}/arch/x86_64/layout.ld")
add_custom_command(TARGET ${kernel_exe_name} POST_BUILD
COMMAND ${BUILD_TOOLS_DIR}/e64patch $<TARGET_FILE:${kernel_exe_name}>
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Patching kernel elf64 image"
)

View File

@@ -11,6 +11,41 @@ ml_hwlock_lock:
mov $1, %ecx
mfence
1: mov $0, %eax
lock cmpxchg %ecx, (%rdi)
jne 1b
pop %rbp
ret
.global ml_hwlock_unlock
.type ml_hwlock_unlock, @function
/* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_unlock:
push %rbp
mov %rsp, %rbp
movl $0, (%rdi)
mfence
pop %rbp
ret
.global ml_hwlock_lock_irq
.type ml_hwlock_lock_irq, @function
/* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_lock_irq:
push %rbp
mov %rsp, %rbp
mov $1, %ecx
cli
mfence
@@ -21,11 +56,12 @@ ml_hwlock_lock:
pop %rbp
ret
.global ml_hwlock_unlock
.type ml_hwlock_unlock, @function
.global ml_hwlock_unlock_irq
.type ml_hwlock_unlock_irq, @function
/* %rdi = pointer to ml_hwlock_t (int) */
ml_hwlock_unlock:
ml_hwlock_unlock_irq:
push %rbp
mov %rsp, %rbp
@@ -42,7 +78,7 @@ ml_hwlock_unlock:
/* %rdi = pointer to ml_hwlock_t (int)
%rsi = pointer to quadword to store rflags in */
ml_hwlock_lock_irqsave:
ml_hwlock_lock_irqsave:
push %rbp
mov %rsp, %rbp
@@ -62,6 +98,7 @@ ml_hwlock_lock_irqsave:
pop %rbp
ret
.global ml_hwlock_unlock_irqrestore
.type ml_hwlock_unlock_irqrestore, @function

View File

@@ -3,7 +3,8 @@
#include <stdint.h>
#define MSR_GS_BASE 0xC0000101
#define MSR_GS_BASE 0xC0000101
#define MSR_KERNEL_GS_BASE 0xC0000102
#ifdef __cplusplus
extern "C" {

View File

@@ -12,6 +12,9 @@ typedef int ml_hwlock_t;
extern void ml_hwlock_lock(ml_hwlock_t *lck);
extern void ml_hwlock_unlock(ml_hwlock_t *lck);
extern void ml_hwlock_lock_irq(ml_hwlock_t *lck);
extern void ml_hwlock_unlock_irq(ml_hwlock_t *lck);
extern void ml_hwlock_lock_irqsave(ml_hwlock_t *lck, unsigned long *flags);
extern void ml_hwlock_unlock_irqrestore(ml_hwlock_t *lck, unsigned long flags);

View File

@@ -18,7 +18,20 @@
#include <kernel/types.h>
#include <kernel/vm.h>
#define PTR32(x) ((void *)((uintptr_t)(x)))
#define PTR32(x) ((void *)((uintptr_t)(x)))
/* the physical address of the start of the memblock heap.
* this is an arbirary value; the heap can start anywhere in memory.
* any reserved areas of memory (the kernel, bsp, bios data, etc) are
* automatically taken into account.
* HOWEVER, this value will dictate how much physical memory is required for
* the kernel to boot successfully.
* the value of 16MiB (0x1000000) means that all heap allocations will be
* above 16MiB, leaving the area below free for DMA operations.
* this value CAN be reduced all the way to zero to minimise the amount of
* memory required to boot, but this may leave you with no DMA memory available.
*/
#define MEMBLOCK_HEAP_START 0x1000000
static ml_cpu_block g_bootstrap_cpu = {0};
@@ -33,7 +46,7 @@ static void bootstrap_cpu_init(void)
static void early_vm_init(uintptr_t reserve_end)
{
uintptr_t alloc_start = VM_KERNEL_VOFFSET;
uintptr_t alloc_start = VM_KERNEL_VOFFSET + MEMBLOCK_HEAP_START;
/* boot code mapped 2 GiB of memory from
VM_KERNEL_VOFFSET */
uintptr_t alloc_end = VM_KERNEL_VOFFSET + 0x7fffffff;

View File

@@ -333,11 +333,30 @@ IRQ 223, 255
isr_common_stub:
PUSH_REGS
# When ISR occurs in Ring 3, CPU sets %ss (and other non-code selectors)
# to 0.
mov %ss, %ax
cmp $0, %ax
jne isr_skipgs1
mov $0x10, %ax
mov %ax, %ss
swapgs
isr_skipgs1:
mov %rsp, %rdi
call isr_dispatch
POP_REGS
add $16, %rsp
cmpq $0x1b, 32(%rsp)
jne isr_skipgs2
swapgs
isr_skipgs2:
iretq
@@ -347,11 +366,31 @@ isr_common_stub:
irq_common_stub:
PUSH_REGS
# When IRQ occurs in Ring 3, CPU sets %ss (and other non-code selectors)
# to 0.
mov %ss, %ax
cmp $0, %ax
jne irq_skipgs1
mov $0x10, %ax
mov %ax, %ss
swapgs
irq_skipgs1:
mov %rsp, %rdi
call irq_dispatch
POP_REGS
add $16, %rsp
cmpq $0x1b, 32(%rsp)
jne isr_skipgs2
swapgs
irq_skipgs2:
iretq
@@ -363,12 +402,12 @@ irq_common_stub:
syscall_gate:
swapgs
movq %rsp, %gs:20 # GS+20 = rsp2 in the current TSS block (user stack storage)
movq %gs:4, %rsp # GS+4 = rsp0 in the current TSS block (per-thread kstack)
movq %rsp, %gs:94 # GS+20 = rsp2 in the current TSS block (user stack storage)
movq %gs:78, %rsp # GS+4 = rsp0 in the current TSS block (per-thread kstack)
# start building a ml_cpu_context
pushq $0x1b
pushq %gs:20
pushq %gs:94
push %r11
push $0x23
push %rcx
@@ -380,10 +419,6 @@ syscall_gate:
mov %rsp, %rdi
# switch back to user gs while in syscall_dispatch. Interrupts are enabled in syscall_dispatch,
# and if the task gets pre-empted, the incoming task will expect %gs to have its usermode value.
swapgs
call syscall_dispatch
POP_REGS
@@ -394,8 +429,8 @@ syscall_gate:
pop %r11
add $16, %rsp
swapgs
movq %gs:20, %rsp # GS+20 = rsp2 in the current TSS block
movq %gs:94, %rsp # GS+20 = rsp2 in the current TSS block
swapgs
# back to usermode

View File

@@ -364,7 +364,12 @@ kern_status_t pmap_handle_fault(
struct task *task = current_task();
struct vm_region *space = task->t_address_space;
return vm_region_demand_map(space, fault_addr, flags);
unsigned long lock_flags;
vm_region_lock_irqsave(space, &lock_flags);
kern_status_t status = vm_region_demand_map(space, fault_addr, flags);
vm_region_unlock_irqrestore(space, lock_flags);
return status;
}
kern_status_t pmap_add(

View File

@@ -73,4 +73,5 @@ ml_thread_switch_user:
pop %rax
add $16, %rsp
swapgs
iretq

View File

@@ -1,5 +1,3 @@
#include "arch/msr.h"
#include <arch/gdt.h>
#include <arch/tss.h>
#include <kernel/libc/string.h>
@@ -22,9 +20,6 @@ void tss_init(struct tss *tss, struct tss_ptr *ptr)
void tss_load(struct tss *tss)
{
tss_flush(TSS_GDT_INDEX);
uintptr_t kernel_gs_base_reg = 0xC0000102;
wrmsr(kernel_gs_base_reg, (uintptr_t)tss);
}
virt_addr_t tss_get_kstack(struct tss *tss)

View File

@@ -1,8 +1,8 @@
#ifndef KERNEL_ARG_H_
#define KERNEL_ARG_H_
#include <mango/types.h>
#include <stdbool.h>
#include <mango/status.h>
#define CMDLINE_MAX 4096

View File

@@ -4,7 +4,7 @@
#include <kernel/object.h>
#include <kernel/sched.h>
struct kmsg;
struct msg;
struct channel {
struct object c_base;
@@ -15,37 +15,39 @@ struct channel {
};
extern kern_status_t channel_type_init(void);
extern struct channel *channel_cast(struct object *obj);
extern struct channel *channel_create(void);
extern kern_status_t channel_enqueue_msg(
struct channel *channel,
struct kmsg *msg);
struct msg *msg);
extern kern_status_t channel_recv_msg(
struct channel *channel,
struct msg *out_msg,
msgid_t *out_id,
kern_msg_t *out_msg,
unsigned long *irq_flags);
extern kern_status_t channel_reply_msg(
struct channel *channel,
msgid_t id,
const struct msg *resp,
const kern_msg_t *reply,
unsigned long *irq_flags);
extern kern_status_t channel_read_msg(
struct channel *channel,
msgid_t msg,
size_t offset,
void *buf,
size_t len,
struct vm_region *dest_region,
const kern_iovec_t *dest_iov,
size_t dest_iov_count,
size_t *nr_read);
extern kern_status_t channel_write_msg(
struct channel *channel,
msgid_t msg,
size_t offset,
const void *buf,
size_t len,
struct vm_region *src_region,
const kern_iovec_t *src_iov,
size_t src_iov_count,
size_t *nr_written);
DEFINE_OBJECT_LOCK_FUNCTION(channel, c_base)

View File

@@ -14,9 +14,10 @@
representing a serial port may allow both sending AND receiving over the
port.
*/
#include <kernel/queue.h>
#include <kernel/locks.h>
#include <kernel/queue.h>
#include <mango/status.h>
#include <mango/types.h>
#ifdef __cplusplus
extern "C" {

View File

@@ -3,6 +3,7 @@
#include <kernel/bitmap.h>
#include <mango/status.h>
#include <mango/types.h>
#include <stddef.h>
#include <stdint.h>
@@ -16,6 +17,7 @@ typedef uintptr_t handle_flags_t;
struct task;
struct object;
struct vm_region;
struct handle_list;
struct handle {
@@ -54,12 +56,14 @@ extern struct handle *handle_table_get_handle(
struct handle_table *tab,
kern_handle_t handle);
extern kern_status_t handle_list_transfer(
struct handle_table *dest,
struct handle_list *dest_list,
size_t dest_list_count,
extern kern_status_t handle_table_transfer(
struct vm_region *dst_region,
struct handle_table *dst,
kern_msg_handle_t *dst_handles,
size_t dst_handles_max,
struct vm_region *src_region,
struct handle_table *src,
const struct handle_list *src_list,
size_t src_list_count);
kern_msg_handle_t *src_handles,
size_t src_handles_count);
#endif

View File

@@ -5,7 +5,10 @@
#include <stddef.h>
struct iovec_iterator {
const struct iovec *it_vecs;
/* if this is set, we are iterating over a list of iovecs stored in
* userspace, and must go through this region to retrieve the data. */
struct vm_region *it_region;
const kern_iovec_t *it_vecs;
size_t it_nr_vecs;
size_t it_vec_ptr;
@@ -15,7 +18,12 @@ struct iovec_iterator {
extern void iovec_iterator_begin(
struct iovec_iterator *it,
const struct iovec *vecs,
const kern_iovec_t *vecs,
size_t nr_vecs);
extern void iovec_iterator_begin_user(
struct iovec_iterator *it,
struct vm_region *address_space,
const kern_iovec_t *vecs,
size_t nr_vecs);
extern void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes);

View File

@@ -10,13 +10,49 @@ extern "C" {
typedef __aligned(8) ml_hwlock_t spin_lock_t;
#define SPIN_LOCK_INIT ML_HWLOCK_INIT
#define SPIN_LOCK_INIT ML_HWLOCK_INIT
#define spin_lock(lck) ml_hwlock_lock(lck);
#define spin_unlock(lck) ml_hwlock_unlock(lck);
#define spin_lock(lck) ml_hwlock_lock(lck);
#define spin_unlock(lck) ml_hwlock_unlock(lck);
#define spin_lock_irq(lck) ml_hwlock_lock_irq(lck);
#define spin_unlock_irq(lck) ml_hwlock_unlock_irq(lck);
#define spin_lock_irqsave(lck, flags) ml_hwlock_lock_irqsave(lck, flags);
#define spin_unlock_irqrestore(lck, flags) ml_hwlock_unlock_irqrestore(lck, flags);
#define spin_unlock_irqrestore(lck, flags) \
ml_hwlock_unlock_irqrestore(lck, flags);
static inline void spin_lock_pair_irqsave(
spin_lock_t *a,
spin_lock_t *b,
unsigned long *flags)
{
if (a == b) {
spin_lock_irqsave(a, flags);
} else if (a < b) {
spin_lock_irqsave(a, flags);
spin_lock(b);
} else {
spin_lock_irqsave(b, flags);
spin_lock(a);
}
}
static inline void spin_unlock_pair_irqrestore(
spin_lock_t *a,
spin_lock_t *b,
unsigned long flags)
{
if (a == b) {
spin_unlock_irqrestore(a, flags);
} else if (a < b) {
spin_unlock(b);
spin_unlock_irqrestore(a, flags);
} else {
spin_unlock(a);
spin_unlock_irqrestore(b, flags);
}
}
#ifdef __cplusplus
}

View File

@@ -22,8 +22,8 @@
#ifndef KERNEL_MEMBLOCK_H_
#define KERNEL_MEMBLOCK_H_
#include <limits.h>
#include <kernel/types.h>
#include <limits.h>
#include <stddef.h>
#ifdef __cplusplus
@@ -338,6 +338,8 @@ extern void __next_memory_region(
phys_addr_t start,
phys_addr_t end);
extern void memblock_dump(void);
#ifdef __cplusplus
}
#endif

View File

@@ -15,7 +15,7 @@ enum kmsg_status {
KMSG_REPLY_SENT,
};
struct kmsg {
struct msg {
spin_lock_t msg_lock;
enum kmsg_status msg_status;
struct btree_node msg_node;
@@ -23,8 +23,7 @@ struct kmsg {
kern_status_t msg_result;
struct port *msg_sender_port;
struct thread *msg_sender_thread;
const struct msg *msg_req;
struct msg *msg_resp;
kern_msg_t msg_req, msg_resp;
};
#endif

View File

@@ -3,8 +3,8 @@
#include <kernel/flags.h>
#include <kernel/locks.h>
#include <mango/status.h>
#include <kernel/vm.h>
#include <mango/status.h>
#include <stddef.h>
#ifdef __cplusplus
@@ -31,6 +31,20 @@ extern "C" {
unsigned long flags) \
{ \
object_unlock_irqrestore(&p->base, flags); \
} \
static inline void object_name##_lock_pair_irqsave( \
struct object_name *a, \
struct object_name *b, \
unsigned long *flags) \
{ \
object_lock_pair_irqsave(&a->base, &b->base, flags); \
} \
static inline void object_name##_unlock_pair_irqrestore( \
struct object_name *a, \
struct object_name *b, \
unsigned long flags) \
{ \
object_unlock_pair_irqrestore(&a->base, &b->base, flags); \
}
#define OBJECT_MAGIC 0xBADDCAFE
@@ -52,7 +66,10 @@ enum object_type_flags {
};
struct object_ops {
kern_status_t (*destroy)(struct object *obj);
kern_status_t (*destroy)(struct object *obj, struct queue *q);
kern_status_t (*destroy_recurse)(
struct queue_entry *entry,
struct object **out);
};
struct object_type {
@@ -67,6 +84,7 @@ struct object_type {
struct object {
uint32_t ob_magic;
koid_t ob_id;
struct object_type *ob_type;
spin_lock_t ob_lock;
unsigned int ob_refcount;
@@ -88,6 +106,15 @@ extern void object_unlock(struct object *obj);
extern void object_lock_irqsave(struct object *obj, unsigned long *flags);
extern void object_unlock_irqrestore(struct object *obj, unsigned long flags);
extern void object_lock_pair_irqsave(
struct object *a,
struct object *b,
unsigned long *flags);
extern void object_unlock_pair_irqrestore(
struct object *a,
struct object *b,
unsigned long flags);
#ifdef __cplusplus
}
#endif

View File

@@ -29,10 +29,12 @@ extern struct port *port_cast(struct object *obj);
extern struct port *port_create(void);
extern kern_status_t port_connect(struct port *port, struct channel *remote);
extern kern_status_t port_disconnect(struct port *port);
extern kern_status_t port_send_msg(
struct port *port,
const struct msg *req,
struct msg *resp);
const kern_msg_t *msg,
kern_msg_t *out_response,
unsigned long *lock_flags);
DEFINE_OBJECT_LOCK_FUNCTION(port, p_base)

View File

@@ -85,6 +85,7 @@ struct task {
pmap_t t_pmap;
struct vm_region *t_address_space;
spin_lock_t t_handles_lock;
struct handle_table *t_handles;
struct btree b_channels;
@@ -114,7 +115,7 @@ struct thread {
virt_addr_t tr_cpu_user_sp, tr_cpu_kernel_sp;
struct runqueue *tr_rq;
struct kmsg tr_msg;
struct msg tr_msg;
struct queue_entry tr_parent_entry;
struct queue_entry tr_rqentry;

View File

@@ -2,16 +2,14 @@
#define KERNEL_SYSCALL_H_
#include <kernel/handle.h>
#include <kernel/sched.h>
#include <kernel/vm-region.h>
#include <kernel/vm.h>
#include <mango/status.h>
#include <mango/syscall.h>
#define validate_access(task, ptr, len, flags) \
vm_region_validate_access( \
task->t_address_space, \
(virt_addr_t)ptr, \
len, \
flags | VM_PROT_USER)
__validate_access(task, (const void *)ptr, len, flags)
#define validate_access_r(task, ptr, len) \
validate_access(task, ptr, len, VM_PROT_READ | VM_PROT_USER)
#define validate_access_w(task, ptr, len) \
@@ -23,7 +21,25 @@
len, \
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER)
static inline bool __validate_access(
struct task *task,
const void *ptr,
size_t len,
vm_prot_t flags)
{
unsigned long irq_flags;
vm_region_lock_irqsave(task->t_address_space, &irq_flags);
bool result = vm_region_validate_access(
task->t_address_space,
(virt_addr_t)ptr,
len,
flags | VM_PROT_USER);
vm_region_unlock_irqrestore(task->t_address_space, irq_flags);
return result;
}
extern kern_status_t sys_task_exit(int status);
extern kern_status_t sys_task_self(kern_handle_t *out);
extern kern_status_t sys_task_create(
kern_handle_t parent_handle,
const char *name,
@@ -37,6 +53,9 @@ extern kern_status_t sys_task_create_thread(
uintptr_t *args,
size_t nr_args,
kern_handle_t *out_thread);
extern kern_status_t sys_task_get_address_space(
kern_handle_t task,
kern_handle_t *out);
extern kern_status_t sys_thread_start(kern_handle_t thread);
@@ -75,6 +94,7 @@ extern kern_status_t sys_vm_region_create(
vm_prot_t prot,
kern_handle_t *out,
virt_addr_t *out_base_address);
extern kern_status_t sys_vm_region_kill(kern_handle_t region);
extern kern_status_t sys_vm_region_read(
kern_handle_t region,
void *dst,
@@ -123,10 +143,7 @@ extern kern_status_t sys_kern_config_set(
const void *ptr,
size_t len);
extern kern_status_t sys_channel_create(
unsigned int id,
channel_flags_t flags,
kern_handle_t *out);
extern kern_status_t sys_channel_create(unsigned int id, kern_handle_t *out);
extern kern_status_t sys_port_create(kern_handle_t *out);
extern kern_status_t sys_port_connect(
kern_handle_t port,
@@ -136,47 +153,28 @@ extern kern_status_t sys_port_disconnect(kern_handle_t port);
extern kern_status_t sys_msg_send(
kern_handle_t port,
msg_flags_t flags,
const struct msg *req,
struct msg *resp);
extern kern_status_t sys_msg_recv(
kern_handle_t channel,
msg_flags_t flags,
msgid_t *out_id,
struct msg *out_msg);
const kern_msg_t *msg,
kern_msg_t *out_reply);
extern kern_status_t sys_msg_recv(kern_handle_t channel, kern_msg_t *out_msg);
extern kern_status_t sys_msg_reply(
kern_handle_t channel,
msg_flags_t flags,
msgid_t id,
const struct msg *reply);
const kern_msg_t *msg);
extern kern_status_t sys_msg_read(
kern_handle_t channel,
kern_handle_t channel_handle,
msgid_t id,
size_t offset,
struct iovec *out,
size_t nr_out);
extern kern_status_t sys_msg_read_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
struct handle_list *out,
size_t nr_out);
const kern_iovec_t *iov,
size_t iov_count,
size_t *nr_read);
extern kern_status_t sys_msg_write(
kern_handle_t channel,
msgid_t id,
size_t offset,
const struct iovec *in,
size_t nr_in);
extern kern_status_t sys_msg_write_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
const struct handle_list *in,
size_t nr_in);
const kern_iovec_t *in,
size_t nr_in,
size_t *nr_written);
extern virt_addr_t syscall_get_function(unsigned int sysid);

View File

@@ -11,6 +11,11 @@
struct vm_region;
struct vm_object;
enum vm_region_status {
VM_REGION_DEAD = 0,
VM_REGION_ONLINE,
};
enum vm_region_entry_type {
VM_REGION_ENTRY_NONE = 0,
VM_REGION_ENTRY_REGION,
@@ -18,9 +23,16 @@ enum vm_region_entry_type {
};
struct vm_region_entry {
struct btree_node e_node;
union {
struct btree_node e_node;
/* this entry is only used to queue vm-region objects for
* recursive cleanup */
struct queue_entry e_entry;
};
struct vm_region_entry *e_parent;
enum vm_region_entry_type e_type;
/* absolute address of this entry */
virt_addr_t e_address;
/* offset in bytes of this entry within its immediate parent. */
off_t e_offset;
/* size of the entry in bytes */
@@ -31,7 +43,7 @@ struct vm_region_mapping {
struct vm_region_entry m_entry;
struct vm_object *m_object;
/* used to link to vm_object->vo_mappings */
/* used to link to vm_object->vo_mappings */
struct queue_entry m_object_entry;
vm_prot_t m_prot;
@@ -41,6 +53,7 @@ struct vm_region_mapping {
struct vm_region {
struct object vr_base;
enum vm_region_status vr_status;
struct vm_region_entry vr_entry;
char vr_name[VM_REGION_NAME_MAX];
@@ -81,6 +94,19 @@ extern kern_status_t vm_region_create(
vm_prot_t prot,
struct vm_region **out);
/* recursively kills a given region and all of its sub-regions.
* when a region is killed, all of its mappings are unmapped, and any further
* operations on the region are denied. however, all handles and references to
* the region (any any sub-region) remain valid, and no kernel memory is
* de-allocated.
* the memory used by the vm-region object itself is de-allocated when the last
* handle/reference to the object is released.
* this function should be called with `region` locked.
*/
extern kern_status_t vm_region_kill(
struct vm_region *region,
unsigned long *lock_flags);
/* map a vm-object into a vm-region.
* [region_offset,length] must fall within exactly one region, and cannot span
* multiple sibling regions.
@@ -122,6 +148,24 @@ extern virt_addr_t vm_region_get_base_address(const struct vm_region *region);
extern void vm_region_dump(struct vm_region *region);
/* read data from the user-space area of a vm-region into a kernel-mode buffer
*/
extern kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *dest,
size_t *nr_read);
/* write data to the user-space area of a vm-region from a kernel-mode buffer
*/
extern kern_status_t vm_region_write_kernel(
struct vm_region *dst_region,
virt_addr_t dst_ptr,
size_t count,
const void *src,
size_t *nr_written);
extern kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
@@ -133,13 +177,14 @@ extern kern_status_t vm_region_memmove(
extern kern_status_t vm_region_memmove_v(
struct vm_region *dest_region,
size_t dest_offset,
struct iovec *dest,
const kern_iovec_t *dest,
size_t nr_dest,
struct vm_region *src_region,
size_t src_offset,
const struct iovec *src,
const kern_iovec_t *src,
size_t nr_src,
size_t bytes_to_move);
size_t bytes_to_move,
size_t *nr_bytes_moved);
DEFINE_OBJECT_LOCK_FUNCTION(vm_region, vr_base)

View File

@@ -1,6 +1,7 @@
#include <kernel/arg.h>
#include <kernel/libc/string.h>
#include <kernel/libc/ctype.h>
#include <kernel/libc/string.h>
#include <mango/status.h>
static char g_cmdline[CMDLINE_MAX + 1] = {0};
@@ -81,7 +82,6 @@ static char *advance_to_next_arg(char *s, char *max)
return s;
}
const char *arg_value(const char *arg_name)
{
char *s = g_cmdline;

View File

@@ -69,7 +69,7 @@ kern_status_t bsp_load(struct bsp *bsp, const struct boot_module *mod)
return KERN_OK;
}
static kern_status_t map_executable(
static kern_status_t map_executable_dyn(
struct bsp *bsp,
struct task *task,
virt_addr_t *entry)
@@ -164,6 +164,82 @@ static kern_status_t map_executable(
return KERN_OK;
}
static kern_status_t map_executable_exec(
struct bsp *bsp,
struct task *task,
virt_addr_t *entry)
{
kern_status_t status = KERN_OK;
struct vm_object *data = vm_object_create(
".data",
5,
bsp->bsp_trailer.bsp_data_size,
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER);
if (!data) {
return KERN_NO_MEMORY;
}
virt_addr_t text_base = 0, data_base = 0;
off_t text_foffset = bsp->bsp_trailer.bsp_exec_offset
+ bsp->bsp_trailer.bsp_text_faddr;
off_t data_foffset = 0;
off_t text_voffset = bsp->bsp_trailer.bsp_text_vaddr;
off_t data_voffset = bsp->bsp_trailer.bsp_data_vaddr;
text_voffset -= vm_region_get_base_address(task->t_address_space);
data_voffset -= vm_region_get_base_address(task->t_address_space);
#if 0
size_t tmp = 0;
status = vm_object_copy(
data,
0,
bsp->bsp_vmo,
bsp->bsp_trailer.bsp_data_faddr,
bsp->bsp_trailer.bsp_data_size,
&tmp);
tracek("read %zuB of data from executable", tmp);
#endif
tracek("text_foffset=%06llx, data_foffset=%06llx",
text_foffset,
data_foffset);
tracek("text_voffset=%08llx, data_voffset=%08llx",
text_voffset,
data_voffset);
status = vm_region_map_object(
task->t_address_space,
text_voffset,
bsp->bsp_vmo,
text_foffset,
bsp->bsp_trailer.bsp_text_size,
VM_PROT_READ | VM_PROT_EXEC | VM_PROT_USER,
&text_base);
if (status != KERN_OK) {
return status;
}
status = vm_region_map_object(
task->t_address_space,
data_voffset,
data,
data_foffset,
bsp->bsp_trailer.bsp_data_size,
VM_PROT_READ | VM_PROT_WRITE | VM_PROT_USER,
&data_base);
if (status != KERN_OK) {
return status;
}
tracek("text_base=%08llx, data_base=%08llx", text_base, data_base);
*entry = bsp->bsp_trailer.bsp_exec_entry;
return KERN_OK;
}
kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task)
{
virt_addr_t stack_buffer, bsp_data_base;
@@ -205,7 +281,7 @@ kern_status_t bsp_launch_async(struct bsp *bsp, struct task *task)
return status;
}
status = map_executable(bsp, task, &entry);
status = map_executable_exec(bsp, task, &entry);
if (status != KERN_OK) {
return status;
}

View File

@@ -1,5 +1,6 @@
#include <kernel/channel.h>
#include <kernel/msg.h>
#include <kernel/port.h>
#include <kernel/util.h>
#include <kernel/vm-region.h>
@@ -11,13 +12,18 @@ static struct object_type channel_type = {
.ob_header_offset = offsetof(struct channel, c_base),
};
BTREE_DEFINE_SIMPLE_GET(struct kmsg, msgid_t, msg_node, msg_id, get_msg_with_id)
BTREE_DEFINE_SIMPLE_GET(struct msg, msgid_t, msg_node, msg_id, get_msg_with_id)
kern_status_t channel_type_init(void)
{
return object_type_register(&channel_type);
}
struct channel *channel_cast(struct object *obj)
{
return CHANNEL_CAST(obj);
}
extern struct channel *channel_create(void)
{
struct object *channel_object = object_create(&channel_type);
@@ -30,7 +36,7 @@ extern struct channel *channel_create(void)
return channel;
}
static bool try_enqueue(struct btree *tree, struct kmsg *msg)
static bool try_enqueue(struct btree *tree, struct msg *msg)
{
if (!tree->b_root) {
tree->b_root = &msg->msg_node;
@@ -40,8 +46,8 @@ static bool try_enqueue(struct btree *tree, struct kmsg *msg)
struct btree_node *cur = tree->b_root;
while (1) {
struct kmsg *cur_node
= BTREE_CONTAINER(struct kmsg, msg_node, cur);
struct msg *cur_node
= BTREE_CONTAINER(struct msg, msg_node, cur);
struct btree_node *next = NULL;
if (msg->msg_id > cur_node->msg_id) {
@@ -69,26 +75,33 @@ static bool try_enqueue(struct btree *tree, struct kmsg *msg)
return true;
}
static void kmsg_reply_error(struct kmsg *msg, kern_status_t status)
static void kmsg_reply_error(
struct msg *msg,
kern_status_t status,
unsigned long *lock_flags)
{
msg->msg_status = KMSG_REPLY_SENT;
msg->msg_status = status;
msg->msg_sender_port->p_status = PORT_READY;
msg->msg_result = status;
thread_awaken(msg->msg_sender_thread);
spin_unlock_irqrestore(&msg->msg_lock, *lock_flags);
}
static struct kmsg *get_next_msg(struct channel *channel)
static struct msg *get_next_msg(
struct channel *channel,
unsigned long *lock_flags)
{
unsigned long flags;
struct btree_node *cur = btree_first(&channel->c_msg);
while (cur) {
struct kmsg *msg = BTREE_CONTAINER(struct kmsg, msg_node, cur);
spin_lock_irqsave(&msg->msg_lock, &flags);
struct msg *msg = BTREE_CONTAINER(struct msg, msg_node, cur);
spin_lock_irqsave(&msg->msg_lock, lock_flags);
if (msg->msg_status == KMSG_WAIT_RECEIVE) {
msg->msg_status = KMSG_WAIT_REPLY;
msg->msg_sender_port->p_status = PORT_REPLY_BLOCKED;
return msg;
}
spin_unlock_irqrestore(&msg->msg_lock, flags);
spin_unlock_irqrestore(&msg->msg_lock, *lock_flags);
cur = btree_next(cur);
}
@@ -97,7 +110,7 @@ static struct kmsg *get_next_msg(struct channel *channel)
extern kern_status_t channel_enqueue_msg(
struct channel *channel,
struct kmsg *msg)
struct msg *msg)
{
fill_random(&msg->msg_id, sizeof msg->msg_id);
while (!try_enqueue(&channel->c_msg, msg)) {
@@ -111,18 +124,18 @@ extern kern_status_t channel_enqueue_msg(
extern kern_status_t channel_recv_msg(
struct channel *channel,
struct msg *out_msg,
msgid_t *out_id,
kern_msg_t *out_msg,
unsigned long *irq_flags)
{
struct wait_item waiter;
struct thread *self = current_thread();
struct kmsg *msg = NULL;
struct msg *msg = NULL;
unsigned long msg_lock_flags;
wait_item_init(&waiter, self);
for (;;) {
thread_wait_begin(&waiter, &channel->c_wq);
msg = get_next_msg(channel);
msg = get_next_msg(channel, &msg_lock_flags);
if (msg) {
break;
}
@@ -138,103 +151,229 @@ extern kern_status_t channel_recv_msg(
struct task *sender = msg->msg_sender_thread->tr_parent;
struct task *receiver = self->tr_parent;
struct vm_region *src = sender->t_address_space,
*dst = receiver->t_address_space;
unsigned long f;
vm_region_lock_pair_irqsave(src, dst, &f);
kern_status_t status = vm_region_memmove_v(
receiver->t_address_space,
dst,
0,
out_msg->msg_data,
out_msg->msg_data_count,
sender->t_address_space,
src,
0,
msg->msg_req->msg_data,
msg->msg_req->msg_data_count,
VM_REGION_COPY_ALL);
msg->msg_req.msg_data,
msg->msg_req.msg_data_count,
VM_REGION_COPY_ALL,
NULL);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
status = handle_list_transfer(
receiver->t_handles,
struct handle_table *src_table = sender->t_handles,
*dst_table = receiver->t_handles;
spin_lock_pair_irqsave(
&sender->t_handles_lock,
&receiver->t_handles_lock,
&f);
status = handle_table_transfer(
dst,
dst_table,
out_msg->msg_handles,
out_msg->msg_handles_count,
sender->t_handles,
msg->msg_req->msg_handles,
msg->msg_req->msg_handles_count);
src,
src_table,
msg->msg_req.msg_handles,
msg->msg_req.msg_handles_count);
spin_unlock_pair_irqrestore(
&sender->t_handles_lock,
&receiver->t_handles_lock,
f);
vm_region_unlock_pair_irqrestore(src, dst, f);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
kmsg_reply_error(msg, KERN_OK);
out_msg->msg_id = msg->msg_id;
out_msg->msg_sender = msg->msg_sender_thread->tr_parent->t_id;
out_msg->msg_endpoint = msg->msg_sender_port->p_base.ob_id;
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_OK;
}
extern kern_status_t channel_reply_msg(
struct channel *channel,
msgid_t id,
const struct msg *resp,
const kern_msg_t *reply,
unsigned long *irq_flags)
{
struct kmsg *msg = get_msg_with_id(&channel->c_msg, id);
if (!msg || msg->msg_status != KMSG_WAIT_REPLY) {
unsigned long msg_lock_flags;
struct msg *msg = get_msg_with_id(&channel->c_msg, id);
if (!msg) {
return KERN_INVALID_ARGUMENT;
}
spin_lock_irqsave(&msg->msg_lock, &msg_lock_flags);
if (msg->msg_status != KMSG_WAIT_REPLY) {
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_INVALID_ARGUMENT;
}
struct thread *self = current_thread();
struct task *sender = msg->msg_sender_thread->tr_parent;
struct task *receiver = self->tr_parent;
/* the task that is about to receive the response */
struct task *receiver = msg->msg_sender_thread->tr_parent;
/* the task that is about to send the response */
struct task *sender = self->tr_parent;
struct vm_region *src = sender->t_address_space,
*dst = receiver->t_address_space;
unsigned long f;
vm_region_lock_pair_irqsave(src, dst, &f);
kern_status_t status = vm_region_memmove_v(
receiver->t_address_space,
dst,
0,
msg->msg_resp->msg_data,
msg->msg_resp->msg_data_count,
sender->t_address_space,
msg->msg_resp.msg_data,
msg->msg_resp.msg_data_count,
src,
0,
resp->msg_data,
resp->msg_data_count,
VM_REGION_COPY_ALL);
reply->msg_data,
reply->msg_data_count,
VM_REGION_COPY_ALL,
NULL);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
status = handle_list_transfer(
receiver->t_handles,
msg->msg_resp->msg_handles,
msg->msg_resp->msg_handles_count,
sender->t_handles,
resp->msg_handles,
resp->msg_handles_count);
struct handle_table *src_table = sender->t_handles,
*dst_table = receiver->t_handles;
spin_lock_pair_irqsave(
&sender->t_handles_lock,
&receiver->t_handles_lock,
&f);
status = handle_table_transfer(
dst,
dst_table,
msg->msg_resp.msg_handles,
msg->msg_resp.msg_handles_count,
src,
src_table,
reply->msg_handles,
reply->msg_handles_count);
spin_unlock_pair_irqrestore(
&sender->t_handles_lock,
&receiver->t_handles_lock,
f);
vm_region_unlock_pair_irqrestore(src, dst, f);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
msg->msg_status = KERN_OK;
msg->msg_status = KMSG_REPLY_SENT;
kmsg_reply_error(msg, KERN_OK, &msg_lock_flags);
return KERN_UNIMPLEMENTED;
return KERN_OK;
}
extern kern_status_t channel_read_msg(
struct channel *channel,
msgid_t msg,
msgid_t id,
size_t offset,
void *buf,
size_t len,
struct vm_region *dest_region,
const kern_iovec_t *dest_iov,
size_t dest_iov_count,
size_t *nr_read)
{
return KERN_UNIMPLEMENTED;
unsigned long msg_lock_flags;
struct msg *msg = get_msg_with_id(&channel->c_msg, id);
if (!msg) {
return KERN_INVALID_ARGUMENT;
}
spin_lock_irqsave(&msg->msg_lock, &msg_lock_flags);
if (msg->msg_status != KMSG_WAIT_REPLY) {
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_INVALID_ARGUMENT;
}
struct vm_region *src_region
= msg->msg_sender_thread->tr_parent->t_address_space;
unsigned long f;
vm_region_lock_pair_irqsave(src_region, dest_region, &f);
kern_status_t status = vm_region_memmove_v(
dest_region,
0,
dest_iov,
dest_iov_count,
src_region,
offset,
msg->msg_req.msg_data,
msg->msg_req.msg_data_count,
VM_REGION_COPY_ALL,
nr_read);
vm_region_unlock_pair_irqrestore(src_region, dest_region, f);
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return status;
}
extern kern_status_t channel_write_msg(
struct channel *channel,
msgid_t msg,
msgid_t id,
size_t offset,
const void *buf,
size_t len,
struct vm_region *src_region,
const kern_iovec_t *src_iov,
size_t src_iov_count,
size_t *nr_written)
{
return KERN_UNIMPLEMENTED;
unsigned long msg_lock_flags;
struct msg *msg = get_msg_with_id(&channel->c_msg, id);
if (!msg) {
return KERN_INVALID_ARGUMENT;
}
spin_lock_irqsave(&msg->msg_lock, &msg_lock_flags);
if (msg->msg_status != KMSG_WAIT_REPLY) {
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_INVALID_ARGUMENT;
}
struct vm_region *dest_region
= msg->msg_sender_thread->tr_parent->t_address_space;
unsigned long f;
vm_region_lock_pair_irqsave(src_region, dest_region, &f);
kern_status_t status = vm_region_memmove_v(
dest_region,
offset,
msg->msg_resp.msg_data,
msg->msg_resp.msg_data_count,
src_region,
0,
src_iov,
src_iov_count,
VM_REGION_COPY_ALL,
nr_written);
vm_region_unlock_pair_irqrestore(src_region, dest_region, f);
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return status;
}

View File

@@ -3,7 +3,9 @@
#include <kernel/object.h>
#include <kernel/sched.h>
#include <kernel/util.h>
#include <kernel/vm-region.h>
#include <kernel/vm.h>
#include <mango/types.h>
/* depth=3 gives a maximum of ~66.6 million handles */
#define MAX_TABLE_DEPTH 3
@@ -192,122 +194,143 @@ struct handle *handle_table_get_handle(
return &tab->t_handles.t_handle_list[handle_index];
}
struct handle_list_iterator {
struct handle_list *it_list;
size_t it_list_count;
size_t it_list_ptr;
kern_handle_t *it_handles;
size_t it_nr_handles;
};
static void handle_list_iterator_begin(
struct handle_list_iterator *it,
struct handle_list *list,
size_t list_count)
kern_status_t handle_table_transfer(
struct vm_region *dst_region,
struct handle_table *dst,
kern_msg_handle_t *dst_handles,
size_t dst_handles_max,
struct vm_region *src_region,
struct handle_table *src,
kern_msg_handle_t *src_handles,
size_t src_handles_count)
{
memset(it, 0x0, sizeof *it);
it->it_list = list;
it->it_list_count = list_count;
kern_status_t status = KERN_OK;
size_t to_transfer = MIN(dst_handles_max, src_handles_count);
while (it->it_list_ptr < list_count) {
if (list[it->it_list_ptr].l_nr_handles > 0) {
size_t i = 0;
for (size_t i = 0; i < to_transfer; i++) {
kern_msg_handle_t src_handle = {0}, dst_handle = {0};
virt_addr_t src_handle_addr
= (virt_addr_t)src_handles + (i * sizeof src_handle);
virt_addr_t dst_handle_addr
= (virt_addr_t)dst_handles + (i * sizeof dst_handle);
status = vm_region_read_kernel(
src_region,
src_handle_addr,
sizeof src_handle,
&src_handle,
NULL);
if (status != KERN_OK) {
src_handle.hnd_result = KERN_OK;
vm_region_write_kernel(
src_region,
src_handle_addr,
sizeof src_handle,
&src_handle,
NULL);
break;
}
it->it_list_ptr++;
}
struct handle *src_entry
= handle_table_get_handle(src, src_handle.hnd_value);
struct handle *dst_entry = NULL;
kern_handle_t dst_value = KERN_HANDLE_INVALID;
if (it->it_list_ptr >= list_count) {
return;
}
it->it_handles = list[it->it_list_ptr].l_handles;
it->it_nr_handles = list[it->it_list_ptr].l_nr_handles;
}
static void handle_list_iterator_seek(
struct handle_list_iterator *it,
size_t nr_handles)
{
if (nr_handles > it->it_nr_handles) {
nr_handles = it->it_nr_handles;
}
if (nr_handles < it->it_nr_handles) {
it->it_handles += nr_handles;
it->it_nr_handles -= nr_handles;
return;
}
it->it_list_ptr++;
while (it->it_list_ptr < it->it_list_count) {
if (it->it_list[it->it_list_ptr].l_nr_handles > 0) {
if (!src_entry) {
status = KERN_INVALID_ARGUMENT;
src_handle.hnd_result = KERN_OK;
vm_region_write_kernel(
src_region,
src_handle_addr,
sizeof src_handle,
&src_handle,
NULL);
break;
}
it->it_list_ptr++;
}
if (it->it_list_ptr >= it->it_list_count) {
return;
}
it->it_handles = it->it_list[it->it_list_ptr].l_handles;
it->it_nr_handles = it->it_list[it->it_list_ptr].l_nr_handles;
}
kern_status_t handle_list_transfer(
struct handle_table *dest_table,
struct handle_list *dest_list,
size_t dest_list_count,
struct handle_table *src_table,
const struct handle_list *src_list,
size_t src_list_count)
{
struct handle_list_iterator src, dest;
handle_list_iterator_begin(
&src,
(struct handle_list *)src_list,
src_list_count);
handle_list_iterator_begin(&dest, dest_list, dest_list_count);
while (src.it_nr_handles && dest.it_nr_handles) {
size_t to_copy = MIN(src.it_nr_handles, dest.it_nr_handles);
for (size_t i = 0; i < to_copy; i++) {
kern_handle_t handle_v = src.it_handles[i];
struct handle *handle
= handle_table_get_handle(src_table, handle_v);
if (!handle) {
return KERN_HANDLE_INVALID;
}
struct object *obj = object_ref(handle->h_object);
handle_flags_t flags = handle->h_flags;
handle_table_free_handle(src_table, handle_v);
struct handle *dest_slot = NULL;
kern_status_t status = handle_table_alloc_handle(
dest_table,
&dest_slot,
&handle_v);
switch (src_handle.hnd_mode) {
case KERN_MSG_HANDLE_IGNORE:
break;
case KERN_MSG_HANDLE_MOVE:
status = handle_table_alloc_handle(
dst,
&dst_entry,
&dst_value);
if (status != KERN_OK) {
return status;
break;
}
dest_slot->h_object = obj;
dest_slot->h_flags = flags;
dst_entry->h_object = src_entry->h_object;
dst_entry->h_flags = src_entry->h_flags;
object_add_handle(dst_entry->h_object);
object_add_handle(obj);
object_unref(obj);
handle_table_free_handle(src, src_handles[i].hnd_value);
dest.it_handles[i] = handle_v;
dst_handle.hnd_mode = src_handles[i].hnd_mode;
dst_handle.hnd_value = dst_value;
dst_handle.hnd_result = KERN_OK;
break;
case KERN_MSG_HANDLE_COPY:
status = handle_table_alloc_handle(
dst,
&dst_entry,
&dst_value);
if (status != KERN_OK) {
break;
}
dst_entry->h_object = src_entry->h_object;
dst_entry->h_flags = src_entry->h_flags;
object_add_handle(dst_entry->h_object);
dst_handle.hnd_mode = src_handles[i].hnd_mode;
dst_handle.hnd_value = dst_value;
dst_handle.hnd_result = KERN_OK;
break;
default:
status = KERN_INVALID_ARGUMENT;
break;
}
handle_list_iterator_seek(&src, to_copy);
handle_list_iterator_seek(&dest, to_copy);
src_handle.hnd_result = status;
vm_region_write_kernel(
src_region,
src_handle_addr,
sizeof src_handle,
&src_handle,
NULL);
vm_region_write_kernel(
dst_region,
dst_handle_addr,
sizeof dst_handle,
&dst_handle,
NULL);
}
return KERN_OK;
for (; i < src_handles_count; i++) {
kern_msg_handle_t handle = {0};
virt_addr_t handle_addr
= (virt_addr_t)src_handles + (i * sizeof handle);
vm_region_read_kernel(
src_region,
handle_addr,
sizeof handle,
&handle,
NULL);
if (handle.hnd_mode != KERN_MSG_HANDLE_MOVE) {
continue;
}
struct handle *src_entry
= handle_table_get_handle(src, handle.hnd_value);
if (src_entry) {
object_remove_handle(src_entry->h_object);
handle_table_free_handle(src, handle.hnd_value);
}
}
return status;
}

View File

@@ -1,10 +1,66 @@
#include <kernel/iovec.h>
#include <kernel/libc/string.h>
#include <kernel/util.h>
#include <kernel/vm-region.h>
static bool read_iovec(
struct iovec_iterator *it,
size_t index,
kern_iovec_t *out)
{
if (index >= it->it_nr_vecs) {
return false;
}
if (!it->it_region) {
memcpy(out, &it->it_vecs[index], sizeof *out);
return true;
}
size_t nr_read = 0;
kern_status_t status = vm_region_read_kernel(
it->it_region,
(virt_addr_t)it->it_vecs + (index * sizeof(kern_iovec_t)),
sizeof(kern_iovec_t),
out,
&nr_read);
return (status == KERN_OK && nr_read != sizeof(kern_iovec_t));
}
void iovec_iterator_begin_user(
struct iovec_iterator *it,
struct vm_region *region,
const kern_iovec_t *vecs,
size_t nr_vecs)
{
memset(it, 0x0, sizeof *it);
it->it_region = region;
it->it_vecs = vecs;
it->it_nr_vecs = nr_vecs;
kern_iovec_t iov;
while (it->it_vec_ptr < nr_vecs) {
read_iovec(it, it->it_vec_ptr, &iov);
if (iov.io_len > 0) {
break;
}
it->it_vec_ptr++;
}
if (it->it_vec_ptr >= nr_vecs) {
return;
}
it->it_base = iov.io_base;
it->it_len = iov.io_len;
}
void iovec_iterator_begin(
struct iovec_iterator *it,
const struct iovec *vecs,
const kern_iovec_t *vecs,
size_t nr_vecs)
{
memset(it, 0x0, sizeof *it);
@@ -20,6 +76,8 @@ void iovec_iterator_begin(
}
if (it->it_vec_ptr >= nr_vecs) {
it->it_len = 0;
it->it_base = 0;
return;
}
@@ -39,10 +97,12 @@ void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes)
}
nr_bytes -= to_seek;
kern_iovec_t iov;
it->it_vec_ptr++;
while (it->it_vec_ptr < it->it_nr_vecs) {
if (it->it_vecs[it->it_vec_ptr].io_len > 0) {
read_iovec(it, it->it_vec_ptr, &iov);
if (iov.io_len > 0) {
break;
}
@@ -50,10 +110,12 @@ void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes)
}
if (it->it_vec_ptr >= it->it_nr_vecs) {
it->it_len = 0;
it->it_base = 0;
return;
}
it->it_base = it->it_vecs[it->it_vec_ptr].io_base;
it->it_len = it->it_vecs[it->it_vec_ptr].io_len;
it->it_base = iov.io_base;
it->it_len = iov.io_len;
}
}

View File

@@ -7,6 +7,20 @@
static struct queue object_types;
static spin_lock_t object_types_lock = SPIN_LOCK_INIT;
static koid_t koid_alloc(void)
{
static koid_t counter = 0;
static spin_lock_t lock = SPIN_LOCK_INIT;
unsigned long flags;
spin_lock_irqsave(&lock, &flags);
koid_t result = counter;
counter++;
spin_unlock_irqrestore(&lock, flags);
return result;
}
kern_status_t object_bootstrap(void)
{
return KERN_OK;
@@ -50,11 +64,10 @@ struct object *object_create(struct object_type *type)
return NULL;
}
memset(obj_buf, 0x00, type->ob_size);
struct object *obj = (struct object *)((unsigned char *)obj_buf
+ type->ob_header_offset);
obj->ob_id = koid_alloc();
obj->ob_type = type;
obj->ob_lock = SPIN_LOCK_INIT;
obj->ob_magic = OBJECT_MAGIC;
@@ -70,6 +83,15 @@ struct object *object_ref(struct object *obj)
return obj;
}
static void __cleanup(struct object *obj, struct queue *queue)
{
if (HAS_OP(obj, destroy)) {
obj->ob_type->ob_ops.destroy(obj, queue);
}
vm_cache_free(&obj->ob_type->ob_cache, obj);
}
static void object_cleanup(struct object *obj, unsigned long flags)
{
if (obj->ob_refcount > 0 || obj->ob_handles > 0) {
@@ -77,11 +99,30 @@ static void object_cleanup(struct object *obj, unsigned long flags)
return;
}
if (HAS_OP(obj, destroy)) {
obj->ob_type->ob_ops.destroy(obj);
struct queue queue = QUEUE_INIT;
__cleanup(obj, &queue);
if (!HAS_OP(obj, destroy_recurse)) {
return;
}
vm_cache_free(&obj->ob_type->ob_cache, obj);
while (!queue_empty(&queue)) {
struct queue_entry *entry = queue_pop_front(&queue);
struct object *child = NULL;
obj->ob_type->ob_ops.destroy_recurse(entry, &child);
if (!child) {
continue;
}
if (child->ob_refcount > 1) {
child->ob_refcount--;
continue;
}
if (child->ob_refcount == 0 && child->ob_handles == 0) {
__cleanup(child, &queue);
}
}
}
void object_unref(struct object *obj)
@@ -137,6 +178,38 @@ void object_unlock_irqrestore(struct object *obj, unsigned long flags)
spin_unlock_irqrestore(&obj->ob_lock, flags);
}
void object_lock_pair_irqsave(
struct object *a,
struct object *b,
unsigned long *flags)
{
if (a == b) {
object_lock_irqsave(a, flags);
} else if (a < b) {
object_lock_irqsave(a, flags);
object_lock(b);
} else {
object_lock_irqsave(b, flags);
object_lock(a);
}
}
void object_unlock_pair_irqrestore(
struct object *a,
struct object *b,
unsigned long flags)
{
if (a == b) {
object_unlock_irqrestore(a, flags);
} else if (a < b) {
object_unlock(b);
object_unlock_irqrestore(a, flags);
} else {
object_unlock(a);
object_unlock_irqrestore(b, flags);
}
}
void *object_data(struct object *obj)
{
return (char *)obj + sizeof *obj;

View File

@@ -20,8 +20,24 @@ struct port *port_cast(struct object *obj)
return PORT_CAST(obj);
}
static void wait_for_reply(struct port *port)
static void wait_for_reply(struct msg *msg, unsigned long *lock_flags)
{
struct wait_item waiter;
struct thread *self = current_thread();
wait_item_init(&waiter, self);
for (;;) {
self->tr_state = THREAD_SLEEPING;
if (msg->msg_status == KMSG_REPLY_SENT) {
break;
}
port_unlock_irqrestore(msg->msg_sender_port, *lock_flags);
schedule(SCHED_NORMAL);
port_lock_irqsave(msg->msg_sender_port, lock_flags);
}
self->tr_state = THREAD_READY;
}
struct port *port_create(void)
@@ -49,30 +65,47 @@ kern_status_t port_connect(struct port *port, struct channel *remote)
return KERN_OK;
}
kern_status_t port_disconnect(struct port *port)
{
if (port->p_status != PORT_READY) {
return KERN_BAD_STATE;
}
port->p_remote = NULL;
port->p_status = PORT_OFFLINE;
return KERN_OK;
}
kern_status_t port_send_msg(
struct port *port,
const struct msg *req,
struct msg *resp)
const kern_msg_t *in_msg,
kern_msg_t *out_reply,
unsigned long *lock_flags)
{
if (port->p_status != PORT_READY) {
return KERN_BAD_STATE;
}
struct thread *self = current_thread();
struct kmsg *msg = &self->tr_msg;
struct msg *msg = &self->tr_msg;
memset(msg, 0x0, sizeof *msg);
msg->msg_status = KMSG_WAIT_RECEIVE;
msg->msg_sender_thread = self;
msg->msg_sender_port = port;
msg->msg_req = req;
msg->msg_resp = resp;
memcpy(&msg->msg_req, in_msg, sizeof msg->msg_req);
memcpy(&msg->msg_resp, out_reply, sizeof msg->msg_req);
unsigned long flags;
channel_lock_irqsave(port->p_remote, &flags);
port->p_status = PORT_SEND_BLOCKED;
channel_enqueue_msg(port->p_remote, msg);
channel_unlock_irqrestore(port->p_remote, flags);
port->p_status = PORT_SEND_BLOCKED;
wait_for_reply(msg, lock_flags);
wait_for_reply(port);
channel_lock_irqsave(port->p_remote, &flags);
btree_delete(&port->p_remote->c_msg, &msg->msg_node);
channel_unlock_irqrestore(port->p_remote, flags);
return msg->msg_result;
}

View File

@@ -1,8 +1,9 @@
#include <mango/status.h>
#include <mango/types.h>
#define ERROR_STRING_CASE(code) \
case code: \
return #code
#define ERROR_STRING_CASE(code) \
case code: \
return #code
const char *kern_status_string(kern_status_t status)
{

View File

@@ -56,8 +56,10 @@
.endm
SYSCALL_GATE task_exit SYS_TASK_EXIT 1
SYSCALL_GATE task_self SYS_TASK_SELF 0
SYSCALL_GATE task_create SYS_TASK_CREATE 5
SYSCALL_GATE task_create_thread SYS_TASK_CREATE_THREAD 6
SYSCALL_GATE task_get_address_space SYS_TASK_GET_ADDRESS_SPACE 1
SYSCALL_GATE thread_start SYS_THREAD_START 1
@@ -67,6 +69,7 @@ SYSCALL_GATE vm_object_write SYS_VM_OBJECT_WRITE 5
SYSCALL_GATE vm_object_copy SYS_VM_OBJECT_COPY 6
SYSCALL_GATE vm_region_create SYS_VM_REGION_CREATE 8
SYSCALL_GATE vm_region_kill SYS_VM_REGION_KILL 1
SYSCALL_GATE vm_region_read SYS_VM_REGION_READ 5
SYSCALL_GATE vm_region_write SYS_VM_REGION_WRITE 5
SYSCALL_GATE vm_region_map_absolute SYS_VM_REGION_MAP_ABSOLUTE 7
@@ -79,15 +82,13 @@ SYSCALL_GATE kern_handle_close SYS_KERN_HANDLE_CLOSE 1
SYSCALL_GATE kern_config_get SYS_KERN_CONFIG_GET 3
SYSCALL_GATE kern_config_set SYS_KERN_CONFIG_SET 3
SYSCALL_GATE channel_create SYS_CHANNEL_CREATE 3
SYSCALL_GATE channel_create SYS_CHANNEL_CREATE 2
SYSCALL_GATE port_create SYS_PORT_CREATE 1
SYSCALL_GATE port_connect SYS_PORT_CONNECT 3
SYSCALL_GATE port_disconnect SYS_PORT_DISCONNECT 1
SYSCALL_GATE msg_send SYS_MSG_SEND 4
SYSCALL_GATE msg_send SYS_MSG_SEND 5
SYSCALL_GATE msg_recv SYS_MSG_RECV 4
SYSCALL_GATE msg_reply SYS_MSG_REPLY 4
SYSCALL_GATE msg_read SYS_MSG_READ 5
SYSCALL_GATE msg_read_handles SYS_MSG_READ_HANDLES 5
SYSCALL_GATE msg_write SYS_MSG_WRITE 5
SYSCALL_GATE msg_write_handles SYS_MSG_WRITE_HANDLES 5
SYSCALL_GATE msg_read SYS_MSG_READ 6
SYSCALL_GATE msg_write SYS_MSG_WRITE 6

View File

@@ -2,16 +2,26 @@
#define MANGO_LOG_H_
#include <mango/status.h>
#include <mango/types.h>
#undef TRACE
extern kern_status_t kern_log(const char *s);
#define kern_logf(...) \
do { \
char __logbuf[128]; \
snprintf(__logbuf, sizeof __logbuf, __VA_ARGS__); \
kern_log(__logbuf); \
} while (0)
#ifdef TRACE
#define kern_trace(...) kern_log(__VA_ARGS__)
#define kern_tracef(...) \
do { \
char s[128]; \
snprintf(s, sizeof s, __VA_ARGS__); \
kern_log(s); \
char __logbuf[128]; \
snprintf(__logbuf, sizeof __logbuf, __VA_ARGS__); \
kern_log(__logbuf); \
} while (0)
#else
#define kern_trace(...)

View File

@@ -4,10 +4,7 @@
#include <mango/status.h>
#include <mango/types.h>
extern kern_status_t channel_create(
unsigned int id,
channel_flags_t flags,
kern_handle_t *out);
extern kern_status_t channel_create(unsigned int id, kern_handle_t *out);
extern kern_status_t port_create(kern_handle_t *out);
extern kern_status_t port_connect(
kern_handle_t port,
@@ -17,46 +14,30 @@ extern kern_status_t port_disconnect(kern_handle_t port);
extern kern_status_t msg_send(
kern_handle_t port,
msg_flags_t flags,
const struct msg *req,
struct msg *resp);
const kern_msg_t *msg,
kern_msg_t *out_response);
extern kern_status_t msg_recv(
kern_handle_t channel,
msg_flags_t flags,
msgid_t *out_id,
struct msg *out_msg);
extern kern_status_t msg_recv(kern_handle_t channel, kern_msg_t *out);
extern kern_status_t msg_reply(
kern_handle_t channel,
msg_flags_t flags,
msgid_t id,
const struct msg *reply);
const kern_msg_t *response);
extern kern_status_t msg_read(
kern_handle_t channel,
msgid_t id,
size_t offset,
struct iovec *out,
size_t nr_out);
extern kern_status_t msg_read_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
struct handle_list *out,
size_t nr_out);
kern_iovec_t *out,
size_t out_count,
size_t *nr_read);
extern kern_status_t msg_write(
kern_handle_t channel,
msgid_t id,
size_t offset,
const struct iovec *in,
size_t nr_in);
extern kern_status_t msg_write_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
const struct handle_list *in,
size_t nr_in);
const kern_iovec_t *in,
size_t nr_in,
size_t *nr_written);
#endif

View File

@@ -5,6 +5,7 @@
#include <mango/types.h>
extern kern_status_t task_exit(int status);
extern kern_status_t task_self(kern_handle_t *out);
extern kern_status_t task_create(
kern_handle_t parent,
@@ -19,6 +20,9 @@ extern kern_status_t task_create_thread(
uintptr_t *args,
size_t nr_args,
kern_handle_t *out_thread);
extern kern_status_t task_get_address_space(
kern_handle_t task,
kern_handle_t *out);
extern kern_status_t thread_start(kern_handle_t thread);

View File

@@ -39,6 +39,7 @@ extern kern_status_t vm_region_create(
vm_prot_t prot,
kern_handle_t *out,
virt_addr_t *out_base_address);
extern kern_status_t vm_region_kill(kern_handle_t region);
extern kern_status_t vm_region_read(
kern_handle_t region,
void *dst,

View File

@@ -1,8 +1,6 @@
#ifndef MANGO_STATUS_H_
#define MANGO_STATUS_H_
typedef unsigned int kern_status_t;
#define KERN_OK (0)
#define KERN_UNIMPLEMENTED (1)
#define KERN_NAME_EXISTS (2)

View File

@@ -2,14 +2,17 @@
#define MANGO_SYSCALL_H_
#define SYS_TASK_EXIT 1
#define SYS_TASK_SELF 31
#define SYS_TASK_CREATE 2
#define SYS_TASK_CREATE_THREAD 3
#define SYS_TASK_GET_ADDRESS_SPACE 33
#define SYS_THREAD_START 30
#define SYS_VM_OBJECT_CREATE 4
#define SYS_VM_OBJECT_READ 5
#define SYS_VM_OBJECT_WRITE 6
#define SYS_VM_OBJECT_COPY 29
#define SYS_VM_REGION_CREATE 7
#define SYS_VM_REGION_KILL 34
#define SYS_VM_REGION_READ 8
#define SYS_VM_REGION_WRITE 9
#define SYS_VM_REGION_MAP_ABSOLUTE 10
@@ -24,9 +27,7 @@
#define SYS_MSG_RECV 19
#define SYS_MSG_REPLY 20
#define SYS_MSG_READ 21
#define SYS_MSG_READ_HANDLES 22
#define SYS_MSG_WRITE 23
#define SYS_MSG_WRITE_HANDLES 24
#define SYS_CHANNEL_CREATE 25
#define SYS_PORT_CREATE 26
#define SYS_PORT_CONNECT 27

View File

@@ -4,59 +4,84 @@
#include <stddef.h>
#include <stdint.h>
#define VM_PROT_READ 0x01u
#define VM_PROT_WRITE 0x02u
#define VM_PROT_EXEC 0x04u
#define VM_PROT_USER 0x08u
#define VM_PROT_SVR 0x10u
#define VM_PROT_NOCACHE 0x10u
#define VM_PROT_MAP_SPECIFIC 0x40u
#define VM_PROT_READ 0x01u
#define VM_PROT_WRITE 0x02u
#define VM_PROT_EXEC 0x04u
#define VM_PROT_USER 0x08u
#define VM_PROT_SVR 0x10u
#define VM_PROT_NOCACHE 0x10u
#define VM_PROT_MAP_SPECIFIC 0x40u
/* if this flag is set, other tasks can connect to this channel using
* the port_connect_* syscalls.
* if this flag is NOT set, only threads in the task that owns the channel
* can create ports connecting to it. */
#define CHANNEL_F_ALLOW_DIRECT_CONNECTIONS 0x01u
#define VM_REGION_ANY_OFFSET ((off_t) - 1)
#define KERN_HANDLE_INVALID ((kern_handle_t)0xFFFFFFFF)
/* msg_reply: once the reply has been sent, disconnect the port that sent the
* original message */
#define MSG_F_DISCONNECT_AFTER_REPLY 0x01u
#define KERN_CFG_INVALID 0x00u
#define KERN_CFG_PAGE_SIZE 0x01u
#define VM_REGION_ANY_OFFSET ((off_t) - 1)
#define KERN_HANDLE_INVALID ((kern_handle_t)0xFFFFFFFF)
#define KERN_MSG_MAX_HANDLES 64
#define KERN_MSG_HANDLE_IGNORE 0
#define KERN_MSG_HANDLE_MOVE 1
#define KERN_MSG_HANDLE_COPY 2
#define KERN_CFG_INVALID 0x00u
#define KERN_CFG_PAGE_SIZE 0x01u
#define IOVEC(p, len) \
{ \
.io_base = (virt_addr_t)(p), \
.io_len = (len), \
}
#define MSG_HANDLE(mode, value) \
{ \
.hnd_mode = (mode), \
.hnd_value = (value), \
}
#define MSG(data, data_count, handles, handles_len) \
{ \
.msg_data = (data), \
.msg_data_count = (data_count), \
.msg_handles = (handles), \
.msg_handles_count = (handles_len), \
}
typedef uintptr_t phys_addr_t;
typedef uintptr_t virt_addr_t;
typedef uint64_t msgid_t;
typedef uint64_t off_t;
typedef uint64_t koid_t;
typedef unsigned int tid_t;
typedef unsigned int kern_status_t;
typedef uint32_t kern_handle_t;
typedef uint32_t kern_config_key_t;
typedef uint32_t vm_prot_t;
typedef uint32_t channel_flags_t;
typedef uint32_t msg_flags_t;
typedef int64_t ssize_t;
typedef unsigned int umode_t;
struct iovec {
typedef struct {
virt_addr_t io_base;
size_t io_len;
};
} kern_iovec_t;
struct handle_list {
kern_handle_t *l_handles;
size_t l_nr_handles;
};
typedef struct {
unsigned int hnd_mode;
kern_handle_t hnd_value;
kern_status_t hnd_result;
} kern_msg_handle_t;
struct msg {
struct iovec *msg_data;
typedef struct {
/* transaction id. identifies a particular request/response exchange.
* used when replying to a particular message. */
msgid_t msg_id;
/* the id of the task that sent a particular message. */
tid_t msg_sender;
/* the id of the port or channel used to send a particular message. */
koid_t msg_endpoint;
/* a list of iovecs that point to the buffers that make up the main
* message data. */
kern_iovec_t *msg_data;
size_t msg_data_count;
struct handle_list *msg_handles;
/* a list of handle entries that contain the kernel handles included
* in a message. */
kern_msg_handle_t *msg_handles;
size_t msg_handles_count;
};
} kern_msg_t;
#endif

View File

@@ -91,7 +91,9 @@ void context_switch(struct thread *old, struct thread *new)
void __schedule(enum sched_mode mode)
{
ml_int_disable();
if (mode != SCHED_IRQ) {
ml_int_disable();
}
struct cpu_data *this_cpu = get_this_cpu();
struct runqueue *rq = &this_cpu->c_rq;
@@ -142,7 +144,9 @@ void __schedule(enum sched_mode mode)
context_switch(prev, next);
}
ml_int_enable();
if (mode != SCHED_IRQ) {
ml_int_enable();
}
}
void schedule(enum sched_mode mode)

View File

@@ -6,14 +6,17 @@
static const virt_addr_t syscall_table[] = {
SYSCALL_TABLE_ENTRY(TASK_EXIT, task_exit),
SYSCALL_TABLE_ENTRY(TASK_SELF, task_self),
SYSCALL_TABLE_ENTRY(TASK_CREATE, task_create),
SYSCALL_TABLE_ENTRY(TASK_CREATE_THREAD, task_create_thread),
SYSCALL_TABLE_ENTRY(TASK_GET_ADDRESS_SPACE, task_get_address_space),
SYSCALL_TABLE_ENTRY(THREAD_START, thread_start),
SYSCALL_TABLE_ENTRY(VM_OBJECT_CREATE, vm_object_create),
SYSCALL_TABLE_ENTRY(VM_OBJECT_READ, vm_object_read),
SYSCALL_TABLE_ENTRY(VM_OBJECT_WRITE, vm_object_write),
SYSCALL_TABLE_ENTRY(VM_OBJECT_COPY, vm_object_copy),
SYSCALL_TABLE_ENTRY(VM_REGION_CREATE, vm_region_create),
SYSCALL_TABLE_ENTRY(VM_REGION_KILL, vm_region_kill),
SYSCALL_TABLE_ENTRY(VM_REGION_READ, vm_region_read),
SYSCALL_TABLE_ENTRY(VM_REGION_WRITE, vm_region_write),
SYSCALL_TABLE_ENTRY(VM_REGION_MAP_ABSOLUTE, vm_region_map_absolute),
@@ -32,9 +35,7 @@ static const virt_addr_t syscall_table[] = {
SYSCALL_TABLE_ENTRY(MSG_RECV, msg_recv),
SYSCALL_TABLE_ENTRY(MSG_REPLY, msg_reply),
SYSCALL_TABLE_ENTRY(MSG_READ, msg_read),
SYSCALL_TABLE_ENTRY(MSG_READ_HANDLES, msg_read_handles),
SYSCALL_TABLE_ENTRY(MSG_WRITE, msg_write),
SYSCALL_TABLE_ENTRY(MSG_WRITE_HANDLES, msg_write_handles),
};
static const size_t syscall_table_count
= sizeof syscall_table / sizeof syscall_table[0];

View File

@@ -4,6 +4,6 @@
kern_status_t sys_kern_log(const char *s)
{
struct task *task = current_task();
printk("%s: %s", task->t_name, s);
printk("%s[%d]: %s", task->t_name, task->t_id, s);
return KERN_OK;
}

View File

@@ -5,10 +5,7 @@
#include <kernel/syscall.h>
#include <kernel/vm-region.h>
kern_status_t sys_channel_create(
unsigned int id,
channel_flags_t flags,
kern_handle_t *out)
kern_status_t sys_channel_create(unsigned int id, kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
@@ -62,14 +59,13 @@ kern_status_t sys_port_create(kern_handle_t *out)
kern_handle_t handle;
kern_status_t status
= task_open_handle(self, &port->p_base, 0, &handle);
task_unlock_irqrestore(self, irq_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, irq_flags);
object_unref(&port->p_base);
return status;
}
task_unlock_irqrestore(self, irq_flags);
*out = handle;
return KERN_OK;
}
@@ -98,6 +94,7 @@ kern_status_t sys_port_connect(
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
struct port *port = port_cast(port_obj);
task_unlock_irqrestore(self, flags);
struct task *remote_task = task_from_tid(task_id);
@@ -116,81 +113,364 @@ kern_status_t sys_port_connect(
object_ref(&remote->c_base);
task_unlock_irqrestore(remote_task, flags);
status = port_connect(port_cast(port_obj), remote);
port_lock_irqsave(port, &flags);
status = port_connect(port, remote);
port_unlock_irqrestore(port, flags);
object_unref(port_obj);
object_unref(&remote->c_base);
return KERN_OK;
}
kern_status_t sys_port_disconnect(kern_handle_t port)
kern_status_t sys_port_disconnect(kern_handle_t port_handle)
{
return KERN_UNIMPLEMENTED;
unsigned long flags;
struct task *self = current_task();
task_lock_irqsave(self, &flags);
struct object *port_obj = NULL;
handle_flags_t port_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
port_handle,
&port_obj,
&port_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
task_unlock_irqrestore(self, flags);
struct port *port = port_cast(port_obj);
if (!port) {
object_unref(port_obj);
return KERN_INVALID_ARGUMENT;
}
object_unref(port_obj);
port_lock_irqsave(port, &flags);
status = port_disconnect(port);
port_unlock_irqrestore(port, flags);
return status;
}
static bool validate_iovec(
struct task *task,
const kern_iovec_t *iov,
size_t count,
bool rw)
{
if (!validate_access_r(task, iov, count * sizeof(*iov))) {
return false;
}
for (size_t i = 0; i < count; i++) {
bool ok = false;
const kern_iovec_t *vec = &iov[i];
if (rw) {
ok = validate_access_w(task, vec->io_base, vec->io_len);
} else {
ok = validate_access_r(task, vec->io_base, vec->io_len);
}
if (!ok) {
return false;
}
}
return true;
}
static bool validate_msg(struct task *task, const kern_msg_t *msg, bool rw)
{
if (!msg) {
return false;
}
vm_prot_t flags;
if (rw) {
flags = VM_PROT_WRITE | VM_PROT_USER;
} else {
flags = VM_PROT_READ | VM_PROT_USER;
}
if (!validate_access(task, msg, sizeof *msg, flags)) {
return false;
}
if (!validate_iovec(task, msg->msg_data, msg->msg_data_count, rw)) {
return false;
}
size_t handle_buffer_len
= msg->msg_handles_count * sizeof(*msg->msg_handles);
if (!validate_access(
task,
msg->msg_handles,
handle_buffer_len,
flags)) {
return false;
}
return true;
}
kern_status_t sys_msg_send(
kern_handle_t port,
msg_flags_t flags,
const struct msg *req,
struct msg *resp)
kern_handle_t port_handle,
const kern_msg_t *msg,
kern_msg_t *out_reply)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_msg(self, msg, false)) {
return KERN_MEMORY_FAULT;
}
if (!validate_msg(self, out_reply, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *port_obj = NULL;
handle_flags_t port_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
port_handle,
&port_obj,
&port_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
task_unlock_irqrestore(self, flags);
struct port *port = port_cast(port_obj);
if (!port) {
object_unref(port_obj);
return KERN_INVALID_ARGUMENT;
}
port_lock_irqsave(port, &flags);
status = port_send_msg(port, msg, out_reply, &flags);
port_unlock_irqrestore(port, flags);
object_unref(port_obj);
return status;
}
kern_status_t sys_msg_recv(
kern_handle_t channel,
msg_flags_t flags,
msgid_t *out_id,
struct msg *out_msg)
kern_status_t sys_msg_recv(kern_handle_t channel_handle, kern_msg_t *out_msg)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_msg(self, out_msg, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_recv_msg(channel, out_msg, &flags);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}
kern_status_t sys_msg_reply(
kern_handle_t channel,
msg_flags_t flags,
kern_handle_t channel_handle,
msgid_t id,
const struct msg *reply)
const kern_msg_t *reply)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_msg(self, reply, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_reply_msg(channel, id, reply, &flags);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}
kern_status_t sys_msg_read(
kern_handle_t channel,
kern_handle_t channel_handle,
msgid_t id,
size_t offset,
struct iovec *out,
size_t nr_out)
const kern_iovec_t *iov,
size_t iov_count,
size_t *nr_read)
{
return KERN_UNIMPLEMENTED;
}
struct task *self = current_task();
kern_status_t sys_msg_read_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
struct handle_list *out,
size_t nr_out)
{
return KERN_UNIMPLEMENTED;
if (nr_read && !validate_access_w(self, nr_read, sizeof *nr_read)) {
return KERN_MEMORY_FAULT;
}
if (!validate_iovec(self, iov, iov_count, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_read_msg(
channel,
id,
offset,
self->t_address_space,
iov,
iov_count,
nr_read);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}
kern_status_t sys_msg_write(
kern_handle_t channel,
kern_handle_t channel_handle,
msgid_t id,
size_t offset,
const struct iovec *in,
size_t nr_in)
const kern_iovec_t *iov,
size_t iov_count,
size_t *nr_written)
{
return KERN_UNIMPLEMENTED;
}
struct task *self = current_task();
kern_status_t sys_msg_write_handles(
kern_handle_t channel,
msgid_t id,
size_t offset,
const struct handle_list *in,
size_t nr_in)
{
return KERN_UNIMPLEMENTED;
if (nr_written
&& !validate_access_w(self, nr_written, sizeof *nr_written)) {
return KERN_MEMORY_FAULT;
}
if (!validate_iovec(self, iov, iov_count, false)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_write_msg(
channel,
id,
offset,
self->t_address_space,
iov,
iov_count,
nr_written);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}

View File

@@ -6,14 +6,44 @@
extern kern_status_t sys_task_exit(int status)
{
struct task *self = current_task();
printk("%s[%d]: task_exit(%d)", self->t_name, self->t_id, status);
while (1) {
printk("sys_exit(%d)", status);
milli_sleep(1000);
milli_sleep(5000);
}
return KERN_UNIMPLEMENTED;
}
kern_status_t sys_task_self(kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct handle *handle_slot = NULL;
kern_handle_t handle;
kern_status_t status = handle_table_alloc_handle(
self->t_handles,
&handle_slot,
&handle);
task_unlock_irqrestore(self, flags);
if (status != KERN_OK) {
return status;
}
object_add_handle(&self->t_base);
handle_slot->h_object = &self->t_base;
*out = handle;
return KERN_OK;
}
kern_status_t sys_task_create(
kern_handle_t parent_handle,
const char *name,
@@ -175,6 +205,57 @@ kern_status_t sys_task_create_thread(
return KERN_OK;
}
kern_status_t sys_task_get_address_space(
kern_handle_t task_handle,
kern_handle_t *out)
{
struct task *self = current_task();
if (!validate_access_w(self, out, sizeof *out)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct handle *handle_slot = NULL;
kern_handle_t handle;
struct object *task_obj = NULL;
handle_flags_t handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
task_handle,
&task_obj,
&handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
status = handle_table_alloc_handle(
self->t_handles,
&handle_slot,
&handle);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct task *task = task_cast(task_obj);
if (!task) {
handle_table_free_handle(self->t_handles, handle);
task_unlock_irqrestore(self, flags);
return KERN_INVALID_ARGUMENT;
}
handle_slot->h_object = &task->t_address_space->vr_base;
object_add_handle(&task->t_address_space->vr_base);
task_unlock_irqrestore(self, flags);
*out = handle;
return KERN_OK;
}
kern_status_t sys_thread_start(kern_handle_t thread_handle)
{
unsigned long flags;

View File

@@ -111,6 +111,13 @@ kern_status_t sys_vm_object_copy(
size_t count,
size_t *nr_copied)
{
tracek("vm_object_copy(%x, %zx, %x, %zx, %zx, %p)",
dst,
dst_offset,
src,
src_offset,
count,
nr_copied);
struct task *self = current_task();
if (nr_copied

View File

@@ -63,6 +63,7 @@ kern_status_t sys_vm_region_create(
object_ref(obj);
task_unlock_irqrestore(self, flags);
vm_region_lock_irqsave(parent_region, &flags);
struct vm_region *child = NULL;
status = vm_region_create(
@@ -73,6 +74,7 @@ kern_status_t sys_vm_region_create(
region_len,
prot,
&child);
vm_region_unlock_irqrestore(parent_region, flags);
object_unref(obj);
if (status != KERN_OK) {
@@ -92,6 +94,39 @@ kern_status_t sys_vm_region_create(
return KERN_OK;
}
kern_status_t sys_vm_region_kill(kern_handle_t region_handle)
{
struct task *self = current_task();
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *obj = NULL;
handle_flags_t handle_flags = 0;
kern_status_t status
= task_resolve_handle(self, region_handle, &obj, &handle_flags);
if (status != KERN_OK) {
task_unlock_irqrestore(self, flags);
return status;
}
struct vm_region *region = vm_region_cast(obj);
if (!region) {
task_unlock_irqrestore(self, flags);
return KERN_INVALID_ARGUMENT;
}
object_ref(obj);
task_unlock_irqrestore(self, flags);
vm_region_lock_irqsave(region, &flags);
status = vm_region_kill(region, &flags);
vm_region_unlock_irqrestore(region, flags);
object_unref(obj);
return status;
}
kern_status_t sys_vm_region_read(
kern_handle_t region_handle,
void *dst,

View File

@@ -198,6 +198,8 @@ void *vm_cache_alloc(struct vm_cache *cache, enum vm_flags flags)
}
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
memset(p, 0x0, cache->c_obj_size);
return p;
}

View File

@@ -19,10 +19,11 @@
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
*/
#include <limits.h>
#include <kernel/libc/string.h>
#include <kernel/memblock.h>
#include <kernel/printk.h>
#include <kernel/types.h>
#include <limits.h>
#include <stdbool.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -447,10 +448,10 @@ void __next_memory_region(
}
/* we want the area that is overlapped by both
region M (m_start - m_end) : The region defined
as system memory. region R (r_start - r_end) : The
region defined as free / outside of any reserved
regions.
- region M (m_start - m_end) : The region defined
as system memory.
- region R (r_start - r_end) : The region defined as
free / outside of any reserved regions.
*/
it->it_base = MAX(m_start, r_start);
it->it_limit = MIN(m_end, r_end);
@@ -497,3 +498,28 @@ void *memblock_phys_to_virt(phys_addr_t p)
{
return (void *)(p + memblock.m_voffset);
}
#ifdef TRACE
static void memblock_type_dump(struct memblock_type *type)
{
tracek("%s:", type->name);
for (size_t i = 0; i < type->count; i++) {
tracek(" [%zx-%zx]",
type->regions[i].base,
type->regions[i].limit);
}
}
extern void memblock_dump(void)
{
memblock_type_dump(&memblock.memory);
memblock_type_dump(&memblock.reserved);
tracek("free:");
struct memblock_iter it;
for_each_free_mem_range(&it, 0, ADDR_MAX)
{
tracek(" [%zx-%zx]", it.it_base, it.it_limit);
}
}
#endif

View File

@@ -96,8 +96,12 @@ static kern_status_t object_iterator_seek(
}
if (it->it_pg) {
it->it_buf = vm_page_get_vaddr(it->it_pg);
virt_addr_t vaddr = (virt_addr_t)vm_page_get_vaddr(it->it_pg);
vaddr += (it->it_offset & VM_PAGE_MASK);
it->it_buf = (void *)vaddr;
it->it_max = vm_page_get_size_bytes(it->it_pg);
it->it_max -= (it->it_offset & VM_PAGE_MASK);
} else {
struct btree_node *n = btree_first(&it->it_obj->vo_pages);
struct vm_page *pg
@@ -112,8 +116,9 @@ static kern_status_t object_iterator_seek(
}
it->it_buf = NULL;
it->it_max = pg ? pg->p_vmo_offset
: it->it_obj->vo_size - it->it_offset;
it->it_max
= pg ? pg->p_vmo_offset - (it->it_offset & VM_PAGE_MASK)
: it->it_obj->vo_size - it->it_offset;
}
return KERN_OK;
@@ -281,6 +286,12 @@ extern struct vm_page *vm_object_alloc_page(
return NULL;
}
void *page_buf = vm_page_get_vaddr(page);
memset(page_buf, 0x0, vm_page_get_size_bytes(page));
tracek("vm-object: [%s] alloc offset %zx -> page %zx",
vo->vo_name,
offset,
vm_page_get_paddr(page));
page->p_vmo_offset = offset;
vo->vo_pages.b_root = &page->p_bnode;
btree_insert_fixup(&vo->vo_pages, &page->p_bnode);

View File

@@ -8,6 +8,20 @@
#include <kernel/vm-region.h>
#include <mango/status.h>
/* NOTE Locking Rules
* To avoid deadlocks and crashes, the following locking rules should be
* followed:
* 1. Do NOT lock more than one region at a time IF the regions are siblings.
* 2. When locking a region and it's child(ren) or ancestors, always lock
* the parent region BEFORE the child region.
* 3. When locking a region and a vm-object mapped into that region, always
* lock the region BEFORE the vm-object.
* 3. An entry MUST be locked before any of its data can be read/written,
* including its children (if it's a region) and its e_parent pointer.
* 4. vm_region_mapping has no lock. Instead, its immediate parent region must
* be locked before any child mappings can be accessed.
*/
/*** STATIC DATA + MACROS *****************************************************/
#undef ASLR
@@ -22,6 +36,15 @@
region_find_free_area_linear(region, length)
#endif
#define unlock_mapping_parent(p, root) \
do { \
struct vm_region *parent \
= region_from_entry(p->m_entry.e_parent); \
if (parent != root) { \
vm_region_unlock(parent); \
} \
} while (0)
/* iterates over a range of mapped virtual memory in a region, and provides
* a moving buffer through which the memory can be accessed */
struct vm_iterator {
@@ -49,10 +72,21 @@ enum search_direction {
#define VM_REGION_CAST(p) \
OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p)
static kern_status_t vm_region_object_destroy(struct object *obj);
static kern_status_t region_object_destroy(struct object *obj, struct queue *q);
static kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out);
static struct object_type vm_region_type = {
.ob_name = "vm-region",
.ob_size = sizeof(struct vm_region),
.ob_header_offset = offsetof(struct vm_region, vr_base),
.ob_ops = {
.destroy = region_object_destroy,
.destroy_recurse = region_object_destroy_recurse,
},
};
static struct vm_cache mapping_cache = {
@@ -81,17 +115,53 @@ static struct vm_region_mapping *mapping_from_entry(
return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry);
}
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry)
kern_status_t region_object_destroy(struct object *obj, struct queue *q)
{
virt_addr_t result = 0;
while (entry) {
result += entry->e_offset;
entry = entry->e_parent;
struct vm_region *region = VM_REGION_CAST(obj);
if (region->vr_status == VM_REGION_ONLINE) {
panic("last reference closed on an online vm-region");
}
return result;
struct btree_node *node = btree_first(&region->vr_entries);
while (node) {
struct btree_node *next = btree_next(node);
btree_delete(&region->vr_entries, node);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
queue_push_back(q, &entry->e_entry);
node = next;
}
return KERN_OK;
}
kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out)
{
struct vm_region_entry *region_entry
= BTREE_CONTAINER(struct vm_region_entry, e_entry, entry);
if (region_entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
struct vm_region *region = region_from_entry(region_entry);
*out = &region->vr_base;
return KERN_OK;
}
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry)
{
return entry->e_address;
}
/* this function must be called with `parent` locked */
static void region_put_entry(
struct vm_region *parent,
struct vm_region_entry *child)
@@ -119,7 +189,15 @@ static void region_put_entry(
} else if (child_base > cur_limit) {
next = btree_right(cur);
} else {
panic("tried to add an overlapping entry to vm-region");
#ifdef TRACE
vm_region_dump(parent);
#endif
panic("tried to add an overlapping entry [%zx-%zx] to "
"vm-region (overlaps [%zx-%zx])",
child_base,
child_limit,
cur_base,
cur_limit);
}
if (next) {
@@ -179,12 +257,17 @@ static struct vm_region_entry *region_get_entry(
/* find the child region that covers the area [*offp,len]. searches recursively
* the value in `offp` is updated to the offset of the returned entry relative
* to its parent */
* to its parent.
* this function should be called with `region` locked.
* the region returned by this function will also be locked. any intermediary
* regions traversed by this function will be locked temporarily, but will
* be unlocked by the time the function returns. */
static struct vm_region *region_get_child_region_recursive(
struct vm_region *region,
off_t *offp,
size_t len)
{
struct vm_region *root = region;
off_t offset = *offp;
if (offset >= region->vr_entry.e_size) {
return NULL;
@@ -197,6 +280,14 @@ static struct vm_region *region_get_child_region_recursive(
struct vm_region *next_region = region_from_entry(next);
if (next_region) {
offset -= next->e_offset;
/* since `region` is locked, interrupts are already
* disabled, so don't use lock_irq() here */
vm_region_lock(next_region);
if (region != root) {
vm_region_unlock(region);
}
region = next_region;
} else {
break;
@@ -207,20 +298,38 @@ static struct vm_region *region_get_child_region_recursive(
return region;
}
/* find the vm_region_mapping that contains a given memory area.
* `offp` should be a pointer to an off_t value that contains the offset
* of the area relative to the start of `region`. this value will be updated
* to the offset of the mapping relative to its immediate parent.
* this function should be called with `region` locked. if a mapping is found,
* it will be returned with its immediate parent locked. */
static struct vm_region_mapping *region_get_mapping_recursive(
struct vm_region *region,
struct vm_region *root,
off_t *offp,
size_t len)
{
off_t offset = *offp;
region = region_get_child_region_recursive(region, &offset, len);
struct vm_region *region
= region_get_child_region_recursive(root, &offset, len);
if (!region) {
return NULL;
}
/* if `region` is a different region than what was originally passed to
* us, it has now been locked, and its children can be accessed. */
struct vm_region_entry *entry = region_get_entry(region, offset, len);
*offp = offset;
if (!entry) {
if (region != root) {
vm_region_unlock(region);
}
return NULL;
}
/* return the mapping with the parent region still locked */
return mapping_from_entry(entry);
}
@@ -488,6 +597,7 @@ static kern_status_t region_validate_allocation(
return KERN_OK;
}
/* this function should be called with `region` locked */
static void vm_iterator_begin(
struct vm_iterator *it,
struct vm_region *region,
@@ -501,7 +611,12 @@ static void vm_iterator_begin(
off_t offset = base - vm_region_get_base_address(region);
it->it_mapping = region_get_mapping_recursive(region, &offset, 1);
if (!it->it_mapping || (it->it_mapping->m_prot & prot) != prot) {
if (!it->it_mapping) {
return;
}
if ((it->it_mapping->m_prot & prot) != prot) {
unlock_mapping_parent(it->it_mapping, region);
return;
}
@@ -520,6 +635,7 @@ static void vm_iterator_begin(
}
if (!pg) {
unlock_mapping_parent(it->it_mapping, region);
return;
}
@@ -545,8 +661,8 @@ static void vm_iterator_begin(
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_max = buffer_size;
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size - (object_offset & VM_PAGE_MASK);
}
static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
@@ -558,6 +674,10 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
return KERN_OK;
}
/* the parent region of it->it_mapping is locked here. if it is
* different from it->it_region, it must be unlocked */
unlock_mapping_parent(it->it_mapping, it->it_region);
it->it_base += nr_bytes;
off_t offset = it->it_base - vm_region_get_base_address(it->it_region);
@@ -569,9 +689,13 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
return KERN_MEMORY_FAULT;
}
/* past this point, if we encounter an error, must remember to
* unlock the parent region of next_mapping */
if ((next_mapping->m_prot & it->it_prot) != it->it_prot) {
it->it_buf = NULL;
it->it_max = 0;
unlock_mapping_parent(next_mapping, it->it_region);
return KERN_MEMORY_FAULT;
}
@@ -590,6 +714,7 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
}
if (!pg) {
unlock_mapping_parent(next_mapping, it->it_region);
return KERN_NO_MEMORY;
}
@@ -615,11 +740,25 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size;
return KERN_OK;
}
/* this function must be called when you are finished with a
* vm_iterator, to ensure that all held locks are released. */
static void vm_iterator_finish(struct vm_iterator *it)
{
if (it->it_mapping) {
unlock_mapping_parent(it->it_mapping, it->it_region);
}
memset(it, 0x0, sizeof *it);
}
/* this function must be called with `root` locked. `root` will be the
* first entry visited by the iterator. from there, child entries are
* visited in depth-first order. */
static void entry_iterator_begin(
struct entry_iterator *it,
struct vm_region *root)
@@ -629,8 +768,43 @@ static void entry_iterator_begin(
it->it_entry = &root->vr_entry;
}
/* this function must be called when you are finished with an
* entry_iterator, to ensure that all held locks are released. */
static void entry_iterator_finish(struct entry_iterator *it)
{
struct vm_region_entry *cur = it->it_entry;
if (!cur) {
return;
}
struct vm_region *region = NULL;
if (cur->e_type == VM_REGION_ENTRY_MAPPING) {
region = region_from_entry(cur->e_parent);
} else {
region = region_from_entry(cur);
}
while (region && region != it->it_root) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
vm_region_unlock(region);
region = parent;
}
memset(it, 0x0, sizeof *it);
}
/* move to the next entry in the traversal order.
* when this function returns:
* 1. if the visited entry is a region, it will be locked.
* 2. if the visited entry is a mapping, its parent region will be
* locked. a region will remain locked until all of its children and
* n-grand-children have been visited. once iteration is finished, only
* `it->it_root` will be locked.
*/
static void entry_iterator_move_next(struct entry_iterator *it)
{
/* `region` is locked */
struct vm_region *region = region_from_entry(it->it_entry);
bool has_children = (region && !btree_empty(&region->vr_entries));
@@ -639,6 +813,16 @@ static void entry_iterator_move_next(struct entry_iterator *it)
struct btree_node *node = btree_first(&region->vr_entries);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region
= region_from_entry(entry);
/* since `region` is locked, interrupts are
* already disabled, so don't use lock_irq()
* here */
vm_region_lock(child_region);
}
it->it_depth++;
it->it_entry = entry;
return;
@@ -670,6 +854,73 @@ static void entry_iterator_move_next(struct entry_iterator *it)
return;
}
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--;
cur = parent_entry;
}
}
/* erase the current entry and move to the next entry in the traversal
* order. the current entry MUST be a mapping, otherwise nothing will
* happen.
*/
static void entry_iterator_erase(struct entry_iterator *it)
{
/* the parent region of `mapping` is locked */
struct vm_region_mapping *mapping = mapping_from_entry(it->it_entry);
if (!mapping) {
return;
}
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
/* go back up until we find a right sibling. */
struct vm_region_entry *cur = it->it_entry;
while (1) {
struct btree_node *sibling = btree_next(&cur->e_node);
if (mapping) {
btree_delete(
&parent->vr_entries,
&mapping->m_entry.e_node);
vm_cache_free(&mapping_cache, mapping);
mapping = NULL;
}
if (sibling) {
it->it_entry = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
sibling);
return;
}
if (cur == &it->it_root->vr_entry) {
it->it_entry = NULL;
return;
}
struct vm_region_entry *parent_entry = cur->e_parent;
struct vm_region *parent = region_from_entry(parent_entry);
if (!parent) {
it->it_entry = NULL;
return;
}
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--;
cur = parent_entry;
}
@@ -705,6 +956,11 @@ static void mapping_iterator_begin(
}
}
static void mapping_iterator_finish(struct entry_iterator *it)
{
entry_iterator_finish(it);
}
static void mapping_iterator_move_next(
struct entry_iterator *it,
off_t offset,
@@ -730,6 +986,34 @@ static void mapping_iterator_move_next(
}
}
static void mapping_iterator_erase(
struct entry_iterator *it,
off_t offset,
size_t length,
off_t *offp)
{
entry_iterator_erase(it);
while (it->it_entry
&& it->it_entry->e_type != VM_REGION_ENTRY_MAPPING) {
entry_iterator_move_next(it);
}
if (!it->it_entry) {
return;
}
off_t base = entry_absolute_address(it->it_entry)
- it->it_root->vr_entry.e_offset;
if (base >= offset + length) {
it->it_entry = NULL;
} else {
*offp = base;
}
}
/*** PUBLIC API ***************************************************************/
kern_status_t vm_region_type_init(void)
@@ -743,6 +1027,9 @@ struct vm_region *vm_region_cast(struct object *obj)
return VM_REGION_CAST(obj);
}
/* this function should be called with `parent` locked (if parent is
* non-NULL)
*/
kern_status_t vm_region_create(
struct vm_region *parent,
const char *name,
@@ -752,6 +1039,10 @@ kern_status_t vm_region_create(
vm_prot_t prot,
struct vm_region **out)
{
if (parent && parent->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (!offset || !region_len) {
return KERN_INVALID_ARGUMENT;
}
@@ -781,8 +1072,10 @@ kern_status_t vm_region_create(
struct vm_region *region = VM_REGION_CAST(region_object);
region->vr_status = VM_REGION_ONLINE;
region->vr_prot = prot;
region->vr_entry.e_type = VM_REGION_ENTRY_REGION;
region->vr_entry.e_address = offset;
region->vr_entry.e_offset = offset;
region->vr_entry.e_size = region_len;
@@ -794,8 +1087,11 @@ kern_status_t vm_region_create(
if (parent) {
region->vr_entry.e_parent = &parent->vr_entry;
region->vr_entry.e_address += parent->vr_entry.e_address;
region->vr_pmap = parent->vr_pmap;
region_put_entry(parent, &region->vr_entry);
/* `parent` holds a reference to child `region` */
object_ref(&region->vr_base);
}
if (name && name_len) {
@@ -808,8 +1104,67 @@ kern_status_t vm_region_create(
return KERN_OK;
}
kern_status_t vm_region_map_object(
kern_status_t vm_region_kill(
struct vm_region *region,
unsigned long *lock_flags)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (region->vr_entry.e_parent) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
region->vr_entry.e_parent = NULL;
/* locks must be acquired in parent->child order. since
* we're going backwards here, unlock `region` before
* locking its parent */
vm_region_unlock_irqrestore(region, *lock_flags);
vm_region_lock_irqsave(parent, lock_flags);
btree_delete(&parent->vr_entries, &region->vr_entry.e_node);
vm_region_unlock_irqrestore(parent, *lock_flags);
vm_region_lock_irqsave(region, lock_flags);
/* `region` lock is held, and e_parent is NULL */
}
struct entry_iterator it;
entry_iterator_begin(&it, region);
while (it.it_entry) {
if (it.it_entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *region
= region_from_entry(it.it_entry);
region->vr_status = VM_REGION_DEAD;
entry_iterator_move_next(&it);
continue;
}
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
virt_addr_t base = entry_absolute_address(it.it_entry);
for (size_t i = 0; i < mapping->m_entry.e_size;
i += VM_PAGE_SIZE) {
pmap_remove(region->vr_pmap, base + i);
}
unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(
&mapping->m_object->vo_mappings,
&mapping->m_object_entry);
vm_object_unlock_irqrestore(mapping->m_object, flags);
entry_iterator_erase(&it);
}
return KERN_OK;
}
kern_status_t vm_region_map_object(
struct vm_region *root,
off_t region_offset,
struct vm_object *object,
off_t object_offset,
@@ -839,10 +1194,24 @@ kern_status_t vm_region_map_object(
length += VM_PAGE_SIZE;
}
if (!region || !object) {
if (!root || !object) {
return KERN_INVALID_ARGUMENT;
}
struct vm_region *region = root;
if (region_offset != VM_REGION_ANY_OFFSET) {
region = region_get_child_region_recursive(
root,
&region_offset,
length);
/* if `region` != `root`, it will need to be unlocked at
* the end of the function */
}
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if ((prot & region->vr_prot) != prot) {
return KERN_INVALID_ARGUMENT;
}
@@ -855,13 +1224,6 @@ kern_status_t vm_region_map_object(
return KERN_INVALID_ARGUMENT;
}
if (region_offset != VM_REGION_ANY_OFFSET) {
region = region_get_child_region_recursive(
region,
&region_offset,
length);
}
if (!region) {
return KERN_INVALID_ARGUMENT;
}
@@ -876,6 +1238,15 @@ kern_status_t vm_region_map_object(
return KERN_INVALID_ARGUMENT;
}
tracek("vm_region_map_object(%s, %zx, %s, %zx, %zx, %x, %p)",
region->vr_name,
region_offset,
object->vo_name,
object_offset,
length,
prot,
out);
struct vm_region_mapping *mapping
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!mapping) {
@@ -887,6 +1258,7 @@ kern_status_t vm_region_map_object(
mapping->m_object_offset = object_offset;
mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
mapping->m_entry.e_parent = &region->vr_entry;
mapping->m_entry.e_address = region->vr_entry.e_address + region_offset;
mapping->m_entry.e_offset = region_offset;
mapping->m_entry.e_size = length;
@@ -898,7 +1270,14 @@ kern_status_t vm_region_map_object(
abs_base + length);
#endif
region_put_entry(region, &mapping->m_entry);
if (region != root) {
vm_region_unlock(region);
}
unsigned long lock_flags;
vm_object_lock_irqsave(object, &lock_flags);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
vm_object_unlock_irqrestore(object, lock_flags);
if (out) {
*out = entry_absolute_address(&mapping->m_entry);
@@ -907,8 +1286,8 @@ kern_status_t vm_region_map_object(
return KERN_OK;
}
/* unmap some pages in the middle of a mapping, splitting it into two separate
* mappings */
/* unmap some pages in the middle of a mapping, splitting it into two
* separate mappings */
static kern_status_t split_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
@@ -1059,12 +1438,13 @@ static kern_status_t delete_mapping(
pmap_remove(root->vr_pmap, base + i);
}
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry);
btree_delete(&parent->vr_entries, &mapping->m_entry.e_node);
vm_object_unlock_irqrestore(mapping->m_object, flags);
vm_cache_free(&mapping_cache, mapping);
/* don't actually delete the mapping yet. that will be done by
* vm_region_unmap */
return KERN_OK;
}
@@ -1074,6 +1454,10 @@ kern_status_t vm_region_unmap(
off_t unmap_area_offset,
size_t unmap_area_length)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
kern_status_t status = KERN_OK;
struct entry_iterator it;
off_t unmap_area_limit = unmap_area_offset + unmap_area_length;
@@ -1092,12 +1476,6 @@ kern_status_t vm_region_unmap(
off_t mapping_offset = tmp;
off_t mapping_limit = mapping_offset + it.it_entry->e_size;
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
bool split
= (unmap_area_offset > mapping_offset
&& unmap_area_limit < mapping_limit);
@@ -1118,6 +1496,7 @@ kern_status_t vm_region_unmap(
mapping_offset,
unmap_area_offset,
unmap_area_limit);
delete = true;
} else if (delete) {
status = delete_mapping(
mapping,
@@ -1138,7 +1517,22 @@ kern_status_t vm_region_unmap(
unmap_area_offset,
unmap_area_limit);
} else {
panic("don't know what to do with this mapping");
panic("don't know what to do with this "
"mapping");
}
if (delete) {
mapping_iterator_erase(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
} else {
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
}
if (status != KERN_OK) {
@@ -1146,6 +1540,8 @@ kern_status_t vm_region_unmap(
}
}
mapping_iterator_finish(&it);
return status;
}
@@ -1155,6 +1551,10 @@ bool vm_region_validate_access(
size_t len,
vm_prot_t prot)
{
if (region->vr_status != VM_REGION_ONLINE) {
return false;
}
if (len == 0) {
return true;
}
@@ -1187,7 +1587,16 @@ bool vm_region_validate_access(
return false;
}
if ((mapping->m_prot & prot) != prot) {
bool ok = (mapping->m_prot & prot) == prot;
struct vm_region *parent
= region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
if (!ok) {
return false;
}
}
@@ -1195,11 +1604,16 @@ bool vm_region_validate_access(
return true;
}
/* this function must be called with `region` locked */
kern_status_t vm_region_demand_map(
struct vm_region *region,
virt_addr_t addr,
enum pmap_fault_flags flags)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
addr &= ~VM_PAGE_MASK;
if (addr < region->vr_entry.e_offset
|| addr > region->vr_entry.e_offset + region->vr_entry.e_size) {
@@ -1221,24 +1635,130 @@ kern_status_t vm_region_demand_map(
mapping->m_object->vo_name,
object_offset);
unsigned long lock_flags;
vm_object_lock_irqsave(mapping->m_object, &lock_flags);
struct vm_page *pg = vm_object_alloc_page(
mapping->m_object,
object_offset,
VM_PAGE_4K);
vm_object_unlock_irqrestore(mapping->m_object, lock_flags);
tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr);
return pmap_add(
kern_status_t status = pmap_add(
region->vr_pmap,
addr,
vm_page_get_pfn(pg),
mapping->m_prot,
PMAP_NORMAL);
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
return status;
}
virt_addr_t vm_region_get_base_address(const struct vm_region *region)
{
if (region->vr_status != VM_REGION_ONLINE) {
return 0;
}
return entry_absolute_address(&region->vr_entry);
}
kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *destp,
size_t *nr_read)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src;
char *dest = destp;
vm_iterator_begin(
&src,
src_region,
src_ptr,
VM_PROT_READ | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (r < count && src.it_max) {
size_t remaining = count - r;
size_t to_move = MIN(src.it_max, remaining);
memmove(dest, src.it_buf, to_move);
status = vm_iterator_seek(&src, to_move);
if (status != KERN_OK) {
break;
}
r += to_move;
dest += to_move;
}
vm_iterator_finish(&src);
if (nr_read) {
*nr_read = r;
}
return status;
}
kern_status_t vm_region_write_kernel(
struct vm_region *dst_region,
virt_addr_t dst_ptr,
size_t count,
const void *srcp,
size_t *nr_written)
{
if (dst_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator dst;
const char *src = srcp;
vm_iterator_begin(
&dst,
dst_region,
dst_ptr,
VM_PROT_WRITE | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (r < count && dst.it_max) {
size_t remaining = count - r;
size_t to_move = MIN(dst.it_max, remaining);
memmove(dst.it_buf, src, to_move);
status = vm_iterator_seek(&dst, to_move);
if (status != KERN_OK) {
break;
}
r += to_move;
src += to_move;
}
vm_iterator_finish(&dst);
if (nr_written) {
*nr_written = r;
}
return status;
}
kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
@@ -1247,6 +1767,14 @@ kern_status_t vm_region_memmove(
size_t count,
size_t *nr_moved)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src, dest;
vm_iterator_begin(
&src,
@@ -1280,6 +1808,9 @@ kern_status_t vm_region_memmove(
r += to_move;
}
vm_iterator_finish(&src);
vm_iterator_finish(&dest);
if (nr_moved) {
*nr_moved = r;
}
@@ -1290,21 +1821,31 @@ kern_status_t vm_region_memmove(
extern kern_status_t vm_region_memmove_v(
struct vm_region *dest_region,
size_t dest_offset,
struct iovec *dest_vecs,
const kern_iovec_t *dest_vecs,
size_t nr_dest_vecs,
struct vm_region *src_region,
size_t src_offset,
const struct iovec *src_vecs,
const kern_iovec_t *src_vecs,
size_t nr_src_vecs,
size_t bytes_to_move)
size_t bytes_to_move,
size_t *nr_bytes_moved)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct iovec_iterator src, dest;
iovec_iterator_begin(&src, src_vecs, nr_src_vecs);
iovec_iterator_begin(&dest, dest_vecs, nr_dest_vecs);
iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs);
iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs);
iovec_iterator_seek(&src, src_offset);
iovec_iterator_seek(&dest, dest_offset);
size_t moved = 0;
while (bytes_to_move && src.it_len && dest.it_len) {
size_t to_move
= MIN(MIN(src.it_len, dest.it_len), bytes_to_move);
@@ -1323,6 +1864,11 @@ extern kern_status_t vm_region_memmove_v(
iovec_iterator_seek(&src, to_move);
iovec_iterator_seek(&dest, to_move);
bytes_to_move -= to_move;
moved += to_move;
}
if (nr_bytes_moved) {
*nr_bytes_moved = moved;
}
return KERN_OK;

137
vm/zone.c
View File

@@ -1,14 +1,19 @@
#include <kernel/locks.h>
#include <kernel/util.h>
#include <kernel/queue.h>
#include <kernel/memblock.h>
#include <kernel/types.h>
#include <kernel/vm.h>
#include <kernel/printk.h>
#include <kernel/libc/string.h>
#include <kernel/locks.h>
#include <kernel/machine/cpu.h>
#include <kernel/memblock.h>
#include <kernel/panic.h>
#include <kernel/printk.h>
#include <kernel/queue.h>
#include <kernel/types.h>
#include <kernel/util.h>
#include <kernel/vm.h>
static struct vm_page *group_pages_into_block(struct vm_zone *z, phys_addr_t base, phys_addr_t limit, int order)
static struct vm_page *group_pages_into_block(
struct vm_zone *z,
phys_addr_t base,
phys_addr_t limit,
int order)
{
struct vm_page *first_page = NULL;
for (phys_addr_t i = base; i < limit; i += VM_PAGE_SIZE) {
@@ -37,16 +42,23 @@ static struct vm_page *group_pages_into_block(struct vm_zone *z, phys_addr_t bas
return first_page;
}
static void convert_region_to_blocks(struct vm_zone *zone,
phys_addr_t base, phys_addr_t limit,
int reserved)
static void convert_region_to_blocks(
struct vm_zone *zone,
phys_addr_t base,
phys_addr_t limit,
int reserved)
{
if (base & VM_PAGE_MASK || (limit + 1) & VM_PAGE_MASK) {
panic("convert_region_to_blocks: region must be page-aligned");
}
size_t block_frames = vm_bytes_to_pages(limit - base + 1);
int reset_order = 0;
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER; ) {
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER;) {
size_t order_frames = vm_page_order_to_pages(order);
vm_alignment_t order_alignment = vm_page_order_to_alignment(order);
vm_alignment_t order_alignment
= vm_page_order_to_alignment(order);
if (order_frames > block_frames) {
order--;
@@ -59,11 +71,18 @@ static void convert_region_to_blocks(struct vm_zone *zone,
continue;
}
phys_addr_t block_limit = base + (order_frames * VM_PAGE_SIZE) - 1;
struct vm_page *block_page = group_pages_into_block(zone, base, block_limit, order);
phys_addr_t block_limit
= base + (order_frames * VM_PAGE_SIZE) - 1;
struct vm_page *block_page = group_pages_into_block(
zone,
base,
block_limit,
order);
if (reserved == 0) {
queue_push_back(&zone->z_free_pages[order], &block_page->p_list);
queue_push_back(
&zone->z_free_pages[order],
&block_page->p_list);
}
base = block_limit + 1;
@@ -83,10 +102,12 @@ static void convert_region_to_blocks(struct vm_zone *zone,
static size_t zone_free_bytes(struct vm_zone *z)
{
size_t free_bytes = 0;
for (enum vm_page_order i = VM_PAGE_MIN_ORDER; i <= VM_PAGE_MAX_ORDER; i++) {
for (enum vm_page_order i = VM_PAGE_MIN_ORDER; i <= VM_PAGE_MAX_ORDER;
i++) {
size_t page_bytes = vm_page_order_to_bytes(i);
size_t nr_pages = 0;
queue_foreach (struct vm_page, pg, &z->z_free_pages[i], p_list) {
queue_foreach(struct vm_page, pg, &z->z_free_pages[i], p_list)
{
free_bytes += page_bytes;
nr_pages++;
}
@@ -106,24 +127,37 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
struct memblock_iter it;
/* TODO this only creates page blocks for free memory regions, not reserved memory regions.
* this is faster for systems that have huge amounts of reserved memory, but it means
* that a call to vm_page_get() for a reserved memory region will return null
* rather than a reserved page.
/* TODO this only creates page blocks for free memory regions, not
* reserved memory regions. this is faster for systems that have huge
* amounts of reserved memory, but it means that a call to vm_page_get()
* for a reserved memory region will return null rather than a reserved
* page.
*
* vm_page_get() should probably create reserved pages on-demand for these regions. */
* vm_page_get() should probably create reserved pages on-demand for
* these regions. */
size_t nr_pages_found = 0;
for_each_free_mem_range(&it, z->z_info.zd_base, z->z_info.zd_limit) {
for_each_free_mem_range(&it, z->z_info.zd_base, z->z_info.zd_limit)
{
it.it_base &= ~VM_PAGE_MASK;
if (it.it_limit & VM_PAGE_MASK) {
it.it_limit &= ~VM_PAGE_MASK;
it.it_limit += VM_PAGE_SIZE;
}
phys_addr_t block_start = it.it_base, block_end = it.it_limit;
int this_page_reserved = 0, last_page_reserved = -1;
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
for (uintptr_t i = it.it_base; i < it.it_limit;
i += VM_PAGE_SIZE) {
struct vm_page *pg = vm_page_get(i);
if (pg) {
nr_pages_found++;
this_page_reserved = (pg->p_flags & VM_PAGE_RESERVED) ? 1 : 0;
this_page_reserved
= (pg->p_flags & VM_PAGE_RESERVED) ? 1
: 0;
} else {
this_page_reserved = 1;
}
@@ -138,22 +172,30 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
}
convert_region_to_blocks(
z,
block_start, block_end + VM_PAGE_SIZE - 1,
last_page_reserved);
z,
block_start,
block_end + VM_PAGE_SIZE - 1,
last_page_reserved);
block_start = i;
if (block_start & VM_PAGE_MASK) {
block_start &= ~VM_PAGE_MASK;
block_start += VM_PAGE_SIZE;
}
last_page_reserved = this_page_reserved;
nr_pages_found = 0;
}
if (block_start != block_end) {
/* either the entire zone is homogeneous (all free/all reserved) or the entire zone is empty. */
/* either the entire zone is homogeneous (all free/all
* reserved) or the entire zone is empty. */
if (nr_pages_found > 0) {
/* the entire zone is homogeneous :) */
convert_region_to_blocks(
z,
block_start, block_end + VM_PAGE_SIZE - 1,
this_page_reserved);
z,
block_start,
block_end + VM_PAGE_SIZE - 1,
this_page_reserved);
}
}
}
@@ -201,7 +243,10 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
char free_bytes_str[64];
data_size_to_string(free_bytes, free_bytes_str, sizeof free_bytes_str);
printk("vm: zone %u/%s: %s of memory online.", z->z_info.zd_node, z->z_info.zd_name, free_bytes_str);
printk("vm: zone %u/%s: %s of memory online.",
z->z_info.zd_node,
z->z_info.zd_name,
free_bytes_str);
}
static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
@@ -216,7 +261,8 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
return -1;
}
/* the lowest page order that is >= `order` and still has pages available */
/* the lowest page order that is >= `order` and still has pages
* available */
enum vm_page_order first_order_with_free = VM_MAX_PAGE_ORDERS;
for (enum vm_page_order i = order; i <= VM_PAGE_MAX_ORDER; i++) {
@@ -232,7 +278,8 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
}
if (first_order_with_free == order) {
/* there are free pages of the requested order, so nothing needs to be done */
/* there are free pages of the requested order, so nothing needs
* to be done */
return 0;
}
@@ -240,8 +287,10 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
take a page, split it in half, and add the sub-pages
to the next order's free list. */
for (enum vm_page_order i = first_order_with_free; i > order; i--) {
struct queue_entry *pg_entry = queue_pop_front(&z->z_free_pages[i]);
struct vm_page *pg = QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
struct queue_entry *pg_entry
= queue_pop_front(&z->z_free_pages[i]);
struct vm_page *pg
= QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
struct vm_page *a, *b;
vm_page_split(pg, &a, &b);
@@ -253,7 +302,10 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
return 0;
}
struct vm_page *vm_zone_alloc_page(struct vm_zone *z, enum vm_page_order order, enum vm_flags flags)
struct vm_page *vm_zone_alloc_page(
struct vm_zone *z,
enum vm_page_order order,
enum vm_flags flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
@@ -266,7 +318,8 @@ struct vm_page *vm_zone_alloc_page(struct vm_zone *z, enum vm_page_order order,
struct queue_entry *pg_entry = queue_pop_front(&z->z_free_pages[order]);
struct vm_page *pg = QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
vm_page_foreach (pg, i) {
vm_page_foreach(pg, i)
{
i->p_flags |= VM_PAGE_ALLOC;
}
@@ -289,7 +342,9 @@ void vm_zone_free_page(struct vm_zone *z, struct vm_page *pg)
break;
}
queue_delete(&z->z_free_pages[buddy->p_order - 1], &buddy->p_list);
queue_delete(
&z->z_free_pages[buddy->p_order - 1],
&buddy->p_list);
queue_delete(&z->z_free_pages[buddy->p_order - 1], &pg->p_list);
queue_push_back(&z->z_free_pages[huge->p_order], &huge->p_list);