Compare commits

...

14 Commits

Author SHA1 Message Date
77936e3511 kernel: implement sending, receiving, and replying to message via port/channel 2026-02-21 11:32:57 +00:00
08c78bd6e7 vm: object: add vm_object_copy syscall trace output 2026-02-21 11:30:44 +00:00
2537ca46de libmango: add macros for easily defining msg and iovec variables 2026-02-21 11:29:25 +00:00
3190035086 libmango: add temporary formatted log function 2026-02-21 11:28:58 +00:00
7f049293f4 vm: memblock: add memblock_dump to header 2026-02-21 11:27:28 +00:00
9b2c2f6b29 x86_64: start the kernel bootstrap heap above 16MiB
this will keep the memory area below 16MiB free for DMA memory allocations.
2026-02-21 11:24:36 +00:00
6e39dd45a4 sched: only disable/enable interrupts if schedule() is called from non-IRQ context 2026-02-21 11:23:43 +00:00
855440f584 vm: add trace output 2026-02-21 11:22:51 +00:00
e1e025ab6a vm: region: memmove_v() now supports iovec arrays stored in userspace 2026-02-21 11:20:09 +00:00
0680b73461 kernel: iovec: implement iterating through an iovec list stored in userspace 2026-02-21 11:17:16 +00:00
aa0933be10 vm: region: implement reading from a user-space vm-region into a kernel buffer 2026-02-21 11:16:11 +00:00
8b188a0ac4 vm: region: fix iterator using wrong buffer offset when seek exceeds current buffer size 2026-02-21 11:07:53 +00:00
ed25ee6761 vm: object: fix iterator using wrong buffer offset when seek exceeds current buffer size 2026-02-21 11:07:12 +00:00
0bae39e550 vm: zone: ensure memblock region bounds are page-aligned while creating zone blocks 2026-02-21 11:01:58 +00:00
19 changed files with 645 additions and 113 deletions

View File

@@ -20,6 +20,19 @@
#define PTR32(x) ((void *)((uintptr_t)(x)))
/* the physical address of the start of the memblock heap.
* this is an arbirary value; the heap can start anywhere in memory.
* any reserved areas of memory (the kernel, bsp, bios data, etc) are
* automatically taken into account.
* HOWEVER, this value will dictate how much physical memory is required for
* the kernel to boot successfully.
* the value of 16MiB (0x1000000) means that all heap allocations will be
* above 16MiB, leaving the area below free for DMA operations.
* this value CAN be reduced all the way to zero to minimise the amount of
* memory required to boot, but this may leave you with no DMA memory available.
*/
#define MEMBLOCK_HEAP_START 0x1000000
static ml_cpu_block g_bootstrap_cpu = {0};
/* start and end of kernel image (physical addresses) */
@@ -33,7 +46,7 @@ static void bootstrap_cpu_init(void)
static void early_vm_init(uintptr_t reserve_end)
{
uintptr_t alloc_start = VM_KERNEL_VOFFSET;
uintptr_t alloc_start = VM_KERNEL_VOFFSET + MEMBLOCK_HEAP_START;
/* boot code mapped 2 GiB of memory from
VM_KERNEL_VOFFSET */
uintptr_t alloc_end = VM_KERNEL_VOFFSET + 0x7fffffff;

View File

@@ -15,6 +15,7 @@ struct channel {
};
extern kern_status_t channel_type_init(void);
extern struct channel *channel_cast(struct object *obj);
extern struct channel *channel_create(void);

View File

@@ -5,6 +5,9 @@
#include <stddef.h>
struct iovec_iterator {
/* if this is set, we are iterating over a list of iovecs stored in
* userspace, and must go through this region to retrieve the data. */
struct vm_region *it_region;
const struct iovec *it_vecs;
size_t it_nr_vecs;
size_t it_vec_ptr;
@@ -17,6 +20,11 @@ extern void iovec_iterator_begin(
struct iovec_iterator *it,
const struct iovec *vecs,
size_t nr_vecs);
extern void iovec_iterator_begin_user(
struct iovec_iterator *it,
struct vm_region *address_space,
const struct iovec *vecs,
size_t nr_vecs);
extern void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes);

View File

@@ -22,8 +22,8 @@
#ifndef KERNEL_MEMBLOCK_H_
#define KERNEL_MEMBLOCK_H_
#include <limits.h>
#include <kernel/types.h>
#include <limits.h>
#include <stddef.h>
#ifdef __cplusplus
@@ -338,6 +338,8 @@ extern void __next_memory_region(
phys_addr_t start,
phys_addr_t end);
extern void memblock_dump(void);
#ifdef __cplusplus
}
#endif

View File

@@ -23,8 +23,8 @@ struct kmsg {
kern_status_t msg_result;
struct port *msg_sender_port;
struct thread *msg_sender_thread;
const struct msg *msg_req;
struct msg *msg_resp;
struct msg msg_req;
struct msg msg_resp;
};
#endif

View File

@@ -29,10 +29,12 @@ extern struct port *port_cast(struct object *obj);
extern struct port *port_create(void);
extern kern_status_t port_connect(struct port *port, struct channel *remote);
extern kern_status_t port_disconnect(struct port *port);
extern kern_status_t port_send_msg(
struct port *port,
const struct msg *req,
struct msg *resp);
struct msg *resp,
unsigned long *lock_flags);
DEFINE_OBJECT_LOCK_FUNCTION(port, p_base)

View File

@@ -122,6 +122,15 @@ extern virt_addr_t vm_region_get_base_address(const struct vm_region *region);
extern void vm_region_dump(struct vm_region *region);
/* read data from the user-space area of a vm-region into a kernel-mode buffer
*/
extern kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *dest,
size_t *nr_read);
extern kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,

View File

@@ -18,6 +18,11 @@ kern_status_t channel_type_init(void)
return object_type_register(&channel_type);
}
struct channel *channel_cast(struct object *obj)
{
return CHANNEL_CAST(obj);
}
extern struct channel *channel_create(void)
{
struct object *channel_object = object_create(&channel_type);
@@ -69,26 +74,31 @@ static bool try_enqueue(struct btree *tree, struct kmsg *msg)
return true;
}
static void kmsg_reply_error(struct kmsg *msg, kern_status_t status)
static void kmsg_reply_error(
struct kmsg *msg,
kern_status_t status,
unsigned long *lock_flags)
{
msg->msg_status = KMSG_REPLY_SENT;
msg->msg_status = status;
msg->msg_result = status;
thread_awaken(msg->msg_sender_thread);
spin_unlock_irqrestore(&msg->msg_lock, *lock_flags);
}
static struct kmsg *get_next_msg(struct channel *channel)
static struct kmsg *get_next_msg(
struct channel *channel,
unsigned long *lock_flags)
{
unsigned long flags;
struct btree_node *cur = btree_first(&channel->c_msg);
while (cur) {
struct kmsg *msg = BTREE_CONTAINER(struct kmsg, msg_node, cur);
spin_lock_irqsave(&msg->msg_lock, &flags);
spin_lock_irqsave(&msg->msg_lock, lock_flags);
if (msg->msg_status == KMSG_WAIT_RECEIVE) {
msg->msg_status = KMSG_WAIT_REPLY;
return msg;
}
spin_unlock_irqrestore(&msg->msg_lock, flags);
spin_unlock_irqrestore(&msg->msg_lock, *lock_flags);
cur = btree_next(cur);
}
@@ -118,11 +128,12 @@ extern kern_status_t channel_recv_msg(
struct wait_item waiter;
struct thread *self = current_thread();
struct kmsg *msg = NULL;
unsigned long msg_lock_flags;
wait_item_init(&waiter, self);
for (;;) {
thread_wait_begin(&waiter, &channel->c_wq);
msg = get_next_msg(channel);
msg = get_next_msg(channel, &msg_lock_flags);
if (msg) {
break;
}
@@ -145,11 +156,11 @@ extern kern_status_t channel_recv_msg(
out_msg->msg_data_count,
sender->t_address_space,
0,
msg->msg_req->msg_data,
msg->msg_req->msg_data_count,
msg->msg_req.msg_data,
msg->msg_req.msg_data_count,
VM_REGION_COPY_ALL);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
@@ -158,14 +169,17 @@ extern kern_status_t channel_recv_msg(
out_msg->msg_handles,
out_msg->msg_handles_count,
sender->t_handles,
msg->msg_req->msg_handles,
msg->msg_req->msg_handles_count);
msg->msg_req.msg_handles,
msg->msg_req.msg_handles_count);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
kmsg_reply_error(msg, KERN_OK);
*out_id = msg->msg_id;
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_OK;
}
@@ -175,46 +189,54 @@ extern kern_status_t channel_reply_msg(
const struct msg *resp,
unsigned long *irq_flags)
{
unsigned long msg_lock_flags;
struct kmsg *msg = get_msg_with_id(&channel->c_msg, id);
if (!msg || msg->msg_status != KMSG_WAIT_REPLY) {
if (!msg) {
return KERN_INVALID_ARGUMENT;
}
spin_lock_irqsave(&msg->msg_lock, &msg_lock_flags);
if (msg->msg_status != KMSG_WAIT_REPLY) {
spin_unlock_irqrestore(&msg->msg_lock, msg_lock_flags);
return KERN_INVALID_ARGUMENT;
}
struct thread *self = current_thread();
struct task *sender = msg->msg_sender_thread->tr_parent;
struct task *receiver = self->tr_parent;
/* the task that is about to receive the response */
struct task *receiver = msg->msg_sender_thread->tr_parent;
/* the task that is about to send the response */
struct task *sender = self->tr_parent;
kern_status_t status = vm_region_memmove_v(
receiver->t_address_space,
0,
msg->msg_resp->msg_data,
msg->msg_resp->msg_data_count,
msg->msg_resp.msg_data,
msg->msg_resp.msg_data_count,
sender->t_address_space,
0,
resp->msg_data,
resp->msg_data_count,
VM_REGION_COPY_ALL);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
status = handle_list_transfer(
receiver->t_handles,
msg->msg_resp->msg_handles,
msg->msg_resp->msg_handles_count,
msg->msg_resp.msg_handles,
msg->msg_resp.msg_handles_count,
sender->t_handles,
resp->msg_handles,
resp->msg_handles_count);
if (status != KERN_OK) {
kmsg_reply_error(msg, status);
kmsg_reply_error(msg, status, &msg_lock_flags);
return status;
}
msg->msg_status = KERN_OK;
msg->msg_status = KMSG_REPLY_SENT;
kmsg_reply_error(msg, KERN_OK, &msg_lock_flags);
return KERN_UNIMPLEMENTED;
return KERN_OK;
}
extern kern_status_t channel_read_msg(

View File

@@ -1,6 +1,62 @@
#include <kernel/iovec.h>
#include <kernel/libc/string.h>
#include <kernel/util.h>
#include <kernel/vm-region.h>
static bool read_iovec(
struct iovec_iterator *it,
size_t index,
struct iovec *out)
{
if (index >= it->it_nr_vecs) {
return false;
}
if (!it->it_region) {
memcpy(out, &it->it_vecs[index], sizeof *out);
return true;
}
size_t nr_read = 0;
kern_status_t status = vm_region_read_kernel(
it->it_region,
(virt_addr_t)it->it_vecs + (index * sizeof(struct iovec)),
sizeof(struct iovec),
out,
&nr_read);
return (status == KERN_OK && nr_read != sizeof(struct iovec));
}
void iovec_iterator_begin_user(
struct iovec_iterator *it,
struct vm_region *region,
const struct iovec *vecs,
size_t nr_vecs)
{
memset(it, 0x0, sizeof *it);
it->it_region = region;
it->it_vecs = vecs;
it->it_nr_vecs = nr_vecs;
struct iovec iov;
while (it->it_vec_ptr < nr_vecs) {
read_iovec(it, it->it_vec_ptr, &iov);
if (iov.io_len > 0) {
break;
}
it->it_vec_ptr++;
}
if (it->it_vec_ptr >= nr_vecs) {
return;
}
it->it_base = iov.io_base;
it->it_len = iov.io_len;
}
void iovec_iterator_begin(
struct iovec_iterator *it,
@@ -20,6 +76,8 @@ void iovec_iterator_begin(
}
if (it->it_vec_ptr >= nr_vecs) {
it->it_len = 0;
it->it_base = 0;
return;
}
@@ -39,10 +97,12 @@ void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes)
}
nr_bytes -= to_seek;
struct iovec iov;
it->it_vec_ptr++;
while (it->it_vec_ptr < it->it_nr_vecs) {
if (it->it_vecs[it->it_vec_ptr].io_len > 0) {
read_iovec(it, it->it_vec_ptr, &iov);
if (iov.io_len > 0) {
break;
}
@@ -50,10 +110,12 @@ void iovec_iterator_seek(struct iovec_iterator *it, size_t nr_bytes)
}
if (it->it_vec_ptr >= it->it_nr_vecs) {
it->it_len = 0;
it->it_base = 0;
return;
}
it->it_base = it->it_vecs[it->it_vec_ptr].io_base;
it->it_len = it->it_vecs[it->it_vec_ptr].io_len;
it->it_base = iov.io_base;
it->it_len = iov.io_len;
}
}

View File

@@ -20,8 +20,24 @@ struct port *port_cast(struct object *obj)
return PORT_CAST(obj);
}
static void wait_for_reply(struct port *port)
static void wait_for_reply(struct kmsg *msg, unsigned long *lock_flags)
{
struct wait_item waiter;
struct thread *self = current_thread();
wait_item_init(&waiter, self);
for (;;) {
self->tr_state = THREAD_SLEEPING;
if (msg->msg_status == KMSG_REPLY_SENT) {
break;
}
port_unlock_irqrestore(msg->msg_sender_port, *lock_flags);
schedule(SCHED_NORMAL);
port_lock_irqsave(msg->msg_sender_port, lock_flags);
}
self->tr_state = THREAD_READY;
}
struct port *port_create(void)
@@ -49,10 +65,22 @@ kern_status_t port_connect(struct port *port, struct channel *remote)
return KERN_OK;
}
kern_status_t port_disconnect(struct port *port)
{
if (port->p_status != PORT_READY) {
return KERN_BAD_STATE;
}
port->p_remote = NULL;
port->p_status = PORT_OFFLINE;
return KERN_OK;
}
kern_status_t port_send_msg(
struct port *port,
const struct msg *req,
struct msg *resp)
struct msg *resp,
unsigned long *lock_flags)
{
if (port->p_status != PORT_READY) {
return KERN_BAD_STATE;
@@ -60,19 +88,20 @@ kern_status_t port_send_msg(
struct thread *self = current_thread();
struct kmsg *msg = &self->tr_msg;
memset(msg, 0x0, sizeof *msg);
msg->msg_status = KMSG_WAIT_RECEIVE;
msg->msg_sender_thread = self;
msg->msg_sender_port = port;
msg->msg_req = req;
msg->msg_resp = resp;
msg->msg_req = *req;
msg->msg_resp = *resp;
unsigned long flags;
channel_lock_irqsave(port->p_remote, &flags);
port->p_status = PORT_SEND_BLOCKED;
channel_enqueue_msg(port->p_remote, msg);
channel_unlock_irqrestore(port->p_remote, flags);
port->p_status = PORT_SEND_BLOCKED;
wait_for_reply(port);
wait_for_reply(msg, lock_flags);
return msg->msg_result;
}

View File

@@ -3,8 +3,17 @@
#include <mango/status.h>
#undef TRACE
extern kern_status_t kern_log(const char *s);
#define kern_logf(...) \
do { \
char s[128]; \
snprintf(s, sizeof s, __VA_ARGS__); \
kern_log(s); \
} while (0)
#ifdef TRACE
#define kern_trace(...) kern_log(__VA_ARGS__)
#define kern_tracef(...) \

View File

@@ -28,6 +28,19 @@
#define KERN_CFG_INVALID 0x00u
#define KERN_CFG_PAGE_SIZE 0x01u
#define IOVEC(p, len) \
{ \
.io_base = (virt_addr_t)(p), \
.io_len = (len), \
}
#define MSG(data, data_count, handles, handles_len) \
{ \
.msg_data = (data), \
.msg_data_count = (data_count), \
.msg_handles = (handles), \
.msg_handles_count = (handles_len), \
}
typedef uintptr_t phys_addr_t;
typedef uintptr_t virt_addr_t;
typedef uint64_t msgid_t;

View File

@@ -91,7 +91,9 @@ void context_switch(struct thread *old, struct thread *new)
void __schedule(enum sched_mode mode)
{
if (mode != SCHED_IRQ) {
ml_int_disable();
}
struct cpu_data *this_cpu = get_this_cpu();
struct runqueue *rq = &this_cpu->c_rq;
@@ -142,7 +144,9 @@ void __schedule(enum sched_mode mode)
context_switch(prev, next);
}
if (mode != SCHED_IRQ) {
ml_int_enable();
}
}
void schedule(enum sched_mode mode)

View File

@@ -62,14 +62,13 @@ kern_status_t sys_port_create(kern_handle_t *out)
kern_handle_t handle;
kern_status_t status
= task_open_handle(self, &port->p_base, 0, &handle);
if (status != KERN_OK) {
task_unlock_irqrestore(self, irq_flags);
if (status != KERN_OK) {
object_unref(&port->p_base);
return status;
}
task_unlock_irqrestore(self, irq_flags);
*out = handle;
return KERN_OK;
}
@@ -98,6 +97,7 @@ kern_status_t sys_port_connect(
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
struct port *port = port_cast(port_obj);
task_unlock_irqrestore(self, flags);
struct task *remote_task = task_from_tid(task_id);
@@ -116,43 +116,255 @@ kern_status_t sys_port_connect(
object_ref(&remote->c_base);
task_unlock_irqrestore(remote_task, flags);
status = port_connect(port_cast(port_obj), remote);
port_lock_irqsave(port, &flags);
status = port_connect(port, remote);
port_unlock_irqrestore(port, flags);
object_unref(port_obj);
object_unref(&remote->c_base);
return KERN_OK;
}
kern_status_t sys_port_disconnect(kern_handle_t port)
kern_status_t sys_port_disconnect(kern_handle_t port_handle)
{
return KERN_UNIMPLEMENTED;
unsigned long flags;
struct task *self = current_task();
task_lock_irqsave(self, &flags);
struct object *port_obj = NULL;
handle_flags_t port_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
port_handle,
&port_obj,
&port_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
task_unlock_irqrestore(self, flags);
struct port *port = port_cast(port_obj);
if (!port) {
object_unref(port_obj);
return KERN_INVALID_ARGUMENT;
}
object_unref(port_obj);
port_lock_irqsave(port, &flags);
status = port_disconnect(port);
port_unlock_irqrestore(port, flags);
return status;
}
static bool validate_msg(struct task *task, const struct msg *msg, bool rw)
{
if (!validate_access_r(task, msg, sizeof *msg)) {
return false;
}
if (msg->msg_data_count
&& !validate_access_r(
task,
msg->msg_data,
sizeof(struct iovec) * msg->msg_data_count)) {
return false;
}
if (msg->msg_handles_count
&& !validate_access_r(
task,
msg->msg_handles,
sizeof(struct handle_list) * msg->msg_handles_count)) {
return false;
}
for (size_t i = 0; i < msg->msg_data_count; i++) {
bool ok = false;
const struct iovec *iov = &msg->msg_data[i];
if (rw) {
ok = validate_access_w(task, iov->io_base, iov->io_len);
} else {
ok = validate_access_r(task, iov->io_base, iov->io_len);
}
if (!ok) {
return false;
}
}
for (size_t i = 0; i < msg->msg_handles_count; i++) {
bool ok = false;
const struct handle_list *list = &msg->msg_handles[i];
if (rw) {
ok = validate_access_w(
task,
list->l_handles,
list->l_nr_handles * sizeof(kern_handle_t));
} else {
ok = validate_access_r(
task,
list->l_handles,
list->l_nr_handles * sizeof(kern_handle_t));
}
if (!ok) {
return false;
}
}
return true;
}
kern_status_t sys_msg_send(
kern_handle_t port,
msg_flags_t flags,
kern_handle_t port_handle,
msg_flags_t msg_flags,
const struct msg *req,
struct msg *resp)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_msg(self, req, false)) {
return KERN_MEMORY_FAULT;
}
if (!validate_msg(self, resp, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *port_obj = NULL;
handle_flags_t port_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
port_handle,
&port_obj,
&port_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(port_obj);
task_unlock_irqrestore(self, flags);
struct port *port = port_cast(port_obj);
if (!port) {
object_unref(port_obj);
return KERN_INVALID_ARGUMENT;
}
port_lock_irqsave(port, &flags);
status = port_send_msg(port, req, resp, &flags);
port_unlock_irqrestore(port, flags);
object_unref(port_obj);
return status;
}
kern_status_t sys_msg_recv(
kern_handle_t channel,
msg_flags_t flags,
kern_handle_t channel_handle,
msg_flags_t msg_flags,
msgid_t *out_id,
struct msg *out_msg)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_access_w(self, out_id, sizeof *out_id)) {
return KERN_MEMORY_FAULT;
}
if (!validate_msg(self, out_msg, true)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_recv_msg(channel, out_msg, out_id, &flags);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}
kern_status_t sys_msg_reply(
kern_handle_t channel,
msg_flags_t flags,
kern_handle_t channel_handle,
msg_flags_t msg_flags,
msgid_t id,
const struct msg *reply)
{
return KERN_UNIMPLEMENTED;
struct task *self = current_task();
if (!validate_msg(self, reply, false)) {
return KERN_MEMORY_FAULT;
}
unsigned long flags;
task_lock_irqsave(self, &flags);
struct object *channel_obj = NULL;
handle_flags_t channel_handle_flags = 0;
kern_status_t status = task_resolve_handle(
self,
channel_handle,
&channel_obj,
&channel_handle_flags);
if (status != KERN_OK) {
return status;
}
/* add a reference to the port object to make sure it isn't deleted
* while we're using it */
object_ref(channel_obj);
task_unlock_irqrestore(self, flags);
struct channel *channel = channel_cast(channel_obj);
if (!channel) {
object_unref(channel_obj);
return KERN_INVALID_ARGUMENT;
}
channel_lock_irqsave(channel, &flags);
status = channel_reply_msg(channel, id, reply, &flags);
channel_unlock_irqrestore(channel, flags);
object_unref(channel_obj);
return status;
}
kern_status_t sys_msg_read(

View File

@@ -111,6 +111,13 @@ kern_status_t sys_vm_object_copy(
size_t count,
size_t *nr_copied)
{
tracek("vm_object_copy(%x, %zx, %x, %zx, %zx, %p)",
dst,
dst_offset,
src,
src_offset,
count,
nr_copied);
struct task *self = current_task();
if (nr_copied

View File

@@ -19,10 +19,11 @@
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
*/
#include <limits.h>
#include <kernel/libc/string.h>
#include <kernel/memblock.h>
#include <kernel/printk.h>
#include <kernel/types.h>
#include <limits.h>
#include <stdbool.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -447,10 +448,10 @@ void __next_memory_region(
}
/* we want the area that is overlapped by both
region M (m_start - m_end) : The region defined
as system memory. region R (r_start - r_end) : The
region defined as free / outside of any reserved
regions.
- region M (m_start - m_end) : The region defined
as system memory.
- region R (r_start - r_end) : The region defined as
free / outside of any reserved regions.
*/
it->it_base = MAX(m_start, r_start);
it->it_limit = MIN(m_end, r_end);
@@ -497,3 +498,28 @@ void *memblock_phys_to_virt(phys_addr_t p)
{
return (void *)(p + memblock.m_voffset);
}
#ifdef TRACE
static void memblock_type_dump(struct memblock_type *type)
{
tracek("%s:", type->name);
for (size_t i = 0; i < type->count; i++) {
tracek(" [%zx-%zx]",
type->regions[i].base,
type->regions[i].limit);
}
}
extern void memblock_dump(void)
{
memblock_type_dump(&memblock.memory);
memblock_type_dump(&memblock.reserved);
tracek("free:");
struct memblock_iter it;
for_each_free_mem_range(&it, 0, ADDR_MAX)
{
tracek(" [%zx-%zx]", it.it_base, it.it_limit);
}
}
#endif

View File

@@ -96,8 +96,12 @@ static kern_status_t object_iterator_seek(
}
if (it->it_pg) {
it->it_buf = vm_page_get_vaddr(it->it_pg);
virt_addr_t vaddr = (virt_addr_t)vm_page_get_vaddr(it->it_pg);
vaddr += (it->it_offset & VM_PAGE_MASK);
it->it_buf = (void *)vaddr;
it->it_max = vm_page_get_size_bytes(it->it_pg);
it->it_max -= (it->it_offset & VM_PAGE_MASK);
} else {
struct btree_node *n = btree_first(&it->it_obj->vo_pages);
struct vm_page *pg
@@ -112,7 +116,8 @@ static kern_status_t object_iterator_seek(
}
it->it_buf = NULL;
it->it_max = pg ? pg->p_vmo_offset
it->it_max
= pg ? pg->p_vmo_offset - (it->it_offset & VM_PAGE_MASK)
: it->it_obj->vo_size - it->it_offset;
}
@@ -281,6 +286,10 @@ extern struct vm_page *vm_object_alloc_page(
return NULL;
}
tracek("vm-object: [%s] alloc offset %zx -> page %zx",
vo->vo_name,
offset,
vm_page_get_paddr(page));
page->p_vmo_offset = offset;
vo->vo_pages.b_root = &page->p_bnode;
btree_insert_fixup(&vo->vo_pages, &page->p_bnode);

View File

@@ -545,8 +545,8 @@ static void vm_iterator_begin(
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_max = buffer_size;
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size - (object_offset & VM_PAGE_MASK);
}
static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
@@ -615,7 +615,7 @@ static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size;
return KERN_OK;
}
@@ -876,6 +876,15 @@ kern_status_t vm_region_map_object(
return KERN_INVALID_ARGUMENT;
}
tracek("vm_region_map_object(%s, %zx, %s, %zx, %zx, %x, %p)",
region->vr_name,
region_offset,
object->vo_name,
object_offset,
length,
prot,
out);
struct vm_region_mapping *mapping
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!mapping) {
@@ -1239,6 +1248,46 @@ virt_addr_t vm_region_get_base_address(const struct vm_region *region)
return entry_absolute_address(&region->vr_entry);
}
kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *destp,
size_t *nr_read)
{
struct vm_iterator src;
char *dest = destp;
vm_iterator_begin(
&src,
src_region,
src_ptr,
VM_PROT_READ | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (r < count && src.it_max) {
size_t remaining = count - r;
size_t to_move = MIN(src.it_max, remaining);
memmove(dest, src.it_buf, to_move);
status = vm_iterator_seek(&src, to_move);
if (status != KERN_OK) {
break;
}
r += to_move;
dest += to_move;
}
if (nr_read) {
*nr_read = r;
}
return status;
}
kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
@@ -1299,8 +1348,8 @@ extern kern_status_t vm_region_memmove_v(
size_t bytes_to_move)
{
struct iovec_iterator src, dest;
iovec_iterator_begin(&src, src_vecs, nr_src_vecs);
iovec_iterator_begin(&dest, dest_vecs, nr_dest_vecs);
iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs);
iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs);
iovec_iterator_seek(&src, src_offset);
iovec_iterator_seek(&dest, dest_offset);

127
vm/zone.c
View File

@@ -1,14 +1,19 @@
#include <kernel/locks.h>
#include <kernel/util.h>
#include <kernel/queue.h>
#include <kernel/memblock.h>
#include <kernel/types.h>
#include <kernel/vm.h>
#include <kernel/printk.h>
#include <kernel/libc/string.h>
#include <kernel/locks.h>
#include <kernel/machine/cpu.h>
#include <kernel/memblock.h>
#include <kernel/panic.h>
#include <kernel/printk.h>
#include <kernel/queue.h>
#include <kernel/types.h>
#include <kernel/util.h>
#include <kernel/vm.h>
static struct vm_page *group_pages_into_block(struct vm_zone *z, phys_addr_t base, phys_addr_t limit, int order)
static struct vm_page *group_pages_into_block(
struct vm_zone *z,
phys_addr_t base,
phys_addr_t limit,
int order)
{
struct vm_page *first_page = NULL;
for (phys_addr_t i = base; i < limit; i += VM_PAGE_SIZE) {
@@ -37,16 +42,23 @@ static struct vm_page *group_pages_into_block(struct vm_zone *z, phys_addr_t bas
return first_page;
}
static void convert_region_to_blocks(struct vm_zone *zone,
phys_addr_t base, phys_addr_t limit,
static void convert_region_to_blocks(
struct vm_zone *zone,
phys_addr_t base,
phys_addr_t limit,
int reserved)
{
if (base & VM_PAGE_MASK || (limit + 1) & VM_PAGE_MASK) {
panic("convert_region_to_blocks: region must be page-aligned");
}
size_t block_frames = vm_bytes_to_pages(limit - base + 1);
int reset_order = 0;
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER; ) {
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER;) {
size_t order_frames = vm_page_order_to_pages(order);
vm_alignment_t order_alignment = vm_page_order_to_alignment(order);
vm_alignment_t order_alignment
= vm_page_order_to_alignment(order);
if (order_frames > block_frames) {
order--;
@@ -59,11 +71,18 @@ static void convert_region_to_blocks(struct vm_zone *zone,
continue;
}
phys_addr_t block_limit = base + (order_frames * VM_PAGE_SIZE) - 1;
struct vm_page *block_page = group_pages_into_block(zone, base, block_limit, order);
phys_addr_t block_limit
= base + (order_frames * VM_PAGE_SIZE) - 1;
struct vm_page *block_page = group_pages_into_block(
zone,
base,
block_limit,
order);
if (reserved == 0) {
queue_push_back(&zone->z_free_pages[order], &block_page->p_list);
queue_push_back(
&zone->z_free_pages[order],
&block_page->p_list);
}
base = block_limit + 1;
@@ -83,10 +102,12 @@ static void convert_region_to_blocks(struct vm_zone *zone,
static size_t zone_free_bytes(struct vm_zone *z)
{
size_t free_bytes = 0;
for (enum vm_page_order i = VM_PAGE_MIN_ORDER; i <= VM_PAGE_MAX_ORDER; i++) {
for (enum vm_page_order i = VM_PAGE_MIN_ORDER; i <= VM_PAGE_MAX_ORDER;
i++) {
size_t page_bytes = vm_page_order_to_bytes(i);
size_t nr_pages = 0;
queue_foreach (struct vm_page, pg, &z->z_free_pages[i], p_list) {
queue_foreach(struct vm_page, pg, &z->z_free_pages[i], p_list)
{
free_bytes += page_bytes;
nr_pages++;
}
@@ -106,24 +127,37 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
struct memblock_iter it;
/* TODO this only creates page blocks for free memory regions, not reserved memory regions.
* this is faster for systems that have huge amounts of reserved memory, but it means
* that a call to vm_page_get() for a reserved memory region will return null
* rather than a reserved page.
/* TODO this only creates page blocks for free memory regions, not
* reserved memory regions. this is faster for systems that have huge
* amounts of reserved memory, but it means that a call to vm_page_get()
* for a reserved memory region will return null rather than a reserved
* page.
*
* vm_page_get() should probably create reserved pages on-demand for these regions. */
* vm_page_get() should probably create reserved pages on-demand for
* these regions. */
size_t nr_pages_found = 0;
for_each_free_mem_range(&it, z->z_info.zd_base, z->z_info.zd_limit) {
for_each_free_mem_range(&it, z->z_info.zd_base, z->z_info.zd_limit)
{
it.it_base &= ~VM_PAGE_MASK;
if (it.it_limit & VM_PAGE_MASK) {
it.it_limit &= ~VM_PAGE_MASK;
it.it_limit += VM_PAGE_SIZE;
}
phys_addr_t block_start = it.it_base, block_end = it.it_limit;
int this_page_reserved = 0, last_page_reserved = -1;
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
for (uintptr_t i = it.it_base; i < it.it_limit;
i += VM_PAGE_SIZE) {
struct vm_page *pg = vm_page_get(i);
if (pg) {
nr_pages_found++;
this_page_reserved = (pg->p_flags & VM_PAGE_RESERVED) ? 1 : 0;
this_page_reserved
= (pg->p_flags & VM_PAGE_RESERVED) ? 1
: 0;
} else {
this_page_reserved = 1;
}
@@ -139,20 +173,28 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
convert_region_to_blocks(
z,
block_start, block_end + VM_PAGE_SIZE - 1,
block_start,
block_end + VM_PAGE_SIZE - 1,
last_page_reserved);
block_start = i;
if (block_start & VM_PAGE_MASK) {
block_start &= ~VM_PAGE_MASK;
block_start += VM_PAGE_SIZE;
}
last_page_reserved = this_page_reserved;
nr_pages_found = 0;
}
if (block_start != block_end) {
/* either the entire zone is homogeneous (all free/all reserved) or the entire zone is empty. */
/* either the entire zone is homogeneous (all free/all
* reserved) or the entire zone is empty. */
if (nr_pages_found > 0) {
/* the entire zone is homogeneous :) */
convert_region_to_blocks(
z,
block_start, block_end + VM_PAGE_SIZE - 1,
block_start,
block_end + VM_PAGE_SIZE - 1,
this_page_reserved);
}
}
@@ -201,7 +243,10 @@ void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info)
char free_bytes_str[64];
data_size_to_string(free_bytes, free_bytes_str, sizeof free_bytes_str);
printk("vm: zone %u/%s: %s of memory online.", z->z_info.zd_node, z->z_info.zd_name, free_bytes_str);
printk("vm: zone %u/%s: %s of memory online.",
z->z_info.zd_node,
z->z_info.zd_name,
free_bytes_str);
}
static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
@@ -216,7 +261,8 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
return -1;
}
/* the lowest page order that is >= `order` and still has pages available */
/* the lowest page order that is >= `order` and still has pages
* available */
enum vm_page_order first_order_with_free = VM_MAX_PAGE_ORDERS;
for (enum vm_page_order i = order; i <= VM_PAGE_MAX_ORDER; i++) {
@@ -232,7 +278,8 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
}
if (first_order_with_free == order) {
/* there are free pages of the requested order, so nothing needs to be done */
/* there are free pages of the requested order, so nothing needs
* to be done */
return 0;
}
@@ -240,8 +287,10 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
take a page, split it in half, and add the sub-pages
to the next order's free list. */
for (enum vm_page_order i = first_order_with_free; i > order; i--) {
struct queue_entry *pg_entry = queue_pop_front(&z->z_free_pages[i]);
struct vm_page *pg = QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
struct queue_entry *pg_entry
= queue_pop_front(&z->z_free_pages[i]);
struct vm_page *pg
= QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
struct vm_page *a, *b;
vm_page_split(pg, &a, &b);
@@ -253,7 +302,10 @@ static int replenish_free_page_list(struct vm_zone *z, enum vm_page_order order)
return 0;
}
struct vm_page *vm_zone_alloc_page(struct vm_zone *z, enum vm_page_order order, enum vm_flags flags)
struct vm_page *vm_zone_alloc_page(
struct vm_zone *z,
enum vm_page_order order,
enum vm_flags flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
@@ -266,7 +318,8 @@ struct vm_page *vm_zone_alloc_page(struct vm_zone *z, enum vm_page_order order,
struct queue_entry *pg_entry = queue_pop_front(&z->z_free_pages[order]);
struct vm_page *pg = QUEUE_CONTAINER(struct vm_page, p_list, pg_entry);
vm_page_foreach (pg, i) {
vm_page_foreach(pg, i)
{
i->p_flags |= VM_PAGE_ALLOC;
}
@@ -289,7 +342,9 @@ void vm_zone_free_page(struct vm_zone *z, struct vm_page *pg)
break;
}
queue_delete(&z->z_free_pages[buddy->p_order - 1], &buddy->p_list);
queue_delete(
&z->z_free_pages[buddy->p_order - 1],
&buddy->p_list);
queue_delete(&z->z_free_pages[buddy->p_order - 1], &pg->p_list);
queue_push_back(&z->z_free_pages[huge->p_order], &huge->p_list);