vm: implement copying data between different vm-regions

This commit is contained in:
2026-02-19 19:15:15 +00:00
parent 85006411bd
commit 2b7e5368c9

View File

@@ -22,6 +22,25 @@
region_find_free_area_linear(region, length)
#endif
/* iterates over a range of mapped virtual memory in a region, and provides
* a moving buffer through which the memory can be accessed */
struct vm_iterator {
struct vm_region *it_region;
struct vm_region_mapping *it_mapping;
virt_addr_t it_base;
vm_prot_t it_prot;
void *it_buf;
size_t it_max;
};
/* iterates recursively over the entries in a region */
struct entry_iterator {
struct vm_region *it_root;
struct vm_region_entry *it_entry;
/* depth of it_entry relative to it_root */
unsigned int it_depth;
};
enum search_direction {
SEARCH_LEFT,
SEARCH_RIGHT,
@@ -62,7 +81,7 @@ static struct vm_region_mapping *mapping_from_entry(
return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry);
}
static virt_addr_t entry_absolute_address(struct vm_region_entry *entry)
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry)
{
virt_addr_t result = 0;
while (entry) {
@@ -285,11 +304,11 @@ static virt_addr_t region_find_free_area_ex(
switch (direction) {
case SEARCH_LEFT:
right_node = start;
left_node = start ? btree_left(start) : NULL;
left_node = start ? btree_prev(start) : NULL;
break;
case SEARCH_RIGHT:
left_node = start;
right_node = start ? btree_left(start) : NULL;
right_node = start ? btree_next(start) : NULL;
break;
default:
return 0;
@@ -361,7 +380,7 @@ static off_t region_find_free_area_linear(
size_t target_length)
{
if (!region->vr_entries.b_root) {
return region->vr_entry.e_offset;
return 0;
}
return region_find_free_area_ex(
@@ -403,16 +422,16 @@ static bool region_is_area_free(
off_t base,
size_t len)
{
if (len >= region->vr_entry.e_size) {
return false;
}
if (base + len > region->vr_entry.e_size) {
return false;
}
off_t limit = base + len - 1;
if (base >= region->vr_entry.e_size) {
return false;
}
if (limit >= region->vr_entry.e_size) {
return false;
}
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return true;
@@ -441,7 +460,7 @@ static bool region_is_area_free(
static kern_status_t region_validate_allocation(
struct vm_region *parent,
enum vm_prot prot,
vm_prot_t prot,
off_t *offp,
size_t len)
{
@@ -455,10 +474,13 @@ static kern_status_t region_validate_allocation(
if (offset == VM_REGION_ANY_OFFSET) {
offset = region_find_free_area(parent, len);
if (offset == 0) {
return KERN_NO_MEMORY;
*offp = offset;
return (offset == INVALID_OFFSET) ? KERN_NO_MEMORY : KERN_OK;
}
} else if (!region_is_area_free(parent, offset, len)) {
offset &= ~VM_PAGE_MASK;
if (!region_is_area_free(parent, offset, len)) {
return KERN_INVALID_ARGUMENT;
}
@@ -466,6 +488,248 @@ static kern_status_t region_validate_allocation(
return KERN_OK;
}
static void vm_iterator_begin(
struct vm_iterator *it,
struct vm_region *region,
virt_addr_t base,
vm_prot_t prot)
{
memset(it, 0x0, sizeof *it);
it->it_base = base;
it->it_region = region;
it->it_prot = prot;
off_t offset = base - vm_region_get_base_address(region);
it->it_mapping = region_get_mapping_recursive(region, &offset, 1);
if (!it->it_mapping || (it->it_mapping->m_prot & prot) != prot) {
return;
}
off_t object_offset = offset - it->it_mapping->m_entry.e_offset
+ it->it_mapping->m_object_offset;
struct vm_page *pg = NULL;
if (prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->m_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->m_object,
object_offset);
}
if (!pg) {
return;
}
void *buffer_base = vm_page_get_vaddr(pg);
phys_addr_t pg_addr = vm_page_get_paddr(pg);
size_t buffer_size = vm_page_get_size_bytes(pg);
while (1) {
struct btree_node *next_node = btree_next(&pg->p_bnode);
struct vm_page *next
= BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
if (!next) {
break;
}
phys_addr_t next_addr = vm_page_get_paddr(next);
if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) {
break;
}
pg = next;
pg_addr = next_addr;
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_max = buffer_size;
}
static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
{
if (nr_bytes < it->it_max) {
it->it_base += nr_bytes;
it->it_buf = (char *)it->it_buf + nr_bytes;
it->it_max -= nr_bytes;
return KERN_OK;
}
it->it_base += nr_bytes;
off_t offset = it->it_base - vm_region_get_base_address(it->it_region);
struct vm_region_mapping *next_mapping
= region_get_mapping_recursive(it->it_region, &offset, 1);
if (!next_mapping) {
it->it_buf = NULL;
it->it_max = 0;
return KERN_MEMORY_FAULT;
}
if ((next_mapping->m_prot & it->it_prot) != it->it_prot) {
it->it_buf = NULL;
it->it_max = 0;
return KERN_MEMORY_FAULT;
}
off_t object_offset = offset - it->it_mapping->m_entry.e_offset
+ it->it_mapping->m_object_offset;
struct vm_page *pg = NULL;
if (it->it_prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->m_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->m_object,
object_offset);
}
if (!pg) {
return KERN_NO_MEMORY;
}
void *buffer_base = vm_page_get_vaddr(pg);
phys_addr_t pg_addr = vm_page_get_paddr(pg);
size_t buffer_size = vm_page_get_size_bytes(pg);
while (1) {
struct btree_node *next_node = btree_next(&pg->p_bnode);
struct vm_page *next
= BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
if (!next) {
break;
}
phys_addr_t next_addr = vm_page_get_paddr(next);
if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) {
break;
}
pg = next;
pg_addr = next_addr;
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = buffer_base;
it->it_max = buffer_size;
return KERN_OK;
}
static void entry_iterator_begin(
struct entry_iterator *it,
struct vm_region *root)
{
memset(it, 0x0, sizeof *it);
it->it_root = root;
it->it_entry = &root->vr_entry;
}
static void entry_iterator_move_next(struct entry_iterator *it)
{
struct vm_region *region = region_from_entry(it->it_entry);
bool has_children = (region && !btree_empty(&region->vr_entries));
if (has_children) {
/* visit the first child */
struct btree_node *node = btree_first(&region->vr_entries);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
it->it_depth++;
it->it_entry = entry;
return;
}
/* go back up until we find a right sibling. */
struct vm_region_entry *cur = it->it_entry;
while (1) {
struct btree_node *sibling = btree_next(&cur->e_node);
if (sibling) {
it->it_entry = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
sibling);
return;
}
if (cur == &it->it_root->vr_entry) {
it->it_entry = NULL;
return;
}
struct vm_region_entry *parent_entry = cur->e_parent;
struct vm_region *parent = region_from_entry(parent_entry);
if (!parent) {
it->it_entry = NULL;
return;
}
it->it_depth--;
cur = parent_entry;
}
}
static void mapping_iterator_begin(
struct entry_iterator *it,
struct vm_region *root,
off_t offset,
size_t length,
off_t *offp)
{
entry_iterator_begin(it, root);
while (it->it_entry) {
off_t base = entry_absolute_address(it->it_entry)
- root->vr_entry.e_offset;
off_t limit = base + it->it_entry->e_size - 1;
if (it->it_entry->e_type == VM_REGION_ENTRY_MAPPING) {
if (offset >= base && offset <= limit) {
*offp = base;
return;
}
if (offset + length >= base
&& offset + length <= limit) {
*offp = base;
return;
}
}
entry_iterator_move_next(it);
}
}
static void mapping_iterator_move_next(
struct entry_iterator *it,
off_t offset,
size_t length,
off_t *offp)
{
do {
entry_iterator_move_next(it);
} while (it->it_entry
&& it->it_entry->e_type != VM_REGION_ENTRY_MAPPING);
if (!it->it_entry) {
return;
}
off_t base = entry_absolute_address(it->it_entry)
- it->it_root->vr_entry.e_offset;
if (base >= offset + length) {
it->it_entry = NULL;
} else {
*offp = base;
}
}
/*** PUBLIC API ***************************************************************/
kern_status_t vm_region_type_init(void)
@@ -550,17 +814,32 @@ kern_status_t vm_region_map_object(
struct vm_object *object,
off_t object_offset,
size_t length,
enum vm_prot prot,
vm_prot_t prot,
virt_addr_t *out)
{
object_offset &= ~VM_PAGE_MASK;
if (region_offset != VM_REGION_ANY_OFFSET) {
off_t limit = region_offset + length;
if (region_offset & VM_PAGE_MASK) {
region_offset &= ~VM_PAGE_MASK;
}
if (limit & VM_PAGE_MASK) {
limit &= ~VM_PAGE_MASK;
limit += VM_PAGE_SIZE;
}
length = limit - region_offset;
}
if (length & VM_PAGE_MASK) {
length &= ~VM_PAGE_MASK;
length += VM_PAGE_SIZE;
}
if (!region || !object || !out) {
if (!region || !object) {
return KERN_INVALID_ARGUMENT;
}
@@ -611,9 +890,6 @@ kern_status_t vm_region_map_object(
mapping->m_entry.e_offset = region_offset;
mapping->m_entry.e_size = length;
region_put_entry(region, &mapping->m_entry);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
#ifdef TRACE
virt_addr_t abs_base = entry_absolute_address(&mapping->m_entry);
tracek("mapping %s at [%llx-%llx]",
@@ -621,11 +897,304 @@ kern_status_t vm_region_map_object(
abs_base,
abs_base + length);
#endif
region_put_entry(region, &mapping->m_entry);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
if (out) {
*out = entry_absolute_address(&mapping->m_entry);
}
return KERN_OK;
}
/* unmap some pages in the middle of a mapping, splitting it into two separate
* mappings */
static kern_status_t split_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
tracek("split mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
off_t mapping_limit = mapping_offset + mapping->m_entry.e_size;
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
struct vm_region_mapping *left = mapping;
struct vm_region_mapping *right
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!right) {
return KERN_NO_MEMORY;
}
off_t left_offset = mapping->m_entry.e_offset;
off_t right_offset = unmap_limit - mapping_offset;
off_t left_object_offset = mapping->m_object_offset;
size_t left_length = unmap_offset - mapping_offset;
size_t right_length = mapping_limit - unmap_limit;
off_t right_object_offset = mapping->m_object_offset
+ mapping->m_entry.e_size - right_length;
tracek("mapping=[%zx-%zx]->[%zx-%zx]",
mapping_offset,
mapping_limit,
mapping->m_object_offset,
mapping->m_object_offset + mapping->m_entry.e_size);
tracek("left=[%zx-%zx]->[%zx-%zx], right=[%zx-%zx]->[%zx-%zx]",
left_offset,
left_offset + left_length,
left_object_offset,
left_object_offset + left_length,
right_offset,
right_offset + right_length,
right_object_offset,
right_object_offset + right_length);
left->m_object_offset = left_object_offset;
left->m_entry.e_offset = left_offset;
left->m_entry.e_size = left_length;
right->m_object = left->m_object;
right->m_prot = left->m_prot;
right->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
right->m_entry.e_parent = left->m_entry.e_parent;
right->m_object_offset = right_object_offset;
right->m_entry.e_offset = right_offset;
right->m_entry.e_size = right_length;
virt_addr_t unmap_base = root->vr_entry.e_offset + unmap_offset;
size_t unmap_length = unmap_limit - unmap_offset;
for (size_t i = 0; i < unmap_length; i += VM_PAGE_SIZE) {
tracek("unmapping %zx", unmap_base + i);
pmap_remove(root->vr_pmap, unmap_base + i);
}
region_put_entry(parent, &right->m_entry);
return KERN_OK;
}
/* unmap some pages from the left-side of a mapping to somewhere in the
* middle. */
static kern_status_t left_reduce_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
/* unmap_limit falls somwwhere between mapping_offset and
* mapping_offset+length */
tracek("left reduce mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
virt_addr_t base = root->vr_entry.e_offset + mapping_offset;
off_t limit = mapping_offset + mapping->m_entry.e_size;
size_t length = mapping->m_entry.e_size - (limit - unmap_limit);
tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length);
for (size_t i = 0; i < length; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
mapping->m_entry.e_offset += length;
mapping->m_object_offset += length;
mapping->m_entry.e_size -= length;
return KERN_OK;
}
/* unmap some pages from the middle of a mapping to the right-side. */
static kern_status_t right_reduce_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
/* unmap_base falls somwwhere between mapping_offset and
* mapping_offset+length */
tracek("right reduce mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
virt_addr_t base = root->vr_entry.e_offset + unmap_offset;
off_t limit = mapping_offset + mapping->m_entry.e_size;
size_t length = limit - unmap_offset;
tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length);
for (size_t i = 0; i < length; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
mapping->m_entry.e_size -= length;
return KERN_OK;
}
/* completely unmap and delete an entire mapping */
static kern_status_t delete_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset)
{
virt_addr_t base = root->vr_entry.e_offset + mapping_offset;
tracek("delete mapping [%zx-%zx]",
base,
base + mapping->m_entry.e_size);
for (size_t i = 0; i < mapping->m_entry.e_size; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry);
btree_delete(&parent->vr_entries, &mapping->m_entry.e_node);
vm_cache_free(&mapping_cache, mapping);
return KERN_OK;
}
kern_status_t vm_region_unmap(
struct vm_region *region,
off_t unmap_area_offset,
size_t unmap_area_length)
{
kern_status_t status = KERN_OK;
struct entry_iterator it;
off_t unmap_area_limit = unmap_area_offset + unmap_area_length;
tracek("unmapping %zx-%zx", unmap_area_offset, unmap_area_limit);
off_t tmp = 0;
mapping_iterator_begin(
&it,
region,
unmap_area_offset,
unmap_area_length,
&tmp);
while (it.it_entry) {
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
off_t mapping_offset = tmp;
off_t mapping_limit = mapping_offset + it.it_entry->e_size;
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
bool split
= (unmap_area_offset > mapping_offset
&& unmap_area_limit < mapping_limit);
bool delete
= (unmap_area_offset <= mapping_offset
&& unmap_area_limit >= mapping_limit);
bool left_reduce
= (unmap_area_offset <= mapping_offset
&& unmap_area_limit < mapping_limit);
bool right_reduce
= (unmap_area_offset > mapping_offset
&& unmap_area_limit >= mapping_limit);
if (split) {
status = split_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
} else if (delete) {
status = delete_mapping(
mapping,
region,
mapping_offset);
} else if (left_reduce) {
status = left_reduce_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
} else if (right_reduce) {
status = right_reduce_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
} else {
panic("don't know what to do with this mapping");
}
if (status != KERN_OK) {
break;
}
}
return status;
}
bool vm_region_validate_access(
struct vm_region *region,
virt_addr_t ptr,
size_t len,
vm_prot_t prot)
{
if (len == 0) {
return true;
}
if (ptr < region->vr_entry.e_offset) {
return false;
}
off_t offset = ptr - region->vr_entry.e_offset;
if (len >= region->vr_entry.e_size) {
return false;
}
if (offset + len > region->vr_entry.e_size) {
return false;
}
offset &= ~VM_PAGE_MASK;
/* TODO improve this to not require a per-page loop */
for (off_t i = 0; i < len; i += VM_PAGE_SIZE) {
off_t x = offset + i;
struct vm_region_mapping *mapping
= region_get_mapping_recursive(
region,
&x,
VM_PAGE_SIZE);
if (!mapping) {
return false;
}
if ((mapping->m_prot & prot) != prot) {
return false;
}
}
return true;
}
kern_status_t vm_region_demand_map(
struct vm_region *region,
virt_addr_t addr,
@@ -665,68 +1234,148 @@ kern_status_t vm_region_demand_map(
PMAP_NORMAL);
}
#ifdef TRACE
void vm_region_dump(struct vm_region *region, int depth)
virt_addr_t vm_region_get_base_address(const struct vm_region *region)
{
char line[128] = {0};
return entry_absolute_address(&region->vr_entry);
}
kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
size_t *nr_moved)
{
struct vm_iterator src, dest;
vm_iterator_begin(
&src,
src_region,
src_ptr,
VM_PROT_READ | VM_PROT_USER);
vm_iterator_begin(
&dest,
dest_region,
dest_ptr,
VM_PROT_WRITE | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (count && src.it_max && dest.it_max) {
size_t to_move = MIN(MIN(src.it_max, dest.it_max), count);
memmove(dest.it_buf, src.it_buf, to_move);
status = vm_iterator_seek(&src, to_move);
if (status != KERN_OK) {
break;
}
status = vm_iterator_seek(&dest, to_move);
if (status != KERN_OK) {
break;
}
count -= to_move;
r += to_move;
}
if (nr_moved) {
*nr_moved = r;
}
return status;
}
extern kern_status_t vm_region_memmove_v(
struct vm_region *dest_region,
size_t dest_offset,
struct iovec *dest_vecs,
size_t nr_dest_vecs,
struct vm_region *src_region,
size_t src_offset,
const struct iovec *src_vecs,
size_t nr_src_vecs,
size_t bytes_to_move)
{
struct iovec_iterator src, dest;
iovec_iterator_begin(&src, src_vecs, nr_src_vecs);
iovec_iterator_begin(&dest, dest_vecs, nr_dest_vecs);
iovec_iterator_seek(&src, src_offset);
iovec_iterator_seek(&dest, dest_offset);
while (bytes_to_move && src.it_len && dest.it_len) {
size_t to_move
= MIN(MIN(src.it_len, dest.it_len), bytes_to_move);
kern_status_t status = vm_region_memmove(
dest_region,
dest.it_base,
src_region,
src.it_base,
to_move,
NULL);
if (status != KERN_OK) {
return status;
}
iovec_iterator_seek(&src, to_move);
iovec_iterator_seek(&dest, to_move);
bytes_to_move -= to_move;
}
return KERN_OK;
}
#ifdef TRACE
void vm_region_dump(struct vm_region *region)
{
char s[128];
size_t p = 0;
for (int i = 0; i < depth; i++) {
p += snprintf(line + p, sizeof line - p, " ");
}
p += snprintf(
line + p,
sizeof line - p,
"region: %s [%llx-%llx]",
region->vr_name,
region->vr_entry.e_offset,
region->vr_entry.e_offset + region->vr_entry.e_size);
printk("%s", line);
struct btree_node *cur = btree_first(&region->vr_entries);
while (cur) {
memset(line, 0x0, sizeof line);
struct entry_iterator it;
entry_iterator_begin(&it, region);
while (it.it_entry) {
p = 0;
for (int i = 0; i < depth + 1; i++) {
p += snprintf(line + p, sizeof line - p, " ");
for (unsigned int i = 0; i < it.it_depth; i++) {
p += snprintf(s + p, sizeof s - p, " ");
}
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct vm_region *child_region = region_from_entry(entry);
struct vm_region_mapping *child_mapping
= mapping_from_entry(entry);
switch (entry->e_type) {
case VM_REGION_ENTRY_REGION:
break;
case VM_REGION_ENTRY_MAPPING:
switch (it.it_entry->e_type) {
case VM_REGION_ENTRY_REGION: {
struct vm_region *child
= region_from_entry(it.it_entry);
p += snprintf(
line + p,
sizeof line - p,
"mapping: %s p:[%llx-%llx] -> v:[%llx-%llx]",
child_mapping->m_object->vo_name,
child_mapping->m_object_offset,
child_mapping->m_object_offset
+ child_mapping->m_entry.e_size,
child_mapping->m_entry.e_offset,
child_mapping->m_entry.e_offset
+ child_mapping->m_entry.e_size);
printk("%s", line);
s + p,
sizeof s - p,
"-region [%zx-%zx] %s",
child->vr_entry.e_offset,
child->vr_entry.e_offset
+ child->vr_entry.e_size,
child->vr_name);
break;
}
case VM_REGION_ENTRY_MAPPING: {
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
p += snprintf(
s + p,
sizeof s - p,
"+mapping [%zx-%zx] %s",
mapping->m_entry.e_offset,
mapping->m_entry.e_offset
+ mapping->m_entry.e_size,
mapping->m_object->vo_name);
break;
default:
p += snprintf(line + p, sizeof line - p, "invalid");
printk("%s", line);
break;
}
if (child_region) {
vm_region_dump(child_region, depth + 1);
}
cur = btree_next(cur);
tracek("%s", s);
entry_iterator_move_next(&it);
}
}
#endif