Files
mango/vm/vm-region.c

1844 lines
43 KiB
C

#include <kernel/iovec.h>
#include <kernel/libc/stdio.h>
#include <kernel/object.h>
#include <kernel/panic.h>
#include <kernel/printk.h>
#include <kernel/util.h>
#include <kernel/vm-object.h>
#include <kernel/vm-region.h>
#include <mango/status.h>
/* NOTE Locking Rules
* To avoid deadlocks and crashes, the following locking rules should be
* followed:
* 1. Do NOT lock more than one region at a time IF the regions are siblings.
* 2. When locking a region and it's child(ren) or ancestors, always lock
* the parent region BEFORE the child region.
* 3. When locking a region and a vm-object mapped into that region, always
* lock the region BEFORE the vm-object.
* 3. An entry MUST be locked before any of its data can be read/written,
* including its children (if it's a region) and its e_parent pointer.
* 4. vm_region_mapping has no lock. Instead, its immediate parent region must
* be locked before any child mappings can be accessed.
*/
/*** STATIC DATA + MACROS *****************************************************/
#undef ASLR
#define INVALID_OFFSET ((off_t) - 1)
#ifdef ASLR
#define region_find_free_area(region, length) \
region_find_free_area_random(region, length)
#else
#define region_find_free_area(region, length) \
region_find_free_area_linear(region, length)
#endif
/* iterates over a range of mapped virtual memory in a region, and provides
* a moving buffer through which the memory can be accessed */
struct vm_iterator {
struct vm_region *it_region;
struct vm_region_mapping *it_mapping;
virt_addr_t it_base;
vm_prot_t it_prot;
void *it_buf;
size_t it_max;
};
/* iterates recursively over the entries in a region */
struct entry_iterator {
struct vm_region *it_root;
struct vm_region_entry *it_entry;
/* depth of it_entry relative to it_root */
unsigned int it_depth;
};
enum search_direction {
SEARCH_LEFT,
SEARCH_RIGHT,
};
#define VM_REGION_CAST(p) \
OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p)
static kern_status_t vm_region_object_destroy(struct object *obj);
static kern_status_t region_object_destroy(struct object *obj, struct queue *q);
static kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out);
static struct object_type vm_region_type = {
.ob_name = "vm-region",
.ob_size = sizeof(struct vm_region),
.ob_header_offset = offsetof(struct vm_region, vr_base),
.ob_ops = {
.destroy = region_object_destroy,
.destroy_recurse = region_object_destroy_recurse,
},
};
static struct vm_cache mapping_cache = {
.c_name = "vm-region-mapping",
.c_obj_size = sizeof(struct vm_region_mapping),
};
/*** INTERNAL UTILITY FUNCTION ************************************************/
static struct vm_region *region_from_entry(struct vm_region_entry *entry)
{
if (!entry || entry->e_type != VM_REGION_ENTRY_REGION) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region, vr_entry, entry);
}
static struct vm_region_mapping *mapping_from_entry(
struct vm_region_entry *entry)
{
if (!entry || entry->e_type != VM_REGION_ENTRY_MAPPING) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry);
}
kern_status_t region_object_destroy(struct object *obj, struct queue *q)
{
struct vm_region *region = VM_REGION_CAST(obj);
if (region->vr_status == VM_REGION_ONLINE) {
panic("last reference closed on an online vm-region");
}
struct btree_node *node = btree_first(&region->vr_entries);
while (node) {
struct btree_node *next = btree_next(node);
btree_delete(&region->vr_entries, node);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
queue_push_back(q, &entry->e_entry);
node = next;
}
return KERN_OK;
}
kern_status_t region_object_destroy_recurse(
struct queue_entry *entry,
struct object **out)
{
struct vm_region_entry *region_entry
= BTREE_CONTAINER(struct vm_region_entry, e_entry, entry);
if (region_entry->e_type != VM_REGION_ENTRY_REGION) {
panic("offline vm-region still contains non-region "
"children.");
}
struct vm_region *region = region_from_entry(region_entry);
*out = &region->vr_base;
return KERN_OK;
}
static virt_addr_t entry_absolute_address(const struct vm_region_entry *entry)
{
return entry->e_address;
}
/* this function must be called with `parent` locked */
static void region_put_entry(
struct vm_region *parent,
struct vm_region_entry *child)
{
struct btree_node *cur = parent->vr_entries.b_root;
if (!cur) {
parent->vr_entries.b_root = &child->e_node;
btree_insert_fixup(&parent->vr_entries, &child->e_node);
return;
}
off_t child_base = child->e_offset;
off_t child_limit = child_base + child->e_size - 1;
while (cur) {
struct vm_region_entry *cur_entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
off_t cur_base = cur_entry->e_offset;
off_t cur_limit = cur_base + cur_entry->e_size - 1;
if (child_limit < cur_base) {
next = btree_left(cur);
} else if (child_base > cur_limit) {
next = btree_right(cur);
} else {
#ifdef TRACE
vm_region_dump(parent);
#endif
panic("tried to add an overlapping entry [%zx-%zx] to "
"vm-region (overlaps [%zx-%zx])",
child_base,
child_limit,
cur_base,
cur_limit);
}
if (next) {
cur = next;
continue;
}
if (child_limit < cur_base) {
btree_put_left(cur, &child->e_node);
} else {
btree_put_right(cur, &child->e_node);
}
btree_insert_fixup(&parent->vr_entries, &child->e_node);
break;
}
}
/* find the child entry that covers the specified offset.
* DOES NOT search recursively! */
static struct vm_region_entry *region_get_entry(
struct vm_region *region,
off_t offset,
size_t len)
{
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return NULL;
}
struct vm_region_entry *result = NULL;
off_t base = offset, limit = offset + len - 1;
while (cur) {
struct vm_region_entry *child
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
off_t child_base = child->e_offset;
off_t child_limit = child->e_offset + child->e_size - 1;
if (limit < child_base) {
next = btree_left(cur);
} else if (base > child_limit) {
next = btree_right(cur);
} else {
result = child;
break;
}
cur = next;
}
return result;
}
/* find the child region that covers the area [*offp,len]. searches recursively
* the value in `offp` is updated to the offset of the returned entry relative
* to its parent.
* this function should be called with `region` locked.
* the region returned by this function will also be locked. any intermediary
* regions traversed by this function will be locked temporarily, but will
* be unlocked by the time the function returns. */
static struct vm_region *region_get_child_region_recursive(
struct vm_region *region,
off_t *offp,
size_t len)
{
struct vm_region *root = region;
off_t offset = *offp;
if (offset >= region->vr_entry.e_size) {
return NULL;
}
while (1) {
struct vm_region_entry *next
= region_get_entry(region, offset, len);
struct vm_region *next_region = region_from_entry(next);
if (next_region) {
offset -= next->e_offset;
/* since `region` is locked, interrupts are already
* disabled, so don't use lock_irq() here */
vm_region_lock(next_region);
if (region != root) {
vm_region_unlock(region);
}
region = next_region;
} else {
break;
}
}
*offp = offset;
return region;
}
/* find the vm_region_mapping that contains a given memory area.
* `offp` should be a pointer to an off_t value that contains the offset
* of the area relative to the start of `region`. this value will be updated
* to the offset of the mapping relative to its immediate parent.
* this function should be called with `region` locked. if a mapping is found,
* it will be returned with its immediate parent locked. */
static struct vm_region_mapping *region_get_mapping_recursive(
struct vm_region *region,
off_t *offp,
size_t len)
{
off_t offset = *offp;
region = region_get_child_region_recursive(region, &offset, len);
if (!region) {
return NULL;
}
/* if `region` is a different region than what was originally passed to
* us, it has now been locked, and its children can be accessed. */
struct vm_region_entry *entry = region_get_entry(region, offset, len);
*offp = offset;
/* return the mapping with the parent region still locked */
return mapping_from_entry(entry);
}
static off_t generate_random_address(
off_t area_base,
size_t area_length,
size_t target_length)
{
size_t random_range = area_length - target_length;
off_t offset = 0;
fill_random(&offset, sizeof offset);
offset %= random_range;
return area_base + offset;
}
static struct vm_region_entry *region_get_random_entry(struct vm_region *region)
{
enum {
STEP_LEFT = 0,
STEP_RIGHT = 1,
STEP_FINISH = 2,
} step;
struct btree_node *result = NULL;
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return NULL;
}
while (1) {
unsigned long r;
fill_random(&r, sizeof r);
struct btree_node *next = NULL;
step = r % 3;
switch (step) {
case STEP_LEFT:
next = btree_left(cur);
break;
case STEP_RIGHT:
next = btree_right(cur);
break;
case STEP_FINISH:
result = cur;
break;
default:
return NULL;
}
if (!next) {
result = cur;
break;
}
cur = next;
}
if (!result) {
return NULL;
}
return BTREE_CONTAINER(struct vm_region_entry, e_node, result);
}
static virt_addr_t region_find_free_area_ex(
struct vm_region *region,
size_t target_length,
struct btree_node *start,
enum search_direction direction,
bool random)
{
if (region->vr_entry.e_size < target_length) {
return 0;
}
struct btree_node *left_node = NULL, *right_node = NULL;
switch (direction) {
case SEARCH_LEFT:
right_node = start;
left_node = start ? btree_prev(start) : NULL;
break;
case SEARCH_RIGHT:
left_node = start;
right_node = start ? btree_next(start) : NULL;
break;
default:
return 0;
}
if (!left_node && !right_node) {
return 0;
}
while (1) {
struct vm_region_entry *left = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
left_node);
struct vm_region_entry *right = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
right_node);
/* addresses of the first and last free bytes in the area
* respectively. */
off_t area_base, area_limit;
if (left && right) {
area_base = left->e_offset + left->e_size;
area_limit = right->e_offset - 1;
} else if (right) {
area_base = region->vr_entry.e_offset;
area_limit = left->e_offset - 1;
} else if (left) {
area_base = left->e_offset + left->e_size;
area_limit = region->vr_entry.e_offset
+ region->vr_entry.e_size - 1;
} else {
return 0;
}
area_base &= ~VM_PAGE_MASK;
size_t area_size = 0;
if (area_limit >= area_base) {
area_size = area_limit - area_base + 1;
}
if (area_size >= target_length) {
if (random) {
area_base = generate_random_address(
area_base,
area_size,
target_length);
area_base &= ~VM_PAGE_MASK;
}
return area_base;
}
if (direction == SEARCH_RIGHT) {
left_node = right_node;
right_node = btree_next(right_node);
} else {
right_node = left_node;
left_node = btree_prev(right_node);
}
}
return 0;
}
static off_t region_find_free_area_linear(
struct vm_region *region,
size_t target_length)
{
if (!region->vr_entries.b_root) {
return 0;
}
return region_find_free_area_ex(
region,
target_length,
btree_first(&region->vr_entries),
SEARCH_RIGHT,
false);
}
static off_t region_find_free_area_random(
struct vm_region *region,
size_t target_length)
{
if (!region->vr_entries.b_root) {
off_t offset = generate_random_address(
0,
region->vr_entry.e_size,
target_length);
return offset & ~VM_PAGE_MASK;
}
int tmp = 0;
struct vm_region_entry *basis = region_get_random_entry(region);
fill_random(&tmp, sizeof tmp);
enum search_direction direction = tmp % 2;
return region_find_free_area_ex(
region,
target_length,
&basis->e_node,
direction,
true);
}
static bool region_is_area_free(
const struct vm_region *region,
off_t base,
size_t len)
{
off_t limit = base + len - 1;
if (base >= region->vr_entry.e_size) {
return false;
}
if (limit >= region->vr_entry.e_size) {
return false;
}
struct btree_node *cur = region->vr_entries.b_root;
if (!cur) {
return true;
}
while (cur) {
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, cur);
struct btree_node *next = NULL;
off_t entry_limit = entry->e_offset + entry->e_size - 1;
if (base > entry_limit) {
next = btree_right(cur);
} else if (limit < entry->e_offset) {
next = btree_left(cur);
} else {
return false;
}
cur = next;
}
return true;
}
static kern_status_t region_validate_allocation(
struct vm_region *parent,
vm_prot_t prot,
off_t *offp,
size_t len)
{
off_t offset = *offp;
if ((prot & parent->vr_prot) != prot) {
/* child region protection must match or be a
* subset of parent region protection */
return KERN_INVALID_ARGUMENT;
}
if (offset == VM_REGION_ANY_OFFSET) {
offset = region_find_free_area(parent, len);
*offp = offset;
return (offset == INVALID_OFFSET) ? KERN_NO_MEMORY : KERN_OK;
}
offset &= ~VM_PAGE_MASK;
if (!region_is_area_free(parent, offset, len)) {
return KERN_INVALID_ARGUMENT;
}
*offp = offset;
return KERN_OK;
}
/* this function should be called with `region` locked */
static void vm_iterator_begin(
struct vm_iterator *it,
struct vm_region *region,
virt_addr_t base,
vm_prot_t prot)
{
memset(it, 0x0, sizeof *it);
it->it_base = base;
it->it_region = region;
it->it_prot = prot;
off_t offset = base - vm_region_get_base_address(region);
it->it_mapping = region_get_mapping_recursive(region, &offset, 1);
if (!it->it_mapping || (it->it_mapping->m_prot & prot) != prot) {
return;
}
off_t object_offset = offset - it->it_mapping->m_entry.e_offset
+ it->it_mapping->m_object_offset;
struct vm_page *pg = NULL;
if (prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->m_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->m_object,
object_offset);
}
if (!pg) {
return;
}
void *buffer_base = vm_page_get_vaddr(pg);
phys_addr_t pg_addr = vm_page_get_paddr(pg);
size_t buffer_size = vm_page_get_size_bytes(pg);
while (1) {
struct btree_node *next_node = btree_next(&pg->p_bnode);
struct vm_page *next
= BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
if (!next) {
break;
}
phys_addr_t next_addr = vm_page_get_paddr(next);
if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) {
break;
}
pg = next;
pg_addr = next_addr;
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size - (object_offset & VM_PAGE_MASK);
}
static kern_status_t vm_iterator_seek(struct vm_iterator *it, size_t nr_bytes)
{
#define UNLOCK_MAPPING_PARENT(p) \
do { \
struct vm_region *parent \
= region_from_entry(p->m_entry.e_parent); \
if (parent != it->it_region) { \
vm_region_unlock(parent); \
} \
} while (0)
if (nr_bytes < it->it_max) {
it->it_base += nr_bytes;
it->it_buf = (char *)it->it_buf + nr_bytes;
it->it_max -= nr_bytes;
return KERN_OK;
}
/* the parent region of it->it_mapping is locked here. if it is
* different from it->it_region, it must be unlocked */
UNLOCK_MAPPING_PARENT(it->it_mapping);
it->it_base += nr_bytes;
off_t offset = it->it_base - vm_region_get_base_address(it->it_region);
struct vm_region_mapping *next_mapping
= region_get_mapping_recursive(it->it_region, &offset, 1);
if (!next_mapping) {
it->it_buf = NULL;
it->it_max = 0;
return KERN_MEMORY_FAULT;
}
/* past this point, if we encounter an error, must remember to unlock
* the parent region of next_mapping */
if ((next_mapping->m_prot & it->it_prot) != it->it_prot) {
it->it_buf = NULL;
it->it_max = 0;
UNLOCK_MAPPING_PARENT(next_mapping);
return KERN_MEMORY_FAULT;
}
off_t object_offset = offset - it->it_mapping->m_entry.e_offset
+ it->it_mapping->m_object_offset;
struct vm_page *pg = NULL;
if (it->it_prot & VM_PROT_WRITE) {
pg = vm_object_alloc_page(
it->it_mapping->m_object,
object_offset,
VM_PAGE_4K);
} else {
pg = vm_object_get_page(
it->it_mapping->m_object,
object_offset);
}
if (!pg) {
UNLOCK_MAPPING_PARENT(next_mapping);
return KERN_NO_MEMORY;
}
void *buffer_base = vm_page_get_vaddr(pg);
phys_addr_t pg_addr = vm_page_get_paddr(pg);
size_t buffer_size = vm_page_get_size_bytes(pg);
while (1) {
struct btree_node *next_node = btree_next(&pg->p_bnode);
struct vm_page *next
= BTREE_CONTAINER(struct vm_page, p_bnode, next_node);
if (!next) {
break;
}
phys_addr_t next_addr = vm_page_get_paddr(next);
if (pg_addr + vm_page_get_size_bytes(pg) != next_addr) {
break;
}
pg = next;
pg_addr = next_addr;
buffer_size += vm_page_get_size_bytes(next);
}
it->it_buf = (char *)buffer_base + (object_offset & VM_PAGE_MASK);
it->it_max = buffer_size;
return KERN_OK;
}
/* this function must be called with `root` locked. `root` will be the first
* entry visited by the iterator. from there, child entries are visited in
* depth-first order. */
static void entry_iterator_begin(
struct entry_iterator *it,
struct vm_region *root)
{
memset(it, 0x0, sizeof *it);
it->it_root = root;
it->it_entry = &root->vr_entry;
}
/* this function must be called when you are finished with an entry_iterator,
* to ensure that all held locks are released. */
static void entry_iterator_finish(struct entry_iterator *it)
{
struct vm_region_entry *cur = it->it_entry;
if (!cur) {
return;
}
struct vm_region *region = NULL;
if (cur->e_type == VM_REGION_ENTRY_MAPPING) {
region = region_from_entry(cur->e_parent);
} else {
region = region_from_entry(cur);
}
while (region && region != it->it_root) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
vm_region_unlock(region);
region = parent;
}
memset(it, 0x0, sizeof *it);
}
/* move to the next entry in the traversal order.
* when this function returns:
* 1. if the visited entry is a region, it will be locked.
* 2. if the visited entry is a mapping, its parent region will be locked.
* a region will remain locked until all of its children and n-grand-children
* have been visited. once iteration is finished, only `it->it_root` will be
* locked.
*/
static void entry_iterator_move_next(struct entry_iterator *it)
{
/* `region` is locked */
struct vm_region *region = region_from_entry(it->it_entry);
bool has_children = (region && !btree_empty(&region->vr_entries));
if (has_children) {
/* visit the first child */
struct btree_node *node = btree_first(&region->vr_entries);
struct vm_region_entry *entry
= BTREE_CONTAINER(struct vm_region_entry, e_node, node);
if (entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region
= region_from_entry(entry);
/* since `region` is locked, interrupts are already
* disabled, so don't use lock_irq() here */
vm_region_lock(child_region);
}
it->it_depth++;
it->it_entry = entry;
return;
}
/* go back up until we find a right sibling. */
struct vm_region_entry *cur = it->it_entry;
while (1) {
struct btree_node *sibling = btree_next(&cur->e_node);
if (sibling) {
it->it_entry = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
sibling);
return;
}
if (cur == &it->it_root->vr_entry) {
it->it_entry = NULL;
return;
}
struct vm_region_entry *parent_entry = cur->e_parent;
struct vm_region *parent = region_from_entry(parent_entry);
if (!parent) {
it->it_entry = NULL;
return;
}
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--;
cur = parent_entry;
}
}
/* erase the current entry and move to the next entry in the traversal order.
* the current entry MUST be a mapping, otherwise nothing will happen.
*/
static void entry_iterator_erase(struct entry_iterator *it)
{
/* the parent region of `mapping` is locked */
struct vm_region_mapping *mapping = mapping_from_entry(it->it_entry);
if (!mapping) {
return;
}
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
/* go back up until we find a right sibling. */
struct vm_region_entry *cur = it->it_entry;
while (1) {
struct btree_node *sibling = btree_next(&cur->e_node);
if (mapping) {
btree_delete(
&parent->vr_entries,
&mapping->m_entry.e_node);
vm_cache_free(&mapping_cache, mapping);
mapping = NULL;
}
if (sibling) {
it->it_entry = BTREE_CONTAINER(
struct vm_region_entry,
e_node,
sibling);
return;
}
if (cur == &it->it_root->vr_entry) {
it->it_entry = NULL;
return;
}
struct vm_region_entry *parent_entry = cur->e_parent;
struct vm_region *parent = region_from_entry(parent_entry);
if (!parent) {
it->it_entry = NULL;
return;
}
if (cur->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *child_region = region_from_entry(cur);
if (child_region != it->it_root) {
vm_region_unlock(child_region);
}
}
it->it_depth--;
cur = parent_entry;
}
}
static void mapping_iterator_begin(
struct entry_iterator *it,
struct vm_region *root,
off_t offset,
size_t length,
off_t *offp)
{
entry_iterator_begin(it, root);
while (it->it_entry) {
off_t base = entry_absolute_address(it->it_entry)
- root->vr_entry.e_offset;
off_t limit = base + it->it_entry->e_size - 1;
if (it->it_entry->e_type == VM_REGION_ENTRY_MAPPING) {
if (offset >= base && offset <= limit) {
*offp = base;
return;
}
if (offset + length >= base
&& offset + length <= limit) {
*offp = base;
return;
}
}
entry_iterator_move_next(it);
}
}
static void mapping_iterator_finish(struct entry_iterator *it)
{
entry_iterator_finish(it);
}
static void mapping_iterator_move_next(
struct entry_iterator *it,
off_t offset,
size_t length,
off_t *offp)
{
do {
entry_iterator_move_next(it);
} while (it->it_entry
&& it->it_entry->e_type != VM_REGION_ENTRY_MAPPING);
if (!it->it_entry) {
return;
}
off_t base = entry_absolute_address(it->it_entry)
- it->it_root->vr_entry.e_offset;
if (base >= offset + length) {
it->it_entry = NULL;
} else {
*offp = base;
}
}
static void mapping_iterator_erase(
struct entry_iterator *it,
off_t offset,
size_t length,
off_t *offp)
{
entry_iterator_erase(it);
while (it->it_entry
&& it->it_entry->e_type != VM_REGION_ENTRY_MAPPING) {
entry_iterator_move_next(it);
}
if (!it->it_entry) {
return;
}
off_t base = entry_absolute_address(it->it_entry)
- it->it_root->vr_entry.e_offset;
if (base >= offset + length) {
it->it_entry = NULL;
} else {
*offp = base;
}
}
/*** PUBLIC API ***************************************************************/
kern_status_t vm_region_type_init(void)
{
vm_cache_init(&mapping_cache);
return object_type_register(&vm_region_type);
}
struct vm_region *vm_region_cast(struct object *obj)
{
return VM_REGION_CAST(obj);
}
/* this function should be called with `parent` locked (if parent is non-NULL)
*/
kern_status_t vm_region_create(
struct vm_region *parent,
const char *name,
size_t name_len,
off_t offset,
size_t region_len,
vm_prot_t prot,
struct vm_region **out)
{
if (parent && parent->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (!offset || !region_len) {
return KERN_INVALID_ARGUMENT;
}
if (region_len & VM_PAGE_MASK) {
region_len &= ~VM_PAGE_MASK;
region_len += VM_PAGE_SIZE;
}
kern_status_t status = KERN_OK;
if (parent) {
status = region_validate_allocation(
parent,
prot,
&offset,
region_len);
}
if (status != KERN_OK) {
return status;
}
struct object *region_object = object_create(&vm_region_type);
if (!region_object) {
return KERN_NO_MEMORY;
}
struct vm_region *region = VM_REGION_CAST(region_object);
region->vr_status = VM_REGION_ONLINE;
region->vr_prot = prot;
region->vr_entry.e_type = VM_REGION_ENTRY_REGION;
region->vr_entry.e_address = offset;
region->vr_entry.e_offset = offset;
region->vr_entry.e_size = region_len;
#ifdef TRACE
tracek("creating sub-region at [%llx-%llx]",
offset,
offset + region_len);
#endif
if (parent) {
region->vr_entry.e_parent = &parent->vr_entry;
region->vr_entry.e_address += parent->vr_entry.e_address;
region->vr_pmap = parent->vr_pmap;
region_put_entry(parent, &region->vr_entry);
/* `parent` holds a reference to child `region` */
object_ref(&region->vr_base);
}
if (name && name_len) {
name_len = MIN(sizeof region->vr_name - 1, name_len);
memcpy(region->vr_name, name, name_len);
region->vr_name[name_len] = '\0';
}
*out = region;
return KERN_OK;
}
kern_status_t vm_region_kill(
struct vm_region *region,
unsigned long *lock_flags)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (region->vr_entry.e_parent) {
struct vm_region *parent
= region_from_entry(region->vr_entry.e_parent);
region->vr_entry.e_parent = NULL;
/* locks must be acquired in parent->child order. since we're
* going backwards here, unlock `region` before locking its
* parent */
vm_region_unlock_irqrestore(region, *lock_flags);
vm_region_lock_irqsave(parent, lock_flags);
btree_delete(&parent->vr_entries, &region->vr_entry.e_node);
vm_region_unlock_irqrestore(parent, *lock_flags);
vm_region_lock_irqsave(region, lock_flags);
/* `region` lock is held, and e_parent is NULL */
}
struct entry_iterator it;
entry_iterator_begin(&it, region);
while (it.it_entry) {
if (it.it_entry->e_type == VM_REGION_ENTRY_REGION) {
struct vm_region *region
= region_from_entry(it.it_entry);
region->vr_status = VM_REGION_DEAD;
entry_iterator_move_next(&it);
continue;
}
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
virt_addr_t base = entry_absolute_address(it.it_entry);
for (size_t i = 0; i < mapping->m_entry.e_size;
i += VM_PAGE_SIZE) {
pmap_remove(region->vr_pmap, base + i);
}
unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(
&mapping->m_object->vo_mappings,
&mapping->m_object_entry);
vm_object_unlock_irqrestore(mapping->m_object, flags);
entry_iterator_erase(&it);
}
return KERN_OK;
}
kern_status_t vm_region_map_object(
struct vm_region *root,
off_t region_offset,
struct vm_object *object,
off_t object_offset,
size_t length,
vm_prot_t prot,
virt_addr_t *out)
{
object_offset &= ~VM_PAGE_MASK;
if (region_offset != VM_REGION_ANY_OFFSET) {
off_t limit = region_offset + length;
if (region_offset & VM_PAGE_MASK) {
region_offset &= ~VM_PAGE_MASK;
}
if (limit & VM_PAGE_MASK) {
limit &= ~VM_PAGE_MASK;
limit += VM_PAGE_SIZE;
}
length = limit - region_offset;
}
if (length & VM_PAGE_MASK) {
length &= ~VM_PAGE_MASK;
length += VM_PAGE_SIZE;
}
if (!root || !object) {
return KERN_INVALID_ARGUMENT;
}
struct vm_region *region = root;
if (region_offset != VM_REGION_ANY_OFFSET) {
region = region_get_child_region_recursive(
root,
&region_offset,
length);
/* if `region` != `root`, it will need to be unlocked at the end
* of the function */
}
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if ((prot & region->vr_prot) != prot) {
return KERN_INVALID_ARGUMENT;
}
if ((prot & object->vo_prot) != prot) {
return KERN_INVALID_ARGUMENT;
}
if (!length || object_offset + length > object->vo_size) {
return KERN_INVALID_ARGUMENT;
}
if (!region) {
return KERN_INVALID_ARGUMENT;
}
if (region_offset == VM_REGION_ANY_OFFSET) {
region_offset = region_find_free_area(region, length);
if (region_offset == INVALID_OFFSET) {
return KERN_NO_MEMORY;
}
} else if (!region_is_area_free(region, region_offset, length)) {
return KERN_INVALID_ARGUMENT;
}
tracek("vm_region_map_object(%s, %zx, %s, %zx, %zx, %x, %p)",
region->vr_name,
region_offset,
object->vo_name,
object_offset,
length,
prot,
out);
struct vm_region_mapping *mapping
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!mapping) {
return KERN_NO_MEMORY;
}
mapping->m_object = object;
mapping->m_prot = prot;
mapping->m_object_offset = object_offset;
mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
mapping->m_entry.e_parent = &region->vr_entry;
mapping->m_entry.e_address = region->vr_entry.e_address + region_offset;
mapping->m_entry.e_offset = region_offset;
mapping->m_entry.e_size = length;
#ifdef TRACE
virt_addr_t abs_base = entry_absolute_address(&mapping->m_entry);
tracek("mapping %s at [%llx-%llx]",
object->vo_name,
abs_base,
abs_base + length);
#endif
region_put_entry(region, &mapping->m_entry);
if (region != root) {
vm_region_unlock(region);
}
unsigned long lock_flags;
vm_object_lock_irqsave(object, &lock_flags);
queue_push_back(&object->vo_mappings, &mapping->m_object_entry);
vm_object_unlock_irqrestore(object, lock_flags);
if (out) {
*out = entry_absolute_address(&mapping->m_entry);
}
return KERN_OK;
}
/* unmap some pages in the middle of a mapping, splitting it into two separate
* mappings */
static kern_status_t split_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
tracek("split mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
off_t mapping_limit = mapping_offset + mapping->m_entry.e_size;
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
struct vm_region_mapping *left = mapping;
struct vm_region_mapping *right
= vm_cache_alloc(&mapping_cache, VM_NORMAL);
if (!right) {
return KERN_NO_MEMORY;
}
off_t left_offset = mapping->m_entry.e_offset;
off_t right_offset = unmap_limit - mapping_offset;
off_t left_object_offset = mapping->m_object_offset;
size_t left_length = unmap_offset - mapping_offset;
size_t right_length = mapping_limit - unmap_limit;
off_t right_object_offset = mapping->m_object_offset
+ mapping->m_entry.e_size - right_length;
tracek("mapping=[%zx-%zx]->[%zx-%zx]",
mapping_offset,
mapping_limit,
mapping->m_object_offset,
mapping->m_object_offset + mapping->m_entry.e_size);
tracek("left=[%zx-%zx]->[%zx-%zx], right=[%zx-%zx]->[%zx-%zx]",
left_offset,
left_offset + left_length,
left_object_offset,
left_object_offset + left_length,
right_offset,
right_offset + right_length,
right_object_offset,
right_object_offset + right_length);
left->m_object_offset = left_object_offset;
left->m_entry.e_offset = left_offset;
left->m_entry.e_size = left_length;
right->m_object = left->m_object;
right->m_prot = left->m_prot;
right->m_entry.e_type = VM_REGION_ENTRY_MAPPING;
right->m_entry.e_parent = left->m_entry.e_parent;
right->m_object_offset = right_object_offset;
right->m_entry.e_offset = right_offset;
right->m_entry.e_size = right_length;
virt_addr_t unmap_base = root->vr_entry.e_offset + unmap_offset;
size_t unmap_length = unmap_limit - unmap_offset;
for (size_t i = 0; i < unmap_length; i += VM_PAGE_SIZE) {
tracek("unmapping %zx", unmap_base + i);
pmap_remove(root->vr_pmap, unmap_base + i);
}
region_put_entry(parent, &right->m_entry);
return KERN_OK;
}
/* unmap some pages from the left-side of a mapping to somewhere in the
* middle. */
static kern_status_t left_reduce_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
/* unmap_limit falls somwwhere between mapping_offset and
* mapping_offset+length */
tracek("left reduce mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
virt_addr_t base = root->vr_entry.e_offset + mapping_offset;
off_t limit = mapping_offset + mapping->m_entry.e_size;
size_t length = mapping->m_entry.e_size - (limit - unmap_limit);
tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length);
for (size_t i = 0; i < length; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
mapping->m_entry.e_offset += length;
mapping->m_object_offset += length;
mapping->m_entry.e_size -= length;
return KERN_OK;
}
/* unmap some pages from the middle of a mapping to the right-side. */
static kern_status_t right_reduce_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset,
off_t unmap_offset,
off_t unmap_limit)
{
/* unmap_base falls somwwhere between mapping_offset and
* mapping_offset+length */
tracek("right reduce mapping [%zx-%zx] subtract [%zx-%zx]",
mapping_offset,
mapping_offset + mapping->m_entry.e_size,
unmap_offset,
unmap_limit);
virt_addr_t base = root->vr_entry.e_offset + unmap_offset;
off_t limit = mapping_offset + mapping->m_entry.e_size;
size_t length = limit - unmap_offset;
tracek(" unmapping %zx-%zx (%zx bytes)", base, base + length, length);
for (size_t i = 0; i < length; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
mapping->m_entry.e_size -= length;
return KERN_OK;
}
/* completely unmap and delete an entire mapping */
static kern_status_t delete_mapping(
struct vm_region_mapping *mapping,
struct vm_region *root,
off_t mapping_offset)
{
virt_addr_t base = root->vr_entry.e_offset + mapping_offset;
tracek("delete mapping [%zx-%zx]",
base,
base + mapping->m_entry.e_size);
for (size_t i = 0; i < mapping->m_entry.e_size; i += VM_PAGE_SIZE) {
pmap_remove(root->vr_pmap, base + i);
}
unsigned long flags;
vm_object_lock_irqsave(mapping->m_object, &flags);
queue_delete(&mapping->m_object->vo_mappings, &mapping->m_object_entry);
vm_object_unlock_irqrestore(mapping->m_object, flags);
/* don't actually delete the mapping yet. that will be done by
* vm_region_unmap */
return KERN_OK;
}
kern_status_t vm_region_unmap(
struct vm_region *region,
off_t unmap_area_offset,
size_t unmap_area_length)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
kern_status_t status = KERN_OK;
struct entry_iterator it;
off_t unmap_area_limit = unmap_area_offset + unmap_area_length;
tracek("unmapping %zx-%zx", unmap_area_offset, unmap_area_limit);
off_t tmp = 0;
mapping_iterator_begin(
&it,
region,
unmap_area_offset,
unmap_area_length,
&tmp);
while (it.it_entry) {
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
off_t mapping_offset = tmp;
off_t mapping_limit = mapping_offset + it.it_entry->e_size;
bool split
= (unmap_area_offset > mapping_offset
&& unmap_area_limit < mapping_limit);
bool delete
= (unmap_area_offset <= mapping_offset
&& unmap_area_limit >= mapping_limit);
bool left_reduce
= (unmap_area_offset <= mapping_offset
&& unmap_area_limit < mapping_limit);
bool right_reduce
= (unmap_area_offset > mapping_offset
&& unmap_area_limit >= mapping_limit);
if (split) {
status = split_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
delete = true;
} else if (delete) {
status = delete_mapping(
mapping,
region,
mapping_offset);
} else if (left_reduce) {
status = left_reduce_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
} else if (right_reduce) {
status = right_reduce_mapping(
mapping,
region,
mapping_offset,
unmap_area_offset,
unmap_area_limit);
} else {
panic("don't know what to do with this mapping");
}
if (delete) {
mapping_iterator_erase(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
} else {
mapping_iterator_move_next(
&it,
unmap_area_offset,
unmap_area_length,
&tmp);
}
if (status != KERN_OK) {
break;
}
}
mapping_iterator_finish(&it);
return status;
}
bool vm_region_validate_access(
struct vm_region *region,
virt_addr_t ptr,
size_t len,
vm_prot_t prot)
{
if (region->vr_status != VM_REGION_ONLINE) {
return false;
}
if (len == 0) {
return true;
}
if (ptr < region->vr_entry.e_offset) {
return false;
}
off_t offset = ptr - region->vr_entry.e_offset;
if (len >= region->vr_entry.e_size) {
return false;
}
if (offset + len > region->vr_entry.e_size) {
return false;
}
offset &= ~VM_PAGE_MASK;
/* TODO improve this to not require a per-page loop */
for (off_t i = 0; i < len; i += VM_PAGE_SIZE) {
off_t x = offset + i;
struct vm_region_mapping *mapping
= region_get_mapping_recursive(
region,
&x,
VM_PAGE_SIZE);
if (!mapping) {
return false;
}
if ((mapping->m_prot & prot) != prot) {
return false;
}
struct vm_region *parent
= region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
}
return true;
}
/* this function must be called with `region` locked */
kern_status_t vm_region_demand_map(
struct vm_region *region,
virt_addr_t addr,
enum pmap_fault_flags flags)
{
if (region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
addr &= ~VM_PAGE_MASK;
if (addr < region->vr_entry.e_offset
|| addr > region->vr_entry.e_offset + region->vr_entry.e_size) {
return KERN_NO_ENTRY;
}
off_t region_offset = addr - region->vr_entry.e_offset;
struct vm_region_mapping *mapping
= region_get_mapping_recursive(region, &region_offset, 1);
if (!mapping) {
return KERN_NO_ENTRY;
}
off_t object_offset = region_offset - mapping->m_entry.e_offset
+ mapping->m_object_offset;
tracek("vm: tried to access vm-object %s at offset=%05llx",
mapping->m_object->vo_name,
object_offset);
unsigned long lock_flags;
vm_object_lock_irqsave(mapping->m_object, &lock_flags);
struct vm_page *pg = vm_object_alloc_page(
mapping->m_object,
object_offset,
VM_PAGE_4K);
vm_object_unlock_irqrestore(mapping->m_object, lock_flags);
tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr);
kern_status_t status = pmap_add(
region->vr_pmap,
addr,
vm_page_get_pfn(pg),
mapping->m_prot,
PMAP_NORMAL);
struct vm_region *parent = region_from_entry(mapping->m_entry.e_parent);
if (parent != region) {
vm_region_unlock(parent);
}
return status;
}
virt_addr_t vm_region_get_base_address(const struct vm_region *region)
{
if (region->vr_status != VM_REGION_ONLINE) {
return 0;
}
return entry_absolute_address(&region->vr_entry);
}
kern_status_t vm_region_read_kernel(
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
void *destp,
size_t *nr_read)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src;
char *dest = destp;
vm_iterator_begin(
&src,
src_region,
src_ptr,
VM_PROT_READ | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (r < count && src.it_max) {
size_t remaining = count - r;
size_t to_move = MIN(src.it_max, remaining);
memmove(dest, src.it_buf, to_move);
status = vm_iterator_seek(&src, to_move);
if (status != KERN_OK) {
break;
}
r += to_move;
dest += to_move;
}
if (nr_read) {
*nr_read = r;
}
return status;
}
kern_status_t vm_region_memmove(
struct vm_region *dest_region,
virt_addr_t dest_ptr,
struct vm_region *src_region,
virt_addr_t src_ptr,
size_t count,
size_t *nr_moved)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct vm_iterator src, dest;
vm_iterator_begin(
&src,
src_region,
src_ptr,
VM_PROT_READ | VM_PROT_USER);
vm_iterator_begin(
&dest,
dest_region,
dest_ptr,
VM_PROT_WRITE | VM_PROT_USER);
kern_status_t status = KERN_OK;
size_t r = 0;
while (count && src.it_max && dest.it_max) {
size_t to_move = MIN(MIN(src.it_max, dest.it_max), count);
memmove(dest.it_buf, src.it_buf, to_move);
status = vm_iterator_seek(&src, to_move);
if (status != KERN_OK) {
break;
}
status = vm_iterator_seek(&dest, to_move);
if (status != KERN_OK) {
break;
}
count -= to_move;
r += to_move;
}
if (nr_moved) {
*nr_moved = r;
}
return status;
}
extern kern_status_t vm_region_memmove_v(
struct vm_region *dest_region,
size_t dest_offset,
const struct iovec *dest_vecs,
size_t nr_dest_vecs,
struct vm_region *src_region,
size_t src_offset,
const struct iovec *src_vecs,
size_t nr_src_vecs,
size_t bytes_to_move,
size_t *nr_bytes_moved)
{
if (src_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
if (dest_region->vr_status != VM_REGION_ONLINE) {
return KERN_BAD_STATE;
}
struct iovec_iterator src, dest;
iovec_iterator_begin_user(&src, src_region, src_vecs, nr_src_vecs);
iovec_iterator_begin_user(&dest, dest_region, dest_vecs, nr_dest_vecs);
iovec_iterator_seek(&src, src_offset);
iovec_iterator_seek(&dest, dest_offset);
size_t moved = 0;
while (bytes_to_move && src.it_len && dest.it_len) {
size_t to_move
= MIN(MIN(src.it_len, dest.it_len), bytes_to_move);
kern_status_t status = vm_region_memmove(
dest_region,
dest.it_base,
src_region,
src.it_base,
to_move,
NULL);
if (status != KERN_OK) {
return status;
}
iovec_iterator_seek(&src, to_move);
iovec_iterator_seek(&dest, to_move);
bytes_to_move -= to_move;
moved += to_move;
}
if (nr_bytes_moved) {
*nr_bytes_moved = moved;
}
return KERN_OK;
}
#ifdef TRACE
void vm_region_dump(struct vm_region *region)
{
char s[128];
size_t p = 0;
struct entry_iterator it;
entry_iterator_begin(&it, region);
while (it.it_entry) {
p = 0;
for (unsigned int i = 0; i < it.it_depth; i++) {
p += snprintf(s + p, sizeof s - p, " ");
}
switch (it.it_entry->e_type) {
case VM_REGION_ENTRY_REGION: {
struct vm_region *child
= region_from_entry(it.it_entry);
p += snprintf(
s + p,
sizeof s - p,
"-region [%zx-%zx] %s",
child->vr_entry.e_offset,
child->vr_entry.e_offset
+ child->vr_entry.e_size,
child->vr_name);
break;
}
case VM_REGION_ENTRY_MAPPING: {
struct vm_region_mapping *mapping
= mapping_from_entry(it.it_entry);
p += snprintf(
s + p,
sizeof s - p,
"+mapping [%zx-%zx] %s",
mapping->m_entry.e_offset,
mapping->m_entry.e_offset
+ mapping->m_entry.e_size,
mapping->m_object->vo_name);
break;
default:
break;
}
}
tracek("%s", s);
entry_iterator_move_next(&it);
}
}
#endif