diff --git a/include/mango/vm-region.h b/include/mango/vm-region.h new file mode 100644 index 0000000..51b3baa --- /dev/null +++ b/include/mango/vm-region.h @@ -0,0 +1,126 @@ +#ifndef MANGO_VM_REGION_H_ +#define MANGO_VM_REGION_H_ + +#include +#include +#include + +#define VM_REGION_NAME_MAX 64 + +#define VM_REGION_ANY_MAP_ADDRESS ((virt_addr_t) - 1) + +struct vm_region; +struct vm_object; + +enum vm_region_entry_type { + VM_REGION_ENTRY_NONE = 0, + VM_REGION_ENTRY_REGION, + VM_REGION_ENTRY_MAPPING, +}; + +struct vm_region_entry { + struct btree_node e_node; + struct vm_region_entry *e_parent; + enum vm_region_entry_type e_type; + /* absolute virtual address of the entry */ + virt_addr_t e_base_address; + /* size of the entry in bytes */ + size_t e_size; +}; + +struct vm_region_mapping { + struct vm_region_entry m_entry; + struct vm_object *m_object; + + /* used to link to vm_object->vo_mappings */ + struct queue_entry m_object_entry; + + enum vm_prot m_prot; + /* offset in bytes to the start of the object data that was mapped */ + off_t m_object_offset; +}; + +struct vm_region { + struct object vr_base; + struct vm_region_entry vr_entry; + + char vr_name[VM_REGION_NAME_MAX]; + + /* btree of struct vm_region_entry. + * sibling entries cannot overlap each other, and child entries must + * be entirely contained within their immediate parent entry. */ + struct btree vr_entries; + + /* memory protection restriction mask. + * any mapping in this region, or any of its children, cannot use + * protection flags that are not set in this mask. + * for example, if VM_PROT_EXEC is /not/ set here, no mapping + * can be created in this region or any child region with VM_PROT_EXEC + * set. */ + enum vm_prot vr_prot; + + /* the physical address space in which mappings in this region (and + * its children) are created */ + pmap_t vr_pmap; +}; + +extern kern_status_t vm_region_type_init(void); + +extern kern_status_t vm_region_create( + struct vm_region *parent, + const char *name, + virt_addr_t base, + size_t len, + enum vm_prot prot, + struct vm_region **out); + +/* find the child region that has jurisdiction over the specified virtual + * address. returns the lowest-nested region that covers the specified virtual + * address. */ +extern struct vm_region *vm_region_find_child( + struct vm_region *region, + virt_addr_t addr); + +/* find the child region that has jurisdiction over the specified virtual + * address area. returns the lowest-nested region that covers the specified + * virtual address area. the area must be fully contained within a region, with + * no partial overlaps. if an area is covered by multiple regions, or is only + * partially within a region, returns NULL. */ +extern struct vm_region *vm_region_find_child_for_area( + struct vm_region *region, + virt_addr_t addr, + size_t len); +extern struct vm_region_mapping *vm_region_find_mapping( + struct vm_region *region, + virt_addr_t addr); + +extern kern_status_t vm_region_map_object( + struct vm_region *region, + virt_addr_t map_address, + struct vm_object *object, + off_t object_offset, + size_t length, + enum vm_prot prot, + virt_addr_t *out); + +/* returns true if the memory area defined by [base, base+len] contains: + * - no child regions + * - no vm_object mappings + * if any child regions or mappings exist in the memory area, returns false. + * if the memory area exceeds the bounds of the region, returns false. + */ +extern bool vm_region_is_area_free( + const struct vm_region *region, + virt_addr_t base, + size_t len); + +extern kern_status_t vm_region_demand_map( + struct vm_region *region, + virt_addr_t addr, + enum pmap_fault_flags flags); + +extern void vm_region_dump(struct vm_region *region, int depth); + +DEFINE_OBJECT_LOCK_FUNCTION(vm_region, vr_base) + +#endif diff --git a/vm/bootstrap.c b/vm/bootstrap.c index 1dcb9d9..3e7b319 100644 --- a/vm/bootstrap.c +++ b/vm/bootstrap.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ kern_status_t vm_bootstrap( kmalloc_init(); vm_object_type_init(); + vm_region_type_init(); return KERN_OK; } diff --git a/vm/vm-region.c b/vm/vm-region.c new file mode 100644 index 0000000..63fbcd2 --- /dev/null +++ b/vm/vm-region.c @@ -0,0 +1,703 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#undef ASLR + +enum search_direction { + SEARCH_LEFT, + SEARCH_RIGHT, +}; + +#define VM_REGION_CAST(p) \ + OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p) + +static struct object_type vm_region_type = { + .ob_name = "vm-region", + .ob_size = sizeof(struct vm_region), + .ob_header_offset = offsetof(struct vm_region, vr_base), +}; + +static struct vm_cache mapping_cache = { + .c_name = "vm-region-mapping", + .c_obj_size = sizeof(struct vm_region_mapping), +}; + +struct entry_pair { + struct vm_region_entry *p_left, *p_right; +}; + +kern_status_t vm_region_type_init(void) +{ + vm_cache_init(&mapping_cache); + return object_type_register(&vm_region_type); +} + +static virt_addr_t find_free_area_linear( + struct vm_region *region, + size_t target_length); +static virt_addr_t find_free_area_random( + struct vm_region *region, + size_t target_length); + +static void put_entry(struct vm_region *parent, struct vm_region_entry *child) +{ + struct btree_node *cur = parent->vr_entries.b_root; + if (!cur) { + parent->vr_entries.b_root = &child->e_node; + btree_insert_fixup(&parent->vr_entries, &child->e_node); + return; + } + + virt_addr_t child_base = child->e_base_address; + virt_addr_t child_limit = child_base + child->e_size - 1; + + while (cur) { + struct vm_region_entry *cur_entry + = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); + + struct btree_node *next = NULL; + virt_addr_t cur_base = cur_entry->e_base_address; + virt_addr_t cur_limit = cur_base + cur_entry->e_size - 1; + + if (child_limit < cur_base) { + next = btree_left(cur); + } else if (child_base > cur_limit) { + next = btree_right(cur); + } else { + panic("tried to add an overlapping entry to vm-region"); + } + + if (next) { + cur = next; + continue; + } + + if (child_limit < cur_base) { + btree_put_left(cur, &child->e_node); + } else { + btree_put_right(cur, &child->e_node); + } + + btree_insert_fixup(&parent->vr_entries, &child->e_node); + break; + } +} + +static struct vm_region *vm_region_from_entry(struct vm_region_entry *entry) +{ + if (entry->e_type != VM_REGION_ENTRY_REGION) { + return NULL; + } + + return BTREE_CONTAINER(struct vm_region, vr_entry, entry); +} + +static struct vm_region_mapping *vm_region_mapping_from_entry( + struct vm_region_entry *entry) +{ + if (entry->e_type != VM_REGION_ENTRY_MAPPING) { + return NULL; + } + + return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); +} + +kern_status_t vm_region_create( + struct vm_region *parent, + const char *name, + virt_addr_t base, + size_t len, + enum vm_prot prot, + struct vm_region **out) +{ + if (!base || !len) { + return KERN_INVALID_ARGUMENT; + } + + if (len & VM_PAGE_MASK) { + len &= ~VM_PAGE_MASK; + len += VM_PAGE_SIZE; + } + + if (parent) { + if ((prot & parent->vr_prot) != prot) { + /* child region protection must match or be a + * subset of parent region protection */ + return KERN_INVALID_ARGUMENT; + } + + if (base == VM_REGION_ANY_MAP_ADDRESS) { +#ifdef ASLR + map_address = find_free_area_random(region, length); +#else + base = find_free_area_linear(parent, len); +#endif + base &= ~VM_PAGE_MASK; + + if (base == 0) { + return KERN_NO_MEMORY; + } + } else if (!vm_region_is_area_free(parent, base, len)) { + return KERN_INVALID_ARGUMENT; + } + } + + struct object *region_object = object_create(&vm_region_type); + if (!region_object) { + return KERN_NO_MEMORY; + } + + struct vm_region *region = VM_REGION_CAST(region_object); + + region->vr_prot = prot; + region->vr_entry.e_type = VM_REGION_ENTRY_REGION; + region->vr_entry.e_base_address = base; + region->vr_entry.e_size = len; + + if (parent) { + region->vr_entry.e_parent = &parent->vr_entry; + region->vr_pmap = parent->vr_pmap; + put_entry(parent, ®ion->vr_entry); + } + + if (name) { + strncpy(region->vr_name, name, sizeof region->vr_name); + region->vr_name[sizeof region->vr_name - 1] = '\0'; + } + + *out = region; + return KERN_OK; +} + +static struct vm_region_entry *vm_region_find_entry( + struct vm_region *region, + virt_addr_t addr) +{ + struct btree_node *cur = region->vr_entries.b_root; + if (!cur) { + return NULL; + } + + struct vm_region_entry *result = NULL; + + while (cur) { + struct vm_region_entry *child + = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); + + struct btree_node *next = NULL; + virt_addr_t child_limit + = child->e_base_address + child->e_size - 1; + + if (addr < child->e_base_address) { + next = btree_left(cur); + } else if (addr > child_limit) { + next = btree_right(cur); + } else { + result = child; + break; + } + + cur = next; + } + + return result; +} + +struct vm_region *vm_region_find_child( + struct vm_region *region, + virt_addr_t addr) +{ + struct vm_region_entry *result = vm_region_find_entry(region, addr); + + if (!result || result->e_type != VM_REGION_ENTRY_REGION) { + return region; + } + + return vm_region_from_entry(result); +} + +struct vm_region *vm_region_find_child_for_area( + struct vm_region *region, + virt_addr_t base, + size_t len) +{ + virt_addr_t limit = base + len - 1; + + while (region) { + struct btree_node *cur = region->vr_entries.b_root; + if (!cur) { + break; + } + + bool found_new_region = false; + while (cur) { + struct vm_region_entry *child = BTREE_CONTAINER( + struct vm_region_entry, + e_node, + cur); + + struct btree_node *next = NULL; + virt_addr_t child_base = child->e_base_address; + virt_addr_t child_limit + = child_base + child->e_size - 1; + + if (limit < child_base) { + next = btree_left(cur); + } else if (base > child_limit) { + next = btree_right(cur); + } else if (base >= child_base && limit <= child_limit) { + region = vm_region_from_entry(child); + found_new_region = true; + break; + } else { + return NULL; + } + + cur = next; + } + + if (!found_new_region) { + break; + } + } + + return region; +} + +struct vm_region_mapping *vm_region_find_mapping( + struct vm_region *region, + virt_addr_t addr) +{ + struct vm_region_entry *result = vm_region_find_entry(region, addr); + + if (!result) { + return NULL; + } + + return vm_region_mapping_from_entry(result); +} + +static struct vm_region_entry *get_random_child(struct vm_region *region) +{ + enum { + STEP_LEFT = 0, + STEP_RIGHT = 1, + STEP_FINISH = 2, + } step; + + struct btree_node *result = NULL; + struct btree_node *cur = region->vr_entries.b_root; + if (!cur) { + return NULL; + } + + while (1) { + unsigned long r; + fill_random(&r, sizeof r); + + struct btree_node *next = NULL; + + step = r % 3; + switch (step) { + case STEP_LEFT: + next = btree_left(cur); + break; + case STEP_RIGHT: + next = btree_right(cur); + break; + case STEP_FINISH: + result = cur; + break; + default: + return NULL; + } + + if (!next) { + result = cur; + break; + } + + cur = next; + } + + if (!result) { + return NULL; + } + + return BTREE_CONTAINER(struct vm_region_entry, e_node, result); +} + +static virt_addr_t find_free_area_linear_ex( + struct vm_region *region, + size_t target_length, + struct btree_node *start, + enum search_direction direction) +{ + if (region->vr_entry.e_size < target_length) { + return 0; + } + + struct btree_node *left_node = NULL, *right_node = NULL; + + switch (direction) { + case SEARCH_LEFT: + right_node = start; + left_node = start ? btree_left(start) : NULL; + break; + case SEARCH_RIGHT: + left_node = start; + right_node = start ? btree_left(start) : NULL; + break; + default: + return 0; + } + + if (!left_node && !right_node) { + return 0; + } + + while (1) { + struct vm_region_entry *left = BTREE_CONTAINER( + struct vm_region_entry, + e_node, + left_node); + struct vm_region_entry *right = BTREE_CONTAINER( + struct vm_region_entry, + e_node, + right_node); + + /* addresses of the first and last free bytes in the area + * respectively. */ + virt_addr_t area_base, area_limit; + if (left && right) { + area_base = left->e_base_address + left->e_size; + area_limit = right->e_base_address - 1; + } else if (right) { + area_base = region->vr_entry.e_base_address; + area_limit = left->e_base_address - 1; + } else if (left) { + area_base = left->e_base_address + left->e_size; + area_limit = region->vr_entry.e_base_address + + region->vr_entry.e_size - 1; + } else { + return 0; + } + + size_t area_size = 0; + if (area_limit >= area_base) { + area_size = area_limit - area_base + 1; + } + + if (area_size >= target_length) { + return area_base; + } + + if (direction == SEARCH_RIGHT) { + left_node = right_node; + right_node = btree_next(right_node); + } else { + right_node = left_node; + left_node = btree_prev(right_node); + } + } + + return 0; +} + +static virt_addr_t find_free_area_linear( + struct vm_region *region, + size_t target_length) +{ + if (!region->vr_entries.b_root) { + return region->vr_entry.e_base_address; + } + + return find_free_area_linear_ex( + region, + target_length, + btree_first(®ion->vr_entries), + SEARCH_RIGHT); +} + +static virt_addr_t random_address( + virt_addr_t area_base, + size_t area_length, + size_t target_length) +{ + size_t random_range = area_length - target_length; + + off_t offset = 0; + fill_random(&offset, sizeof offset); + + offset %= random_range; + return area_base + offset; +} + +static virt_addr_t find_free_area_random( + struct vm_region *region, + size_t target_length) +{ + int tmp = 0; + struct btree_node *node = NULL; + struct vm_region_entry *basis = get_random_child(region); + + fill_random(&tmp, sizeof tmp); + enum search_direction direction = tmp % 2; + + struct vm_region_entry *left = NULL, *right = NULL; + if (direction == SEARCH_LEFT) { + node = basis ? btree_left(&basis->e_node) : NULL; + right = basis; + left = BTREE_CONTAINER(struct vm_region_entry, e_node, node); + } else { + node = basis ? btree_right(&basis->e_node) : NULL; + left = basis; + right = BTREE_CONTAINER(struct vm_region_entry, e_node, node); + } + + virt_addr_t base = region->vr_entry.e_base_address, + limit = base + region->vr_entry.e_size - 1; + + if (left) { + base = left->e_base_address; + } + + if (right) { + limit = right->e_base_address + right->e_size - 1; + } + + return random_address(base, limit - base + 1, target_length); +} + +kern_status_t vm_region_map_object( + struct vm_region *region, + virt_addr_t map_address, + struct vm_object *object, + off_t object_offset, + size_t length, + enum vm_prot prot, + virt_addr_t *out) +{ + + object_offset &= ~VM_PAGE_MASK; + + if (length & VM_PAGE_MASK) { + length &= ~VM_PAGE_MASK; + length += VM_PAGE_SIZE; + } + + if (!region || !object || !out) { + return KERN_INVALID_ARGUMENT; + } + + if ((prot & region->vr_prot) != prot) { + return KERN_INVALID_ARGUMENT; + } + + if ((prot & object->vo_prot) != prot) { + return KERN_INVALID_ARGUMENT; + } + + if (!length || object_offset + length > object->vo_size) { + return KERN_INVALID_ARGUMENT; + } + + if (map_address != VM_REGION_ANY_MAP_ADDRESS) { + region = vm_region_find_child_for_area( + region, + map_address, + length); + } + + if (!region) { + return KERN_INVALID_ARGUMENT; + } + + if (map_address == VM_REGION_ANY_MAP_ADDRESS) { +#ifdef ASLR + map_address = find_free_area_random(region, length); +#else + map_address = find_free_area_linear(region, length); +#endif + map_address &= ~VM_PAGE_MASK; + + if (map_address == 0) { + return KERN_NO_MEMORY; + } + } else if (!vm_region_is_area_free(region, map_address, length)) { + return KERN_INVALID_ARGUMENT; + } + + struct vm_region_mapping *mapping + = vm_cache_alloc(&mapping_cache, VM_NORMAL); + if (!mapping) { + return KERN_NO_MEMORY; + } + + tracek("mapping %s at [%llx-%llx]", + object->vo_name, + map_address, + map_address + length); + mapping->m_object = object; + mapping->m_prot = prot; + mapping->m_object_offset = object_offset; + mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING; + mapping->m_entry.e_parent = ®ion->vr_entry; + mapping->m_entry.e_base_address = map_address; + mapping->m_entry.e_size = length; + + put_entry(region, &mapping->m_entry); + queue_push_back(&object->vo_mappings, &mapping->m_object_entry); + + *out = map_address; + return KERN_OK; +} + +bool vm_region_is_area_free( + const struct vm_region *region, + virt_addr_t base, + size_t len) +{ + /* address of the last byte in the region */ + virt_addr_t region_limit + = region->vr_entry.e_base_address + region->vr_entry.e_size - 1; + if (base < region->vr_entry.e_base_address || base > region_limit) { + return false; + } + + if (base + len - 1 > region_limit) { + return false; + } + + virt_addr_t limit = base + len - 1; + + struct btree_node *cur = region->vr_entries.b_root; + if (!cur) { + return true; + } + + while (cur) { + struct vm_region_entry *entry + = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); + + struct btree_node *next = NULL; + virt_addr_t entry_limit + = entry->e_base_address + entry->e_size - 1; + + if (base > entry_limit) { + next = btree_right(cur); + } else if (limit < entry->e_base_address) { + next = btree_left(cur); + } else { + return false; + } + + cur = next; + } + + return true; +} + +kern_status_t vm_region_demand_map( + struct vm_region *region, + virt_addr_t addr, + enum pmap_fault_flags flags) +{ + addr &= ~VM_PAGE_MASK; + region = vm_region_find_child(region, addr); + + struct vm_region_mapping *mapping + = vm_region_find_mapping(region, addr); + if (!mapping) { + return KERN_NO_ENTRY; + } + + off_t offset = addr - mapping->m_entry.e_base_address + + mapping->m_object_offset; + + tracek("vm: tried to access vm-object %s at offset=%05llx", + mapping->m_object->vo_name, + offset); + + struct vm_page *pg + = vm_object_alloc_page(mapping->m_object, offset, VM_PAGE_4K); + tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); + return pmap_add( + region->vr_pmap, + addr, + vm_page_get_pfn(pg), + mapping->m_prot, + PMAP_NORMAL); +} + +#ifdef TRACE +void vm_region_dump(struct vm_region *region, int depth) +{ + char line[128] = {0}; + size_t p = 0; + + for (int i = 0; i < depth; i++) { + p += snprintf(line + p, sizeof line - p, " "); + } + p += snprintf( + line + p, + sizeof line - p, + "region: %s [%llx-%llx]", + region->vr_name, + region->vr_entry.e_base_address, + region->vr_entry.e_base_address + region->vr_entry.e_size); + + printk("%s", line); + + struct btree_node *cur = btree_first(®ion->vr_entries); + while (cur) { + memset(line, 0x0, sizeof line); + p = 0; + + for (int i = 0; i < depth + 1; i++) { + p += snprintf(line + p, sizeof line - p, " "); + } + + struct vm_region_entry *entry + = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); + struct vm_region *child_region = vm_region_from_entry(entry); + struct vm_region_mapping *child_mapping + = vm_region_mapping_from_entry(entry); + + switch (entry->e_type) { + case VM_REGION_ENTRY_REGION: + break; + case VM_REGION_ENTRY_MAPPING: + p += snprintf( + line + p, + sizeof line - p, + "mapping: %s [%llx-%llx] -> [%llx-%llx]", + child_mapping->m_object->vo_name, + child_mapping->m_object_offset, + child_mapping->m_object_offset + + child_mapping->m_entry.e_size, + child_mapping->m_entry.e_base_address, + child_mapping->m_entry.e_base_address + + child_mapping->m_entry.e_size); + printk("%s", line); + break; + default: + p += snprintf(line + p, sizeof line - p, "invalid"); + printk("%s", line); + break; + } + + if (child_region) { + vm_region_dump(child_region, depth + 1); + } + + cur = btree_next(cur); + } +} +#endif