#include #include #include #include #include #include #include #include #include /*** STATIC DATA + MACROS *****************************************************/ #undef ASLR #define INVALID_OFFSET ((off_t) - 1) #ifdef ASLR #define region_find_free_area(region, length) \ region_find_free_area_random(region, length) #else #define region_find_free_area(region, length) \ region_find_free_area_linear(region, length) #endif enum search_direction { SEARCH_LEFT, SEARCH_RIGHT, }; #define VM_REGION_CAST(p) \ OBJECT_C_CAST(struct vm_region, vr_base, &vm_region_type, p) static struct object_type vm_region_type = { .ob_name = "vm-region", .ob_size = sizeof(struct vm_region), .ob_header_offset = offsetof(struct vm_region, vr_base), }; static struct vm_cache mapping_cache = { .c_name = "vm-region-mapping", .c_obj_size = sizeof(struct vm_region_mapping), }; /*** INTERNAL UTILITY FUNCTION ************************************************/ static struct vm_region *region_from_entry(struct vm_region_entry *entry) { if (!entry || entry->e_type != VM_REGION_ENTRY_REGION) { return NULL; } return BTREE_CONTAINER(struct vm_region, vr_entry, entry); } static struct vm_region_mapping *mapping_from_entry( struct vm_region_entry *entry) { if (!entry || entry->e_type != VM_REGION_ENTRY_MAPPING) { return NULL; } return BTREE_CONTAINER(struct vm_region_mapping, m_entry, entry); } static virt_addr_t entry_absolute_address(struct vm_region_entry *entry) { virt_addr_t result = 0; while (entry) { result += entry->e_offset; entry = entry->e_parent; } return result; } static void region_put_entry( struct vm_region *parent, struct vm_region_entry *child) { struct btree_node *cur = parent->vr_entries.b_root; if (!cur) { parent->vr_entries.b_root = &child->e_node; btree_insert_fixup(&parent->vr_entries, &child->e_node); return; } off_t child_base = child->e_offset; off_t child_limit = child_base + child->e_size - 1; while (cur) { struct vm_region_entry *cur_entry = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); struct btree_node *next = NULL; off_t cur_base = cur_entry->e_offset; off_t cur_limit = cur_base + cur_entry->e_size - 1; if (child_limit < cur_base) { next = btree_left(cur); } else if (child_base > cur_limit) { next = btree_right(cur); } else { panic("tried to add an overlapping entry to vm-region"); } if (next) { cur = next; continue; } if (child_limit < cur_base) { btree_put_left(cur, &child->e_node); } else { btree_put_right(cur, &child->e_node); } btree_insert_fixup(&parent->vr_entries, &child->e_node); break; } } /* find the child entry that covers the specified offset. * DOES NOT search recursively! */ static struct vm_region_entry *region_get_entry( struct vm_region *region, off_t offset, size_t len) { struct btree_node *cur = region->vr_entries.b_root; if (!cur) { return NULL; } struct vm_region_entry *result = NULL; off_t base = offset, limit = offset + len - 1; while (cur) { struct vm_region_entry *child = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); struct btree_node *next = NULL; off_t child_base = child->e_offset; off_t child_limit = child->e_offset + child->e_size - 1; if (limit < child_base) { next = btree_left(cur); } else if (base > child_limit) { next = btree_right(cur); } else { result = child; break; } cur = next; } return result; } /* find the child region that covers the area [*offp,len]. searches recursively * the value in `offp` is updated to the offset of the returned entry relative * to its parent */ static struct vm_region *region_get_child_region_recursive( struct vm_region *region, off_t *offp, size_t len) { off_t offset = *offp; if (offset >= region->vr_entry.e_size) { return NULL; } while (1) { struct vm_region_entry *next = region_get_entry(region, offset, len); struct vm_region *next_region = region_from_entry(next); if (next_region) { offset -= next->e_offset; region = next_region; } else { break; } } *offp = offset; return region; } static struct vm_region_mapping *region_get_mapping_recursive( struct vm_region *region, off_t *offp, size_t len) { off_t offset = *offp; region = region_get_child_region_recursive(region, &offset, len); if (!region) { return NULL; } struct vm_region_entry *entry = region_get_entry(region, offset, len); *offp = offset; return mapping_from_entry(entry); } static off_t generate_random_address( off_t area_base, size_t area_length, size_t target_length) { size_t random_range = area_length - target_length; off_t offset = 0; fill_random(&offset, sizeof offset); offset %= random_range; return area_base + offset; } static struct vm_region_entry *region_get_random_entry(struct vm_region *region) { enum { STEP_LEFT = 0, STEP_RIGHT = 1, STEP_FINISH = 2, } step; struct btree_node *result = NULL; struct btree_node *cur = region->vr_entries.b_root; if (!cur) { return NULL; } while (1) { unsigned long r; fill_random(&r, sizeof r); struct btree_node *next = NULL; step = r % 3; switch (step) { case STEP_LEFT: next = btree_left(cur); break; case STEP_RIGHT: next = btree_right(cur); break; case STEP_FINISH: result = cur; break; default: return NULL; } if (!next) { result = cur; break; } cur = next; } if (!result) { return NULL; } return BTREE_CONTAINER(struct vm_region_entry, e_node, result); } static virt_addr_t region_find_free_area_ex( struct vm_region *region, size_t target_length, struct btree_node *start, enum search_direction direction, bool random) { if (region->vr_entry.e_size < target_length) { return 0; } struct btree_node *left_node = NULL, *right_node = NULL; switch (direction) { case SEARCH_LEFT: right_node = start; left_node = start ? btree_left(start) : NULL; break; case SEARCH_RIGHT: left_node = start; right_node = start ? btree_left(start) : NULL; break; default: return 0; } if (!left_node && !right_node) { return 0; } while (1) { struct vm_region_entry *left = BTREE_CONTAINER( struct vm_region_entry, e_node, left_node); struct vm_region_entry *right = BTREE_CONTAINER( struct vm_region_entry, e_node, right_node); /* addresses of the first and last free bytes in the area * respectively. */ off_t area_base, area_limit; if (left && right) { area_base = left->e_offset + left->e_size; area_limit = right->e_offset - 1; } else if (right) { area_base = region->vr_entry.e_offset; area_limit = left->e_offset - 1; } else if (left) { area_base = left->e_offset + left->e_size; area_limit = region->vr_entry.e_offset + region->vr_entry.e_size - 1; } else { return 0; } area_base &= ~VM_PAGE_MASK; size_t area_size = 0; if (area_limit >= area_base) { area_size = area_limit - area_base + 1; } if (area_size >= target_length) { if (random) { area_base = generate_random_address( area_base, area_size, target_length); area_base &= ~VM_PAGE_MASK; } return area_base; } if (direction == SEARCH_RIGHT) { left_node = right_node; right_node = btree_next(right_node); } else { right_node = left_node; left_node = btree_prev(right_node); } } return 0; } static off_t region_find_free_area_linear( struct vm_region *region, size_t target_length) { if (!region->vr_entries.b_root) { return region->vr_entry.e_offset; } return region_find_free_area_ex( region, target_length, btree_first(®ion->vr_entries), SEARCH_RIGHT, false); } static off_t region_find_free_area_random( struct vm_region *region, size_t target_length) { if (!region->vr_entries.b_root) { off_t offset = generate_random_address( 0, region->vr_entry.e_size, target_length); return offset & ~VM_PAGE_MASK; } int tmp = 0; struct vm_region_entry *basis = region_get_random_entry(region); fill_random(&tmp, sizeof tmp); enum search_direction direction = tmp % 2; return region_find_free_area_ex( region, target_length, &basis->e_node, direction, true); } static bool region_is_area_free( const struct vm_region *region, off_t base, size_t len) { if (len >= region->vr_entry.e_size) { return false; } if (base + len > region->vr_entry.e_size) { return false; } off_t limit = base + len - 1; struct btree_node *cur = region->vr_entries.b_root; if (!cur) { return true; } while (cur) { struct vm_region_entry *entry = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); struct btree_node *next = NULL; off_t entry_limit = entry->e_offset + entry->e_size - 1; if (base > entry_limit) { next = btree_right(cur); } else if (limit < entry->e_offset) { next = btree_left(cur); } else { return false; } cur = next; } return true; } static kern_status_t region_validate_allocation( struct vm_region *parent, enum vm_prot prot, off_t *offp, size_t len) { off_t offset = *offp; if ((prot & parent->vr_prot) != prot) { /* child region protection must match or be a * subset of parent region protection */ return KERN_INVALID_ARGUMENT; } if (offset == VM_REGION_ANY_OFFSET) { offset = region_find_free_area(parent, len); if (offset == 0) { return KERN_NO_MEMORY; } } else if (!region_is_area_free(parent, offset, len)) { return KERN_INVALID_ARGUMENT; } *offp = offset; return KERN_OK; } /*** PUBLIC API ***************************************************************/ kern_status_t vm_region_type_init(void) { vm_cache_init(&mapping_cache); return object_type_register(&vm_region_type); } kern_status_t vm_region_create( struct vm_region *parent, const char *name, off_t offset, size_t len, enum vm_prot prot, struct vm_region **out) { if (!offset || !len) { return KERN_INVALID_ARGUMENT; } if (len & VM_PAGE_MASK) { len &= ~VM_PAGE_MASK; len += VM_PAGE_SIZE; } kern_status_t status = KERN_OK; if (parent) { status = region_validate_allocation(parent, prot, &offset, len); } if (status != KERN_OK) { return status; } struct object *region_object = object_create(&vm_region_type); if (!region_object) { return KERN_NO_MEMORY; } struct vm_region *region = VM_REGION_CAST(region_object); region->vr_prot = prot; region->vr_entry.e_type = VM_REGION_ENTRY_REGION; region->vr_entry.e_offset = offset; region->vr_entry.e_size = len; if (parent) { region->vr_entry.e_parent = &parent->vr_entry; region->vr_pmap = parent->vr_pmap; region_put_entry(parent, ®ion->vr_entry); } if (name) { strncpy(region->vr_name, name, sizeof region->vr_name); region->vr_name[sizeof region->vr_name - 1] = '\0'; } *out = region; return KERN_OK; } kern_status_t vm_region_map_object( struct vm_region *region, off_t region_offset, struct vm_object *object, off_t object_offset, size_t length, enum vm_prot prot, virt_addr_t *out) { object_offset &= ~VM_PAGE_MASK; if (length & VM_PAGE_MASK) { length &= ~VM_PAGE_MASK; length += VM_PAGE_SIZE; } if (!region || !object || !out) { return KERN_INVALID_ARGUMENT; } if ((prot & region->vr_prot) != prot) { return KERN_INVALID_ARGUMENT; } if ((prot & object->vo_prot) != prot) { return KERN_INVALID_ARGUMENT; } if (!length || object_offset + length > object->vo_size) { return KERN_INVALID_ARGUMENT; } if (region_offset != VM_REGION_ANY_OFFSET) { region = region_get_child_region_recursive( region, ®ion_offset, length); } if (!region) { return KERN_INVALID_ARGUMENT; } if (region_offset == VM_REGION_ANY_OFFSET) { region_offset = region_find_free_area(region, length); if (region_offset == INVALID_OFFSET) { return KERN_NO_MEMORY; } } else if (!region_is_area_free(region, region_offset, length)) { return KERN_INVALID_ARGUMENT; } struct vm_region_mapping *mapping = vm_cache_alloc(&mapping_cache, VM_NORMAL); if (!mapping) { return KERN_NO_MEMORY; } mapping->m_object = object; mapping->m_prot = prot; mapping->m_object_offset = object_offset; mapping->m_entry.e_type = VM_REGION_ENTRY_MAPPING; mapping->m_entry.e_parent = ®ion->vr_entry; mapping->m_entry.e_offset = region_offset; mapping->m_entry.e_size = length; region_put_entry(region, &mapping->m_entry); queue_push_back(&object->vo_mappings, &mapping->m_object_entry); #ifdef TRACE virt_addr_t abs_base = entry_absolute_address(&mapping->m_entry); tracek("mapping %s at [%llx-%llx]", object->vo_name, abs_base, abs_base + length); #endif *out = entry_absolute_address(&mapping->m_entry); return KERN_OK; } kern_status_t vm_region_demand_map( struct vm_region *region, virt_addr_t addr, enum pmap_fault_flags flags) { addr &= ~VM_PAGE_MASK; if (addr < region->vr_entry.e_offset || addr > region->vr_entry.e_offset + region->vr_entry.e_size) { return KERN_NO_ENTRY; } off_t region_offset = addr - region->vr_entry.e_offset; struct vm_region_mapping *mapping = region_get_mapping_recursive(region, ®ion_offset, 1); if (!mapping) { return KERN_NO_ENTRY; } off_t object_offset = region_offset - mapping->m_entry.e_offset + mapping->m_object_offset; tracek("vm: tried to access vm-object %s at offset=%05llx", mapping->m_object->vo_name, object_offset); struct vm_page *pg = vm_object_alloc_page( mapping->m_object, object_offset, VM_PAGE_4K); tracek("vm: mapping %07llx -> %10llx", vm_page_get_paddr(pg), addr); return pmap_add( region->vr_pmap, addr, vm_page_get_pfn(pg), mapping->m_prot, PMAP_NORMAL); } #ifdef TRACE void vm_region_dump(struct vm_region *region, int depth) { char line[128] = {0}; size_t p = 0; for (int i = 0; i < depth; i++) { p += snprintf(line + p, sizeof line - p, " "); } p += snprintf( line + p, sizeof line - p, "region: %s [%llx-%llx]", region->vr_name, region->vr_entry.e_offset, region->vr_entry.e_offset + region->vr_entry.e_size); printk("%s", line); struct btree_node *cur = btree_first(®ion->vr_entries); while (cur) { memset(line, 0x0, sizeof line); p = 0; for (int i = 0; i < depth + 1; i++) { p += snprintf(line + p, sizeof line - p, " "); } struct vm_region_entry *entry = BTREE_CONTAINER(struct vm_region_entry, e_node, cur); struct vm_region *child_region = region_from_entry(entry); struct vm_region_mapping *child_mapping = mapping_from_entry(entry); switch (entry->e_type) { case VM_REGION_ENTRY_REGION: break; case VM_REGION_ENTRY_MAPPING: p += snprintf( line + p, sizeof line - p, "mapping: %s p:[%llx-%llx] -> v:[%llx-%llx]", child_mapping->m_object->vo_name, child_mapping->m_object_offset, child_mapping->m_object_offset + child_mapping->m_entry.e_size, child_mapping->m_entry.e_offset, child_mapping->m_entry.e_offset + child_mapping->m_entry.e_size); printk("%s", line); break; default: p += snprintf(line + p, sizeof line - p, "invalid"); printk("%s", line); break; } if (child_region) { vm_region_dump(child_region, depth + 1); } cur = btree_next(cur); } } #endif