sandbox: moved all sources to main kernel tree

This commit is contained in:
2023-02-03 20:43:38 +00:00
parent e714d619ba
commit 40f83922da
18 changed files with 0 additions and 9 deletions

36
vm/bootstrap.c Normal file
View File

@@ -0,0 +1,36 @@
#include <socks/status.h>
#include <limits.h>
#include <socks/vm.h>
#include <socks/memblock.h>
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
/* One vm_pg_data_t per NUMA node. */
static vm_pg_data_t *node_data = NULL;
kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
{
int numa_count = 1;
/* we're only worrying about UMA systems for now */
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count);
vm_page_init_array();
for (size_t i = 0; i < nr_zones; i++) {
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
}
return KERN_OK;
}
vm_pg_data_t *vm_pg_data_get(vm_node_id_t node)
{
if (node == 0) {
return node_data;
}
return NULL;
}

217
vm/cache.c Normal file
View File

@@ -0,0 +1,217 @@
#include <socks/queue.h>
#include <stdlib.h>
#include <assert.h>
#include <socks/vm.h>
#define FREELIST_END ((unsigned int)-1)
static vm_cache_t cache_cache = { .c_name = "vm_cache", .c_obj_size = sizeof(vm_cache_t) };
vm_cache_t *vm_cache_create(const char *name, size_t objsz, vm_cache_flags_t flags)
{
if (!VM_CACHE_INITIALISED(&cache_cache)) {
vm_cache_init(&cache_cache);
}
vm_cache_t *new_cache = vm_cache_alloc(&cache_cache, 0);
new_cache->c_name = name;
new_cache->c_obj_size = objsz;
new_cache->c_flags = flags;
vm_cache_init(new_cache);
return new_cache;
}
void vm_cache_init(vm_cache_t *cache)
{
cache->c_page_order = VM_PAGE_16K;
if (cache->c_obj_size >= 512) {
cache->c_flags |= VM_CACHE_OFFSLAB;
}
size_t available = vm_page_order_to_bytes(cache->c_page_order);
size_t space_per_item = cache->c_obj_size;
/* align to 16-byte boundary */
if (space_per_item & 0xF) {
space_per_item &= ~0xF;
space_per_item += 0x10;
}
cache->c_stride = space_per_item;
if (!(cache->c_flags & VM_CACHE_OFFSLAB)) {
available -= sizeof(vm_slab_t);
}
/* one entry in the freelist per object slot */
space_per_item += sizeof(unsigned int);
cache->c_obj_count = available / space_per_item;
cache->c_slabs_full = QUEUE_INIT;
cache->c_slabs_partial = QUEUE_INIT;
cache->c_slabs_empty = QUEUE_INIT;
cache->c_hdr_size = sizeof(vm_slab_t) + (sizeof(unsigned int) * cache->c_obj_count);
}
void vm_cache_destroy(vm_cache_t *cache)
{
/* TODO */
}
static vm_slab_t *alloc_slab(vm_cache_t *cache, vm_flags_t flags)
{
vm_page_t *slab_page = vm_page_alloc(cache->c_page_order, flags);
vm_slab_t *slab_hdr = NULL;
void *slab_data = vm_page_get_vaddr(slab_page);
if (cache->c_flags & VM_CACHE_OFFSLAB) {
/* NOTE the circular dependency here:
kmalloc -> vm_cache_alloc -> alloc_slab -> kmalloc
since this call path is only used for caches with
VM_CACHE_OFFSLAB set, we avoid the circular dependency
by ensuring the small size-N (where N < 512) caches
(which don't use that flag) are initialised before
attempting to allocate from an offslab cache. */
slab_hdr = kmalloc(cache->c_hdr_size, flags);
slab_hdr->s_objects = slab_data;
} else {
slab_hdr = slab_data;
slab_hdr->s_objects = (void *)((char *)slab_data + cache->c_hdr_size);
}
slab_hdr->s_cache = cache;
slab_hdr->s_list = QUEUE_ENTRY_INIT;
slab_hdr->s_obj_allocated = 0;
slab_hdr->s_free = 0;
for (unsigned int i = 0; i < cache->c_obj_count; i++) {
slab_hdr->s_freelist[i] = i + 1;
}
slab_hdr->s_freelist[cache->c_obj_count - 1] = FREELIST_END;
vm_page_foreach (slab_page, i) {
i->p_slab = slab_hdr;
}
return slab_hdr;
}
static void destroy_slab(vm_slab_t *slab)
{
}
static unsigned int slab_allocate_slot(vm_slab_t *slab)
{
if (slab->s_free == FREELIST_END) {
return FREELIST_END;
}
unsigned int slot = slab->s_free;
slab->s_free = slab->s_freelist[slab->s_free];
slab->s_obj_allocated++;
return slot;
}
static void slab_free_slot(vm_slab_t *slab, unsigned int slot)
{
unsigned int next = slab->s_free;
slab->s_free = slot;
slab->s_freelist[slot] = next;
slab->s_obj_allocated--;
}
static void *slot_to_pointer(vm_slab_t *slab, unsigned int slot)
{
return (void *)((char *)slab->s_objects + (slot * slab->s_cache->c_stride));
}
static unsigned int pointer_to_slot(vm_slab_t *slab, void *p)
{
size_t offset = (uintptr_t)p - (uintptr_t)slab->s_objects;
return offset / slab->s_cache->c_stride;
}
void *vm_cache_alloc(vm_cache_t *cache, vm_flags_t flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&cache->c_lock, &irq_flags);
vm_slab_t *slab = NULL;
if (!queue_empty(&cache->c_slabs_partial)) {
/* prefer using up partially-full slabs before taking a fresh one */
queue_entry_t *slab_entry = queue_pop_front(&cache->c_slabs_partial);
assert(slab_entry);
slab = QUEUE_CONTAINER(vm_slab_t, s_list, slab_entry);
} else if (!queue_empty(&cache->c_slabs_empty)) {
queue_entry_t *slab_entry = queue_pop_front(&cache->c_slabs_empty);
assert(slab_entry);
slab = QUEUE_CONTAINER(vm_slab_t, s_list, slab_entry);
} else {
/* we've run out of slabs. create a new one */
slab = alloc_slab(cache, flags);
}
if (!slab) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return NULL;
}
unsigned int slot = slab_allocate_slot(slab);
void *p = slot_to_pointer(slab, slot);
if (slab->s_free == FREELIST_END) {
queue_push_back(&cache->c_slabs_full, &slab->s_list);
} else {
queue_push_back(&cache->c_slabs_partial, &slab->s_list);
}
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return p;
}
void vm_cache_free(vm_cache_t *cache, void *p)
{
unsigned long irq_flags;
spin_lock_irqsave(&cache->c_lock, &irq_flags);
phys_addr_t phys = vm_virt_to_phys(p);
vm_page_t *pg = vm_page_get(phys);
if (!pg || !pg->p_slab) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return;
}
vm_slab_t *slab = pg->p_slab;
if (slab->s_cache != cache) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return;
}
if (slab->s_free == FREELIST_END) {
queue_delete(&cache->c_slabs_full, &slab->s_list);
} else {
queue_delete(&cache->c_slabs_partial, &slab->s_list);
}
unsigned int slot = pointer_to_slot(slab, p);
slab_free_slot(slab, slot);
if (slab->s_obj_allocated == 0) {
queue_push_back(&cache->c_slabs_empty, &slab->s_list);
} else {
queue_push_back(&cache->c_slabs_partial, &slab->s_list);
}
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
}

73
vm/kmalloc.c Normal file
View File

@@ -0,0 +1,73 @@
#include <socks/vm.h>
#include <string.h>
#define SIZE_N_CACHE(s) \
{ .c_name = "size-" # s, .c_obj_size = s, .c_page_order = VM_PAGE_16K }
/* reserve space for the size-N caches: */
static vm_cache_t size_n_caches[] = {
SIZE_N_CACHE(16),
SIZE_N_CACHE(32),
SIZE_N_CACHE(48),
SIZE_N_CACHE(64),
SIZE_N_CACHE(96),
SIZE_N_CACHE(128),
SIZE_N_CACHE(160),
SIZE_N_CACHE(256),
SIZE_N_CACHE(388),
SIZE_N_CACHE(512),
SIZE_N_CACHE(576),
SIZE_N_CACHE(768),
SIZE_N_CACHE(1024),
SIZE_N_CACHE(1664),
SIZE_N_CACHE(2048),
SIZE_N_CACHE(3072),
SIZE_N_CACHE(4096),
};
static const size_t nr_size_n_caches = sizeof size_n_caches / sizeof size_n_caches[0];
void *kmalloc(size_t count, vm_flags_t flags)
{
if (!count) {
return NULL;
}
vm_cache_t *best_fit = NULL;
for (size_t i = 0; i < nr_size_n_caches; i++) {
if (size_n_caches[i].c_obj_size >= count) {
best_fit = &size_n_caches[i];
break;
}
}
if (!best_fit) {
return NULL;
}
if (!VM_CACHE_INITIALISED(best_fit)) {
vm_cache_init(best_fit);
}
return vm_cache_alloc(best_fit, flags);
}
void *kzalloc(size_t count, vm_flags_t flags)
{
void *p = kmalloc(count, flags);
if (p) {
memset(p, 0x0, count);
}
return p;
}
void kfree(void *p)
{
phys_addr_t phys = vm_virt_to_phys(p);
vm_page_t *pg = vm_page_get(phys);
if (!pg || !pg->p_slab) {
return;
}
vm_cache_free(pg->p_slab->s_cache, p);
}

399
vm/memblock.c Normal file
View File

@@ -0,0 +1,399 @@
/*
The Clear BSD License
Copyright (c) 2023 Max Wash
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted (subject to the limitations in the disclaimer
below) provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
*/
#include "socks/types.h"
#include <stdio.h>
#include <stdbool.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <socks/memblock.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define ITER(a, b) ((uint64_t)(a) | ((uint64_t)(b) << 32))
#define ITER_END ULLONG_MAX
#define IDX_A(idx) ((idx) & 0xFFFFFFFF)
#define IDX_B(idx) (((idx) >> 32) & 0xFFFFFFFF)
/* the maximum possible value for a pointer type.
Note that any pointers returned by the memblock API will still
be bounded by the defined memory regions, and not by this constant. */
#define ADDR_MAX (~(uintptr_t)0)
static memblock_region_t init_memory_regions[MEMBLOCK_INIT_MEMORY_REGION_COUNT];
static memblock_region_t init_reserved_regions[MEMBLOCK_INIT_RESERVED_REGION_COUNT];
static phys_addr_t do_alloc(size_t size);
memblock_t memblock = {
.memory.regions = init_memory_regions,
.memory.count = 0,
.memory.max = MEMBLOCK_INIT_MEMORY_REGION_COUNT,
.memory.name = "memory",
.reserved.regions = init_reserved_regions,
.reserved.count = 0,
.reserved.max = MEMBLOCK_INIT_RESERVED_REGION_COUNT,
.reserved.name = "reserved",
};
static void memblock_double_capacity(memblock_type_t *type)
{
size_t new_max = type->max * 2;
phys_addr_t new_regions_p = do_alloc(new_max * sizeof(memblock_region_t));
void *new_regions = (void *)(new_regions_p + memblock.m_voffset);
memcpy(new_regions, type->regions, type->count * sizeof(memblock_region_t));
type->regions = new_regions;
type->max = new_max;
}
static int memblock_insert_region(memblock_type_t *type, memblock_region_t *to_add)
{
unsigned int i = 0;
for (i = 0; i < type->count; i++) {
const memblock_region_t *cur = &type->regions[i];
if (cur->base >= to_add->limit) {
break;
}
}
memblock_region_t *src = &type->regions[i];
memblock_region_t *dst = &type->regions[i + 1];
unsigned int count = type->count - i;
memmove(dst, src, count * sizeof *src);
*src = *to_add;
type->count++;
return 0;
}
static int memblock_remove_region(memblock_type_t *type, unsigned int i)
{
if (i >= type->count) {
return -1;
}
memblock_region_t *src = &type->regions[i + 1];
memblock_region_t *dst = &type->regions[i];
unsigned int count = type->count - i;
memmove(dst, src, count * sizeof *src);
type->count--;
return 0;
}
int memblock_init(uintptr_t alloc_start, uintptr_t alloc_end, uintptr_t voffset)
{
memblock.m_alloc_start = alloc_start;
memblock.m_alloc_end =alloc_end;
memblock.m_voffset = voffset;
return 0;
}
int memblock_add_range(memblock_type_t *type, uintptr_t base, size_t size, memblock_region_status_t status)
{
if (size == 0) {
return 0;
}
uintptr_t limit = base + size - 1;
if (type->count == 0) {
type->regions[0].base = base;
type->regions[0].limit = limit;
type->count++;
return 0;
}
memblock_region_t new_region = { .base = base, .limit = limit, .status = status };
/* two regions with different statuses CANNOT intersect. we first need to check
to make sure the region being added doesn't violate this rule. */
for (unsigned int i = 0; i < type->count; i++) {
memblock_region_t *cur_region = &type->regions[i];
if (new_region.base > cur_region->limit || new_region.limit < cur_region->base) {
continue;
}
if (cur_region->status == new_region.status) {
continue;
}
return -1;
}
bool add_new = true;
for (unsigned int i = 0; i < type->count; i++) {
memblock_region_t *cur_region = &type->regions[i];
/* case 1: the region being added and the current region have no connection what-so-ever (no overlaps) */
if (cur_region->limit + 1 < new_region.base || cur_region->base > new_region.limit) {
continue;
}
/* case 2: the region being added matches a region already in the list. */
if (cur_region->base == new_region.base && cur_region->limit == new_region.limit) {
/* nothing needs to be done */
add_new = false;
break;
}
/* case 3: the region being added completely contains a region already in the list. */
if (cur_region->base > new_region.base && cur_region->limit <= new_region.limit) {
memblock_remove_region(type, i);
/* after memblock_remove_region(), a different region will have moved into the array slot referenced by i.
decrementing i means we'll stay at the current index and process this region. */
i--;
continue;
}
/* case 4: the region being added meets or partially overlaps a region already in the list. */
/* there can be an overlap at the beginning and the end of the region being added,
anything else is either a full overlap (case 3) or not within the region being added at all.
to handle this, remove the region that's already in the list and extend the region being added to cover it.
the two regions may overlap and have incompatible statuses, but this case was handled earlier in this function. */
if ((new_region.base > cur_region->base || new_region.base == cur_region->limit - 1) && new_region.status == cur_region->status) {
/* the new region overlaps the END of the current region, change the base of the new region to match that of the current region. */
new_region.base = cur_region->base;
} else if ((new_region.base < cur_region->base || new_region.limit + 1 == cur_region->base) && new_region.status == cur_region->status) {
/* the new region overlaps the BEGINNING of the current region, change the limit of the new region to match that of the current region. */
new_region.limit = cur_region->limit;
} else {
continue;
}
/* with the new region updated to include the current region, we can remove the current region from the list */
memblock_remove_region(type, i);
i--;
}
if (add_new) {
memblock_insert_region(type, &new_region);
}
return 0;
}
int memblock_add(uintptr_t base, size_t size)
{
if (memblock.memory.count >= memblock.memory.max - 2) {
if (memblock.reserved.count >= memblock.reserved.max - 2) {
memblock_double_capacity(&memblock.reserved);
}
memblock_double_capacity(&memblock.memory);
}
return memblock_add_range(&memblock.memory, base, size, MEMBLOCK_MEMORY);
}
int memblock_reserve(uintptr_t base, size_t size)
{
if (memblock.reserved.count >= memblock.reserved.max - 2) {
memblock_double_capacity(&memblock.reserved);
}
return memblock_add_range(&memblock.reserved, base, size, MEMBLOCK_RESERVED);
}
static phys_addr_t do_alloc(size_t size)
{
phys_addr_t allocated_base = ADDR_MAX;
phys_addr_t region_start = memblock.m_alloc_start - memblock.m_voffset;
phys_addr_t region_end = memblock.m_alloc_end - memblock.m_voffset;
memblock_iter_t it;
for_each_free_mem_range (&it, region_start, region_end) {
if (it.it_base & 0xF) {
it.it_base &= ~0xF;
it.it_base += 0x10;
}
size_t region_size = it.it_limit - it.it_base + 1;
if (region_size >= size) {
allocated_base = it.it_base;
break;
}
}
if (allocated_base == ADDR_MAX) {
fprintf(stderr, "memblock: cannot allocate %zu byte buffer!\n", size);
abort();
}
int status = memblock_add_range(&memblock.reserved, allocated_base, size, MEMBLOCK_ALLOC);
if (status != 0) {
return 0;
}
return allocated_base;
}
void *memblock_alloc(size_t size)
{
if (memblock.reserved.count >= memblock.reserved.max - 2) {
memblock_double_capacity(&memblock.reserved);
}
return (void *)(do_alloc(size) + memblock.m_voffset);
}
phys_addr_t memblock_alloc_phys(size_t size)
{
if (memblock.reserved.count >= memblock.reserved.max - 2) {
memblock_double_capacity(&memblock.reserved);
}
return do_alloc(size);
}
int memblock_free(void *p, size_t size)
{
return 0;
}
int memblock_free_phys(phys_addr_t addr, size_t size)
{
return 0;
}
void __next_memory_region(memblock_iter_t *it, memblock_type_t *type_a, memblock_type_t *type_b, uintptr_t start, uintptr_t end)
{
unsigned int idx_a = IDX_A(it->__idx);
unsigned int idx_b = IDX_B(it->__idx);
for (; idx_a < type_a->count; idx_a++) {
memblock_region_t *m = &type_a->regions[idx_a];
uintptr_t m_start = m->base;
uintptr_t m_end = m->limit;
if (!type_b) {
it->it_base = m->base;
it->it_limit = m->limit;
it->it_status = m->status;
it->__idx = ITER(idx_a + 1, idx_b);
return;
}
if (m_end < start) {
/* we haven't reached the requested memory range yet */
continue;
}
if (m_start > end) {
/* we have gone past the requested memory range and can now stop */
break;
}
for (; idx_b < type_b->count + 1; idx_b++) {
memblock_region_t *r = &type_b->regions[idx_b];
/* r_start and r_end delimit the region of memory between the current and previous reserved regions.
if we have gone past the last reserved region, these variables delimit the range between the end
of the last reserved region and the end of memory. */
uintptr_t r_start = idx_b > 0 ? r[-1].limit + 1 : 0;
uintptr_t r_end;
if (idx_b < type_b->count) {
r_end = r->base;
/* we decrement r_end to get the address of the last byte of the free region.
if r_end is already zero, there is a reserved region starting at address 0x0.
as long as r_end == r_start == 0x00000, we will skip this region. */
if (r_end) {
r_end--;
}
} else {
/* this maximum value will be clamped to the bounds of memblock.memory
before being returned to the caller */
r_end = ADDR_MAX;
}
if (r_start >= r_end) {
/* this free region has a length of zero, move to the next one */
continue;
}
if (r_start >= m_end) {
/* we've gone past the end of the current memory region, and need to go to the next one */
break;
}
/* we've already gone past this free memory region. move to the next one */
if (m_start >= r_end) {
continue;
}
/* we want the area that is overlapped by both
region M (m_start - m_end) : The region defined as system memory.
region R (r_start - r_end) : The region defined as free / outside of any reserved regions.
*/
it->it_base = MAX(m_start, r_start);
it->it_limit = MIN(m_end, r_end);
/* further limit the region to the intersection between the region itself and the
specified iteration bounds */
it->it_base = MAX(it->it_base, start);
it->it_limit = MIN(it->it_limit, end);
if (it->it_limit <= it->it_base) {
/* this region is not part of the specified bounds, skip it. */
continue;
}
it->it_status = MEMBLOCK_MEMORY;
/* whichever region is smaller, increment the pointer for that type, so we can
compare the larger region with the next region of the incremented type. */
if (m_end <= r_end) {
idx_a++;
} else {
idx_b++;
}
/* store the position for the next iteration */
it->__idx = ITER(idx_a, idx_b);
return;
}
}
/* ULLONG_MAX signals the end of the iteration */
it->__idx = ITER_END;
}

296
vm/page.c Normal file
View File

@@ -0,0 +1,296 @@
#include <socks/types.h>
#include <socks/memblock.h>
#include <socks/vm.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
/* array of pages, one for each physical page frame present in RAM */
static vm_page_t *page_array = NULL;
/* number of pages stored in page_array */
static size_t page_array_count = 0;
/* Pre-calculated page order -> size conversion table */
static size_t page_order_bytes[] = {
[VM_PAGE_4K] = 0x1000,
[VM_PAGE_8K] = 0x2000,
[VM_PAGE_16K] = 0x4000,
[VM_PAGE_32K] = 0x8000,
[VM_PAGE_64K] = 0x10000,
[VM_PAGE_128K] = 0x20000,
[VM_PAGE_256K] = 0x40000,
[VM_PAGE_512K] = 0x80000,
[VM_PAGE_1M] = 0x100000,
[VM_PAGE_2M] = 0x200000,
[VM_PAGE_4M] = 0x400000,
[VM_PAGE_8M] = 0x800000,
[VM_PAGE_16M] = 0x1000000,
[VM_PAGE_32M] = 0x2000000,
[VM_PAGE_64M] = 0x4000000,
[VM_PAGE_128M] = 0x8000000,
#if 0
/* vm can support pages of this size, but
vm_page_t only has 4 bits with which to store
the page order, which cannot accomodate these
larger order numbers */
[VM_PAGE_256M] = 0x10000000,
[VM_PAGE_512M] = 0x20000000,
[VM_PAGE_1G] = 0x40000000,
#endif
};
/* temporary */
static void *tmp_vaddr_base = NULL;
static size_t tmp_vaddr_len = 0;
void tmp_set_vaddr_base(void *p, size_t len)
{
tmp_vaddr_base = p;
tmp_vaddr_len = len;
}
phys_addr_t vm_virt_to_phys(void *p)
{
phys_addr_t x = (phys_addr_t)p - (phys_addr_t)tmp_vaddr_base;
assert(x < tmp_vaddr_len);
return x;
}
void vm_page_init_array()
{
size_t pmem_size = 0;
memblock_iter_t it;
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
if (pmem_size < it.it_limit + 1) {
pmem_size = it.it_limit + 1;
}
}
size_t nr_pages = pmem_size / VM_PAGE_SIZE;
if (pmem_size % VM_PAGE_SIZE) {
nr_pages++;
}
page_array = memblock_alloc(sizeof(vm_page_t) * nr_pages);
page_array_count = nr_pages;
printf("page_array covers 0x%zx bytes, %zu page frames\n", pmem_size, pmem_size / VM_PAGE_SIZE);
printf("page_array is %zu bytes long\n", sizeof(vm_page_t) * nr_pages);
for (size_t i = 0; i < nr_pages; i++) {
memset(&page_array[i], 0x0, sizeof page_array[i]);
}
size_t nr_reserved = 0;
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
size_t pfn = i / VM_PAGE_SIZE;
page_array[pfn].p_flags |= VM_PAGE_RESERVED;
nr_reserved++;
}
}
printf("%zu reserved page frames\n", nr_reserved);
}
vm_page_t *vm_page_get(phys_addr_t addr)
{
size_t pfn = addr / VM_PAGE_SIZE;
return pfn < page_array_count ? &page_array[pfn] : NULL;
}
phys_addr_t vm_page_get_paddr(vm_page_t *pg)
{
return vm_page_get_pfn(pg) * VM_PAGE_SIZE;
}
void *vm_page_get_vaddr(vm_page_t *pg)
{
return (void *)((char *)tmp_vaddr_base + (vm_page_get_pfn(pg) * VM_PAGE_SIZE));
}
size_t vm_page_get_pfn(vm_page_t *pg)
{
return ((uintptr_t)pg - (uintptr_t)page_array) / sizeof *pg;
}
size_t vm_page_order_to_bytes(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return page_order_bytes[order];
}
phys_addr_t vm_page_order_to_pages(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return page_order_bytes[order] >> VM_PAGE_SHIFT;
}
vm_alignment_t vm_page_order_to_alignment(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return ~(page_order_bytes[order] - 1);
}
size_t vm_bytes_to_pages(size_t bytes)
{
if (bytes & (VM_PAGE_SIZE-1)) {
bytes &= ~(VM_PAGE_SIZE-1);
bytes += VM_PAGE_SIZE;
}
bytes >>= VM_PAGE_SHIFT;
return bytes;
}
vm_zone_t *vm_page_get_zone(vm_page_t *pg)
{
vm_pg_data_t *node = vm_pg_data_get(pg->p_node);
if (!node) {
return 0;
}
if (pg->p_zone >= VM_MAX_ZONES) {
return NULL;
}
return &node->pg_zones[pg->p_zone];
}
vm_page_t *vm_page_alloc(vm_page_order_t order, vm_flags_t flags)
{
/* TODO prefer nodes closer to us */
vm_pg_data_t *node = vm_pg_data_get(0);
vm_zone_id_t zone_id = VM_ZONE_HIGHMEM;
if (flags & VM_GET_DMA) {
zone_id = VM_ZONE_DMA;
}
while (1) {
vm_zone_t *z = &node->pg_zones[zone_id];
vm_page_t *pg = vm_zone_alloc_page(z, order, flags);
if (pg) {
return pg;
}
if (zone_id == VM_ZONE_MIN) {
break;
}
zone_id--;
}
return NULL;
}
void vm_page_free(vm_page_t *pg)
{
vm_zone_t *z = vm_page_get_zone(pg);
if (!z) {
return;
}
vm_zone_free_page(z, pg);
}
int vm_page_split(vm_page_t *pg, vm_page_t **a, vm_page_t **b)
{
if (pg->p_order == VM_PAGE_MIN_ORDER) {
return -1;
}
/* NOTE that we cannot use vm_page_foreach here,
as we are modifying the flags that vm_page_foreach
uses to determine where a given page block ends */
size_t nr_frames = vm_page_order_to_pages(pg->p_order);
for (size_t i = 0; i < nr_frames; i++) {
pg[i].p_order--;
}
vm_page_t *buddy = vm_page_get_buddy(pg);
if (pg->p_order == VM_PAGE_MIN_ORDER) {
pg->p_flags &= ~(VM_PAGE_HUGE | VM_PAGE_HEAD);
buddy->p_flags &= ~(VM_PAGE_HUGE | VM_PAGE_HEAD);
} else {
pg->p_flags |= VM_PAGE_HEAD | VM_PAGE_HUGE;
buddy->p_flags |= VM_PAGE_HEAD | VM_PAGE_HUGE;
}
*a = pg;
*b = buddy;
return 0;
}
vm_page_t *vm_page_merge(vm_page_t *a, vm_page_t *b)
{
if (a->p_order != b->p_order) {
return NULL;
}
if (a->p_order == VM_PAGE_MAX_ORDER) {
return NULL;
}
if (vm_page_get_buddy(a) != b) {
return NULL;
}
if ((a->p_flags & (VM_PAGE_ALLOC | VM_PAGE_RESERVED)) != (b->p_flags & (VM_PAGE_ALLOC | VM_PAGE_RESERVED))) {
return NULL;
}
/* make sure that a comes before b */
if (a > b) {
vm_page_t *tmp = a;
a = b;
b = tmp;
}
a->p_order++;
/* NOTE that we cannot use vm_page_foreach here,
as we are modifying the flags that vm_page_foreach
uses to determine where a given page block ends */
size_t nr_frames = vm_page_order_to_pages(a->p_order);
for (size_t i = 0; i < nr_frames; i++) {
a[i].p_flags &= ~VM_PAGE_HEAD;
a[i].p_flags |= VM_PAGE_HUGE;
a[i].p_order = a->p_order;
}
a->p_flags |= VM_PAGE_HEAD;
return a;
}
vm_page_t *vm_page_get_buddy(vm_page_t *pg)
{
phys_addr_t paddr = vm_page_get_paddr(pg);
paddr = paddr ^ vm_page_order_to_bytes(pg->p_order);
return vm_page_get(paddr);
}
vm_page_t *vm_page_get_next_tail(vm_page_t *pg)
{
vm_page_t *next = pg + 1;
if (next->p_flags & VM_PAGE_HEAD || !(next->p_flags & VM_PAGE_HUGE)) {
return NULL;
}
return next;
}

231
vm/zone.c Normal file
View File

@@ -0,0 +1,231 @@
#include <socks/locks.h>
#include <socks/queue.h>
#include <socks/types.h>
#include <socks/vm.h>
#include <string.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
#include <stdlib.h>
static vm_page_t *group_pages_into_block(vm_zone_t *z, phys_addr_t base, phys_addr_t limit, int order)
{
vm_page_t *first_page = NULL;
for (phys_addr_t i = base; i < limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = vm_page_get(i);
if (order != VM_PAGE_MIN_ORDER) {
pg->p_flags |= VM_PAGE_HUGE;
}
if (i == base) {
pg->p_flags |= VM_PAGE_HEAD;
first_page = pg;
}
pg->p_order = order;
pg->p_node = z->z_info.zd_node;
pg->p_zone = z->z_info.zd_id;
}
return first_page;
}
static void convert_region_to_blocks(vm_zone_t *zone,
phys_addr_t base, phys_addr_t limit,
int reserved)
{
size_t block_frames = vm_bytes_to_pages(limit - base + 1);
printf("adding region %08zx-%08zx (%zu frames) to zone %s\n",
base, limit, block_frames, zone->z_info.zd_name);
int reset_order = 0;
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER; ) {
size_t order_frames = vm_page_order_to_pages(order);
vm_alignment_t order_alignment = vm_page_order_to_alignment(order);
if (order_frames > block_frames) {
order--;
continue;
}
if (!VM_CHECK_ALIGN(base, order_alignment)) {
reset_order = 1;
order--;
continue;
}
printf("%s: %zu %s pages at %08" PRIxPTR "\n",
zone->z_info.zd_name,
order_frames,
reserved == 1 ? "reserved" : "free",
base);
phys_addr_t block_limit = base + (order_frames * VM_PAGE_SIZE) - 1;
vm_page_t *block_page = group_pages_into_block(zone, base, block_limit, order);
if (reserved == 0) {
queue_push_back(&zone->z_free_pages[order], &block_page->p_list);
}
base = block_limit + 1;
block_frames -= order_frames;
if (reset_order) {
order = VM_PAGE_MAX_ORDER;
reset_order = 0;
}
if (base > limit + 1) {
printf("too many pages created! %zx > %zx\n", base, limit);
abort();
}
if (base == limit) {
break;
}
}
}
void vm_zone_init(vm_zone_t *z, const vm_zone_descriptor_t *zone_info)
{
if (!vm_page_get(zone_info->zd_base)) {
return;
}
printf("initialising zone %s (%08zx-%08zx)\n",
zone_info->zd_name, zone_info->zd_base, zone_info->zd_limit);
memset(z, 0x0, sizeof *z);
memcpy(&z->z_info, zone_info, sizeof *zone_info);
z->z_lock = SPIN_LOCK_INIT;
unsigned long flags;
spin_lock_irqsave(&z->z_lock, &flags);
phys_addr_t block_start = zone_info->zd_base, block_end = zone_info->zd_limit;
int this_page_reserved = 0, last_page_reserved = -1;
for (uintptr_t i = zone_info->zd_base; i < zone_info->zd_limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = vm_page_get(i);
if (!pg) {
break;
}
this_page_reserved = (pg->p_flags & VM_PAGE_RESERVED) ? 1 : 0;
if (last_page_reserved == -1) {
last_page_reserved = this_page_reserved;
}
if (this_page_reserved == last_page_reserved) {
block_end = i;
continue;
}
convert_region_to_blocks(z, block_start, block_end + VM_PAGE_SIZE - 1, last_page_reserved);
block_start = i;
last_page_reserved = this_page_reserved;
}
if (block_start != block_end) {
convert_region_to_blocks(z, block_start, block_end + VM_PAGE_SIZE - 1, this_page_reserved);
}
spin_unlock_irqrestore(&z->z_lock, flags);
}
static int replenish_free_page_list(vm_zone_t *z, vm_page_order_t order)
{
if (!queue_empty(&z->z_free_pages[order])) {
/* we already have pages available. */
return 0;
}
if (order == VM_PAGE_MAX_ORDER) {
/* there are no larger pages to split, so just give up. */
return -1;
}
/* the lowest page order that is >= `order` and still has pages available */
vm_page_order_t first_order_with_free = VM_MAX_PAGE_ORDERS;
for (vm_page_order_t i = order; i <= VM_PAGE_MAX_ORDER; i++) {
if (!queue_empty(&z->z_free_pages[i])) {
first_order_with_free = i;
break;
}
}
if (first_order_with_free == VM_MAX_PAGE_ORDERS) {
/* there are no pages available to split */
return -1;
}
if (first_order_with_free == order) {
/* there are free pages of the requested order, so nothing needs to be done */
return 0;
}
/* starting from the first page list with free pages,
take a page, split it in half, and add the sub-pages
to the next order's free list. */
for (vm_page_order_t i = first_order_with_free; i > order; i--) {
queue_entry_t *pg_entry = queue_pop_front(&z->z_free_pages[i]);
vm_page_t *pg = QUEUE_CONTAINER(vm_page_t, p_list, pg_entry);
vm_page_t *a, *b;
vm_page_split(pg, &a, &b);
queue_push_back(&z->z_free_pages[i - 1], &a->p_list);
queue_push_back(&z->z_free_pages[i - 1], &b->p_list);
}
return 0;
}
vm_page_t *vm_zone_alloc_page(vm_zone_t *z, vm_page_order_t order, vm_flags_t flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
int result = replenish_free_page_list(z, order);
if (result != 0) {
spin_unlock_irqrestore(&z->z_lock, irq_flags);
return NULL;
}
queue_entry_t *pg_entry = queue_pop_front(&z->z_free_pages[order]);
vm_page_t *pg = QUEUE_CONTAINER(vm_page_t, p_list, pg_entry);
vm_page_foreach (pg, i) {
i->p_flags |= VM_PAGE_ALLOC;
}
spin_unlock_irqrestore(&z->z_lock, irq_flags);
return pg;
}
void vm_zone_free_page(vm_zone_t *z, vm_page_t *pg)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
pg->p_flags &= ~VM_PAGE_ALLOC;
queue_push_back(&z->z_free_pages[pg->p_order], &pg->p_list);
while (1) {
vm_page_t *buddy = vm_page_get_buddy(pg);
vm_page_t *huge = vm_page_merge(pg, buddy);
if (!huge) {
break;
}
queue_delete(&z->z_free_pages[buddy->p_order - 1], &buddy->p_list);
queue_delete(&z->z_free_pages[buddy->p_order - 1], &pg->p_list);
queue_push_back(&z->z_free_pages[huge->p_order], &huge->p_list);
pg = huge;
}
spin_unlock_irqrestore(&z->z_lock, irq_flags);
}