sandbox: moved all sources to main kernel tree

This commit is contained in:
2023-02-03 20:43:38 +00:00
parent e714d619ba
commit 40f83922da
18 changed files with 0 additions and 9 deletions

View File

@@ -1,36 +0,0 @@
#include <socks/status.h>
#include <limits.h>
#include <socks/vm.h>
#include <socks/memblock.h>
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
/* One vm_pg_data_t per NUMA node. */
static vm_pg_data_t *node_data = NULL;
kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
{
int numa_count = 1;
/* we're only worrying about UMA systems for now */
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count);
vm_page_init_array();
for (size_t i = 0; i < nr_zones; i++) {
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
}
return KERN_OK;
}
vm_pg_data_t *vm_pg_data_get(vm_node_id_t node)
{
if (node == 0) {
return node_data;
}
return NULL;
}

View File

@@ -1,217 +0,0 @@
#include <socks/queue.h>
#include <stdlib.h>
#include <assert.h>
#include <socks/vm.h>
#define FREELIST_END ((unsigned int)-1)
static vm_cache_t cache_cache = { .c_name = "vm_cache", .c_obj_size = sizeof(vm_cache_t) };
vm_cache_t *vm_cache_create(const char *name, size_t objsz, vm_cache_flags_t flags)
{
if (!VM_CACHE_INITIALISED(&cache_cache)) {
vm_cache_init(&cache_cache);
}
vm_cache_t *new_cache = vm_cache_alloc(&cache_cache, 0);
new_cache->c_name = name;
new_cache->c_obj_size = objsz;
new_cache->c_flags = flags;
vm_cache_init(new_cache);
return new_cache;
}
void vm_cache_init(vm_cache_t *cache)
{
cache->c_page_order = VM_PAGE_16K;
if (cache->c_obj_size >= 512) {
cache->c_flags |= VM_CACHE_OFFSLAB;
}
size_t available = vm_page_order_to_bytes(cache->c_page_order);
size_t space_per_item = cache->c_obj_size;
/* align to 16-byte boundary */
if (space_per_item & 0xF) {
space_per_item &= ~0xF;
space_per_item += 0x10;
}
cache->c_stride = space_per_item;
if (!(cache->c_flags & VM_CACHE_OFFSLAB)) {
available -= sizeof(vm_slab_t);
}
/* one entry in the freelist per object slot */
space_per_item += sizeof(unsigned int);
cache->c_obj_count = available / space_per_item;
cache->c_slabs_full = QUEUE_INIT;
cache->c_slabs_partial = QUEUE_INIT;
cache->c_slabs_empty = QUEUE_INIT;
cache->c_hdr_size = sizeof(vm_slab_t) + (sizeof(unsigned int) * cache->c_obj_count);
}
void vm_cache_destroy(vm_cache_t *cache)
{
/* TODO */
}
static vm_slab_t *alloc_slab(vm_cache_t *cache, vm_flags_t flags)
{
vm_page_t *slab_page = vm_page_alloc(cache->c_page_order, flags);
vm_slab_t *slab_hdr = NULL;
void *slab_data = vm_page_get_vaddr(slab_page);
if (cache->c_flags & VM_CACHE_OFFSLAB) {
/* NOTE the circular dependency here:
kmalloc -> vm_cache_alloc -> alloc_slab -> kmalloc
since this call path is only used for caches with
VM_CACHE_OFFSLAB set, we avoid the circular dependency
by ensuring the small size-N (where N < 512) caches
(which don't use that flag) are initialised before
attempting to allocate from an offslab cache. */
slab_hdr = kmalloc(cache->c_hdr_size, flags);
slab_hdr->s_objects = slab_data;
} else {
slab_hdr = slab_data;
slab_hdr->s_objects = (void *)((char *)slab_data + cache->c_hdr_size);
}
slab_hdr->s_cache = cache;
slab_hdr->s_list = QUEUE_ENTRY_INIT;
slab_hdr->s_obj_allocated = 0;
slab_hdr->s_free = 0;
for (unsigned int i = 0; i < cache->c_obj_count; i++) {
slab_hdr->s_freelist[i] = i + 1;
}
slab_hdr->s_freelist[cache->c_obj_count - 1] = FREELIST_END;
vm_page_foreach (slab_page, i) {
i->p_slab = slab_hdr;
}
return slab_hdr;
}
static void destroy_slab(vm_slab_t *slab)
{
}
static unsigned int slab_allocate_slot(vm_slab_t *slab)
{
if (slab->s_free == FREELIST_END) {
return FREELIST_END;
}
unsigned int slot = slab->s_free;
slab->s_free = slab->s_freelist[slab->s_free];
slab->s_obj_allocated++;
return slot;
}
static void slab_free_slot(vm_slab_t *slab, unsigned int slot)
{
unsigned int next = slab->s_free;
slab->s_free = slot;
slab->s_freelist[slot] = next;
slab->s_obj_allocated--;
}
static void *slot_to_pointer(vm_slab_t *slab, unsigned int slot)
{
return (void *)((char *)slab->s_objects + (slot * slab->s_cache->c_stride));
}
static unsigned int pointer_to_slot(vm_slab_t *slab, void *p)
{
size_t offset = (uintptr_t)p - (uintptr_t)slab->s_objects;
return offset / slab->s_cache->c_stride;
}
void *vm_cache_alloc(vm_cache_t *cache, vm_flags_t flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&cache->c_lock, &irq_flags);
vm_slab_t *slab = NULL;
if (!queue_empty(&cache->c_slabs_partial)) {
/* prefer using up partially-full slabs before taking a fresh one */
queue_entry_t *slab_entry = queue_pop_front(&cache->c_slabs_partial);
assert(slab_entry);
slab = QUEUE_CONTAINER(vm_slab_t, s_list, slab_entry);
} else if (!queue_empty(&cache->c_slabs_empty)) {
queue_entry_t *slab_entry = queue_pop_front(&cache->c_slabs_empty);
assert(slab_entry);
slab = QUEUE_CONTAINER(vm_slab_t, s_list, slab_entry);
} else {
/* we've run out of slabs. create a new one */
slab = alloc_slab(cache, flags);
}
if (!slab) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return NULL;
}
unsigned int slot = slab_allocate_slot(slab);
void *p = slot_to_pointer(slab, slot);
if (slab->s_free == FREELIST_END) {
queue_push_back(&cache->c_slabs_full, &slab->s_list);
} else {
queue_push_back(&cache->c_slabs_partial, &slab->s_list);
}
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return p;
}
void vm_cache_free(vm_cache_t *cache, void *p)
{
unsigned long irq_flags;
spin_lock_irqsave(&cache->c_lock, &irq_flags);
phys_addr_t phys = vm_virt_to_phys(p);
vm_page_t *pg = vm_page_get(phys);
if (!pg || !pg->p_slab) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return;
}
vm_slab_t *slab = pg->p_slab;
if (slab->s_cache != cache) {
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
return;
}
if (slab->s_free == FREELIST_END) {
queue_delete(&cache->c_slabs_full, &slab->s_list);
} else {
queue_delete(&cache->c_slabs_partial, &slab->s_list);
}
unsigned int slot = pointer_to_slot(slab, p);
slab_free_slot(slab, slot);
if (slab->s_obj_allocated == 0) {
queue_push_back(&cache->c_slabs_empty, &slab->s_list);
} else {
queue_push_back(&cache->c_slabs_partial, &slab->s_list);
}
spin_unlock_irqrestore(&cache->c_lock, irq_flags);
}

View File

@@ -1,241 +0,0 @@
#ifndef SOCKS_VM_H_
#define SOCKS_VM_H_
#include <stddef.h>
#include <socks/types.h>
#include <socks/status.h>
#include <socks/queue.h>
#include <socks/locks.h>
/* maximum number of NUMA nodes */
#define VM_MAX_NODES 64
/* maximum number of memory zones per node */
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
/* maximum number of supported page orders */
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
#define VM_PAGE_SIZE 0x1000
#define VM_PAGE_SHIFT 12
#define VM_CACHE_INITIALISED(c) ((c)->c_obj_count != 0)
#define VM_PAGE_IS_FREE(pg) (((pg)->p_flags & (VM_PAGE_RESERVED | VM_PAGE_ALLOC)) == 0)
#define vm_page_foreach(pg, i) \
for (vm_page_t *i = (pg); i; i = vm_page_get_next_tail(i))
typedef phys_addr_t vm_alignment_t;
typedef unsigned int vm_node_id_t;
typedef struct vm_object {
unsigned int reserved;
} vm_object_t;
typedef enum vm_flags {
VM_GET_DMA = 0x01u,
} vm_flags_t;
typedef enum vm_zone_id {
/* NOTE that these are used as indices into the node_zones array in vm/zone.c
they need to be continuous, and must start at 0! */
VM_ZONE_DMA = 0u,
VM_ZONE_NORMAL = 1u,
VM_ZONE_HIGHMEM = 2u,
VM_ZONE_MIN = VM_ZONE_DMA,
VM_ZONE_MAX = VM_ZONE_HIGHMEM,
} vm_zone_id_t;
typedef enum vm_page_order {
VM_PAGE_4K = 0u,
VM_PAGE_8K,
VM_PAGE_16K,
VM_PAGE_32K,
VM_PAGE_64K,
VM_PAGE_128K,
VM_PAGE_256K,
VM_PAGE_512K,
VM_PAGE_1M,
VM_PAGE_2M,
VM_PAGE_4M,
VM_PAGE_8M,
VM_PAGE_16M,
VM_PAGE_32M,
VM_PAGE_64M,
VM_PAGE_128M,
#if 0
/* vm_page_t only has 4 bits to store the page order with.
the maximum order that can be stored in 4 bits is 15 (VM_PAGE_128M)
to use any of the page orders listed here, this field
will have to be expanded. */
VM_PAGE_256M,
VM_PAGE_512M,
VM_PAGE_1G,
#endif
VM_PAGE_MIN_ORDER = VM_PAGE_4K,
VM_PAGE_MAX_ORDER = VM_PAGE_8M,
} vm_page_order_t;
typedef enum vm_page_flags {
/* page is reserved (probably by a call to memblock_reserve()) and cannot be
returned by any allocation function */
VM_PAGE_RESERVED = 0x01u,
/* page has been allocated by a zone's buddy allocator, and is in-use */
VM_PAGE_ALLOC = 0x02u,
/* page is the first page of a huge-page */
VM_PAGE_HEAD = 0x04u,
/* page is part of a huge-page */
VM_PAGE_HUGE = 0x08u,
} vm_page_flags_t;
typedef enum vm_memory_region_status {
VM_REGION_FREE = 0x01u,
VM_REGION_RESERVED = 0x02u,
} vm_memory_region_status_t;
typedef enum vm_cache_flags {
VM_CACHE_OFFSLAB = 0x01u,
VM_CACHE_DMA = 0x02u
} vm_cache_flags_t;
typedef struct vm_zone_descriptor {
vm_zone_id_t zd_id;
vm_node_id_t zd_node;
const char zd_name[32];
phys_addr_t zd_base;
phys_addr_t zd_limit;
} vm_zone_descriptor_t;
typedef struct vm_zone {
vm_zone_descriptor_t z_info;
spin_lock_t z_lock;
queue_t z_free_pages[VM_MAX_PAGE_ORDERS];
unsigned long z_size;
} vm_zone_t;
typedef struct vm_pg_data {
vm_zone_t pg_zones[VM_MAX_ZONES];
} vm_pg_data_t;
typedef struct vm_region {
vm_memory_region_status_t r_status;
phys_addr_t r_base;
phys_addr_t r_limit;
} vm_region_t;
typedef struct vm_cache {
const char *c_name;
vm_cache_flags_t c_flags;
queue_entry_t c_list;
queue_t c_slabs_full;
queue_t c_slabs_partial;
queue_t c_slabs_empty;
spin_lock_t c_lock;
/* number of objects that can be stored in a single slab */
unsigned int c_obj_count;
/* the size of object kept in the cache */
unsigned int c_obj_size;
/* combined size of vm_slab_t and the freelist */
unsigned int c_hdr_size;
/* offset from one object to the next in a slab.
this may be different from c_obj_size as
we enforce a 16-byte alignment on allocated objects */
unsigned int c_stride;
/* size of page used for slabs */
unsigned int c_page_order;
} vm_cache_t;
typedef struct vm_slab {
vm_cache_t *s_cache;
/* queue entry for vm_cache_t.c_slabs_* */
queue_entry_t s_list;
/* pointer to the first object slot. */
void *s_objects;
/* the number of objects allocated on the slab. */
unsigned int s_obj_allocated;
/* the index of the next free object.
if s_free is equal to FREELIST_END (defined in vm/cache.c)
there are no free slots left in the slab. */
unsigned int s_free;
/* list of free object slots.
when allocating:
- s_free should be set to the value of s_freelist[s_free]
when freeing:
- s_free should be set to the index of the object being freed.
- s_freelist[s_free] should be set to the previous value of s_free.
*/
unsigned int s_freelist[];
} vm_slab_t;
typedef struct vm_page {
/* order of the page block that this page belongs too */
uint16_t p_order : 4;
/* the id of the NUMA node that this page belongs to */
uint16_t p_node : 6;
/* the id of the memory zone that this page belongs to */
uint16_t p_zone : 3;
/* some unused bits */
uint16_t p_reserved : 3;
/* vm_page_flags_t bitfields. */
uint32_t p_flags;
/* multi-purpose list.
the owner of the page can decide what to do with this.
some examples:
- the buddy allocator uses this to maintain its per-zone free-page lists.
*/
queue_entry_t p_list;
/* owner-specific data */
union {
struct {
vm_slab_t *p_slab;
};
};
} __attribute__((aligned(2 * sizeof(unsigned long)))) vm_page_t;
extern kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones);
extern vm_pg_data_t *vm_pg_data_get(vm_node_id_t node);
extern phys_addr_t vm_virt_to_phys(void *p);
extern void vm_page_init_array();
extern vm_page_t *vm_page_get(phys_addr_t addr);
extern phys_addr_t vm_page_get_paddr(vm_page_t *pg);
extern vm_zone_t *vm_page_get_zone(vm_page_t *pg);
extern void *vm_page_get_vaddr(vm_page_t *pg);
extern size_t vm_page_get_pfn(vm_page_t *pg);
extern size_t vm_page_order_to_bytes(vm_page_order_t order);
extern size_t vm_page_order_to_pages(vm_page_order_t order);
extern vm_alignment_t vm_page_order_to_alignment(vm_page_order_t order);
extern vm_page_t *vm_page_alloc(vm_page_order_t order, vm_flags_t flags);
extern void vm_page_free(vm_page_t *pg);
extern int vm_page_split(vm_page_t *pg, vm_page_t **a, vm_page_t **b);
extern vm_page_t *vm_page_merge(vm_page_t *a, vm_page_t *b);
extern vm_page_t *vm_page_get_buddy(vm_page_t *pg);
extern vm_page_t *vm_page_get_next_tail(vm_page_t *pg);
extern size_t vm_bytes_to_pages(size_t bytes);
extern void vm_zone_init(vm_zone_t *z, const vm_zone_descriptor_t *zone_info);
extern vm_page_t *vm_zone_alloc_page(vm_zone_t *z, vm_page_order_t order, vm_flags_t flags);
extern void vm_zone_free_page(vm_zone_t *z, vm_page_t *pg);
extern vm_cache_t *vm_cache_create(const char *name, size_t objsz, vm_cache_flags_t flags);
extern void vm_cache_init(vm_cache_t *cache);
extern void vm_cache_destroy(vm_cache_t *cache);
extern void *vm_cache_alloc(vm_cache_t *cache, vm_flags_t flags);
extern void vm_cache_free(vm_cache_t *cache, void *p);
extern void *kmalloc(size_t count, vm_flags_t flags);
extern void *kzalloc(size_t count, vm_flags_t flags);
extern void kfree(void *p);
#endif

View File

@@ -1,73 +0,0 @@
#include <socks/vm.h>
#include <string.h>
#define SIZE_N_CACHE(s) \
{ .c_name = "size-" # s, .c_obj_size = s, .c_page_order = VM_PAGE_16K }
/* reserve space for the size-N caches: */
static vm_cache_t size_n_caches[] = {
SIZE_N_CACHE(16),
SIZE_N_CACHE(32),
SIZE_N_CACHE(48),
SIZE_N_CACHE(64),
SIZE_N_CACHE(96),
SIZE_N_CACHE(128),
SIZE_N_CACHE(160),
SIZE_N_CACHE(256),
SIZE_N_CACHE(388),
SIZE_N_CACHE(512),
SIZE_N_CACHE(576),
SIZE_N_CACHE(768),
SIZE_N_CACHE(1024),
SIZE_N_CACHE(1664),
SIZE_N_CACHE(2048),
SIZE_N_CACHE(3072),
SIZE_N_CACHE(4096),
};
static const size_t nr_size_n_caches = sizeof size_n_caches / sizeof size_n_caches[0];
void *kmalloc(size_t count, vm_flags_t flags)
{
if (!count) {
return NULL;
}
vm_cache_t *best_fit = NULL;
for (size_t i = 0; i < nr_size_n_caches; i++) {
if (size_n_caches[i].c_obj_size >= count) {
best_fit = &size_n_caches[i];
break;
}
}
if (!best_fit) {
return NULL;
}
if (!VM_CACHE_INITIALISED(best_fit)) {
vm_cache_init(best_fit);
}
return vm_cache_alloc(best_fit, flags);
}
void *kzalloc(size_t count, vm_flags_t flags)
{
void *p = kmalloc(count, flags);
if (p) {
memset(p, 0x0, count);
}
return p;
}
void kfree(void *p)
{
phys_addr_t phys = vm_virt_to_phys(p);
vm_page_t *pg = vm_page_get(phys);
if (!pg || !pg->p_slab) {
return;
}
vm_cache_free(pg->p_slab->s_cache, p);
}

View File

@@ -1,296 +0,0 @@
#include <socks/types.h>
#include <socks/memblock.h>
#include <socks/vm.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
/* array of pages, one for each physical page frame present in RAM */
static vm_page_t *page_array = NULL;
/* number of pages stored in page_array */
static size_t page_array_count = 0;
/* Pre-calculated page order -> size conversion table */
static size_t page_order_bytes[] = {
[VM_PAGE_4K] = 0x1000,
[VM_PAGE_8K] = 0x2000,
[VM_PAGE_16K] = 0x4000,
[VM_PAGE_32K] = 0x8000,
[VM_PAGE_64K] = 0x10000,
[VM_PAGE_128K] = 0x20000,
[VM_PAGE_256K] = 0x40000,
[VM_PAGE_512K] = 0x80000,
[VM_PAGE_1M] = 0x100000,
[VM_PAGE_2M] = 0x200000,
[VM_PAGE_4M] = 0x400000,
[VM_PAGE_8M] = 0x800000,
[VM_PAGE_16M] = 0x1000000,
[VM_PAGE_32M] = 0x2000000,
[VM_PAGE_64M] = 0x4000000,
[VM_PAGE_128M] = 0x8000000,
#if 0
/* vm can support pages of this size, but
vm_page_t only has 4 bits with which to store
the page order, which cannot accomodate these
larger order numbers */
[VM_PAGE_256M] = 0x10000000,
[VM_PAGE_512M] = 0x20000000,
[VM_PAGE_1G] = 0x40000000,
#endif
};
/* temporary */
static void *tmp_vaddr_base = NULL;
static size_t tmp_vaddr_len = 0;
void tmp_set_vaddr_base(void *p, size_t len)
{
tmp_vaddr_base = p;
tmp_vaddr_len = len;
}
phys_addr_t vm_virt_to_phys(void *p)
{
phys_addr_t x = (phys_addr_t)p - (phys_addr_t)tmp_vaddr_base;
assert(x < tmp_vaddr_len);
return x;
}
void vm_page_init_array()
{
size_t pmem_size = 0;
memblock_iter_t it;
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
if (pmem_size < it.it_limit + 1) {
pmem_size = it.it_limit + 1;
}
}
size_t nr_pages = pmem_size / VM_PAGE_SIZE;
if (pmem_size % VM_PAGE_SIZE) {
nr_pages++;
}
page_array = memblock_alloc(sizeof(vm_page_t) * nr_pages);
page_array_count = nr_pages;
printf("page_array covers 0x%zx bytes, %zu page frames\n", pmem_size, pmem_size / VM_PAGE_SIZE);
printf("page_array is %zu bytes long\n", sizeof(vm_page_t) * nr_pages);
for (size_t i = 0; i < nr_pages; i++) {
memset(&page_array[i], 0x0, sizeof page_array[i]);
}
size_t nr_reserved = 0;
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
size_t pfn = i / VM_PAGE_SIZE;
page_array[pfn].p_flags |= VM_PAGE_RESERVED;
nr_reserved++;
}
}
printf("%zu reserved page frames\n", nr_reserved);
}
vm_page_t *vm_page_get(phys_addr_t addr)
{
size_t pfn = addr / VM_PAGE_SIZE;
return pfn < page_array_count ? &page_array[pfn] : NULL;
}
phys_addr_t vm_page_get_paddr(vm_page_t *pg)
{
return vm_page_get_pfn(pg) * VM_PAGE_SIZE;
}
void *vm_page_get_vaddr(vm_page_t *pg)
{
return (void *)((char *)tmp_vaddr_base + (vm_page_get_pfn(pg) * VM_PAGE_SIZE));
}
size_t vm_page_get_pfn(vm_page_t *pg)
{
return ((uintptr_t)pg - (uintptr_t)page_array) / sizeof *pg;
}
size_t vm_page_order_to_bytes(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return page_order_bytes[order];
}
phys_addr_t vm_page_order_to_pages(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return page_order_bytes[order] >> VM_PAGE_SHIFT;
}
vm_alignment_t vm_page_order_to_alignment(vm_page_order_t order)
{
if (order < 0 || order > VM_PAGE_MAX_ORDER) {
return 0;
}
return ~(page_order_bytes[order] - 1);
}
size_t vm_bytes_to_pages(size_t bytes)
{
if (bytes & (VM_PAGE_SIZE-1)) {
bytes &= ~(VM_PAGE_SIZE-1);
bytes += VM_PAGE_SIZE;
}
bytes >>= VM_PAGE_SHIFT;
return bytes;
}
vm_zone_t *vm_page_get_zone(vm_page_t *pg)
{
vm_pg_data_t *node = vm_pg_data_get(pg->p_node);
if (!node) {
return 0;
}
if (pg->p_zone >= VM_MAX_ZONES) {
return NULL;
}
return &node->pg_zones[pg->p_zone];
}
vm_page_t *vm_page_alloc(vm_page_order_t order, vm_flags_t flags)
{
/* TODO prefer nodes closer to us */
vm_pg_data_t *node = vm_pg_data_get(0);
vm_zone_id_t zone_id = VM_ZONE_HIGHMEM;
if (flags & VM_GET_DMA) {
zone_id = VM_ZONE_DMA;
}
while (1) {
vm_zone_t *z = &node->pg_zones[zone_id];
vm_page_t *pg = vm_zone_alloc_page(z, order, flags);
if (pg) {
return pg;
}
if (zone_id == VM_ZONE_MIN) {
break;
}
zone_id--;
}
return NULL;
}
void vm_page_free(vm_page_t *pg)
{
vm_zone_t *z = vm_page_get_zone(pg);
if (!z) {
return;
}
vm_zone_free_page(z, pg);
}
int vm_page_split(vm_page_t *pg, vm_page_t **a, vm_page_t **b)
{
if (pg->p_order == VM_PAGE_MIN_ORDER) {
return -1;
}
/* NOTE that we cannot use vm_page_foreach here,
as we are modifying the flags that vm_page_foreach
uses to determine where a given page block ends */
size_t nr_frames = vm_page_order_to_pages(pg->p_order);
for (size_t i = 0; i < nr_frames; i++) {
pg[i].p_order--;
}
vm_page_t *buddy = vm_page_get_buddy(pg);
if (pg->p_order == VM_PAGE_MIN_ORDER) {
pg->p_flags &= ~(VM_PAGE_HUGE | VM_PAGE_HEAD);
buddy->p_flags &= ~(VM_PAGE_HUGE | VM_PAGE_HEAD);
} else {
pg->p_flags |= VM_PAGE_HEAD | VM_PAGE_HUGE;
buddy->p_flags |= VM_PAGE_HEAD | VM_PAGE_HUGE;
}
*a = pg;
*b = buddy;
return 0;
}
vm_page_t *vm_page_merge(vm_page_t *a, vm_page_t *b)
{
if (a->p_order != b->p_order) {
return NULL;
}
if (a->p_order == VM_PAGE_MAX_ORDER) {
return NULL;
}
if (vm_page_get_buddy(a) != b) {
return NULL;
}
if ((a->p_flags & (VM_PAGE_ALLOC | VM_PAGE_RESERVED)) != (b->p_flags & (VM_PAGE_ALLOC | VM_PAGE_RESERVED))) {
return NULL;
}
/* make sure that a comes before b */
if (a > b) {
vm_page_t *tmp = a;
a = b;
b = tmp;
}
a->p_order++;
/* NOTE that we cannot use vm_page_foreach here,
as we are modifying the flags that vm_page_foreach
uses to determine where a given page block ends */
size_t nr_frames = vm_page_order_to_pages(a->p_order);
for (size_t i = 0; i < nr_frames; i++) {
a[i].p_flags &= ~VM_PAGE_HEAD;
a[i].p_flags |= VM_PAGE_HUGE;
a[i].p_order = a->p_order;
}
a->p_flags |= VM_PAGE_HEAD;
return a;
}
vm_page_t *vm_page_get_buddy(vm_page_t *pg)
{
phys_addr_t paddr = vm_page_get_paddr(pg);
paddr = paddr ^ vm_page_order_to_bytes(pg->p_order);
return vm_page_get(paddr);
}
vm_page_t *vm_page_get_next_tail(vm_page_t *pg)
{
vm_page_t *next = pg + 1;
if (next->p_flags & VM_PAGE_HEAD || !(next->p_flags & VM_PAGE_HUGE)) {
return NULL;
}
return next;
}

View File

@@ -1,231 +0,0 @@
#include <socks/locks.h>
#include <socks/queue.h>
#include <socks/types.h>
#include <socks/vm.h>
#include <string.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
#include <stdlib.h>
static vm_page_t *group_pages_into_block(vm_zone_t *z, phys_addr_t base, phys_addr_t limit, int order)
{
vm_page_t *first_page = NULL;
for (phys_addr_t i = base; i < limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = vm_page_get(i);
if (order != VM_PAGE_MIN_ORDER) {
pg->p_flags |= VM_PAGE_HUGE;
}
if (i == base) {
pg->p_flags |= VM_PAGE_HEAD;
first_page = pg;
}
pg->p_order = order;
pg->p_node = z->z_info.zd_node;
pg->p_zone = z->z_info.zd_id;
}
return first_page;
}
static void convert_region_to_blocks(vm_zone_t *zone,
phys_addr_t base, phys_addr_t limit,
int reserved)
{
size_t block_frames = vm_bytes_to_pages(limit - base + 1);
printf("adding region %08zx-%08zx (%zu frames) to zone %s\n",
base, limit, block_frames, zone->z_info.zd_name);
int reset_order = 0;
for (int order = VM_PAGE_MAX_ORDER; order >= VM_PAGE_MIN_ORDER; ) {
size_t order_frames = vm_page_order_to_pages(order);
vm_alignment_t order_alignment = vm_page_order_to_alignment(order);
if (order_frames > block_frames) {
order--;
continue;
}
if (!VM_CHECK_ALIGN(base, order_alignment)) {
reset_order = 1;
order--;
continue;
}
printf("%s: %zu %s pages at %08" PRIxPTR "\n",
zone->z_info.zd_name,
order_frames,
reserved == 1 ? "reserved" : "free",
base);
phys_addr_t block_limit = base + (order_frames * VM_PAGE_SIZE) - 1;
vm_page_t *block_page = group_pages_into_block(zone, base, block_limit, order);
if (reserved == 0) {
queue_push_back(&zone->z_free_pages[order], &block_page->p_list);
}
base = block_limit + 1;
block_frames -= order_frames;
if (reset_order) {
order = VM_PAGE_MAX_ORDER;
reset_order = 0;
}
if (base > limit + 1) {
printf("too many pages created! %zx > %zx\n", base, limit);
abort();
}
if (base == limit) {
break;
}
}
}
void vm_zone_init(vm_zone_t *z, const vm_zone_descriptor_t *zone_info)
{
if (!vm_page_get(zone_info->zd_base)) {
return;
}
printf("initialising zone %s (%08zx-%08zx)\n",
zone_info->zd_name, zone_info->zd_base, zone_info->zd_limit);
memset(z, 0x0, sizeof *z);
memcpy(&z->z_info, zone_info, sizeof *zone_info);
z->z_lock = SPIN_LOCK_INIT;
unsigned long flags;
spin_lock_irqsave(&z->z_lock, &flags);
phys_addr_t block_start = zone_info->zd_base, block_end = zone_info->zd_limit;
int this_page_reserved = 0, last_page_reserved = -1;
for (uintptr_t i = zone_info->zd_base; i < zone_info->zd_limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = vm_page_get(i);
if (!pg) {
break;
}
this_page_reserved = (pg->p_flags & VM_PAGE_RESERVED) ? 1 : 0;
if (last_page_reserved == -1) {
last_page_reserved = this_page_reserved;
}
if (this_page_reserved == last_page_reserved) {
block_end = i;
continue;
}
convert_region_to_blocks(z, block_start, block_end + VM_PAGE_SIZE - 1, last_page_reserved);
block_start = i;
last_page_reserved = this_page_reserved;
}
if (block_start != block_end) {
convert_region_to_blocks(z, block_start, block_end + VM_PAGE_SIZE - 1, this_page_reserved);
}
spin_unlock_irqrestore(&z->z_lock, flags);
}
static int replenish_free_page_list(vm_zone_t *z, vm_page_order_t order)
{
if (!queue_empty(&z->z_free_pages[order])) {
/* we already have pages available. */
return 0;
}
if (order == VM_PAGE_MAX_ORDER) {
/* there are no larger pages to split, so just give up. */
return -1;
}
/* the lowest page order that is >= `order` and still has pages available */
vm_page_order_t first_order_with_free = VM_MAX_PAGE_ORDERS;
for (vm_page_order_t i = order; i <= VM_PAGE_MAX_ORDER; i++) {
if (!queue_empty(&z->z_free_pages[i])) {
first_order_with_free = i;
break;
}
}
if (first_order_with_free == VM_MAX_PAGE_ORDERS) {
/* there are no pages available to split */
return -1;
}
if (first_order_with_free == order) {
/* there are free pages of the requested order, so nothing needs to be done */
return 0;
}
/* starting from the first page list with free pages,
take a page, split it in half, and add the sub-pages
to the next order's free list. */
for (vm_page_order_t i = first_order_with_free; i > order; i--) {
queue_entry_t *pg_entry = queue_pop_front(&z->z_free_pages[i]);
vm_page_t *pg = QUEUE_CONTAINER(vm_page_t, p_list, pg_entry);
vm_page_t *a, *b;
vm_page_split(pg, &a, &b);
queue_push_back(&z->z_free_pages[i - 1], &a->p_list);
queue_push_back(&z->z_free_pages[i - 1], &b->p_list);
}
return 0;
}
vm_page_t *vm_zone_alloc_page(vm_zone_t *z, vm_page_order_t order, vm_flags_t flags)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
int result = replenish_free_page_list(z, order);
if (result != 0) {
spin_unlock_irqrestore(&z->z_lock, irq_flags);
return NULL;
}
queue_entry_t *pg_entry = queue_pop_front(&z->z_free_pages[order]);
vm_page_t *pg = QUEUE_CONTAINER(vm_page_t, p_list, pg_entry);
vm_page_foreach (pg, i) {
i->p_flags |= VM_PAGE_ALLOC;
}
spin_unlock_irqrestore(&z->z_lock, irq_flags);
return pg;
}
void vm_zone_free_page(vm_zone_t *z, vm_page_t *pg)
{
unsigned long irq_flags;
spin_lock_irqsave(&z->z_lock, &irq_flags);
pg->p_flags &= ~VM_PAGE_ALLOC;
queue_push_back(&z->z_free_pages[pg->p_order], &pg->p_list);
while (1) {
vm_page_t *buddy = vm_page_get_buddy(pg);
vm_page_t *huge = vm_page_merge(pg, buddy);
if (!huge) {
break;
}
queue_delete(&z->z_free_pages[buddy->p_order - 1], &buddy->p_list);
queue_delete(&z->z_free_pages[buddy->p_order - 1], &pg->p_list);
queue_push_back(&z->z_free_pages[huge->p_order], &huge->p_list);
pg = huge;
}
spin_unlock_irqrestore(&z->z_lock, irq_flags);
}