vm: implement a sparse memory model
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
#include <socks/vm.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/printk.h>
|
||||
#include <socks/machine/cpu.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
@@ -18,8 +19,10 @@ kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
|
||||
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count, 8);
|
||||
printk("vm: initialising %u node%s", numa_count, numa_count > 1 ? "s" : "");
|
||||
|
||||
vm_set_memory_model(VM_MODEL_FLAT);
|
||||
vm_flat_init();
|
||||
/* TODO select which memory model to use automatically, and add
|
||||
a kernel boot parameter to override the choice */
|
||||
vm_set_memory_model(VM_MODEL_SPARSE);
|
||||
vm_sparse_init();
|
||||
|
||||
for (size_t i = 0; i < nr_zones; i++) {
|
||||
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
|
||||
|
||||
19
vm/flat.c
19
vm/flat.c
@@ -1,3 +1,20 @@
|
||||
/* ### The flat memory model ###
|
||||
|
||||
under this memory model, the system memory is represented by
|
||||
a single contiguous array of vm_pages. this array spans from
|
||||
physical address up to the last available byte, as provided by
|
||||
memblock. any extra reserved regions after the last available
|
||||
byte will not be included to save memory.
|
||||
|
||||
this memory model is good for systems with a smaller amount of
|
||||
physical memory that is mostly contiguous with few holes or
|
||||
reserved regions. it is simpler and has less overhead.
|
||||
|
||||
for systems with a large amount of memory, or with large
|
||||
amounts of reserved memory (especially those whose reserved
|
||||
memory outstripts free memory), the sparse memory model may
|
||||
be a better choice.
|
||||
*/
|
||||
#include <socks/vm.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/printk.h>
|
||||
@@ -8,7 +25,7 @@ static vm_page_t *page_array = NULL;
|
||||
/* number of pages stored in page_array */
|
||||
static size_t page_array_count = 0;
|
||||
|
||||
void vm_flat_init()
|
||||
void vm_flat_init(void)
|
||||
{
|
||||
printk("vm: using flat memory model");
|
||||
size_t pmem_size = 0;
|
||||
|
||||
233
vm/sparse.c
Normal file
233
vm/sparse.c
Normal file
@@ -0,0 +1,233 @@
|
||||
/* ### The sparse memory model ###
|
||||
|
||||
under this memory model, the system memory is represented by
|
||||
a set of sectors. each sector has the same, fixed, power-of-2
|
||||
size, and has its own array of vm_pages. unlike the flat memory
|
||||
model, this is an array of vm_page POINTERS, allowing vm_pages
|
||||
to be allocated on demand.
|
||||
|
||||
under this memory model, only memory frames that are usable by
|
||||
the kernel will have an associated vm_page. the array of pointers
|
||||
adds some overhead, effectively adding an extra pointer's worth
|
||||
of memory to the size of vm_page, but this is mitigated by
|
||||
fewer vm_pages being allocated.
|
||||
|
||||
on top of this, any sector that ONLY contains reserved memory
|
||||
can forego allocating their vm_page pointer array entirely,
|
||||
saving even more memory.
|
||||
|
||||
this memory model is good for systems with large amounts of
|
||||
memory, or those will less memory but a high percentage of
|
||||
reserved memory. if this is not the case, the memory savings
|
||||
of the sparse memory model may be outweighed by the extra
|
||||
overhead, and the flat memory model may be a better choice.
|
||||
*/
|
||||
#include <socks/vm.h>
|
||||
#include <socks/printk.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/util.h>
|
||||
#include <socks/machine/cpu.h>
|
||||
|
||||
static vm_sector_t *sector_array = NULL;
|
||||
static size_t sector_array_count = 0;
|
||||
|
||||
static vm_sector_t *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
|
||||
{
|
||||
/* all sectors have the same size */
|
||||
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
|
||||
addr &= ~VM_PAGE_MASK;
|
||||
size_t sector = div64_pow2(addr, step);
|
||||
|
||||
addr >>= VM_PAGE_SHIFT;
|
||||
addr -= ((sector * step) >> VM_PAGE_SHIFT);
|
||||
|
||||
if (sector_id) {
|
||||
*sector_id = sector;
|
||||
}
|
||||
|
||||
if (index) {
|
||||
*index = addr;
|
||||
}
|
||||
|
||||
return §or_array[sector];
|
||||
}
|
||||
|
||||
static vm_page_t *get_or_create_page(phys_addr_t addr)
|
||||
{
|
||||
size_t sector_number, page_number;
|
||||
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||
|
||||
vm_sector_t *sector = §or_array[sector_number];
|
||||
|
||||
if (!sector->s_pages) {
|
||||
printk("allocated page array for sector %u", sector_number);
|
||||
sector->s_pages = kmalloc(vm_page_order_to_pages(sector->s_size) * sizeof(vm_page_t *), 0);
|
||||
}
|
||||
|
||||
sector->s_pages[page_number].p_sector = sector_number;
|
||||
return §or->s_pages[page_number];
|
||||
}
|
||||
|
||||
static vm_page_order_t find_minimum_sector_size(size_t pmem_size)
|
||||
{
|
||||
for (vm_page_order_t i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
|
||||
size_t order_bytes = vm_page_order_to_bytes(i);
|
||||
if (order_bytes * VM_MAX_SECTORS >= pmem_size) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO panic here, once panic() is implemented. */
|
||||
return VM_PAGE_64G;
|
||||
}
|
||||
|
||||
/* this function is called to calculate the optimal sector size for the system,
|
||||
taking in to account factors like the total system memory and how much memory
|
||||
is reserved vs free.
|
||||
|
||||
this function uses some heuristics and thresholds that are untested and
|
||||
are in need of improvement to ensure that sparse works well on a wide
|
||||
range of systems. */
|
||||
static void calculate_sector_size_and_count(size_t pmem_size, size_t reserved_size, unsigned int *out_sector_count, vm_page_order_t *out_sector_size)
|
||||
{
|
||||
/* we can support up to VM_MAX_SECTORS memory sectors.
|
||||
the minimum sector size is what ever is required
|
||||
to cover all of physical memory in the maximum number of sectors */
|
||||
vm_page_order_t sector_size = find_minimum_sector_size(pmem_size);
|
||||
|
||||
if (sector_size <= VM_PAGE_2M) {
|
||||
/* override really small sector sizes with something
|
||||
more reasonable, to avoid excessive overhead on
|
||||
low-memory systems */
|
||||
sector_size = VM_PAGE_2M;
|
||||
}
|
||||
|
||||
size_t free_size = pmem_size - reserved_size;
|
||||
/* the absolute difference between the amount of free memory and
|
||||
the amount of reserved memory. */
|
||||
size_t memdiff = absdiff64(free_size, reserved_size);
|
||||
|
||||
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
|
||||
/* if there is more free memory than reserved, we can choose
|
||||
a bigger sector size, as we don't have to worry as much
|
||||
about wasting memory allocating vm_pages for reserved frames.
|
||||
|
||||
we only do this bump if the sector size is below a certain
|
||||
threshold. */
|
||||
sector_size++;
|
||||
|
||||
/* if the difference is particularly big, increase the sector size
|
||||
even further */
|
||||
if (memdiff >= 0x1000000) {
|
||||
sector_size++;
|
||||
}
|
||||
}
|
||||
|
||||
/* round pmem_size up to the next multiple of sector_bytes.
|
||||
this works because sector_bytes is guaranteed to be a
|
||||
power of 2. */
|
||||
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||
|
||||
if (pmem_size & (sector_bytes - 1)) {
|
||||
pmem_size &= ~(sector_bytes - 1);
|
||||
pmem_size += sector_bytes;
|
||||
}
|
||||
|
||||
size_t sector_count = div64_pow2(pmem_size, sector_bytes);
|
||||
|
||||
*out_sector_count = sector_count;
|
||||
*out_sector_size = sector_size;
|
||||
}
|
||||
|
||||
void vm_sparse_init(void)
|
||||
{
|
||||
printk("vm: using sparse memory model");
|
||||
|
||||
size_t pmem_size = 0, reserved_size = 0;
|
||||
|
||||
memblock_iter_t it;
|
||||
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||
if (pmem_size < it.it_limit + 1) {
|
||||
pmem_size = it.it_limit + 1;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||
reserved_size += it.it_limit - it.it_base + 1;
|
||||
}
|
||||
|
||||
vm_page_order_t sector_size;
|
||||
size_t sector_bytes = 0;
|
||||
unsigned int nr_sectors = 0;
|
||||
calculate_sector_size_and_count(pmem_size, reserved_size, &nr_sectors, §or_size);
|
||||
sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||
|
||||
char sector_size_str[64];
|
||||
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
|
||||
|
||||
sector_array = memblock_alloc(sizeof(vm_sector_t) * nr_sectors, 8);
|
||||
sector_array_count = nr_sectors;
|
||||
|
||||
for (unsigned int i = 0; i < nr_sectors; i++) {
|
||||
sector_array[i].s_size = sector_size;
|
||||
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
|
||||
}
|
||||
|
||||
size_t s, i;
|
||||
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
|
||||
|
||||
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||
if (it.it_base & VM_PAGE_MASK) {
|
||||
it.it_base &= ~VM_PAGE_MASK;
|
||||
it.it_base += VM_PAGE_SIZE;
|
||||
}
|
||||
|
||||
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||
vm_page_t *pg = get_or_create_page(i);
|
||||
memset(pg, 0x0, sizeof *pg);
|
||||
}
|
||||
}
|
||||
|
||||
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||
if (it.it_base & VM_PAGE_MASK) {
|
||||
it.it_base &= ~VM_PAGE_MASK;
|
||||
it.it_base += VM_PAGE_SIZE;
|
||||
}
|
||||
|
||||
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||
vm_page_t *pg = vm_page_get(i);
|
||||
|
||||
if (!pg) {
|
||||
/* if the page doesn't exist, it is part of a sector
|
||||
that only contains reserved pages. a NULL page
|
||||
is implicitly treated as reserved */
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(pg, 0x0, sizeof *pg);
|
||||
pg->p_flags = VM_PAGE_RESERVED;
|
||||
}
|
||||
}
|
||||
|
||||
printk("vm: initialised %zu sectors of size %s", nr_sectors, sector_size_str);
|
||||
}
|
||||
|
||||
vm_page_t *vm_page_get_sparse(phys_addr_t addr)
|
||||
{
|
||||
size_t sector_number, page_number;
|
||||
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||
|
||||
vm_sector_t *sector = §or_array[sector_number];
|
||||
|
||||
if (!sector->s_pages) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return §or->s_pages[page_number];
|
||||
}
|
||||
|
||||
size_t vm_page_get_pfn_sparse(vm_page_t *pg)
|
||||
{
|
||||
vm_sector_t *sector = §or_array[pg->p_sector];
|
||||
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
|
||||
}
|
||||
Reference in New Issue
Block a user