vm: implement a sparse memory model

This commit is contained in:
2023-02-08 17:13:01 +00:00
parent 6690572bf3
commit a7d4166c89
4 changed files with 285 additions and 9 deletions

View File

@@ -14,6 +14,8 @@
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
/* maximum number of supported page orders */
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
/* maximum number of sparse memory sectors */
#define VM_MAX_SECTORS 1024
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
@@ -189,15 +191,16 @@ typedef struct vm_slab {
typedef struct vm_page {
/* order of the page block that this page belongs too */
uint16_t p_order : 4;
uint32_t p_order : 4;
/* the id of the NUMA node that this page belongs to */
uint16_t p_node : 6;
uint32_t p_node : 6;
/* the id of the memory zone that this page belongs to */
uint16_t p_zone : 3;
/* some unused bits */
uint16_t p_reserved : 3;
uint32_t p_zone : 3;
/* vm_page_flags_t bitfields. */
uint32_t p_sector : 11;
/* some unused bits */
uint32_t p_reserved : 8;
uint32_t p_flags;
/* multi-purpose list.
@@ -216,6 +219,21 @@ typedef struct vm_page {
} __attribute__((aligned(2 * sizeof(unsigned long)))) vm_page_t;
/* represents a sector of memory, containing its own array of vm_pages.
this struct is used under the sparse memory model, instead of the
global vm_page array */
typedef struct vm_sector {
/* sector size. this must be a power of 2.
all sectors in the system have the same size. */
vm_page_order_t s_size;
/* PFN of the first page contained in s_pages.
to find the PFN of any page contained within s_pages,
simply add its offset within the array to s_first_pfn */
size_t s_first_pfn;
/* array of pages contained in this sector */
vm_page_t *s_pages;
} vm_sector_t;
extern kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones);
extern vm_model_t vm_memory_model(void);
extern void vm_set_memory_model(vm_model_t model);
@@ -264,4 +282,9 @@ extern void vm_flat_init(void);
extern vm_page_t *vm_page_get_flat(phys_addr_t addr);
extern size_t vm_page_get_pfn_flat(vm_page_t *pg);
/* Sparse memory model functions */
extern void vm_sparse_init(void);
extern vm_page_t *vm_page_get_sparse(phys_addr_t addr);
extern size_t vm_page_get_pfn_sparse(vm_page_t *pg);
#endif

View File

@@ -3,6 +3,7 @@
#include <socks/vm.h>
#include <socks/memblock.h>
#include <socks/printk.h>
#include <socks/machine/cpu.h>
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
@@ -18,8 +19,10 @@ kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count, 8);
printk("vm: initialising %u node%s", numa_count, numa_count > 1 ? "s" : "");
vm_set_memory_model(VM_MODEL_FLAT);
vm_flat_init();
/* TODO select which memory model to use automatically, and add
a kernel boot parameter to override the choice */
vm_set_memory_model(VM_MODEL_SPARSE);
vm_sparse_init();
for (size_t i = 0; i < nr_zones; i++) {
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);

View File

@@ -1,3 +1,20 @@
/* ### The flat memory model ###
under this memory model, the system memory is represented by
a single contiguous array of vm_pages. this array spans from
physical address up to the last available byte, as provided by
memblock. any extra reserved regions after the last available
byte will not be included to save memory.
this memory model is good for systems with a smaller amount of
physical memory that is mostly contiguous with few holes or
reserved regions. it is simpler and has less overhead.
for systems with a large amount of memory, or with large
amounts of reserved memory (especially those whose reserved
memory outstripts free memory), the sparse memory model may
be a better choice.
*/
#include <socks/vm.h>
#include <socks/memblock.h>
#include <socks/printk.h>
@@ -8,7 +25,7 @@ static vm_page_t *page_array = NULL;
/* number of pages stored in page_array */
static size_t page_array_count = 0;
void vm_flat_init()
void vm_flat_init(void)
{
printk("vm: using flat memory model");
size_t pmem_size = 0;

233
vm/sparse.c Normal file
View File

@@ -0,0 +1,233 @@
/* ### The sparse memory model ###
under this memory model, the system memory is represented by
a set of sectors. each sector has the same, fixed, power-of-2
size, and has its own array of vm_pages. unlike the flat memory
model, this is an array of vm_page POINTERS, allowing vm_pages
to be allocated on demand.
under this memory model, only memory frames that are usable by
the kernel will have an associated vm_page. the array of pointers
adds some overhead, effectively adding an extra pointer's worth
of memory to the size of vm_page, but this is mitigated by
fewer vm_pages being allocated.
on top of this, any sector that ONLY contains reserved memory
can forego allocating their vm_page pointer array entirely,
saving even more memory.
this memory model is good for systems with large amounts of
memory, or those will less memory but a high percentage of
reserved memory. if this is not the case, the memory savings
of the sparse memory model may be outweighed by the extra
overhead, and the flat memory model may be a better choice.
*/
#include <socks/vm.h>
#include <socks/printk.h>
#include <socks/memblock.h>
#include <socks/util.h>
#include <socks/machine/cpu.h>
static vm_sector_t *sector_array = NULL;
static size_t sector_array_count = 0;
static vm_sector_t *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
{
/* all sectors have the same size */
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
addr &= ~VM_PAGE_MASK;
size_t sector = div64_pow2(addr, step);
addr >>= VM_PAGE_SHIFT;
addr -= ((sector * step) >> VM_PAGE_SHIFT);
if (sector_id) {
*sector_id = sector;
}
if (index) {
*index = addr;
}
return &sector_array[sector];
}
static vm_page_t *get_or_create_page(phys_addr_t addr)
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
vm_sector_t *sector = &sector_array[sector_number];
if (!sector->s_pages) {
printk("allocated page array for sector %u", sector_number);
sector->s_pages = kmalloc(vm_page_order_to_pages(sector->s_size) * sizeof(vm_page_t *), 0);
}
sector->s_pages[page_number].p_sector = sector_number;
return &sector->s_pages[page_number];
}
static vm_page_order_t find_minimum_sector_size(size_t pmem_size)
{
for (vm_page_order_t i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
size_t order_bytes = vm_page_order_to_bytes(i);
if (order_bytes * VM_MAX_SECTORS >= pmem_size) {
return i;
}
}
/* TODO panic here, once panic() is implemented. */
return VM_PAGE_64G;
}
/* this function is called to calculate the optimal sector size for the system,
taking in to account factors like the total system memory and how much memory
is reserved vs free.
this function uses some heuristics and thresholds that are untested and
are in need of improvement to ensure that sparse works well on a wide
range of systems. */
static void calculate_sector_size_and_count(size_t pmem_size, size_t reserved_size, unsigned int *out_sector_count, vm_page_order_t *out_sector_size)
{
/* we can support up to VM_MAX_SECTORS memory sectors.
the minimum sector size is what ever is required
to cover all of physical memory in the maximum number of sectors */
vm_page_order_t sector_size = find_minimum_sector_size(pmem_size);
if (sector_size <= VM_PAGE_2M) {
/* override really small sector sizes with something
more reasonable, to avoid excessive overhead on
low-memory systems */
sector_size = VM_PAGE_2M;
}
size_t free_size = pmem_size - reserved_size;
/* the absolute difference between the amount of free memory and
the amount of reserved memory. */
size_t memdiff = absdiff64(free_size, reserved_size);
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
/* if there is more free memory than reserved, we can choose
a bigger sector size, as we don't have to worry as much
about wasting memory allocating vm_pages for reserved frames.
we only do this bump if the sector size is below a certain
threshold. */
sector_size++;
/* if the difference is particularly big, increase the sector size
even further */
if (memdiff >= 0x1000000) {
sector_size++;
}
}
/* round pmem_size up to the next multiple of sector_bytes.
this works because sector_bytes is guaranteed to be a
power of 2. */
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
if (pmem_size & (sector_bytes - 1)) {
pmem_size &= ~(sector_bytes - 1);
pmem_size += sector_bytes;
}
size_t sector_count = div64_pow2(pmem_size, sector_bytes);
*out_sector_count = sector_count;
*out_sector_size = sector_size;
}
void vm_sparse_init(void)
{
printk("vm: using sparse memory model");
size_t pmem_size = 0, reserved_size = 0;
memblock_iter_t it;
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
if (pmem_size < it.it_limit + 1) {
pmem_size = it.it_limit + 1;
}
}
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
reserved_size += it.it_limit - it.it_base + 1;
}
vm_page_order_t sector_size;
size_t sector_bytes = 0;
unsigned int nr_sectors = 0;
calculate_sector_size_and_count(pmem_size, reserved_size, &nr_sectors, &sector_size);
sector_bytes = vm_page_order_to_bytes(sector_size);
char sector_size_str[64];
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
sector_array = memblock_alloc(sizeof(vm_sector_t) * nr_sectors, 8);
sector_array_count = nr_sectors;
for (unsigned int i = 0; i < nr_sectors; i++) {
sector_array[i].s_size = sector_size;
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
}
size_t s, i;
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = get_or_create_page(i);
memset(pg, 0x0, sizeof *pg);
}
}
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
vm_page_t *pg = vm_page_get(i);
if (!pg) {
/* if the page doesn't exist, it is part of a sector
that only contains reserved pages. a NULL page
is implicitly treated as reserved */
continue;
}
memset(pg, 0x0, sizeof *pg);
pg->p_flags = VM_PAGE_RESERVED;
}
}
printk("vm: initialised %zu sectors of size %s", nr_sectors, sector_size_str);
}
vm_page_t *vm_page_get_sparse(phys_addr_t addr)
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
vm_sector_t *sector = &sector_array[sector_number];
if (!sector->s_pages) {
return NULL;
}
return &sector->s_pages[page_number];
}
size_t vm_page_get_pfn_sparse(vm_page_t *pg)
{
vm_sector_t *sector = &sector_array[pg->p_sector];
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
}