vm: implement a sparse memory model
This commit is contained in:
@@ -14,6 +14,8 @@
|
||||
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
|
||||
/* maximum number of supported page orders */
|
||||
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
|
||||
/* maximum number of sparse memory sectors */
|
||||
#define VM_MAX_SECTORS 1024
|
||||
|
||||
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
|
||||
|
||||
@@ -189,15 +191,16 @@ typedef struct vm_slab {
|
||||
|
||||
typedef struct vm_page {
|
||||
/* order of the page block that this page belongs too */
|
||||
uint16_t p_order : 4;
|
||||
uint32_t p_order : 4;
|
||||
/* the id of the NUMA node that this page belongs to */
|
||||
uint16_t p_node : 6;
|
||||
uint32_t p_node : 6;
|
||||
/* the id of the memory zone that this page belongs to */
|
||||
uint16_t p_zone : 3;
|
||||
/* some unused bits */
|
||||
uint16_t p_reserved : 3;
|
||||
|
||||
uint32_t p_zone : 3;
|
||||
/* vm_page_flags_t bitfields. */
|
||||
uint32_t p_sector : 11;
|
||||
/* some unused bits */
|
||||
uint32_t p_reserved : 8;
|
||||
|
||||
uint32_t p_flags;
|
||||
|
||||
/* multi-purpose list.
|
||||
@@ -216,6 +219,21 @@ typedef struct vm_page {
|
||||
|
||||
} __attribute__((aligned(2 * sizeof(unsigned long)))) vm_page_t;
|
||||
|
||||
/* represents a sector of memory, containing its own array of vm_pages.
|
||||
this struct is used under the sparse memory model, instead of the
|
||||
global vm_page array */
|
||||
typedef struct vm_sector {
|
||||
/* sector size. this must be a power of 2.
|
||||
all sectors in the system have the same size. */
|
||||
vm_page_order_t s_size;
|
||||
/* PFN of the first page contained in s_pages.
|
||||
to find the PFN of any page contained within s_pages,
|
||||
simply add its offset within the array to s_first_pfn */
|
||||
size_t s_first_pfn;
|
||||
/* array of pages contained in this sector */
|
||||
vm_page_t *s_pages;
|
||||
} vm_sector_t;
|
||||
|
||||
extern kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones);
|
||||
extern vm_model_t vm_memory_model(void);
|
||||
extern void vm_set_memory_model(vm_model_t model);
|
||||
@@ -264,4 +282,9 @@ extern void vm_flat_init(void);
|
||||
extern vm_page_t *vm_page_get_flat(phys_addr_t addr);
|
||||
extern size_t vm_page_get_pfn_flat(vm_page_t *pg);
|
||||
|
||||
/* Sparse memory model functions */
|
||||
extern void vm_sparse_init(void);
|
||||
extern vm_page_t *vm_page_get_sparse(phys_addr_t addr);
|
||||
extern size_t vm_page_get_pfn_sparse(vm_page_t *pg);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <socks/vm.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/printk.h>
|
||||
#include <socks/machine/cpu.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
@@ -18,8 +19,10 @@ kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
|
||||
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count, 8);
|
||||
printk("vm: initialising %u node%s", numa_count, numa_count > 1 ? "s" : "");
|
||||
|
||||
vm_set_memory_model(VM_MODEL_FLAT);
|
||||
vm_flat_init();
|
||||
/* TODO select which memory model to use automatically, and add
|
||||
a kernel boot parameter to override the choice */
|
||||
vm_set_memory_model(VM_MODEL_SPARSE);
|
||||
vm_sparse_init();
|
||||
|
||||
for (size_t i = 0; i < nr_zones; i++) {
|
||||
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
|
||||
|
||||
19
vm/flat.c
19
vm/flat.c
@@ -1,3 +1,20 @@
|
||||
/* ### The flat memory model ###
|
||||
|
||||
under this memory model, the system memory is represented by
|
||||
a single contiguous array of vm_pages. this array spans from
|
||||
physical address up to the last available byte, as provided by
|
||||
memblock. any extra reserved regions after the last available
|
||||
byte will not be included to save memory.
|
||||
|
||||
this memory model is good for systems with a smaller amount of
|
||||
physical memory that is mostly contiguous with few holes or
|
||||
reserved regions. it is simpler and has less overhead.
|
||||
|
||||
for systems with a large amount of memory, or with large
|
||||
amounts of reserved memory (especially those whose reserved
|
||||
memory outstripts free memory), the sparse memory model may
|
||||
be a better choice.
|
||||
*/
|
||||
#include <socks/vm.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/printk.h>
|
||||
@@ -8,7 +25,7 @@ static vm_page_t *page_array = NULL;
|
||||
/* number of pages stored in page_array */
|
||||
static size_t page_array_count = 0;
|
||||
|
||||
void vm_flat_init()
|
||||
void vm_flat_init(void)
|
||||
{
|
||||
printk("vm: using flat memory model");
|
||||
size_t pmem_size = 0;
|
||||
|
||||
233
vm/sparse.c
Normal file
233
vm/sparse.c
Normal file
@@ -0,0 +1,233 @@
|
||||
/* ### The sparse memory model ###
|
||||
|
||||
under this memory model, the system memory is represented by
|
||||
a set of sectors. each sector has the same, fixed, power-of-2
|
||||
size, and has its own array of vm_pages. unlike the flat memory
|
||||
model, this is an array of vm_page POINTERS, allowing vm_pages
|
||||
to be allocated on demand.
|
||||
|
||||
under this memory model, only memory frames that are usable by
|
||||
the kernel will have an associated vm_page. the array of pointers
|
||||
adds some overhead, effectively adding an extra pointer's worth
|
||||
of memory to the size of vm_page, but this is mitigated by
|
||||
fewer vm_pages being allocated.
|
||||
|
||||
on top of this, any sector that ONLY contains reserved memory
|
||||
can forego allocating their vm_page pointer array entirely,
|
||||
saving even more memory.
|
||||
|
||||
this memory model is good for systems with large amounts of
|
||||
memory, or those will less memory but a high percentage of
|
||||
reserved memory. if this is not the case, the memory savings
|
||||
of the sparse memory model may be outweighed by the extra
|
||||
overhead, and the flat memory model may be a better choice.
|
||||
*/
|
||||
#include <socks/vm.h>
|
||||
#include <socks/printk.h>
|
||||
#include <socks/memblock.h>
|
||||
#include <socks/util.h>
|
||||
#include <socks/machine/cpu.h>
|
||||
|
||||
static vm_sector_t *sector_array = NULL;
|
||||
static size_t sector_array_count = 0;
|
||||
|
||||
static vm_sector_t *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
|
||||
{
|
||||
/* all sectors have the same size */
|
||||
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
|
||||
addr &= ~VM_PAGE_MASK;
|
||||
size_t sector = div64_pow2(addr, step);
|
||||
|
||||
addr >>= VM_PAGE_SHIFT;
|
||||
addr -= ((sector * step) >> VM_PAGE_SHIFT);
|
||||
|
||||
if (sector_id) {
|
||||
*sector_id = sector;
|
||||
}
|
||||
|
||||
if (index) {
|
||||
*index = addr;
|
||||
}
|
||||
|
||||
return §or_array[sector];
|
||||
}
|
||||
|
||||
static vm_page_t *get_or_create_page(phys_addr_t addr)
|
||||
{
|
||||
size_t sector_number, page_number;
|
||||
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||
|
||||
vm_sector_t *sector = §or_array[sector_number];
|
||||
|
||||
if (!sector->s_pages) {
|
||||
printk("allocated page array for sector %u", sector_number);
|
||||
sector->s_pages = kmalloc(vm_page_order_to_pages(sector->s_size) * sizeof(vm_page_t *), 0);
|
||||
}
|
||||
|
||||
sector->s_pages[page_number].p_sector = sector_number;
|
||||
return §or->s_pages[page_number];
|
||||
}
|
||||
|
||||
static vm_page_order_t find_minimum_sector_size(size_t pmem_size)
|
||||
{
|
||||
for (vm_page_order_t i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
|
||||
size_t order_bytes = vm_page_order_to_bytes(i);
|
||||
if (order_bytes * VM_MAX_SECTORS >= pmem_size) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO panic here, once panic() is implemented. */
|
||||
return VM_PAGE_64G;
|
||||
}
|
||||
|
||||
/* this function is called to calculate the optimal sector size for the system,
|
||||
taking in to account factors like the total system memory and how much memory
|
||||
is reserved vs free.
|
||||
|
||||
this function uses some heuristics and thresholds that are untested and
|
||||
are in need of improvement to ensure that sparse works well on a wide
|
||||
range of systems. */
|
||||
static void calculate_sector_size_and_count(size_t pmem_size, size_t reserved_size, unsigned int *out_sector_count, vm_page_order_t *out_sector_size)
|
||||
{
|
||||
/* we can support up to VM_MAX_SECTORS memory sectors.
|
||||
the minimum sector size is what ever is required
|
||||
to cover all of physical memory in the maximum number of sectors */
|
||||
vm_page_order_t sector_size = find_minimum_sector_size(pmem_size);
|
||||
|
||||
if (sector_size <= VM_PAGE_2M) {
|
||||
/* override really small sector sizes with something
|
||||
more reasonable, to avoid excessive overhead on
|
||||
low-memory systems */
|
||||
sector_size = VM_PAGE_2M;
|
||||
}
|
||||
|
||||
size_t free_size = pmem_size - reserved_size;
|
||||
/* the absolute difference between the amount of free memory and
|
||||
the amount of reserved memory. */
|
||||
size_t memdiff = absdiff64(free_size, reserved_size);
|
||||
|
||||
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
|
||||
/* if there is more free memory than reserved, we can choose
|
||||
a bigger sector size, as we don't have to worry as much
|
||||
about wasting memory allocating vm_pages for reserved frames.
|
||||
|
||||
we only do this bump if the sector size is below a certain
|
||||
threshold. */
|
||||
sector_size++;
|
||||
|
||||
/* if the difference is particularly big, increase the sector size
|
||||
even further */
|
||||
if (memdiff >= 0x1000000) {
|
||||
sector_size++;
|
||||
}
|
||||
}
|
||||
|
||||
/* round pmem_size up to the next multiple of sector_bytes.
|
||||
this works because sector_bytes is guaranteed to be a
|
||||
power of 2. */
|
||||
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||
|
||||
if (pmem_size & (sector_bytes - 1)) {
|
||||
pmem_size &= ~(sector_bytes - 1);
|
||||
pmem_size += sector_bytes;
|
||||
}
|
||||
|
||||
size_t sector_count = div64_pow2(pmem_size, sector_bytes);
|
||||
|
||||
*out_sector_count = sector_count;
|
||||
*out_sector_size = sector_size;
|
||||
}
|
||||
|
||||
void vm_sparse_init(void)
|
||||
{
|
||||
printk("vm: using sparse memory model");
|
||||
|
||||
size_t pmem_size = 0, reserved_size = 0;
|
||||
|
||||
memblock_iter_t it;
|
||||
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||
if (pmem_size < it.it_limit + 1) {
|
||||
pmem_size = it.it_limit + 1;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||
reserved_size += it.it_limit - it.it_base + 1;
|
||||
}
|
||||
|
||||
vm_page_order_t sector_size;
|
||||
size_t sector_bytes = 0;
|
||||
unsigned int nr_sectors = 0;
|
||||
calculate_sector_size_and_count(pmem_size, reserved_size, &nr_sectors, §or_size);
|
||||
sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||
|
||||
char sector_size_str[64];
|
||||
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
|
||||
|
||||
sector_array = memblock_alloc(sizeof(vm_sector_t) * nr_sectors, 8);
|
||||
sector_array_count = nr_sectors;
|
||||
|
||||
for (unsigned int i = 0; i < nr_sectors; i++) {
|
||||
sector_array[i].s_size = sector_size;
|
||||
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
|
||||
}
|
||||
|
||||
size_t s, i;
|
||||
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
|
||||
|
||||
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||
if (it.it_base & VM_PAGE_MASK) {
|
||||
it.it_base &= ~VM_PAGE_MASK;
|
||||
it.it_base += VM_PAGE_SIZE;
|
||||
}
|
||||
|
||||
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||
vm_page_t *pg = get_or_create_page(i);
|
||||
memset(pg, 0x0, sizeof *pg);
|
||||
}
|
||||
}
|
||||
|
||||
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||
if (it.it_base & VM_PAGE_MASK) {
|
||||
it.it_base &= ~VM_PAGE_MASK;
|
||||
it.it_base += VM_PAGE_SIZE;
|
||||
}
|
||||
|
||||
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||
vm_page_t *pg = vm_page_get(i);
|
||||
|
||||
if (!pg) {
|
||||
/* if the page doesn't exist, it is part of a sector
|
||||
that only contains reserved pages. a NULL page
|
||||
is implicitly treated as reserved */
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(pg, 0x0, sizeof *pg);
|
||||
pg->p_flags = VM_PAGE_RESERVED;
|
||||
}
|
||||
}
|
||||
|
||||
printk("vm: initialised %zu sectors of size %s", nr_sectors, sector_size_str);
|
||||
}
|
||||
|
||||
vm_page_t *vm_page_get_sparse(phys_addr_t addr)
|
||||
{
|
||||
size_t sector_number, page_number;
|
||||
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||
|
||||
vm_sector_t *sector = §or_array[sector_number];
|
||||
|
||||
if (!sector->s_pages) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return §or->s_pages[page_number];
|
||||
}
|
||||
|
||||
size_t vm_page_get_pfn_sparse(vm_page_t *pg)
|
||||
{
|
||||
vm_sector_t *sector = §or_array[pg->p_sector];
|
||||
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
|
||||
}
|
||||
Reference in New Issue
Block a user