vm: implement a sparse memory model
This commit is contained in:
@@ -14,6 +14,8 @@
|
|||||||
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
|
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
|
||||||
/* maximum number of supported page orders */
|
/* maximum number of supported page orders */
|
||||||
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
|
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
|
||||||
|
/* maximum number of sparse memory sectors */
|
||||||
|
#define VM_MAX_SECTORS 1024
|
||||||
|
|
||||||
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
|
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
|
||||||
|
|
||||||
@@ -189,15 +191,16 @@ typedef struct vm_slab {
|
|||||||
|
|
||||||
typedef struct vm_page {
|
typedef struct vm_page {
|
||||||
/* order of the page block that this page belongs too */
|
/* order of the page block that this page belongs too */
|
||||||
uint16_t p_order : 4;
|
uint32_t p_order : 4;
|
||||||
/* the id of the NUMA node that this page belongs to */
|
/* the id of the NUMA node that this page belongs to */
|
||||||
uint16_t p_node : 6;
|
uint32_t p_node : 6;
|
||||||
/* the id of the memory zone that this page belongs to */
|
/* the id of the memory zone that this page belongs to */
|
||||||
uint16_t p_zone : 3;
|
uint32_t p_zone : 3;
|
||||||
/* some unused bits */
|
|
||||||
uint16_t p_reserved : 3;
|
|
||||||
|
|
||||||
/* vm_page_flags_t bitfields. */
|
/* vm_page_flags_t bitfields. */
|
||||||
|
uint32_t p_sector : 11;
|
||||||
|
/* some unused bits */
|
||||||
|
uint32_t p_reserved : 8;
|
||||||
|
|
||||||
uint32_t p_flags;
|
uint32_t p_flags;
|
||||||
|
|
||||||
/* multi-purpose list.
|
/* multi-purpose list.
|
||||||
@@ -216,6 +219,21 @@ typedef struct vm_page {
|
|||||||
|
|
||||||
} __attribute__((aligned(2 * sizeof(unsigned long)))) vm_page_t;
|
} __attribute__((aligned(2 * sizeof(unsigned long)))) vm_page_t;
|
||||||
|
|
||||||
|
/* represents a sector of memory, containing its own array of vm_pages.
|
||||||
|
this struct is used under the sparse memory model, instead of the
|
||||||
|
global vm_page array */
|
||||||
|
typedef struct vm_sector {
|
||||||
|
/* sector size. this must be a power of 2.
|
||||||
|
all sectors in the system have the same size. */
|
||||||
|
vm_page_order_t s_size;
|
||||||
|
/* PFN of the first page contained in s_pages.
|
||||||
|
to find the PFN of any page contained within s_pages,
|
||||||
|
simply add its offset within the array to s_first_pfn */
|
||||||
|
size_t s_first_pfn;
|
||||||
|
/* array of pages contained in this sector */
|
||||||
|
vm_page_t *s_pages;
|
||||||
|
} vm_sector_t;
|
||||||
|
|
||||||
extern kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones);
|
extern kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones);
|
||||||
extern vm_model_t vm_memory_model(void);
|
extern vm_model_t vm_memory_model(void);
|
||||||
extern void vm_set_memory_model(vm_model_t model);
|
extern void vm_set_memory_model(vm_model_t model);
|
||||||
@@ -264,4 +282,9 @@ extern void vm_flat_init(void);
|
|||||||
extern vm_page_t *vm_page_get_flat(phys_addr_t addr);
|
extern vm_page_t *vm_page_get_flat(phys_addr_t addr);
|
||||||
extern size_t vm_page_get_pfn_flat(vm_page_t *pg);
|
extern size_t vm_page_get_pfn_flat(vm_page_t *pg);
|
||||||
|
|
||||||
|
/* Sparse memory model functions */
|
||||||
|
extern void vm_sparse_init(void);
|
||||||
|
extern vm_page_t *vm_page_get_sparse(phys_addr_t addr);
|
||||||
|
extern size_t vm_page_get_pfn_sparse(vm_page_t *pg);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <socks/vm.h>
|
#include <socks/vm.h>
|
||||||
#include <socks/memblock.h>
|
#include <socks/memblock.h>
|
||||||
#include <socks/printk.h>
|
#include <socks/printk.h>
|
||||||
|
#include <socks/machine/cpu.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
@@ -18,8 +19,10 @@ kern_status_t vm_bootstrap(const vm_zone_descriptor_t *zones, size_t nr_zones)
|
|||||||
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count, 8);
|
node_data = memblock_alloc(sizeof(vm_pg_data_t) * numa_count, 8);
|
||||||
printk("vm: initialising %u node%s", numa_count, numa_count > 1 ? "s" : "");
|
printk("vm: initialising %u node%s", numa_count, numa_count > 1 ? "s" : "");
|
||||||
|
|
||||||
vm_set_memory_model(VM_MODEL_FLAT);
|
/* TODO select which memory model to use automatically, and add
|
||||||
vm_flat_init();
|
a kernel boot parameter to override the choice */
|
||||||
|
vm_set_memory_model(VM_MODEL_SPARSE);
|
||||||
|
vm_sparse_init();
|
||||||
|
|
||||||
for (size_t i = 0; i < nr_zones; i++) {
|
for (size_t i = 0; i < nr_zones; i++) {
|
||||||
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
|
vm_zone_init(&node_data->pg_zones[zones[i].zd_id], &zones[i]);
|
||||||
|
|||||||
19
vm/flat.c
19
vm/flat.c
@@ -1,3 +1,20 @@
|
|||||||
|
/* ### The flat memory model ###
|
||||||
|
|
||||||
|
under this memory model, the system memory is represented by
|
||||||
|
a single contiguous array of vm_pages. this array spans from
|
||||||
|
physical address up to the last available byte, as provided by
|
||||||
|
memblock. any extra reserved regions after the last available
|
||||||
|
byte will not be included to save memory.
|
||||||
|
|
||||||
|
this memory model is good for systems with a smaller amount of
|
||||||
|
physical memory that is mostly contiguous with few holes or
|
||||||
|
reserved regions. it is simpler and has less overhead.
|
||||||
|
|
||||||
|
for systems with a large amount of memory, or with large
|
||||||
|
amounts of reserved memory (especially those whose reserved
|
||||||
|
memory outstripts free memory), the sparse memory model may
|
||||||
|
be a better choice.
|
||||||
|
*/
|
||||||
#include <socks/vm.h>
|
#include <socks/vm.h>
|
||||||
#include <socks/memblock.h>
|
#include <socks/memblock.h>
|
||||||
#include <socks/printk.h>
|
#include <socks/printk.h>
|
||||||
@@ -8,7 +25,7 @@ static vm_page_t *page_array = NULL;
|
|||||||
/* number of pages stored in page_array */
|
/* number of pages stored in page_array */
|
||||||
static size_t page_array_count = 0;
|
static size_t page_array_count = 0;
|
||||||
|
|
||||||
void vm_flat_init()
|
void vm_flat_init(void)
|
||||||
{
|
{
|
||||||
printk("vm: using flat memory model");
|
printk("vm: using flat memory model");
|
||||||
size_t pmem_size = 0;
|
size_t pmem_size = 0;
|
||||||
|
|||||||
233
vm/sparse.c
Normal file
233
vm/sparse.c
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
/* ### The sparse memory model ###
|
||||||
|
|
||||||
|
under this memory model, the system memory is represented by
|
||||||
|
a set of sectors. each sector has the same, fixed, power-of-2
|
||||||
|
size, and has its own array of vm_pages. unlike the flat memory
|
||||||
|
model, this is an array of vm_page POINTERS, allowing vm_pages
|
||||||
|
to be allocated on demand.
|
||||||
|
|
||||||
|
under this memory model, only memory frames that are usable by
|
||||||
|
the kernel will have an associated vm_page. the array of pointers
|
||||||
|
adds some overhead, effectively adding an extra pointer's worth
|
||||||
|
of memory to the size of vm_page, but this is mitigated by
|
||||||
|
fewer vm_pages being allocated.
|
||||||
|
|
||||||
|
on top of this, any sector that ONLY contains reserved memory
|
||||||
|
can forego allocating their vm_page pointer array entirely,
|
||||||
|
saving even more memory.
|
||||||
|
|
||||||
|
this memory model is good for systems with large amounts of
|
||||||
|
memory, or those will less memory but a high percentage of
|
||||||
|
reserved memory. if this is not the case, the memory savings
|
||||||
|
of the sparse memory model may be outweighed by the extra
|
||||||
|
overhead, and the flat memory model may be a better choice.
|
||||||
|
*/
|
||||||
|
#include <socks/vm.h>
|
||||||
|
#include <socks/printk.h>
|
||||||
|
#include <socks/memblock.h>
|
||||||
|
#include <socks/util.h>
|
||||||
|
#include <socks/machine/cpu.h>
|
||||||
|
|
||||||
|
static vm_sector_t *sector_array = NULL;
|
||||||
|
static size_t sector_array_count = 0;
|
||||||
|
|
||||||
|
static vm_sector_t *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
|
||||||
|
{
|
||||||
|
/* all sectors have the same size */
|
||||||
|
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
|
||||||
|
addr &= ~VM_PAGE_MASK;
|
||||||
|
size_t sector = div64_pow2(addr, step);
|
||||||
|
|
||||||
|
addr >>= VM_PAGE_SHIFT;
|
||||||
|
addr -= ((sector * step) >> VM_PAGE_SHIFT);
|
||||||
|
|
||||||
|
if (sector_id) {
|
||||||
|
*sector_id = sector;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index) {
|
||||||
|
*index = addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return §or_array[sector];
|
||||||
|
}
|
||||||
|
|
||||||
|
static vm_page_t *get_or_create_page(phys_addr_t addr)
|
||||||
|
{
|
||||||
|
size_t sector_number, page_number;
|
||||||
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||||
|
|
||||||
|
vm_sector_t *sector = §or_array[sector_number];
|
||||||
|
|
||||||
|
if (!sector->s_pages) {
|
||||||
|
printk("allocated page array for sector %u", sector_number);
|
||||||
|
sector->s_pages = kmalloc(vm_page_order_to_pages(sector->s_size) * sizeof(vm_page_t *), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
sector->s_pages[page_number].p_sector = sector_number;
|
||||||
|
return §or->s_pages[page_number];
|
||||||
|
}
|
||||||
|
|
||||||
|
static vm_page_order_t find_minimum_sector_size(size_t pmem_size)
|
||||||
|
{
|
||||||
|
for (vm_page_order_t i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
|
||||||
|
size_t order_bytes = vm_page_order_to_bytes(i);
|
||||||
|
if (order_bytes * VM_MAX_SECTORS >= pmem_size) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO panic here, once panic() is implemented. */
|
||||||
|
return VM_PAGE_64G;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this function is called to calculate the optimal sector size for the system,
|
||||||
|
taking in to account factors like the total system memory and how much memory
|
||||||
|
is reserved vs free.
|
||||||
|
|
||||||
|
this function uses some heuristics and thresholds that are untested and
|
||||||
|
are in need of improvement to ensure that sparse works well on a wide
|
||||||
|
range of systems. */
|
||||||
|
static void calculate_sector_size_and_count(size_t pmem_size, size_t reserved_size, unsigned int *out_sector_count, vm_page_order_t *out_sector_size)
|
||||||
|
{
|
||||||
|
/* we can support up to VM_MAX_SECTORS memory sectors.
|
||||||
|
the minimum sector size is what ever is required
|
||||||
|
to cover all of physical memory in the maximum number of sectors */
|
||||||
|
vm_page_order_t sector_size = find_minimum_sector_size(pmem_size);
|
||||||
|
|
||||||
|
if (sector_size <= VM_PAGE_2M) {
|
||||||
|
/* override really small sector sizes with something
|
||||||
|
more reasonable, to avoid excessive overhead on
|
||||||
|
low-memory systems */
|
||||||
|
sector_size = VM_PAGE_2M;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t free_size = pmem_size - reserved_size;
|
||||||
|
/* the absolute difference between the amount of free memory and
|
||||||
|
the amount of reserved memory. */
|
||||||
|
size_t memdiff = absdiff64(free_size, reserved_size);
|
||||||
|
|
||||||
|
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
|
||||||
|
/* if there is more free memory than reserved, we can choose
|
||||||
|
a bigger sector size, as we don't have to worry as much
|
||||||
|
about wasting memory allocating vm_pages for reserved frames.
|
||||||
|
|
||||||
|
we only do this bump if the sector size is below a certain
|
||||||
|
threshold. */
|
||||||
|
sector_size++;
|
||||||
|
|
||||||
|
/* if the difference is particularly big, increase the sector size
|
||||||
|
even further */
|
||||||
|
if (memdiff >= 0x1000000) {
|
||||||
|
sector_size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* round pmem_size up to the next multiple of sector_bytes.
|
||||||
|
this works because sector_bytes is guaranteed to be a
|
||||||
|
power of 2. */
|
||||||
|
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||||
|
|
||||||
|
if (pmem_size & (sector_bytes - 1)) {
|
||||||
|
pmem_size &= ~(sector_bytes - 1);
|
||||||
|
pmem_size += sector_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t sector_count = div64_pow2(pmem_size, sector_bytes);
|
||||||
|
|
||||||
|
*out_sector_count = sector_count;
|
||||||
|
*out_sector_size = sector_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vm_sparse_init(void)
|
||||||
|
{
|
||||||
|
printk("vm: using sparse memory model");
|
||||||
|
|
||||||
|
size_t pmem_size = 0, reserved_size = 0;
|
||||||
|
|
||||||
|
memblock_iter_t it;
|
||||||
|
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||||
|
if (pmem_size < it.it_limit + 1) {
|
||||||
|
pmem_size = it.it_limit + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
|
||||||
|
reserved_size += it.it_limit - it.it_base + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vm_page_order_t sector_size;
|
||||||
|
size_t sector_bytes = 0;
|
||||||
|
unsigned int nr_sectors = 0;
|
||||||
|
calculate_sector_size_and_count(pmem_size, reserved_size, &nr_sectors, §or_size);
|
||||||
|
sector_bytes = vm_page_order_to_bytes(sector_size);
|
||||||
|
|
||||||
|
char sector_size_str[64];
|
||||||
|
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
|
||||||
|
|
||||||
|
sector_array = memblock_alloc(sizeof(vm_sector_t) * nr_sectors, 8);
|
||||||
|
sector_array_count = nr_sectors;
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < nr_sectors; i++) {
|
||||||
|
sector_array[i].s_size = sector_size;
|
||||||
|
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t s, i;
|
||||||
|
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
|
||||||
|
|
||||||
|
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||||
|
if (it.it_base & VM_PAGE_MASK) {
|
||||||
|
it.it_base &= ~VM_PAGE_MASK;
|
||||||
|
it.it_base += VM_PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||||
|
vm_page_t *pg = get_or_create_page(i);
|
||||||
|
memset(pg, 0x0, sizeof *pg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
|
||||||
|
if (it.it_base & VM_PAGE_MASK) {
|
||||||
|
it.it_base &= ~VM_PAGE_MASK;
|
||||||
|
it.it_base += VM_PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
||||||
|
vm_page_t *pg = vm_page_get(i);
|
||||||
|
|
||||||
|
if (!pg) {
|
||||||
|
/* if the page doesn't exist, it is part of a sector
|
||||||
|
that only contains reserved pages. a NULL page
|
||||||
|
is implicitly treated as reserved */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(pg, 0x0, sizeof *pg);
|
||||||
|
pg->p_flags = VM_PAGE_RESERVED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printk("vm: initialised %zu sectors of size %s", nr_sectors, sector_size_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
vm_page_t *vm_page_get_sparse(phys_addr_t addr)
|
||||||
|
{
|
||||||
|
size_t sector_number, page_number;
|
||||||
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
||||||
|
|
||||||
|
vm_sector_t *sector = §or_array[sector_number];
|
||||||
|
|
||||||
|
if (!sector->s_pages) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return §or->s_pages[page_number];
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t vm_page_get_pfn_sparse(vm_page_t *pg)
|
||||||
|
{
|
||||||
|
vm_sector_t *sector = §or_array[pg->p_sector];
|
||||||
|
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user