Files
mango/vm/sparse.c
Max Wash 8803c23f08 vm: improve memory usage under sparse with a high reserved memory ratio
previously, sparse would attempt to create a smaller number of larger sectors on systems with lots of reserved memory, often causing an out-of-memory condition. the reserved memory ratio calculation now compares reserved memory to free memory, rather than to the address of the last byte in physical memory. this improved heuristic means sparse is now better at choosing an appropriate sector size, allowing sparse to operate on systems with high amounts of reserved memory.
2023-12-24 09:39:28 +00:00

250 lines
7.3 KiB
C

/* ### The sparse memory model ###
under this memory model, the system memory is represented by
a set of sectors. each sector has the same, fixed, power-of-2
size, and has its own array of vm_pages. unlike the flat memory
model, this is an array of vm_page POINTERS, allowing vm_pages
to be allocated on demand.
under this memory model, only memory frames that are usable by
the kernel will have an associated vm_page. the array of pointers
adds some overhead, effectively adding an extra pointer's worth
of memory to the size of vm_page, but this is mitigated by
fewer vm_pages being allocated.
on top of this, any sector that ONLY contains reserved memory
can forego allocating their vm_page pointer array entirely,
saving even more memory.
this memory model is good for systems with large amounts of
memory, or those will less memory but a high percentage of
reserved memory. if this is not the case, the memory savings
of the sparse memory model may be outweighed by the extra
overhead, and the flat memory model may be a better choice.
*/
#include <socks/vm.h>
#include <socks/printk.h>
#include <socks/panic.h>
#include <socks/memblock.h>
#include <socks/util.h>
#include <socks/machine/cpu.h>
static struct vm_sector *sector_array = NULL;
static size_t sector_array_count = 0;
static struct vm_sector *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
{
/* all sectors have the same size */
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
addr &= ~VM_PAGE_MASK;
size_t sector = div64_pow2(addr, step);
addr >>= VM_PAGE_SHIFT;
addr -= ((sector * step) >> VM_PAGE_SHIFT);
if (sector_id) {
*sector_id = sector;
}
if (index) {
*index = addr;
}
return &sector_array[sector];
}
static struct vm_page *get_or_create_page(phys_addr_t addr)
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
struct vm_sector *sector = &sector_array[sector_number];
if (!sector->s_pages) {
size_t nr_pages = vm_page_order_to_pages(sector->s_size);
sector->s_pages = kzalloc(nr_pages * sizeof(struct vm_page), 0);
if (!sector->s_pages) {
panic("out of memory!");
}
for (size_t i = 0; i < nr_pages; i++) {
sector->s_pages[i].p_flags = VM_PAGE_RESERVED;
}
}
sector->s_pages[page_number].p_sector = sector_number;
return &sector->s_pages[page_number];
}
static enum vm_page_order find_minimum_sector_size(phys_addr_t pmem_end)
{
for (enum vm_page_order i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
size_t order_bytes = vm_page_order_to_bytes(i);
if (order_bytes * VM_MAX_SECTORS >= pmem_end) {
return i;
}
}
panic("cannot find suitable sector size for memory map.");
}
/* this function is called to calculate the optimal sector size for the system,
taking in to account factors like the total system memory and how much memory
is reserved vs free.
this function uses some heuristics and thresholds that are untested and
are in need of improvement to ensure that sparse works well on a wide
range of systems. */
static void calculate_sector_size_and_count(
phys_addr_t pmem_end, size_t reserved_size, size_t free_size,
unsigned int *out_sector_count, enum vm_page_order *out_sector_size)
{
/* we can support up to VM_MAX_SECTORS memory sectors.
the minimum sector size is what ever is required
to cover all of physical memory in the maximum number of sectors */
enum vm_page_order sector_size = find_minimum_sector_size(pmem_end);
if (sector_size <= VM_PAGE_2M) {
/* override really small sector sizes with something
more reasonable, to avoid excessive overhead on
low-memory systems */
sector_size = VM_PAGE_2M;
}
/* the absolute difference between the amount of free memory and
the amount of reserved memory. */
size_t memdiff = absdiff64(free_size, reserved_size);
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
/* if there is more free memory than reserved, we can choose
a bigger sector size, as we don't have to worry as much
about wasting memory allocating vm_pages for reserved frames.
we only do this bump if the sector size is below a certain
threshold. */
sector_size++;
/* if the difference is particularly big, increase the sector size
even further */
if (memdiff >= 0x1000000) {
sector_size++;
}
}
/* round pmem_size up to the next multiple of sector_bytes.
this works because sector_bytes is guaranteed to be a
power of 2. */
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
if (pmem_end & (sector_bytes - 1)) {
pmem_end &= ~(sector_bytes - 1);
pmem_end += sector_bytes;
}
size_t sector_count = div64_pow2(pmem_end, sector_bytes);
*out_sector_count = sector_count;
*out_sector_size = sector_size;
}
void vm_sparse_init(void)
{
size_t pmem_limit = 0, reserved_size = 0, free_size = 0;
struct memblock_iter it;
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
if (pmem_limit < it.it_limit + 1) {
pmem_limit = it.it_limit + 1;
}
}
for_each_free_mem_range (&it, 0x0, UINTPTR_MAX) {
free_size += it.it_limit - it.it_base + 1;
}
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
reserved_size += it.it_limit - it.it_base + 1;
}
enum vm_page_order sector_size;
size_t sector_bytes = 0;
unsigned int nr_sectors = 0;
calculate_sector_size_and_count(
pmem_limit, reserved_size, free_size,
&nr_sectors, &sector_size);
sector_bytes = vm_page_order_to_bytes(sector_size);
char sector_size_str[64];
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
sector_array = kzalloc(sizeof(struct vm_sector) * nr_sectors, 0);
sector_array_count = nr_sectors;
for (unsigned int i = 0; i < nr_sectors; i++) {
sector_array[i].s_size = sector_size;
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
}
size_t s, i;
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
struct vm_page *pg = get_or_create_page(i);
pg->p_flags = 0;
}
}
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
struct vm_page *pg = vm_page_get(i);
if (!pg) {
/* if the page doesn't exist, it is part of a sector
that only contains reserved pages. a NULL page
is implicitly treated as reserved */
continue;
}
pg->p_flags = VM_PAGE_RESERVED;
}
}
printk("vm: [sparse] initialised %zu sectors of size %s", nr_sectors, sector_size_str);
}
struct vm_page *vm_page_get_sparse(phys_addr_t addr)
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
if (sector_number >= sector_array_count) {
return NULL;
}
struct vm_sector *sector = &sector_array[sector_number];
if (!sector->s_pages || page_number >= vm_page_order_to_pages(sector->s_size)) {
return NULL;
}
return &sector->s_pages[page_number];
}
size_t vm_page_get_pfn_sparse(struct vm_page *pg)
{
struct vm_sector *sector = &sector_array[pg->p_sector];
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
}