Files
mango/vm/sparse.c

332 lines
8.7 KiB
C
Raw Normal View History

2023-02-08 17:13:01 +00:00
/* ### The sparse memory model ###
under this memory model, the system memory is represented by
a set of sectors. each sector has the same, fixed, power-of-2
size, and has its own array of vm_pages. unlike the flat memory
model, this is an array of vm_page POINTERS, allowing vm_pages
to be allocated on demand.
under this memory model, only memory frames that are usable by
the kernel will have an associated vm_page. the array of pointers
adds some overhead, effectively adding an extra pointer's worth
of memory to the size of vm_page, but this is mitigated by
fewer vm_pages being allocated.
on top of this, any sector that ONLY contains reserved memory
can forego allocating their vm_page pointer array entirely,
saving even more memory.
this memory model is good for systems with large amounts of
memory, or those will less memory but a high percentage of
reserved memory. if this is not the case, the memory savings
of the sparse memory model may be outweighed by the extra
overhead, and the flat memory model may be a better choice.
*/
2024-11-02 11:31:51 +00:00
#include <mango/arg.h>
2026-02-08 12:17:27 +00:00
#include <mango/machine/cpu.h>
2024-11-02 11:31:51 +00:00
#include <mango/memblock.h>
2026-02-08 12:17:27 +00:00
#include <mango/panic.h>
#include <mango/printk.h>
2024-11-02 11:31:51 +00:00
#include <mango/util.h>
2026-02-08 12:17:27 +00:00
#include <mango/vm.h>
2023-02-08 17:13:01 +00:00
static struct vm_sector *sector_array = NULL;
2023-02-08 17:13:01 +00:00
static size_t sector_array_count = 0;
enum sector_coverage_mode {
SECTOR_COVERAGE_FREE,
SECTOR_COVERAGE_ALL,
};
static enum sector_coverage_mode get_sector_coverage_mode(void)
{
const char *arg = arg_value("vm.sector-coverage-mode");
if (!arg) {
return SECTOR_COVERAGE_FREE;
}
if (!strcmp(arg, "free")) {
return SECTOR_COVERAGE_FREE;
}
if (!strcmp(arg, "all")) {
return SECTOR_COVERAGE_ALL;
}
2026-02-08 12:17:27 +00:00
printk("vm: [sparse] ignoring unknown sector coverage mode '%s', using "
"FREE",
arg);
return SECTOR_COVERAGE_FREE;
}
2026-02-08 12:17:27 +00:00
static struct vm_sector *phys_addr_to_sector_and_index(
phys_addr_t addr,
size_t *sector_id,
size_t *index)
2023-02-08 17:13:01 +00:00
{
/* all sectors have the same size */
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
addr &= ~VM_PAGE_MASK;
size_t sector = div64_pow2(addr, step);
addr >>= VM_PAGE_SHIFT;
addr -= ((sector * step) >> VM_PAGE_SHIFT);
if (sector_id) {
*sector_id = sector;
}
2023-02-08 17:13:01 +00:00
if (index) {
*index = addr;
}
return &sector_array[sector];
}
static struct vm_page *get_or_create_page(phys_addr_t addr)
2023-02-08 17:13:01 +00:00
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
struct vm_sector *sector = &sector_array[sector_number];
2023-02-08 17:13:01 +00:00
if (!sector->s_pages) {
size_t nr_pages = vm_page_order_to_pages(sector->s_size);
sector->s_pages = kzalloc(nr_pages * sizeof(struct vm_page), 0);
if (!sector->s_pages) {
panic("out of memory!");
}
for (size_t i = 0; i < nr_pages; i++) {
sector->s_pages[i].p_flags = VM_PAGE_RESERVED;
}
2023-02-08 17:13:01 +00:00
}
sector->s_pages[page_number].p_sector = sector_number;
return &sector->s_pages[page_number];
}
static enum vm_page_order find_minimum_sector_size(phys_addr_t pmem_end)
2023-02-08 17:13:01 +00:00
{
for (enum vm_page_order i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
2023-02-08 17:13:01 +00:00
size_t order_bytes = vm_page_order_to_bytes(i);
if (order_bytes * VM_MAX_SECTORS >= pmem_end) {
2023-02-08 17:13:01 +00:00
return i;
}
}
panic("cannot find suitable sector size for memory map.");
2023-02-08 17:13:01 +00:00
}
/* this function is called to calculate the optimal sector size for the system,
taking in to account factors like the total system memory and how much memory
is reserved vs free.
this function uses some heuristics and thresholds that are untested and
are in need of improvement to ensure that sparse works well on a wide
range of systems. */
static void calculate_sector_size_and_count(
2026-02-08 12:17:27 +00:00
size_t last_reserved_pfn,
size_t last_free_pfn,
size_t limit_pfn,
size_t reserved_size,
size_t free_size,
unsigned int *out_sector_count,
enum vm_page_order *out_sector_size)
2023-02-08 17:13:01 +00:00
{
/* we can support up to VM_MAX_SECTORS memory sectors.
the minimum sector size is what ever is required
to cover all of physical memory in the maximum number of sectors */
phys_addr_t pmem_end = limit_pfn * VM_PAGE_SIZE;
enum vm_page_order sector_size = find_minimum_sector_size(pmem_end);
2023-02-08 17:13:01 +00:00
if (sector_size <= VM_PAGE_2M) {
/* override really small sector sizes with something
more reasonable, to avoid excessive overhead on
low-memory systems */
sector_size = VM_PAGE_2M;
}
/* the absolute difference between the amount of free memory and
the amount of reserved memory. */
size_t memdiff = absdiff64(free_size, reserved_size);
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
/* if there is more free memory than reserved, we can choose
a bigger sector size, as we don't have to worry as much
about wasting memory allocating vm_pages for reserved frames.
we only do this bump if the sector size is below a certain
threshold. */
sector_size++;
2026-02-08 12:17:27 +00:00
/* if the difference is particularly big, increase the sector
size even further */
2023-02-08 17:13:01 +00:00
if (memdiff >= 0x1000000) {
sector_size++;
}
}
/* round pmem_size up to the next multiple of sector_bytes.
this works because sector_bytes is guaranteed to be a
power of 2. */
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
if (pmem_end & (sector_bytes - 1)) {
pmem_end &= ~(sector_bytes - 1);
pmem_end += sector_bytes;
2023-02-08 17:13:01 +00:00
}
size_t sector_count = div64_pow2(pmem_end, sector_bytes);
2023-02-08 17:13:01 +00:00
*out_sector_count = sector_count;
*out_sector_size = sector_size;
}
void vm_sparse_init(void)
{
size_t pmem_limit = 0, reserved_size = 0, free_size = 0;
size_t last_reserved_pfn = 0, last_free_pfn = 0;
2023-02-08 17:13:01 +00:00
struct memblock_iter it;
2026-02-08 12:17:27 +00:00
for_each_mem_range(&it, 0x0, UINTPTR_MAX)
{
if (pmem_limit < it.it_limit + 1) {
pmem_limit = it.it_limit + 1;
2023-02-08 17:13:01 +00:00
}
}
2026-02-08 12:17:27 +00:00
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX)
{
free_size += it.it_limit - it.it_base + 1;
size_t last_pfn = it.it_limit / VM_PAGE_SIZE;
if (last_pfn > last_free_pfn) {
last_free_pfn = last_pfn;
}
}
2026-02-08 12:17:27 +00:00
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX)
{
2023-02-08 17:13:01 +00:00
reserved_size += it.it_limit - it.it_base + 1;
size_t last_pfn = it.it_limit / VM_PAGE_SIZE;
if (last_pfn > last_reserved_pfn) {
last_reserved_pfn = last_pfn;
}
2023-02-08 17:13:01 +00:00
}
enum sector_coverage_mode mode = get_sector_coverage_mode();
phys_addr_t pmem_end = 0;
2026-02-08 12:17:27 +00:00
enum vm_page_order sector_size
= find_minimum_sector_size(last_free_pfn);
if (mode == SECTOR_COVERAGE_FREE) {
pmem_end = last_free_pfn * VM_PAGE_SIZE;
} else {
pmem_end = MAX(last_free_pfn, last_reserved_pfn) * VM_PAGE_SIZE;
}
printk("vm: last_pfn=0x%lx", pmem_end / VM_PAGE_SIZE);
2023-02-08 17:13:01 +00:00
size_t sector_bytes = 0;
unsigned int nr_sectors = 0;
calculate_sector_size_and_count(
2026-02-08 12:17:27 +00:00
last_reserved_pfn,
last_free_pfn,
pmem_end / VM_PAGE_SIZE,
reserved_size,
free_size,
&nr_sectors,
&sector_size);
2023-02-08 17:13:01 +00:00
sector_bytes = vm_page_order_to_bytes(sector_size);
char sector_size_str[64];
2026-02-08 12:17:27 +00:00
data_size_to_string(
sector_bytes,
sector_size_str,
sizeof sector_size_str);
2023-02-08 17:13:01 +00:00
sector_array = kzalloc(sizeof(struct vm_sector) * nr_sectors, 0);
2023-02-08 17:13:01 +00:00
sector_array_count = nr_sectors;
for (unsigned int i = 0; i < nr_sectors; i++) {
sector_array[i].s_size = sector_size;
2026-02-08 12:17:27 +00:00
sector_array[i].s_first_pfn
= (i * sector_bytes) >> VM_PAGE_SHIFT;
2023-02-08 17:13:01 +00:00
}
size_t s, i;
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
2026-02-08 12:17:27 +00:00
for_each_free_mem_range(&it, 0x0, pmem_end)
{
2023-02-08 17:13:01 +00:00
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
2026-02-08 12:17:27 +00:00
for (phys_addr_t i = it.it_base; i < it.it_limit;
i += VM_PAGE_SIZE) {
struct vm_page *pg = get_or_create_page(i);
pg->p_flags = 0;
2023-02-08 17:13:01 +00:00
}
}
2026-02-08 12:17:27 +00:00
for_each_reserved_mem_range(&it, 0x0, pmem_end)
{
2023-02-08 17:13:01 +00:00
if (it.it_base & VM_PAGE_MASK) {
it.it_base &= ~VM_PAGE_MASK;
it.it_base += VM_PAGE_SIZE;
}
2026-02-08 12:17:27 +00:00
for (phys_addr_t i = it.it_base; i < it.it_limit;
i += VM_PAGE_SIZE) {
struct vm_page *pg = vm_page_get(i);
2023-02-08 17:13:01 +00:00
if (!pg) {
2026-02-08 12:17:27 +00:00
/* if the page doesn't exist, it is part of a
sector that only contains reserved pages. a
NULL page is implicitly treated as reserved
*/
2023-02-08 17:13:01 +00:00
continue;
}
pg->p_flags = VM_PAGE_RESERVED;
}
}
2026-02-08 12:17:27 +00:00
printk("vm: [sparse] initialised %zu sectors of size %s",
nr_sectors,
sector_size_str);
2023-02-08 17:13:01 +00:00
}
struct vm_page *vm_page_get_sparse(phys_addr_t addr)
2023-02-08 17:13:01 +00:00
{
size_t sector_number, page_number;
phys_addr_to_sector_and_index(addr, &sector_number, &page_number);
if (sector_number >= sector_array_count) {
return NULL;
}
2023-02-08 17:13:01 +00:00
struct vm_sector *sector = &sector_array[sector_number];
2026-02-08 12:17:27 +00:00
if (!sector->s_pages
|| page_number >= vm_page_order_to_pages(sector->s_size)) {
2023-02-08 17:13:01 +00:00
return NULL;
}
return &sector->s_pages[page_number];
}
size_t vm_page_get_pfn_sparse(struct vm_page *pg)
2023-02-08 17:13:01 +00:00
{
struct vm_sector *sector = &sector_array[pg->p_sector];
2026-02-08 12:17:27 +00:00
return sector->s_first_pfn
+ (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
2023-02-08 17:13:01 +00:00
}