by default, the sector map created under the sparse model now only extends to the last non-reserved page frame, any reserved page frames afterwards are ignored.
301 lines
8.5 KiB
C
301 lines
8.5 KiB
C
/* ### The sparse memory model ###
|
|
|
|
under this memory model, the system memory is represented by
|
|
a set of sectors. each sector has the same, fixed, power-of-2
|
|
size, and has its own array of vm_pages. unlike the flat memory
|
|
model, this is an array of vm_page POINTERS, allowing vm_pages
|
|
to be allocated on demand.
|
|
|
|
under this memory model, only memory frames that are usable by
|
|
the kernel will have an associated vm_page. the array of pointers
|
|
adds some overhead, effectively adding an extra pointer's worth
|
|
of memory to the size of vm_page, but this is mitigated by
|
|
fewer vm_pages being allocated.
|
|
|
|
on top of this, any sector that ONLY contains reserved memory
|
|
can forego allocating their vm_page pointer array entirely,
|
|
saving even more memory.
|
|
|
|
this memory model is good for systems with large amounts of
|
|
memory, or those will less memory but a high percentage of
|
|
reserved memory. if this is not the case, the memory savings
|
|
of the sparse memory model may be outweighed by the extra
|
|
overhead, and the flat memory model may be a better choice.
|
|
*/
|
|
#include <socks/vm.h>
|
|
#include <socks/arg.h>
|
|
#include <socks/printk.h>
|
|
#include <socks/panic.h>
|
|
#include <socks/memblock.h>
|
|
#include <socks/util.h>
|
|
#include <socks/machine/cpu.h>
|
|
|
|
static struct vm_sector *sector_array = NULL;
|
|
static size_t sector_array_count = 0;
|
|
|
|
enum sector_coverage_mode {
|
|
SECTOR_COVERAGE_FREE,
|
|
SECTOR_COVERAGE_ALL,
|
|
};
|
|
|
|
static enum sector_coverage_mode get_sector_coverage_mode(void)
|
|
{
|
|
const char *arg = arg_value("vm.sector-coverage-mode");
|
|
if (!arg) {
|
|
return SECTOR_COVERAGE_FREE;
|
|
}
|
|
|
|
if (!strcmp(arg, "free")) {
|
|
return SECTOR_COVERAGE_FREE;
|
|
}
|
|
|
|
if (!strcmp(arg, "all")) {
|
|
return SECTOR_COVERAGE_ALL;
|
|
}
|
|
|
|
printk("vm: [sparse] ignoring unknown sector coverage mode '%s', using FREE", arg);
|
|
return SECTOR_COVERAGE_FREE;
|
|
}
|
|
|
|
static struct vm_sector *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
|
|
{
|
|
/* all sectors have the same size */
|
|
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
|
|
addr &= ~VM_PAGE_MASK;
|
|
size_t sector = div64_pow2(addr, step);
|
|
|
|
addr >>= VM_PAGE_SHIFT;
|
|
addr -= ((sector * step) >> VM_PAGE_SHIFT);
|
|
|
|
if (sector_id) {
|
|
*sector_id = sector;
|
|
}
|
|
|
|
if (index) {
|
|
*index = addr;
|
|
}
|
|
|
|
return §or_array[sector];
|
|
}
|
|
|
|
static struct vm_page *get_or_create_page(phys_addr_t addr)
|
|
{
|
|
size_t sector_number, page_number;
|
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
|
|
|
struct vm_sector *sector = §or_array[sector_number];
|
|
|
|
if (!sector->s_pages) {
|
|
size_t nr_pages = vm_page_order_to_pages(sector->s_size);
|
|
sector->s_pages = kzalloc(nr_pages * sizeof(struct vm_page), 0);
|
|
|
|
if (!sector->s_pages) {
|
|
panic("out of memory!");
|
|
}
|
|
|
|
for (size_t i = 0; i < nr_pages; i++) {
|
|
sector->s_pages[i].p_flags = VM_PAGE_RESERVED;
|
|
}
|
|
}
|
|
|
|
|
|
sector->s_pages[page_number].p_sector = sector_number;
|
|
return §or->s_pages[page_number];
|
|
}
|
|
|
|
static enum vm_page_order find_minimum_sector_size(phys_addr_t pmem_end)
|
|
{
|
|
for (enum vm_page_order i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
|
|
size_t order_bytes = vm_page_order_to_bytes(i);
|
|
if (order_bytes * VM_MAX_SECTORS >= pmem_end) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
panic("cannot find suitable sector size for memory map.");
|
|
}
|
|
|
|
/* this function is called to calculate the optimal sector size for the system,
|
|
taking in to account factors like the total system memory and how much memory
|
|
is reserved vs free.
|
|
|
|
this function uses some heuristics and thresholds that are untested and
|
|
are in need of improvement to ensure that sparse works well on a wide
|
|
range of systems. */
|
|
static void calculate_sector_size_and_count(
|
|
size_t last_reserved_pfn, size_t last_free_pfn,
|
|
size_t reserved_size, size_t free_size,
|
|
unsigned int *out_sector_count, enum vm_page_order *out_sector_size)
|
|
{
|
|
/* we can support up to VM_MAX_SECTORS memory sectors.
|
|
the minimum sector size is what ever is required
|
|
to cover all of physical memory in the maximum number of sectors */
|
|
|
|
enum sector_coverage_mode mode = get_sector_coverage_mode();
|
|
phys_addr_t pmem_end = 0;
|
|
|
|
enum vm_page_order sector_size = find_minimum_sector_size(last_free_pfn);
|
|
if (mode == SECTOR_COVERAGE_FREE) {
|
|
pmem_end = last_free_pfn * VM_PAGE_SIZE;
|
|
} else {
|
|
pmem_end = MAX(last_free_pfn, last_reserved_pfn) * VM_PAGE_SIZE;
|
|
}
|
|
|
|
sector_size = find_minimum_sector_size(pmem_end);
|
|
|
|
if (sector_size <= VM_PAGE_2M) {
|
|
/* override really small sector sizes with something
|
|
more reasonable, to avoid excessive overhead on
|
|
low-memory systems */
|
|
sector_size = VM_PAGE_2M;
|
|
}
|
|
|
|
/* the absolute difference between the amount of free memory and
|
|
the amount of reserved memory. */
|
|
size_t memdiff = absdiff64(free_size, reserved_size);
|
|
|
|
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
|
|
/* if there is more free memory than reserved, we can choose
|
|
a bigger sector size, as we don't have to worry as much
|
|
about wasting memory allocating vm_pages for reserved frames.
|
|
|
|
we only do this bump if the sector size is below a certain
|
|
threshold. */
|
|
sector_size++;
|
|
|
|
/* if the difference is particularly big, increase the sector size
|
|
even further */
|
|
if (memdiff >= 0x1000000) {
|
|
sector_size++;
|
|
}
|
|
}
|
|
|
|
/* round pmem_size up to the next multiple of sector_bytes.
|
|
this works because sector_bytes is guaranteed to be a
|
|
power of 2. */
|
|
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
|
|
|
|
if (pmem_end & (sector_bytes - 1)) {
|
|
pmem_end &= ~(sector_bytes - 1);
|
|
pmem_end += sector_bytes;
|
|
}
|
|
|
|
size_t sector_count = div64_pow2(pmem_end, sector_bytes);
|
|
|
|
*out_sector_count = sector_count;
|
|
*out_sector_size = sector_size;
|
|
}
|
|
|
|
void vm_sparse_init(void)
|
|
{
|
|
size_t pmem_limit = 0, reserved_size = 0, free_size = 0;
|
|
size_t last_reserved_pfn = 0, last_free_pfn = 0;
|
|
|
|
struct memblock_iter it;
|
|
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
if (pmem_limit < it.it_limit + 1) {
|
|
pmem_limit = it.it_limit + 1;
|
|
}
|
|
}
|
|
|
|
for_each_free_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
free_size += it.it_limit - it.it_base + 1;
|
|
|
|
size_t last_pfn = it.it_limit / VM_PAGE_SIZE;
|
|
|
|
if (last_pfn > last_free_pfn) {
|
|
last_free_pfn = last_pfn;
|
|
}
|
|
}
|
|
|
|
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
reserved_size += it.it_limit - it.it_base + 1;
|
|
|
|
size_t last_pfn = it.it_limit / VM_PAGE_SIZE;
|
|
|
|
if (last_pfn > last_reserved_pfn) {
|
|
last_reserved_pfn = last_pfn;
|
|
}
|
|
}
|
|
|
|
enum vm_page_order sector_size;
|
|
size_t sector_bytes = 0;
|
|
unsigned int nr_sectors = 0;
|
|
calculate_sector_size_and_count(
|
|
last_reserved_pfn, last_free_pfn,
|
|
reserved_size, free_size,
|
|
&nr_sectors, §or_size);
|
|
sector_bytes = vm_page_order_to_bytes(sector_size);
|
|
|
|
char sector_size_str[64];
|
|
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
|
|
|
|
sector_array = kzalloc(sizeof(struct vm_sector) * nr_sectors, 0);
|
|
sector_array_count = nr_sectors;
|
|
|
|
for (unsigned int i = 0; i < nr_sectors; i++) {
|
|
sector_array[i].s_size = sector_size;
|
|
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
|
|
}
|
|
|
|
size_t s, i;
|
|
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
|
|
|
|
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
|
|
if (it.it_base & VM_PAGE_MASK) {
|
|
it.it_base &= ~VM_PAGE_MASK;
|
|
it.it_base += VM_PAGE_SIZE;
|
|
}
|
|
|
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
|
struct vm_page *pg = get_or_create_page(i);
|
|
pg->p_flags = 0;
|
|
}
|
|
}
|
|
|
|
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
|
|
if (it.it_base & VM_PAGE_MASK) {
|
|
it.it_base &= ~VM_PAGE_MASK;
|
|
it.it_base += VM_PAGE_SIZE;
|
|
}
|
|
|
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
|
struct vm_page *pg = vm_page_get(i);
|
|
|
|
if (!pg) {
|
|
/* if the page doesn't exist, it is part of a sector
|
|
that only contains reserved pages. a NULL page
|
|
is implicitly treated as reserved */
|
|
continue;
|
|
}
|
|
|
|
pg->p_flags = VM_PAGE_RESERVED;
|
|
}
|
|
}
|
|
|
|
printk("vm: [sparse] initialised %zu sectors of size %s", nr_sectors, sector_size_str);
|
|
}
|
|
|
|
struct vm_page *vm_page_get_sparse(phys_addr_t addr)
|
|
{
|
|
size_t sector_number, page_number;
|
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
|
if (sector_number >= sector_array_count) {
|
|
return NULL;
|
|
}
|
|
|
|
struct vm_sector *sector = §or_array[sector_number];
|
|
|
|
if (!sector->s_pages || page_number >= vm_page_order_to_pages(sector->s_size)) {
|
|
return NULL;
|
|
}
|
|
|
|
return §or->s_pages[page_number];
|
|
}
|
|
|
|
size_t vm_page_get_pfn_sparse(struct vm_page *pg)
|
|
{
|
|
struct vm_sector *sector = §or_array[pg->p_sector];
|
|
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
|
|
}
|