previously, sparse would attempt to create a smaller number of larger sectors on systems with lots of reserved memory, often causing an out-of-memory condition. the reserved memory ratio calculation now compares reserved memory to free memory, rather than to the address of the last byte in physical memory. this improved heuristic means sparse is now better at choosing an appropriate sector size, allowing sparse to operate on systems with high amounts of reserved memory.
250 lines
7.3 KiB
C
250 lines
7.3 KiB
C
/* ### The sparse memory model ###
|
|
|
|
under this memory model, the system memory is represented by
|
|
a set of sectors. each sector has the same, fixed, power-of-2
|
|
size, and has its own array of vm_pages. unlike the flat memory
|
|
model, this is an array of vm_page POINTERS, allowing vm_pages
|
|
to be allocated on demand.
|
|
|
|
under this memory model, only memory frames that are usable by
|
|
the kernel will have an associated vm_page. the array of pointers
|
|
adds some overhead, effectively adding an extra pointer's worth
|
|
of memory to the size of vm_page, but this is mitigated by
|
|
fewer vm_pages being allocated.
|
|
|
|
on top of this, any sector that ONLY contains reserved memory
|
|
can forego allocating their vm_page pointer array entirely,
|
|
saving even more memory.
|
|
|
|
this memory model is good for systems with large amounts of
|
|
memory, or those will less memory but a high percentage of
|
|
reserved memory. if this is not the case, the memory savings
|
|
of the sparse memory model may be outweighed by the extra
|
|
overhead, and the flat memory model may be a better choice.
|
|
*/
|
|
#include <socks/vm.h>
|
|
#include <socks/printk.h>
|
|
#include <socks/panic.h>
|
|
#include <socks/memblock.h>
|
|
#include <socks/util.h>
|
|
#include <socks/machine/cpu.h>
|
|
|
|
static struct vm_sector *sector_array = NULL;
|
|
static size_t sector_array_count = 0;
|
|
|
|
static struct vm_sector *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index)
|
|
{
|
|
/* all sectors have the same size */
|
|
size_t step = vm_page_order_to_bytes(sector_array[0].s_size);
|
|
addr &= ~VM_PAGE_MASK;
|
|
size_t sector = div64_pow2(addr, step);
|
|
|
|
addr >>= VM_PAGE_SHIFT;
|
|
addr -= ((sector * step) >> VM_PAGE_SHIFT);
|
|
|
|
if (sector_id) {
|
|
*sector_id = sector;
|
|
}
|
|
|
|
if (index) {
|
|
*index = addr;
|
|
}
|
|
|
|
return §or_array[sector];
|
|
}
|
|
|
|
static struct vm_page *get_or_create_page(phys_addr_t addr)
|
|
{
|
|
size_t sector_number, page_number;
|
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
|
|
|
struct vm_sector *sector = §or_array[sector_number];
|
|
|
|
if (!sector->s_pages) {
|
|
size_t nr_pages = vm_page_order_to_pages(sector->s_size);
|
|
sector->s_pages = kzalloc(nr_pages * sizeof(struct vm_page), 0);
|
|
|
|
if (!sector->s_pages) {
|
|
panic("out of memory!");
|
|
}
|
|
|
|
for (size_t i = 0; i < nr_pages; i++) {
|
|
sector->s_pages[i].p_flags = VM_PAGE_RESERVED;
|
|
}
|
|
}
|
|
|
|
|
|
sector->s_pages[page_number].p_sector = sector_number;
|
|
return §or->s_pages[page_number];
|
|
}
|
|
|
|
static enum vm_page_order find_minimum_sector_size(phys_addr_t pmem_end)
|
|
{
|
|
for (enum vm_page_order i = VM_PAGE_4K; i < VM_PAGE_64G; i++) {
|
|
size_t order_bytes = vm_page_order_to_bytes(i);
|
|
if (order_bytes * VM_MAX_SECTORS >= pmem_end) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
panic("cannot find suitable sector size for memory map.");
|
|
}
|
|
|
|
/* this function is called to calculate the optimal sector size for the system,
|
|
taking in to account factors like the total system memory and how much memory
|
|
is reserved vs free.
|
|
|
|
this function uses some heuristics and thresholds that are untested and
|
|
are in need of improvement to ensure that sparse works well on a wide
|
|
range of systems. */
|
|
static void calculate_sector_size_and_count(
|
|
phys_addr_t pmem_end, size_t reserved_size, size_t free_size,
|
|
unsigned int *out_sector_count, enum vm_page_order *out_sector_size)
|
|
{
|
|
/* we can support up to VM_MAX_SECTORS memory sectors.
|
|
the minimum sector size is what ever is required
|
|
to cover all of physical memory in the maximum number of sectors */
|
|
enum vm_page_order sector_size = find_minimum_sector_size(pmem_end);
|
|
|
|
if (sector_size <= VM_PAGE_2M) {
|
|
/* override really small sector sizes with something
|
|
more reasonable, to avoid excessive overhead on
|
|
low-memory systems */
|
|
sector_size = VM_PAGE_2M;
|
|
}
|
|
|
|
/* the absolute difference between the amount of free memory and
|
|
the amount of reserved memory. */
|
|
size_t memdiff = absdiff64(free_size, reserved_size);
|
|
|
|
if (free_size > reserved_size && sector_size < VM_PAGE_256M) {
|
|
/* if there is more free memory than reserved, we can choose
|
|
a bigger sector size, as we don't have to worry as much
|
|
about wasting memory allocating vm_pages for reserved frames.
|
|
|
|
we only do this bump if the sector size is below a certain
|
|
threshold. */
|
|
sector_size++;
|
|
|
|
/* if the difference is particularly big, increase the sector size
|
|
even further */
|
|
if (memdiff >= 0x1000000) {
|
|
sector_size++;
|
|
}
|
|
}
|
|
|
|
/* round pmem_size up to the next multiple of sector_bytes.
|
|
this works because sector_bytes is guaranteed to be a
|
|
power of 2. */
|
|
size_t sector_bytes = vm_page_order_to_bytes(sector_size);
|
|
|
|
if (pmem_end & (sector_bytes - 1)) {
|
|
pmem_end &= ~(sector_bytes - 1);
|
|
pmem_end += sector_bytes;
|
|
}
|
|
|
|
size_t sector_count = div64_pow2(pmem_end, sector_bytes);
|
|
|
|
*out_sector_count = sector_count;
|
|
*out_sector_size = sector_size;
|
|
}
|
|
|
|
void vm_sparse_init(void)
|
|
{
|
|
size_t pmem_limit = 0, reserved_size = 0, free_size = 0;
|
|
|
|
struct memblock_iter it;
|
|
for_each_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
if (pmem_limit < it.it_limit + 1) {
|
|
pmem_limit = it.it_limit + 1;
|
|
}
|
|
}
|
|
|
|
for_each_free_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
free_size += it.it_limit - it.it_base + 1;
|
|
}
|
|
|
|
for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) {
|
|
reserved_size += it.it_limit - it.it_base + 1;
|
|
}
|
|
|
|
enum vm_page_order sector_size;
|
|
size_t sector_bytes = 0;
|
|
unsigned int nr_sectors = 0;
|
|
calculate_sector_size_and_count(
|
|
pmem_limit, reserved_size, free_size,
|
|
&nr_sectors, §or_size);
|
|
sector_bytes = vm_page_order_to_bytes(sector_size);
|
|
|
|
char sector_size_str[64];
|
|
data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str);
|
|
|
|
sector_array = kzalloc(sizeof(struct vm_sector) * nr_sectors, 0);
|
|
sector_array_count = nr_sectors;
|
|
|
|
for (unsigned int i = 0; i < nr_sectors; i++) {
|
|
sector_array[i].s_size = sector_size;
|
|
sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT;
|
|
}
|
|
|
|
size_t s, i;
|
|
phys_addr_to_sector_and_index(0x3f00000, &s, &i);
|
|
|
|
for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) {
|
|
if (it.it_base & VM_PAGE_MASK) {
|
|
it.it_base &= ~VM_PAGE_MASK;
|
|
it.it_base += VM_PAGE_SIZE;
|
|
}
|
|
|
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
|
struct vm_page *pg = get_or_create_page(i);
|
|
pg->p_flags = 0;
|
|
}
|
|
}
|
|
|
|
for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) {
|
|
if (it.it_base & VM_PAGE_MASK) {
|
|
it.it_base &= ~VM_PAGE_MASK;
|
|
it.it_base += VM_PAGE_SIZE;
|
|
}
|
|
|
|
for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) {
|
|
struct vm_page *pg = vm_page_get(i);
|
|
|
|
if (!pg) {
|
|
/* if the page doesn't exist, it is part of a sector
|
|
that only contains reserved pages. a NULL page
|
|
is implicitly treated as reserved */
|
|
continue;
|
|
}
|
|
|
|
pg->p_flags = VM_PAGE_RESERVED;
|
|
}
|
|
}
|
|
|
|
printk("vm: [sparse] initialised %zu sectors of size %s", nr_sectors, sector_size_str);
|
|
}
|
|
|
|
struct vm_page *vm_page_get_sparse(phys_addr_t addr)
|
|
{
|
|
size_t sector_number, page_number;
|
|
phys_addr_to_sector_and_index(addr, §or_number, &page_number);
|
|
if (sector_number >= sector_array_count) {
|
|
return NULL;
|
|
}
|
|
|
|
struct vm_sector *sector = §or_array[sector_number];
|
|
|
|
if (!sector->s_pages || page_number >= vm_page_order_to_pages(sector->s_size)) {
|
|
return NULL;
|
|
}
|
|
|
|
return §or->s_pages[page_number];
|
|
}
|
|
|
|
size_t vm_page_get_pfn_sparse(struct vm_page *pg)
|
|
{
|
|
struct vm_sector *sector = §or_array[pg->p_sector];
|
|
return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg);
|
|
}
|