/* ### The sparse memory model ### under this memory model, the system memory is represented by a set of sectors. each sector has the same, fixed, power-of-2 size, and has its own array of vm_pages. unlike the flat memory model, this is an array of vm_page POINTERS, allowing vm_pages to be allocated on demand. under this memory model, only memory frames that are usable by the kernel will have an associated vm_page. the array of pointers adds some overhead, effectively adding an extra pointer's worth of memory to the size of vm_page, but this is mitigated by fewer vm_pages being allocated. on top of this, any sector that ONLY contains reserved memory can forego allocating their vm_page pointer array entirely, saving even more memory. this memory model is good for systems with large amounts of memory, or those will less memory but a high percentage of reserved memory. if this is not the case, the memory savings of the sparse memory model may be outweighed by the extra overhead, and the flat memory model may be a better choice. */ #include #include #include #include #include static vm_sector_t *sector_array = NULL; static size_t sector_array_count = 0; static vm_sector_t *phys_addr_to_sector_and_index(phys_addr_t addr, size_t *sector_id, size_t *index) { /* all sectors have the same size */ size_t step = vm_page_order_to_bytes(sector_array[0].s_size); addr &= ~VM_PAGE_MASK; size_t sector = div64_pow2(addr, step); addr >>= VM_PAGE_SHIFT; addr -= ((sector * step) >> VM_PAGE_SHIFT); if (sector_id) { *sector_id = sector; } if (index) { *index = addr; } return §or_array[sector]; } static vm_page_t *get_or_create_page(phys_addr_t addr) { size_t sector_number, page_number; phys_addr_to_sector_and_index(addr, §or_number, &page_number); vm_sector_t *sector = §or_array[sector_number]; if (!sector->s_pages) { sector->s_pages = kzalloc(vm_page_order_to_pages(sector->s_size) * sizeof(vm_page_t), 0); } sector->s_pages[page_number].p_sector = sector_number; return §or->s_pages[page_number]; } static vm_page_order_t find_minimum_sector_size(size_t pmem_size) { for (vm_page_order_t i = VM_PAGE_4K; i < VM_PAGE_64G; i++) { size_t order_bytes = vm_page_order_to_bytes(i); if (order_bytes * VM_MAX_SECTORS >= pmem_size) { return i; } } /* TODO panic here, once panic() is implemented. */ return VM_PAGE_64G; } /* this function is called to calculate the optimal sector size for the system, taking in to account factors like the total system memory and how much memory is reserved vs free. this function uses some heuristics and thresholds that are untested and are in need of improvement to ensure that sparse works well on a wide range of systems. */ static void calculate_sector_size_and_count(size_t pmem_size, size_t reserved_size, unsigned int *out_sector_count, vm_page_order_t *out_sector_size) { /* we can support up to VM_MAX_SECTORS memory sectors. the minimum sector size is what ever is required to cover all of physical memory in the maximum number of sectors */ vm_page_order_t sector_size = find_minimum_sector_size(pmem_size); if (sector_size <= VM_PAGE_2M) { /* override really small sector sizes with something more reasonable, to avoid excessive overhead on low-memory systems */ sector_size = VM_PAGE_2M; } size_t free_size = pmem_size - reserved_size; /* the absolute difference between the amount of free memory and the amount of reserved memory. */ size_t memdiff = absdiff64(free_size, reserved_size); if (free_size > reserved_size && sector_size < VM_PAGE_256M) { /* if there is more free memory than reserved, we can choose a bigger sector size, as we don't have to worry as much about wasting memory allocating vm_pages for reserved frames. we only do this bump if the sector size is below a certain threshold. */ sector_size++; /* if the difference is particularly big, increase the sector size even further */ if (memdiff >= 0x1000000) { sector_size++; } } /* round pmem_size up to the next multiple of sector_bytes. this works because sector_bytes is guaranteed to be a power of 2. */ size_t sector_bytes = vm_page_order_to_bytes(sector_size); if (pmem_size & (sector_bytes - 1)) { pmem_size &= ~(sector_bytes - 1); pmem_size += sector_bytes; } size_t sector_count = div64_pow2(pmem_size, sector_bytes); *out_sector_count = sector_count; *out_sector_size = sector_size; } void vm_sparse_init(void) { printk("vm: using sparse memory model"); size_t pmem_size = 0, reserved_size = 0; memblock_iter_t it; for_each_mem_range (&it, 0x0, UINTPTR_MAX) { if (pmem_size < it.it_limit + 1) { pmem_size = it.it_limit + 1; } } for_each_reserved_mem_range (&it, 0x0, UINTPTR_MAX) { reserved_size += it.it_limit - it.it_base + 1; } vm_page_order_t sector_size; size_t sector_bytes = 0; unsigned int nr_sectors = 0; calculate_sector_size_and_count(pmem_size, reserved_size, &nr_sectors, §or_size); sector_bytes = vm_page_order_to_bytes(sector_size); char sector_size_str[64]; data_size_to_string(sector_bytes, sector_size_str, sizeof sector_size_str); sector_array = kzalloc(sizeof(vm_sector_t) * nr_sectors, 0); sector_array_count = nr_sectors; for (unsigned int i = 0; i < nr_sectors; i++) { sector_array[i].s_size = sector_size; sector_array[i].s_first_pfn = (i * sector_bytes) >> VM_PAGE_SHIFT; } size_t s, i; phys_addr_to_sector_and_index(0x3f00000, &s, &i); for_each_free_mem_range(&it, 0x0, UINTPTR_MAX) { if (it.it_base & VM_PAGE_MASK) { it.it_base &= ~VM_PAGE_MASK; it.it_base += VM_PAGE_SIZE; } for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) { vm_page_t *pg = get_or_create_page(i); pg->p_flags = 0; } } for_each_reserved_mem_range(&it, 0x0, UINTPTR_MAX) { if (it.it_base & VM_PAGE_MASK) { it.it_base &= ~VM_PAGE_MASK; it.it_base += VM_PAGE_SIZE; } for (uintptr_t i = it.it_base; i < it.it_limit; i += VM_PAGE_SIZE) { vm_page_t *pg = vm_page_get(i); if (!pg) { /* if the page doesn't exist, it is part of a sector that only contains reserved pages. a NULL page is implicitly treated as reserved */ continue; } pg->p_flags = VM_PAGE_RESERVED; } } printk("vm: initialised %zu sectors of size %s", nr_sectors, sector_size_str); } vm_page_t *vm_page_get_sparse(phys_addr_t addr) { size_t sector_number, page_number; phys_addr_to_sector_and_index(addr, §or_number, &page_number); if (sector_number >= sector_array_count) { return NULL; } vm_sector_t *sector = §or_array[sector_number]; if (!sector->s_pages || page_number >= vm_page_order_to_pages(sector->s_size)) { return NULL; } return §or->s_pages[page_number]; } size_t vm_page_get_pfn_sparse(vm_page_t *pg) { vm_sector_t *sector = §or_array[pg->p_sector]; return sector->s_first_pfn + (((uintptr_t)pg - (uintptr_t)sector->s_pages) / sizeof *pg); }