diff --git a/arch/x86_64/include/arch/paging.h b/arch/x86_64/include/arch/paging.h new file mode 100644 index 0000000..7335e31 --- /dev/null +++ b/arch/x86_64/include/arch/paging.h @@ -0,0 +1,54 @@ +#ifndef ARCH_PAGING_H_ +#define ARCH_PAGING_H_ + +#include +#include + +#define PTE_PRESENT 0x01ULL +#define PTE_RW 0x02ULL +#define PTE_USR 0x04ULL +#define PTE_WRITETHROUGH 0x08ULL +#define PTE_NOCACHE 0x10ULL +#define PTE_PAGESIZE 0x80ULL +#define PTE_NX 0x8000000000000000ULL + +typedef phys_addr_t pml4t_ptr_t; +typedef uint64_t pte_t; + +typedef struct pml4t { + phys_addr_t p_entries[512]; +} __packed pml4t_t; + +typedef struct pdpt { + union { + /* 4KiB and 2MiB pages */ + phys_addr_t p_entries[512]; + /* 1GiB pages */ + pte_t p_pages[512]; + }; +} __packed pdpt_t; + +typedef struct pdir { + union { + /* 4KiB pages */ + phys_addr_t p_entries[512]; + /* 2MiB pages */ + pte_t p_pages[512]; + }; +} __packed pdir_t; + +typedef struct ptab { + pte_t p_pages[512]; +} __packed ptab_t; + +typedef enum page_size { + PS_4K, + PS_2M, + PS_1G, +} page_size_t; + +/* returns 1 if gigabyte pages are supported by the CPU, 0 otherwise. + defined in pmap_ctrl.S */ +extern int gigabyte_pages(void); + +#endif diff --git a/arch/x86_64/include/socks/machine/pmap.h b/arch/x86_64/include/socks/machine/pmap.h new file mode 100644 index 0000000..85a1829 --- /dev/null +++ b/arch/x86_64/include/socks/machine/pmap.h @@ -0,0 +1,9 @@ +#ifndef SOCKS_X86_64_PMAP_H_ +#define SOCKS_X86_64_PMAP_H_ + +#include + +typedef pml4t_ptr_t ml_pmap_t; +typedef uint64_t ml_pfn_t; + +#endif diff --git a/arch/x86_64/init.c b/arch/x86_64/init.c index 00ce61a..ebdb374 100644 --- a/arch/x86_64/init.c +++ b/arch/x86_64/init.c @@ -1,4 +1,5 @@ -#include "socks/types.h" +#include +#include #include #include #include @@ -47,5 +48,7 @@ int ml_init(uintptr_t arg) e820_scan(PTR32(mb->mmap_addr), mb->mmap_length); + pmap_bootstrap(); + return 0; } diff --git a/arch/x86_64/pmap.c b/arch/x86_64/pmap.c new file mode 100644 index 0000000..e8c379f --- /dev/null +++ b/arch/x86_64/pmap.c @@ -0,0 +1,280 @@ +#include "socks/types.h" +#include +#include +#include +#include +#include +#include + +/* some helpful datasize constants */ +#define C_1GiB 0x40000000ULL +#define C_2GiB (2 * C_1GiB) + +#define BAD_INDEX ((unsigned int)-1) +#define PTR_TO_ENTRY(x) (((x) & ~VM_PAGE_MASK) | PTE_PRESENT | PTE_RW) +#define ENTRY_TO_PTR(x) ((x) & ~VM_PAGE_MASK) + +#define PFN(x) ((x) >> VM_PAGE_SHIFT) + +static int can_use_gbpages = 0; +static pmap_t kernel_pmap; + +static size_t ps_size(page_size_t ps) +{ + switch (ps) { + case PS_4K: + return 0x1000; + case PS_2M: + return 0x200000; + case PS_1G: + return 0x40000000; + default: + return 0; + } +} + +static pmap_t alloc_pmap(pml4t_t **pt) +{ + pml4t_t *p = kzalloc(sizeof *p, 0); + *pt = p; + return vm_virt_to_phys(p); +} + +static pte_t make_pte(pfn_t pfn, vm_prot_t prot, page_size_t size) +{ + pte_t v = pfn; + + switch (size) { + case PS_1G: + /* pfn_t is in terms of 4KiB pages, convert to 1GiB page frame number */ + pfn >>= 18; + v = (pfn & 0x3FFFFF) << 30; + break; + case PS_2M: + /* pfn_t is in terms of 4KiB pages, convert to 2MiB page frame number */ + pfn >>= 9; + v = (pfn & 0x7FFFFFFF) << 21; + break; + case PS_4K: + v = (pfn & 0xFFFFFFFFFF) << 12; + break; + default: + return 0; + } + + v |= PTE_PRESENT; + + if (size != PS_4K) { + v |= PTE_PAGESIZE; + } + + if (prot & VM_PROT_WRITE) { + v |= PTE_RW; + } + + if (!(prot & VM_PROT_EXEC)) { + v |= PTE_RW; + } + + if ((prot & VM_PROT_USER) && !(prot & VM_PROT_SVR)) { + v |= PTE_USR; + } + + return v; +} + +static void delete_ptab(phys_addr_t pt) +{ + if (pt & PTE_PAGESIZE) { + /* this entry points to a hugepage, nothing to delete */ + return; + } + + pt &= ~VM_PAGE_MASK; + if (!pt) { + return; + } + + ptab_t *ptab = vm_phys_to_virt(pt); + kfree(ptab); +} + +static void delete_pdir(phys_addr_t pd) +{ + if (pd & PTE_PAGESIZE) { + /* this entry points to a hugepage, nothing to delete */ + return; + } + + pd &= ~0x1FFFFFULL; + + pdir_t *pdir = vm_phys_to_virt(pd); + for (int i = 0; i < 512; i++) { + if (pdir->p_pages[i] & PTE_PAGESIZE) { + /* this is a hugepage, there is nothing to delete */ + continue; + } + + delete_ptab(pdir->p_entries[i]); + } + + kfree(pdir); +} + +static kern_status_t do_pmap_add(pmap_t pmap, void *p, pfn_t pfn, vm_prot_t prot, page_size_t size) +{ + uintptr_t pv = (uintptr_t)p; + unsigned int + pml4t_index = BAD_INDEX, + pdpt_index = BAD_INDEX, + pd_index = BAD_INDEX, + pt_index = BAD_INDEX; + + switch (size) { + case PS_4K: + pml4t_index = (pv >> 39) & 0x1FF; + pdpt_index = (pv >> 30) & 0x1FF; + pd_index = (pv >> 21) & 0x1FF; + pt_index = (pv >> 12) & 0x1FF; + break; + case PS_2M: + pml4t_index = (pv >> 39) & 0x1FF; + pdpt_index = (pv >> 30) & 0x1FF; + pd_index = (pv >> 21) & 0x1FF; + break; + case PS_1G: + if (!can_use_gbpages) { + return KERN_UNSUPPORTED; + } + pml4t_index = (pv >> 39) & 0x1FF; + pdpt_index = (pv >> 30) & 0x1FF; + break; + default: + return KERN_INVALID_ARGUMENT; + } + + /* 1. get PML4T (mandatory) */ + pml4t_t *pml4t = vm_phys_to_virt(ENTRY_TO_PTR(pmap)); + if (!pml4t) { + return KERN_INVALID_ARGUMENT; + } + + /* 2. traverse PML4T, get PDPT (mandatory) */ + pdpt_t *pdpt = NULL; + if (!pml4t->p_entries[pml4t_index]) { + pdpt = kzalloc(sizeof *pdpt, 0); + pml4t->p_entries[pml4t_index] = PTR_TO_ENTRY(vm_virt_to_phys(pdpt)); + } else { + pdpt = vm_phys_to_virt(ENTRY_TO_PTR(pml4t->p_entries[pml4t_index])); + } + + /* if we're mapping a 1GiB page, we stop here */ + if (size == PS_1G) { + if (pdpt->p_entries[pdpt_index] != 0) { + /* this slot points to a ptab, delete it. + if this slot points to a hugepage, this does nothing */ + delete_pdir(pdpt->p_entries[pdpt_index]); + } + + pdpt->p_pages[pdpt_index] = make_pte(pfn, prot, size); + + return KERN_OK; + } + + + /* 3. traverse PDPT, get PDIR (optional, 4K and 2M only) */ + pdir_t *pdir = NULL; + if (!pdpt->p_entries[pdpt_index] || pdpt->p_pages[pdpt_index] & PTE_PAGESIZE) { + /* entry is null, or points to a hugepage */ + pdir = kzalloc(sizeof *pdir, 0); + pdpt->p_entries[pdpt_index] = PTR_TO_ENTRY(vm_virt_to_phys(pdir)); + } else { + pdir = vm_phys_to_virt(ENTRY_TO_PTR(pdpt->p_entries[pdpt_index])); + } + + /* if we're ampping a 2MiB page, we stop here */ + if (size == PS_2M) { + if (pdir->p_entries[pd_index] != 0) { + /* this slot points to a ptab, delete it. + if this slot points to a hugepage, this does nothing */ + delete_ptab(pdir->p_entries[pd_index]); + } + + pdir->p_pages[pd_index] = make_pte(pfn, prot, size); + return KERN_OK; + } + + /* 4. traverse PDIR, get PTAB (optional, 4K only) */ + ptab_t *ptab = NULL; + if (!pdir->p_entries[pd_index] || pdir->p_pages[pd_index] & PTE_PAGESIZE) { + /* entry is null, or points to a hugepage */ + ptab = kzalloc(sizeof *ptab, 0); + pdir->p_entries[pd_index] = PTR_TO_ENTRY(vm_virt_to_phys(ptab)); + } else { + ptab = vm_phys_to_virt(ENTRY_TO_PTR(pdir->p_entries[pd_index])); + } + + ptab->p_pages[pt_index] = make_pte(pfn, prot, size); + return KERN_OK; +} + +void pmap_bootstrap(void) +{ + can_use_gbpages = gigabyte_pages(); + printk("pmap: gigabyte pages %sabled", can_use_gbpages == 1 ? "en" : "dis"); + + page_size_t hugepage = PS_2M; + if (can_use_gbpages) { + hugepage = PS_1G; + } + size_t hugepage_sz = ps_size(hugepage); + + pml4t_t *kernel_pml4t; + kernel_pmap = alloc_pmap(&kernel_pml4t); + + //do_pmap_add(kernel_pmap, NULL, 0, VM_PROT_READ, PS_2M); + + /* map 2GiB at the end of the address space to cover + the kernel and memblock-allocated data */ + uintptr_t vbase = VM_KERNEL_VOFFSET; + for (size_t i = 0; i < C_2GiB; i += hugepage_sz) { + do_pmap_add(kernel_pmap, + (void *)(vbase + i), + PFN(i), + VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC | VM_PROT_SVR, + hugepage); + } + + pmap_switch(kernel_pmap); + printk("pmap: kernel pmap initialised"); +} + +pmap_t pmap_create(void) +{ + return 0; +} + +void pmap_destroy(pmap_t pmap) +{ + +} + +kern_status_t pmap_add(pmap_t pmap, void *p, pfn_t pfn, vm_prot_t prot, pmap_flags_t flags) +{ + return KERN_OK; +} + +kern_status_t pmap_add_block(pmap_t pmap, void *p, pfn_t pfn, size_t len, vm_prot_t prot, pmap_flags_t flags) +{ + return KERN_OK; +} + +kern_status_t pmap_remove(pmap_t pmap, void *p) +{ + return KERN_OK; +} + +kern_status_t pmap_remove_range(pmap_t pmap, void *p, size_t len) +{ + return KERN_OK; +} diff --git a/arch/x86_64/pmap_ctrl.S b/arch/x86_64/pmap_ctrl.S new file mode 100644 index 0000000..28c9b67 --- /dev/null +++ b/arch/x86_64/pmap_ctrl.S @@ -0,0 +1,31 @@ + + .global pmap_switch + .type pmap_switch, @function + +pmap_switch: + mov %rdi, %cr3 + ret + + .global gigabyte_pages + .type gigabyte_pages, @function + +gigabyte_pages: + push %rbp + mov %rsp, %rbp + + push %rbx + + mov $0x80000001, %eax + cpuid + andl $0x4000000, %edx + + jz 2f + +1: mov $0x1, %rax + jmp 3f + +2: mov $0x0, %rax + +3: pop %rbx + pop %rbp + ret diff --git a/include/socks/pmap.h b/include/socks/pmap.h new file mode 100644 index 0000000..6216ed6 --- /dev/null +++ b/include/socks/pmap.h @@ -0,0 +1,32 @@ +#ifndef SOCKS_PMAP_H_ +#define SOCKS_PMAP_H_ + +/* all the functions declared in this file are defined in arch/xyz/pmap.c */ + +#include +#include +#include +#include + +#define PFN(x) ((x) >> VM_PAGE_SHIFT) + +typedef ml_pmap_t pmap_t; +typedef ml_pfn_t pfn_t; + +typedef enum pmap_flags { + PMAP_HUGEPAGE = 0x01u, +} pmap_flags_t; + +extern void pmap_bootstrap(void); + +extern pmap_t pmap_create(void); +extern void pmap_destroy(pmap_t pmap); +extern void pmap_switch(pmap_t pmap); + +extern kern_status_t pmap_add(pmap_t pmap, void *p, pfn_t pfn, vm_prot_t prot, pmap_flags_t flags); +extern kern_status_t pmap_add_block(pmap_t pmap, void *p, pfn_t pfn, size_t len, vm_prot_t prot, pmap_flags_t flags); + +extern kern_status_t pmap_remove(pmap_t pmap, void *p); +extern kern_status_t pmap_remove_range(pmap_t pmap, void *p, size_t len); + +#endif diff --git a/include/socks/status.h b/include/socks/status.h index f4c3274..7aeeb21 100644 --- a/include/socks/status.h +++ b/include/socks/status.h @@ -3,8 +3,10 @@ typedef unsigned int kern_status_t; -#define KERN_OK (0) -#define KERN_UNIMPLEMENTED (1) -#define KERN_NAME_EXISTS (2) +#define KERN_OK (0) +#define KERN_UNIMPLEMENTED (1) +#define KERN_NAME_EXISTS (2) +#define KERN_INVALID_ARGUMENT (3) +#define KERN_UNSUPPORTED (4) #endif diff --git a/include/socks/vm.h b/include/socks/vm.h index 2036f2a..a748f95 100644 --- a/include/socks/vm.h +++ b/include/socks/vm.h @@ -30,6 +30,14 @@ typedef struct vm_object { unsigned int reserved; } vm_object_t; +typedef enum vm_prot { + VM_PROT_READ = 0x01u, + VM_PROT_WRITE = 0x02u, + VM_PROT_EXEC = 0x04u, + VM_PROT_USER = 0x08u, + VM_PROT_SVR = 0x10u, +} vm_prot_t; + typedef enum vm_flags { VM_GET_DMA = 0x01u, } vm_flags_t;