From 4677c881e15f24e003b9d8d7550b789d2472ecc8 Mon Sep 17 00:00:00 2001 From: Max Wash Date: Mon, 1 May 2023 18:13:44 +0100 Subject: [PATCH] x86_64: acpi: bring up other APs in long mode --- arch/x86_64/acpi/ap_trampoline.S | 67 +++++++++---- arch/x86_64/acpi/apic.cpp | 49 ++------- arch/x86_64/acpi/local_apic.cpp | 49 +++++++++ arch/x86_64/acpi/{smp.c => smp.cpp} | 100 ++++++++++++------- arch/x86_64/e820.c | 5 +- arch/x86_64/include/arch/acpi/local_apic.hpp | 45 ++++++--- 6 files changed, 201 insertions(+), 114 deletions(-) rename arch/x86_64/acpi/{smp.c => smp.cpp} (50%) diff --git a/arch/x86_64/acpi/ap_trampoline.S b/arch/x86_64/acpi/ap_trampoline.S index 4fb9ac7..4c8519a 100644 --- a/arch/x86_64/acpi/ap_trampoline.S +++ b/arch/x86_64/acpi/ap_trampoline.S @@ -20,18 +20,21 @@ acpi_bsp_lapic_id: .extern __ap_stack_top .extern __bsp_done + .extern __this_ap_ok + .extern __all_ap_ok .extern ap_startup .type ap_startup, @function - .global ap_trampoline_code + .global ap_trampoline # this code will be relocated to 0x8000, sets up environment for calling a C function .code16 -ap_trampoline_code: + .align 0x100 +ap_trampoline: cli cld - ljmp $0, $0x8040 + ljmp $0, $0x8080 .align 16 _L8010_GDT_table: .long 0, 0 @@ -43,18 +46,26 @@ _L8030_GDT_value: .long 0x8010 .long 0, 0 .align 64 -_L8040: +_L8040_GDT64_table: + .long 0, 0 + .long 0x0000ffff, 0x002f9a00 # kernel code + .long 0x0000ffff, 0x002f9200 # kernel data +_L8058_GDT64_value: + .word _L8058_GDT64_value - _L8040_GDT64_table - 1 + .quad 0x8040 + + .align 64 +_L8080: xorw %ax, %ax movw %ax, %ds lgdtl 0x8030 movl %cr0, %eax orl $1, %eax movl %eax, %cr0 - ljmp $8, $0x8060 - + ljmp $8, $0x80a0 .align 32 .code32 -_L8060: +_L80A0: movw $16, %ax movw %ax, %ds movw %ax, %ss @@ -63,20 +74,34 @@ _L8060: cpuid shrl $24, %ebx movl %ebx, %edi - - movl $0x9000, %eax + + movl %cr4, %eax + orl $0xb0, %eax + movl %eax, %cr4 + + movl $0x8ff8, %eax movl (%eax), %eax mov %eax, %cr3 + + movl $0xC0000080, %ecx + rdmsr + # enable long mode and the 'syscall/sysret' instructions + orl $0x00000101, %eax + wrmsr + + movl $0x8058, %eax + lgdt (%eax) + + movl %cr0, %ecx + orl $0x8000002a, %ecx + movl %ecx, %cr0 + + ljmpl $0x08, $0x8100 + + .code64 + .align 64 +_L8100: + mov (__ap_stack_top), %rsp + movabsq $(ap_trampoline_exit), %rax + callq *%rax hlt - # set up 32k stack, one for each core. It is important that all core must have its own stack - #shll $15, %ebx - #movl __ap_stack_top, %esp - #subl %ebx, %esp - #pushl %edi - # spinlock, wait for the BSP to finish -#1: pause - #cmpb $0, __bsp_done - #jz 1b - #lock incb aprunning - # jump into C code (should never return) - #ljmp $8, $ap_startup diff --git a/arch/x86_64/acpi/apic.cpp b/arch/x86_64/acpi/apic.cpp index 139a290..a12328b 100644 --- a/arch/x86_64/acpi/apic.cpp +++ b/arch/x86_64/acpi/apic.cpp @@ -21,7 +21,6 @@ static unsigned int bsp_id = (unsigned int)-1; using namespace arch::acpi; -static uint32_t *lapic_base; static struct queue io_apics; extern "C" { @@ -46,45 +45,13 @@ static void disable_8259(void) outportb(PIC2_DATA, 0xFF); } -static void *find_lapic(struct acpi_madt *madt) +kern_status_t local_apic_enable(struct acpi_madt *madt) { - phys_addr_t local_apic = madt->m_lapic_ptr; - - unsigned char *p = (unsigned char *)madt + sizeof *madt; - unsigned char *madt_end = (unsigned char *)madt + madt->m_base.s_length; - - while (p < madt_end) { - struct acpi_madt_record *rec = (struct acpi_madt_record *)p; - struct lapic_override_record *lapic; - - switch (rec->r_type) { - case ACPI_MADT_LAPIC_OVERRIDE: - lapic = (struct lapic_override_record *)(rec + 1); - return vm_phys_to_virt(lapic->l_lapic_ptr); - default: - break; - } - - p += rec->r_length; - } - - return vm_phys_to_virt(local_apic); -} - - -kern_status_t local_apic_enable(void) -{ - struct acpi_madt *madt = (struct acpi_madt *)acpi_find_sdt(ACPI_SIG_MADT); - if (!madt) { - return KERN_UNSUPPORTED; - } - apic_set_base(apic_get_base()); - lapic_base = (uint32_t *)find_lapic(madt); - local_apic lapic(lapic_base); + local_apic::find(madt, local_apic::get()); - lapic.write(0xF0, 0x1FF); - lapic.ack(); + local_apic::get().write(0xF0, 0x1FF); + local_apic::get().ack(); return KERN_OK; } @@ -136,7 +103,8 @@ static struct irq_hook lapic_clock_irq_hook = { void local_apic_config_timer(void) { - local_apic lapic(lapic_base); + local_apic& lapic = local_apic::get(); + lapic.write(local_apic::TIMER_DIV, 0x3); lapic.write(local_apic::TIMER_INITCOUNT, (uint32_t)-1); clock_wait(10); @@ -223,7 +191,7 @@ kern_status_t apic_init(void) init_all_ioapic(madt); } - local_apic_enable(); + local_apic_enable(madt); disable_8259(); apic_enabled = 1; @@ -243,8 +211,7 @@ kern_status_t apic_init(void) void irq_ack(unsigned int vec) { if (apic_enabled) { - local_apic lapic(lapic_base); - lapic.ack(); + local_apic::get().ack(); } else { if (vec >= 40) { outportb(0xA0, 0x20); diff --git a/arch/x86_64/acpi/local_apic.cpp b/arch/x86_64/acpi/local_apic.cpp index 87ede23..5f551af 100644 --- a/arch/x86_64/acpi/local_apic.cpp +++ b/arch/x86_64/acpi/local_apic.cpp @@ -1,14 +1,44 @@ #include +#include #include #include #define LAPIC_REG_EOI 0xB0 namespace arch::acpi { +static local_apic lapic; + local_apic::local_apic(uint32_t *base) : base_(base) { } +kern_status_t local_apic::find(struct acpi_madt *madt, local_apic& out) +{ + phys_addr_t local_apic = madt->m_lapic_ptr; + + unsigned char *p = (unsigned char *)madt + sizeof *madt; + unsigned char *madt_end = (unsigned char *)madt + madt->m_base.s_length; + + while (p < madt_end) { + struct acpi_madt_record *rec = (struct acpi_madt_record *)p; + struct lapic_override_record *lapic; + + switch (rec->r_type) { + case ACPI_MADT_LAPIC_OVERRIDE: + lapic = (struct lapic_override_record *)(rec + 1); + out.base_ = (uint32_t *)vm_phys_to_virt(lapic->l_lapic_ptr); + return KERN_OK; + default: + break; + } + + p += rec->r_length; + } + + out.base_ = (uint32_t *)vm_phys_to_virt(local_apic); + return KERN_OK; +} + uint32_t local_apic::read(uint32_t reg) { return read_once(base_ + (reg >> 2)); @@ -23,4 +53,23 @@ void local_apic::ack() { write(LAPIC_REG_EOI, 0); } + +void local_apic::send_ipi(unsigned int dest, unsigned int data) +{ + uint32_t dest_val = (read(IPI_DEST) & 0x00FFFFFF) | (dest << 24); + write(IPI_STATUS, 0); + write(IPI_DEST, dest_val); + + uint32_t icr_val = (read(IPI_ICR) & 0xFFF00000) | data; + write(IPI_ICR, icr_val); + + do { + __asm__ __volatile__("pause" : : : "memory"); + } while (read(IPI_ICR) & (1 << 12)); +} + +local_apic& local_apic::get(void) +{ + return lapic; +} } diff --git a/arch/x86_64/acpi/smp.c b/arch/x86_64/acpi/smp.cpp similarity index 50% rename from arch/x86_64/acpi/smp.c rename to arch/x86_64/acpi/smp.cpp index cb54229..c80b043 100644 --- a/arch/x86_64/acpi/smp.c +++ b/arch/x86_64/acpi/smp.cpp @@ -1,47 +1,72 @@ #include +#include #include +#include #include #include -extern uint8_t acpi_bsp_lapic_id(void); -extern char ap_trampoline[]; +using namespace arch::acpi; + +extern "C" uint8_t acpi_bsp_lapic_id(void); +extern "C" char ap_trampoline[]; + +volatile uintptr_t __ap_stack_top = 0; +volatile uint8_t __this_ap_ok = 0; +volatile uint8_t __all_ap_ok = 0; /* -static int __used send_ipi(void *lapic, unsigned int target_id, uint32_t payload) +static int __used send_ipi(void *lapic, unsigned int target_id, uint32_t +payload) { - uintptr_t lapic_ptr = (uintptr_t)lapic; + uintptr_t lapic_ptr = (uintptr_t)lapic; - *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_STATUS_REG)) = 0; - *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_DEST_REG)) = (*((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_DEST_REG)) & 0x00ffffff) | (target_id << 24); - *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_ICR_REG)) = (*((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_ICR_REG)) & 0xfff00000) | payload; - do { __asm__ __volatile__ ("pause" : : : "memory"); }while(*((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_ICR_REG)) & (1 << 12)); + *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_STATUS_REG)) = 0; + *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_DEST_REG)) = (*((volatile +uint32_t*)(lapic_ptr + LAPIC_IPI_DEST_REG)) & 0x00ffffff) | (target_id << 24); + *((volatile uint32_t*)(lapic_ptr + LAPIC_IPI_ICR_REG)) = (*((volatile +uint32_t*)(lapic_ptr + LAPIC_IPI_ICR_REG)) & 0xfff00000) | payload; do { __asm__ +__volatile__ ("pause" : : : "memory"); }while(*((volatile uint32_t*)(lapic_ptr + +LAPIC_IPI_ICR_REG)) & (1 << 12)); - return 0; -} - -static int __used init_ap(struct acpi_madt_record *rec, void *bsp_lapic, uint8_t bsp_id) -{ - struct lapic_record *lapic = (struct lapic_record *)(rec + 1); - if (!(lapic->l_flags & 0x1) && !(lapic->l_flags & 0x2)) { - return -1; - } - - if (lapic->l_apic_id == bsp_id) { - printk("acpi: core %u online [BSP]", lapic->l_apic_id); - return 0; - } - - send_ipi(bsp_lapic, lapic-> l_apic_id, 0xC500); - send_ipi(bsp_lapic, lapic-> l_apic_id, 0x8500); - send_ipi(bsp_lapic, lapic-> l_apic_id, 0x4600 | (AP_TRAMPOLINE_PADDR >> VM_PAGE_SHIFT)); - - printk("acpi: core %u online [AP]", lapic->l_apic_id); - return 0; + return 0; } */ -/* -kern_status_t acpi_smp_init(void) +extern "C" void ap_trampoline_exit(void) +{ + printk("OK!"); + __this_ap_ok = 1; +} + +static int init_ap(struct acpi_madt_record *rec, local_apic& lapic, uint8_t bsp_id) +{ + struct lapic_record *ap_lapic = (struct lapic_record *)(rec + 1); + if (!(ap_lapic->l_flags & 0x1) && !(ap_lapic->l_flags & 0x2)) { + return -1; + } + + if (ap_lapic->l_apic_id == bsp_id) { + printk("acpi: core %u online [BSP]", ap_lapic->l_apic_id); + return 0; + } + + struct vm_page *ap_stack = vm_page_alloc(VM_PAGE_4K, VM_NORMAL); + __ap_stack_top = (uintptr_t)vm_page_get_vaddr(ap_stack) + VM_PAGE_SIZE; + + lapic.send_ipi(ap_lapic->l_apic_id, 0xC500); + lapic.send_ipi(ap_lapic->l_apic_id, 0x8500); + milli_sleep(10); // wait 10 msec + lapic.send_ipi(ap_lapic->l_apic_id, 0x4600 | (AP_TRAMPOLINE_PADDR >> VM_PAGE_SHIFT)); + + do { + ml_cpu_relax(); + } while (__this_ap_ok == 0); + + printk("acpi: core %u online [AP]", ap_lapic->l_apic_id); + return 0; +} + +kern_status_t bring_all_ap_online(void) { struct acpi_madt *madt = (struct acpi_madt *)acpi_find_sdt(ACPI_SIG_MADT); if (!madt) { @@ -49,10 +74,16 @@ kern_status_t acpi_smp_init(void) } uint8_t bsp_id = acpi_bsp_lapic_id(); - void *bsp_lapic = find_lapic(madt); + void *ap_trampoline_dest = vm_phys_to_virt(AP_TRAMPOLINE_PADDR); memcpy(ap_trampoline_dest, ap_trampoline, VM_PAGE_SIZE); + pmap_t kernel_pmap = get_kernel_pmap(); + uint32_t *pmap_ptr = (uint32_t *)vm_phys_to_virt(0x8ff8); + *pmap_ptr = kernel_pmap; + + pmap_add(kernel_pmap, (void *)0x8000, 8, (enum vm_prot)(VM_PROT_READ | VM_PROT_EXEC), PMAP_NORMAL); + unsigned char *p = (unsigned char *)madt + sizeof *madt; unsigned char *madt_end = (unsigned char *)madt + madt->m_base.s_length; @@ -63,7 +94,7 @@ kern_status_t acpi_smp_init(void) switch (rec->r_type) { case ACPI_MADT_LAPIC: - if (init_ap(rec, bsp_lapic, bsp_id) == 0) { + if (init_ap(rec, local_apic::get(), bsp_id) == 0) { nr_processors++; } break; @@ -77,7 +108,6 @@ kern_status_t acpi_smp_init(void) printk("acpi: found %u logical cores", nr_processors); return KERN_OK; } -*/ static void no_smp_config(void) { @@ -138,5 +168,5 @@ kern_status_t acpi_scan_cpu_topology(void) kern_status_t smp_init(void) { - return KERN_OK; + return bring_all_ap_online(); } diff --git a/arch/x86_64/e820.c b/arch/x86_64/e820.c index 5b5b88b..40a89fa 100644 --- a/arch/x86_64/e820.c +++ b/arch/x86_64/e820.c @@ -44,10 +44,13 @@ void e820_scan(multiboot_memory_map_t *mmap, size_t len) memblock_reserve(entry->addr, entry->len); mem_reserved += entry->len; } - + mem_total += entry->len; } + /* reserve a page at 0x8000 for use by the ACPI AP trampoline code */ + memblock_reserve(0x8000, 0x1000); + char str_mem_total[64], str_mem_reserved[64]; data_size_to_string(mem_total, str_mem_total, sizeof str_mem_total); data_size_to_string(mem_reserved, str_mem_reserved, sizeof str_mem_reserved); diff --git a/arch/x86_64/include/arch/acpi/local_apic.hpp b/arch/x86_64/include/arch/acpi/local_apic.hpp index cdc8694..358bca3 100644 --- a/arch/x86_64/include/arch/acpi/local_apic.hpp +++ b/arch/x86_64/include/arch/acpi/local_apic.hpp @@ -1,30 +1,43 @@ #ifndef ARCH_ACPI_LOCAL_APIC_HPP_ #define ARCH_ACPI_LOCAL_APIC_HPP_ +#include #include #define APIC_LVT_INT_MASKED 0x10000 #define APIC_LVT_TIMER_MODE_PERIODIC 0x20000 +struct acpi_madt; + namespace arch::acpi { - class local_apic { - uint32_t *base_ = nullptr; +class local_apic { + uint32_t *base_ = nullptr; - public: - enum { - EOI = 0xB0, - LVT_TIMER = 0x320, - TIMER_INITCOUNT = 0x380, - TIMER_CURCOUNT = 0x390, - TIMER_DIV = 0x3E0, - }; - - local_apic(uint32_t *base); - - uint32_t read(uint32_t reg); - void write(uint32_t reg, uint32_t val); - void ack(); +public: + enum { + EOI = 0xB0, + IPI_STATUS = 0x280, + IPI_ICR = 0x300, + IPI_DEST = 0x310, + LVT_TIMER = 0x320, + TIMER_INITCOUNT = 0x380, + TIMER_CURCOUNT = 0x390, + TIMER_DIV = 0x3E0, }; + + local_apic(uint32_t *base = nullptr); + + static kern_status_t find(struct acpi_madt *madt, local_apic& out); + static local_apic& get(void); + + uint32_t read(uint32_t reg); + void write(uint32_t reg, uint32_t val); + void ack(); + + uint32_t *ptr() const { return base_; } + + void send_ipi(unsigned int dest, unsigned int data); +}; } #endif