diff options
Diffstat (limited to 'Kernel/Arch')
28 files changed, 3789 insertions, 3461 deletions
diff --git a/Kernel/Arch/i386/CPU.cpp b/Kernel/Arch/i386/CPU.cpp deleted file mode 100644 index 7d13a29ec3..0000000000 --- a/Kernel/Arch/i386/CPU.cpp +++ /dev/null @@ -1,2464 +0,0 @@ -/* - * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include <AK/Assertions.h> -#include <AK/ScopeGuard.h> -#include <AK/String.h> -#include <AK/StringBuilder.h> -#include <AK/Types.h> -#include <Kernel/Arch/x86/CPU.h> -#include <Kernel/Arch/x86/ISRStubs.h> -#include <Kernel/Arch/x86/ProcessorInfo.h> -#include <Kernel/Arch/x86/SafeMem.h> -#include <Kernel/Assertions.h> -#include <Kernel/Debug.h> -#include <Kernel/IO.h> -#include <Kernel/Interrupts/APIC.h> -#include <Kernel/Interrupts/GenericInterruptHandler.h> -#include <Kernel/Interrupts/SharedIRQHandler.h> -#include <Kernel/Interrupts/SpuriousInterruptHandler.h> -#include <Kernel/Interrupts/UnhandledInterruptHandler.h> -#include <Kernel/KSyms.h> -#include <Kernel/Panic.h> -#include <Kernel/PerformanceManager.h> -#include <Kernel/Process.h> -#include <Kernel/Random.h> -#include <Kernel/Thread.h> -#include <Kernel/VM/MemoryManager.h> -#include <Kernel/VM/PageDirectory.h> -#include <Kernel/VM/ProcessPagingScope.h> -#include <LibC/mallocdefs.h> - -extern FlatPtr start_of_unmap_after_init; -extern FlatPtr end_of_unmap_after_init; -extern FlatPtr start_of_ro_after_init; -extern FlatPtr end_of_ro_after_init; - -namespace Kernel { - -READONLY_AFTER_INIT static DescriptorTablePointer s_idtr; -READONLY_AFTER_INIT static IDTEntry s_idt[256]; - -static GenericInterruptHandler* s_interrupt_handler[GENERIC_INTERRUPT_HANDLERS_COUNT]; - -static EntropySource s_entropy_source_interrupts { EntropySource::Static::Interrupts }; - -// The compiler can't see the calls to these functions inside assembly. -// Declare them, to avoid dead code warnings. -extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used)); -extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used)); -extern "C" u32 do_init_context(Thread* thread, u32 flags) __attribute__((used)); -extern "C" void exit_kernel_thread(void); -extern "C" void pre_init_finished(void) __attribute__((used)); -extern "C" void post_init_finished(void) __attribute__((used)); -extern "C" void handle_interrupt(TrapFrame*) __attribute__((used)); - -// clang-format off - -#if ARCH(I386) -#define EH_ENTRY(ec, title) \ - extern "C" void title##_asm_entry(); \ - extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \ - asm( \ - ".globl " #title "_asm_entry\n" \ - "" #title "_asm_entry: \n" \ - " pusha\n" \ - " pushl %ds\n" \ - " pushl %es\n" \ - " pushl %fs\n" \ - " pushl %gs\n" \ - " pushl %ss\n" \ - " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \ - " mov %ax, %ds\n" \ - " mov %ax, %es\n" \ - " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \ - " mov %ax, %fs\n" \ - " pushl %esp \n" /* set TrapFrame::regs */ \ - " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \ - " pushl %esp \n" \ - " cld\n" \ - " call enter_trap_no_irq \n" \ - " call " #title "_handler\n" \ - " jmp common_trap_exit \n"); - -#define EH_ENTRY_NO_CODE(ec, title) \ - extern "C" void title##_asm_entry(); \ - extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \ - asm( \ - ".globl " #title "_asm_entry\n" \ - "" #title "_asm_entry: \n" \ - " pushl $0x0\n" \ - " pusha\n" \ - " pushl %ds\n" \ - " pushl %es\n" \ - " pushl %fs\n" \ - " pushl %gs\n" \ - " pushl %ss\n" \ - " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \ - " mov %ax, %ds\n" \ - " mov %ax, %es\n" \ - " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \ - " mov %ax, %fs\n" \ - " pushl %esp \n" /* set TrapFrame::regs */ \ - " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \ - " pushl %esp \n" \ - " cld\n" \ - " call enter_trap_no_irq \n" \ - " call " #title "_handler\n" \ - " jmp common_trap_exit \n"); - -#elif ARCH(X86_64) -#define EH_ENTRY(ec, title) \ - extern "C" void title##_asm_entry(); \ - extern "C" void title##_handler(TrapFrame*); \ - asm( \ - ".globl " #title "_asm_entry\n" \ - "" #title "_asm_entry: \n" \ - " cli;hlt;\n" \ -); - -#define EH_ENTRY_NO_CODE(ec, title) \ - extern "C" void title##_handler(TrapFrame*); \ - extern "C" void title##_asm_entry(); \ -asm( \ - ".globl " #title "_asm_entry\n" \ - "" #title "_asm_entry: \n" \ - " cli;hlt;\n" \ -); -#endif - -// clang-format on - -static void dump(const RegisterState& regs) -{ - u16 ss; - u32 esp; - - if (!(regs.cs & 3)) { - ss = regs.ss; - esp = regs.esp; - } else { - ss = regs.userspace_ss; - esp = regs.userspace_esp; - } - - dbgln("Exception code: {:04x} (isr: {:04x})", regs.exception_code, regs.isr_number); - dbgln(" pc={:04x}:{:08x} eflags={:08x}", (u16)regs.cs, regs.eip, regs.eflags); - dbgln(" stack={:04x}:{:08x}", ss, esp); - dbgln(" ds={:04x} es={:04x} fs={:04x} gs={:04x}", (u16)regs.ds, (u16)regs.es, (u16)regs.fs, (u16)regs.gs); - dbgln(" eax={:08x} ebx={:08x} ecx={:08x} edx={:08x}", regs.eax, regs.ebx, regs.ecx, regs.edx); - dbgln(" ebp={:08x} esp={:08x} esi={:08x} edi={:08x}", regs.ebp, regs.esp, regs.esi, regs.edi); - dbgln(" cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4()); -} - -void handle_crash(RegisterState& regs, const char* description, int signal, bool out_of_memory) -{ - auto process = Process::current(); - if (!process) { - PANIC("{} with !current", description); - } - - // If a process crashed while inspecting another process, - // make sure we switch back to the right page tables. - MM.enter_process_paging_scope(*process); - - dmesgln("CRASH: CPU #{} {} in ring {}", Processor::id(), description, (regs.cs & 3)); - dump(regs); - - if (!(regs.cs & 3)) { - PANIC("Crash in ring 0"); - } - - process->crash(signal, regs.eip, out_of_memory); -} - -EH_ENTRY_NO_CODE(6, illegal_instruction); -void illegal_instruction_handler(TrapFrame* trap) -{ - clac(); - handle_crash(*trap->regs, "Illegal instruction", SIGILL); -} - -EH_ENTRY_NO_CODE(0, divide_error); -void divide_error_handler(TrapFrame* trap) -{ - clac(); - handle_crash(*trap->regs, "Divide error", SIGFPE); -} - -EH_ENTRY(13, general_protection_fault); -void general_protection_fault_handler(TrapFrame* trap) -{ - clac(); - handle_crash(*trap->regs, "General protection fault", SIGSEGV); -} - -// 7: FPU not available exception -EH_ENTRY_NO_CODE(7, fpu_exception); -void fpu_exception_handler(TrapFrame*) -{ - // Just clear the TS flag. We've already restored the FPU state eagerly. - // FIXME: It would be nice if we didn't have to do this at all. - asm volatile("clts"); -} - -// 14: Page Fault -EH_ENTRY(14, page_fault); -void page_fault_handler(TrapFrame* trap) -{ - clac(); - - auto& regs = *trap->regs; - auto fault_address = read_cr2(); - - if constexpr (PAGE_FAULT_DEBUG) { - u32 fault_page_directory = read_cr3(); - dbgln("CPU #{} ring {} {} page fault in PD={:#x}, {}{} {}", - Processor::is_initialized() ? Processor::id() : 0, - regs.cs & 3, - regs.exception_code & 1 ? "PV" : "NP", - fault_page_directory, - regs.exception_code & 8 ? "reserved-bit " : "", - regs.exception_code & 2 ? "write" : "read", - VirtualAddress(fault_address)); - - dump(regs); - } - - bool faulted_in_kernel = !(regs.cs & 3); - - if (faulted_in_kernel && Processor::current().in_irq()) { - // If we're faulting in an IRQ handler, first check if we failed - // due to safe_memcpy, safe_strnlen, or safe_memset. If we did, - // gracefully continue immediately. Because we're in an IRQ handler - // we can't really try to resolve the page fault in a meaningful - // way, so we need to do this before calling into - // MemoryManager::handle_page_fault, which would just bail and - // request a crash - if (handle_safe_access_fault(regs, fault_address)) - return; - } - - auto current_thread = Thread::current(); - - if (current_thread) { - current_thread->set_handling_page_fault(true); - PerformanceManager::add_page_fault_event(*current_thread, regs); - } - - ScopeGuard guard = [current_thread] { - if (current_thread) - current_thread->set_handling_page_fault(false); - }; - - if (!faulted_in_kernel && !MM.validate_user_stack(current_thread->process(), VirtualAddress(regs.userspace_esp))) { - dbgln("Invalid stack pointer: {}", VirtualAddress(regs.userspace_esp)); - handle_crash(regs, "Bad stack on page fault", SIGSTKFLT); - } - - if (fault_address >= (FlatPtr)&start_of_ro_after_init && fault_address < (FlatPtr)&end_of_ro_after_init) { - dump(regs); - PANIC("Attempt to write into READONLY_AFTER_INIT section"); - } - - if (fault_address >= (FlatPtr)&start_of_unmap_after_init && fault_address < (FlatPtr)&end_of_unmap_after_init) { - dump(regs); - PANIC("Attempt to access UNMAP_AFTER_INIT section"); - } - - PageFault fault { regs.exception_code, VirtualAddress { fault_address } }; - auto response = MM.handle_page_fault(fault); - - if (response == PageFaultResponse::ShouldCrash || response == PageFaultResponse::OutOfMemory) { - if (faulted_in_kernel && handle_safe_access_fault(regs, fault_address)) { - // If this would be a ring0 (kernel) fault and the fault was triggered by - // safe_memcpy, safe_strnlen, or safe_memset then we resume execution at - // the appropriate _fault label rather than crashing - return; - } - - if (response != PageFaultResponse::OutOfMemory && current_thread) { - if (current_thread->has_signal_handler(SIGSEGV)) { - current_thread->send_urgent_signal_to_self(SIGSEGV); - return; - } - } - - dbgln("Unrecoverable page fault, {}{}{} address {}", - regs.exception_code & PageFaultFlags::ReservedBitViolation ? "reserved bit violation / " : "", - regs.exception_code & PageFaultFlags::InstructionFetch ? "instruction fetch / " : "", - regs.exception_code & PageFaultFlags::Write ? "write to" : "read from", - VirtualAddress(fault_address)); - u32 malloc_scrub_pattern = explode_byte(MALLOC_SCRUB_BYTE); - u32 free_scrub_pattern = explode_byte(FREE_SCRUB_BYTE); - u32 kmalloc_scrub_pattern = explode_byte(KMALLOC_SCRUB_BYTE); - u32 kfree_scrub_pattern = explode_byte(KFREE_SCRUB_BYTE); - u32 slab_alloc_scrub_pattern = explode_byte(SLAB_ALLOC_SCRUB_BYTE); - u32 slab_dealloc_scrub_pattern = explode_byte(SLAB_DEALLOC_SCRUB_BYTE); - if ((fault_address & 0xffff0000) == (malloc_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be uninitialized malloc() memory", VirtualAddress(fault_address)); - } else if ((fault_address & 0xffff0000) == (free_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be recently free()'d memory", VirtualAddress(fault_address)); - } else if ((fault_address & 0xffff0000) == (kmalloc_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be uninitialized kmalloc() memory", VirtualAddress(fault_address)); - } else if ((fault_address & 0xffff0000) == (kfree_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be recently kfree()'d memory", VirtualAddress(fault_address)); - } else if ((fault_address & 0xffff0000) == (slab_alloc_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be uninitialized slab_alloc() memory", VirtualAddress(fault_address)); - } else if ((fault_address & 0xffff0000) == (slab_dealloc_scrub_pattern & 0xffff0000)) { - dbgln("Note: Address {} looks like it may be recently slab_dealloc()'d memory", VirtualAddress(fault_address)); - } else if (fault_address < 4096) { - dbgln("Note: Address {} looks like a possible nullptr dereference", VirtualAddress(fault_address)); - } - - if (current_thread) { - auto& current_process = current_thread->process(); - if (current_process.is_user_process()) { - current_process.set_coredump_metadata("fault_address", String::formatted("{:p}", fault_address)); - current_process.set_coredump_metadata("fault_type", fault.type() == PageFault::Type::PageNotPresent ? "NotPresent" : "ProtectionViolation"); - String fault_access; - if (fault.is_instruction_fetch()) - fault_access = "Execute"; - else - fault_access = fault.access() == PageFault::Access::Read ? "Read" : "Write"; - current_process.set_coredump_metadata("fault_access", fault_access); - } - } - - handle_crash(regs, "Page Fault", SIGSEGV, response == PageFaultResponse::OutOfMemory); - } else if (response == PageFaultResponse::Continue) { - dbgln_if(PAGE_FAULT_DEBUG, "Continuing after resolved page fault"); - } else { - VERIFY_NOT_REACHED(); - } -} - -EH_ENTRY_NO_CODE(1, debug); -void debug_handler(TrapFrame* trap) -{ - clac(); - auto& regs = *trap->regs; - auto current_thread = Thread::current(); - auto& process = current_thread->process(); - if ((regs.cs & 3) == 0) { - PANIC("Debug exception in ring 0"); - } - constexpr u8 REASON_SINGLESTEP = 14; - auto debug_status = read_dr6(); - auto should_trap_mask = (1 << REASON_SINGLESTEP) | 0b1111; - if ((debug_status & should_trap_mask) == 0) - return; - if (auto tracer = process.tracer()) { - tracer->set_regs(regs); - } - current_thread->send_urgent_signal_to_self(SIGTRAP); - write_dr6(debug_status & ~(should_trap_mask)); -} - -EH_ENTRY_NO_CODE(3, breakpoint); -void breakpoint_handler(TrapFrame* trap) -{ - clac(); - auto& regs = *trap->regs; - auto current_thread = Thread::current(); - auto& process = current_thread->process(); - if ((regs.cs & 3) == 0) { - PANIC("Breakpoint trap in ring 0"); - } - if (auto tracer = process.tracer()) { - tracer->set_regs(regs); - } - current_thread->send_urgent_signal_to_self(SIGTRAP); -} - -#define EH(i, msg) \ - static void _exception##i() \ - { \ - dbgln("{}", msg); \ - PANIC("cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4()); \ - } - -EH(2, "Unknown error") -EH(4, "Overflow") -EH(5, "Bounds check") -EH(8, "Double fault") -EH(9, "Coprocessor segment overrun") -EH(10, "Invalid TSS") -EH(11, "Segment not present") -EH(12, "Stack exception") -EH(15, "Unknown error") -EH(16, "Coprocessor error") - -const DescriptorTablePointer& get_idtr() -{ - return s_idtr; -} - -static void unimp_trap() -{ - PANIC("Unhandled IRQ"); -} - -GenericInterruptHandler& get_interrupt_handler(u8 interrupt_number) -{ - auto*& handler_slot = s_interrupt_handler[interrupt_number]; - VERIFY(handler_slot != nullptr); - return *handler_slot; -} - -static void revert_to_unused_handler(u8 interrupt_number) -{ - auto handler = new UnhandledInterruptHandler(interrupt_number); - handler->register_interrupt_handler(); -} - -void register_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler) -{ - VERIFY(interrupt_number < GENERIC_INTERRUPT_HANDLERS_COUNT); - auto*& handler_slot = s_interrupt_handler[interrupt_number]; - if (handler_slot != nullptr) { - if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) { - if (handler_slot) { - auto* unhandled_handler = static_cast<UnhandledInterruptHandler*>(handler_slot); - unhandled_handler->unregister_interrupt_handler(); - delete unhandled_handler; - } - handler_slot = &handler; - return; - } - if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) { - VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler); - static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler); - return; - } - if (!handler_slot->is_shared_handler()) { - if (handler_slot->type() == HandlerType::SpuriousInterruptHandler) { - static_cast<SpuriousInterruptHandler*>(handler_slot)->register_handler(handler); - return; - } - VERIFY(handler_slot->type() == HandlerType::IRQHandler); - auto& previous_handler = *handler_slot; - handler_slot = nullptr; - SharedIRQHandler::initialize(interrupt_number); - VERIFY(handler_slot); - static_cast<SharedIRQHandler*>(handler_slot)->register_handler(previous_handler); - static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler); - return; - } - VERIFY_NOT_REACHED(); - } else { - handler_slot = &handler; - } -} - -void unregister_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler) -{ - auto*& handler_slot = s_interrupt_handler[interrupt_number]; - VERIFY(handler_slot != nullptr); - if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) { - dbgln("Trying to unregister unused handler (?)"); - return; - } - if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) { - VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler); - auto* shared_handler = static_cast<SharedIRQHandler*>(handler_slot); - shared_handler->unregister_handler(handler); - if (!shared_handler->sharing_devices_count()) { - handler_slot = nullptr; - revert_to_unused_handler(interrupt_number); - } - return; - } - if (!handler_slot->is_shared_handler()) { - VERIFY(handler_slot->type() == HandlerType::IRQHandler); - handler_slot = nullptr; - revert_to_unused_handler(interrupt_number); - return; - } - VERIFY_NOT_REACHED(); -} - -UNMAP_AFTER_INIT void register_interrupt_handler(u8 index, void (*handler)()) -{ - // FIXME: Why is that with selector 8? - // FIXME: Is the Gate Type really required to be an Interrupt - // FIXME: What's up with that storage segment 0? - s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::InterruptGate32, 0, 0); -} - -UNMAP_AFTER_INIT void register_user_callable_interrupt_handler(u8 index, void (*handler)()) -{ - // FIXME: Why is that with selector 8? - // FIXME: Is the Gate Type really required to be a Trap - // FIXME: What's up with that storage segment 0? - s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::TrapGate32, 0, 3); -} - -UNMAP_AFTER_INIT void flush_idt() -{ - asm("lidt %0" ::"m"(s_idtr)); -} - -UNMAP_AFTER_INIT static void idt_init() -{ - s_idtr.address = s_idt; - s_idtr.limit = 256 * 8 - 1; - - register_interrupt_handler(0x00, divide_error_asm_entry); - register_user_callable_interrupt_handler(0x01, debug_asm_entry); - register_interrupt_handler(0x02, _exception2); - register_user_callable_interrupt_handler(0x03, breakpoint_asm_entry); - register_interrupt_handler(0x04, _exception4); - register_interrupt_handler(0x05, _exception5); - register_interrupt_handler(0x06, illegal_instruction_asm_entry); - register_interrupt_handler(0x07, fpu_exception_asm_entry); - register_interrupt_handler(0x08, _exception8); - register_interrupt_handler(0x09, _exception9); - register_interrupt_handler(0x0a, _exception10); - register_interrupt_handler(0x0b, _exception11); - register_interrupt_handler(0x0c, _exception12); - register_interrupt_handler(0x0d, general_protection_fault_asm_entry); - register_interrupt_handler(0x0e, page_fault_asm_entry); - register_interrupt_handler(0x0f, _exception15); - register_interrupt_handler(0x10, _exception16); - - for (u8 i = 0x11; i < 0x50; i++) - register_interrupt_handler(i, unimp_trap); - - dbgln("Initializing unhandled interrupt handlers"); - register_interrupt_handler(0x50, interrupt_80_asm_entry); - register_interrupt_handler(0x51, interrupt_81_asm_entry); - register_interrupt_handler(0x52, interrupt_82_asm_entry); - register_interrupt_handler(0x53, interrupt_83_asm_entry); - register_interrupt_handler(0x54, interrupt_84_asm_entry); - register_interrupt_handler(0x55, interrupt_85_asm_entry); - register_interrupt_handler(0x56, interrupt_86_asm_entry); - register_interrupt_handler(0x57, interrupt_87_asm_entry); - register_interrupt_handler(0x58, interrupt_88_asm_entry); - register_interrupt_handler(0x59, interrupt_89_asm_entry); - register_interrupt_handler(0x5a, interrupt_90_asm_entry); - register_interrupt_handler(0x5b, interrupt_91_asm_entry); - register_interrupt_handler(0x5c, interrupt_92_asm_entry); - register_interrupt_handler(0x5d, interrupt_93_asm_entry); - register_interrupt_handler(0x5e, interrupt_94_asm_entry); - register_interrupt_handler(0x5f, interrupt_95_asm_entry); - register_interrupt_handler(0x60, interrupt_96_asm_entry); - register_interrupt_handler(0x61, interrupt_97_asm_entry); - register_interrupt_handler(0x62, interrupt_98_asm_entry); - register_interrupt_handler(0x63, interrupt_99_asm_entry); - register_interrupt_handler(0x64, interrupt_100_asm_entry); - register_interrupt_handler(0x65, interrupt_101_asm_entry); - register_interrupt_handler(0x66, interrupt_102_asm_entry); - register_interrupt_handler(0x67, interrupt_103_asm_entry); - register_interrupt_handler(0x68, interrupt_104_asm_entry); - register_interrupt_handler(0x69, interrupt_105_asm_entry); - register_interrupt_handler(0x6a, interrupt_106_asm_entry); - register_interrupt_handler(0x6b, interrupt_107_asm_entry); - register_interrupt_handler(0x6c, interrupt_108_asm_entry); - register_interrupt_handler(0x6d, interrupt_109_asm_entry); - register_interrupt_handler(0x6e, interrupt_110_asm_entry); - register_interrupt_handler(0x6f, interrupt_111_asm_entry); - register_interrupt_handler(0x70, interrupt_112_asm_entry); - register_interrupt_handler(0x71, interrupt_113_asm_entry); - register_interrupt_handler(0x72, interrupt_114_asm_entry); - register_interrupt_handler(0x73, interrupt_115_asm_entry); - register_interrupt_handler(0x74, interrupt_116_asm_entry); - register_interrupt_handler(0x75, interrupt_117_asm_entry); - register_interrupt_handler(0x76, interrupt_118_asm_entry); - register_interrupt_handler(0x77, interrupt_119_asm_entry); - register_interrupt_handler(0x78, interrupt_120_asm_entry); - register_interrupt_handler(0x79, interrupt_121_asm_entry); - register_interrupt_handler(0x7a, interrupt_122_asm_entry); - register_interrupt_handler(0x7b, interrupt_123_asm_entry); - register_interrupt_handler(0x7c, interrupt_124_asm_entry); - register_interrupt_handler(0x7d, interrupt_125_asm_entry); - register_interrupt_handler(0x7e, interrupt_126_asm_entry); - register_interrupt_handler(0x7f, interrupt_127_asm_entry); - register_interrupt_handler(0x80, interrupt_128_asm_entry); - register_interrupt_handler(0x81, interrupt_129_asm_entry); - register_interrupt_handler(0x82, interrupt_130_asm_entry); - register_interrupt_handler(0x83, interrupt_131_asm_entry); - register_interrupt_handler(0x84, interrupt_132_asm_entry); - register_interrupt_handler(0x85, interrupt_133_asm_entry); - register_interrupt_handler(0x86, interrupt_134_asm_entry); - register_interrupt_handler(0x87, interrupt_135_asm_entry); - register_interrupt_handler(0x88, interrupt_136_asm_entry); - register_interrupt_handler(0x89, interrupt_137_asm_entry); - register_interrupt_handler(0x8a, interrupt_138_asm_entry); - register_interrupt_handler(0x8b, interrupt_139_asm_entry); - register_interrupt_handler(0x8c, interrupt_140_asm_entry); - register_interrupt_handler(0x8d, interrupt_141_asm_entry); - register_interrupt_handler(0x8e, interrupt_142_asm_entry); - register_interrupt_handler(0x8f, interrupt_143_asm_entry); - register_interrupt_handler(0x90, interrupt_144_asm_entry); - register_interrupt_handler(0x91, interrupt_145_asm_entry); - register_interrupt_handler(0x92, interrupt_146_asm_entry); - register_interrupt_handler(0x93, interrupt_147_asm_entry); - register_interrupt_handler(0x94, interrupt_148_asm_entry); - register_interrupt_handler(0x95, interrupt_149_asm_entry); - register_interrupt_handler(0x96, interrupt_150_asm_entry); - register_interrupt_handler(0x97, interrupt_151_asm_entry); - register_interrupt_handler(0x98, interrupt_152_asm_entry); - register_interrupt_handler(0x99, interrupt_153_asm_entry); - register_interrupt_handler(0x9a, interrupt_154_asm_entry); - register_interrupt_handler(0x9b, interrupt_155_asm_entry); - register_interrupt_handler(0x9c, interrupt_156_asm_entry); - register_interrupt_handler(0x9d, interrupt_157_asm_entry); - register_interrupt_handler(0x9e, interrupt_158_asm_entry); - register_interrupt_handler(0x9f, interrupt_159_asm_entry); - register_interrupt_handler(0xa0, interrupt_160_asm_entry); - register_interrupt_handler(0xa1, interrupt_161_asm_entry); - register_interrupt_handler(0xa2, interrupt_162_asm_entry); - register_interrupt_handler(0xa3, interrupt_163_asm_entry); - register_interrupt_handler(0xa4, interrupt_164_asm_entry); - register_interrupt_handler(0xa5, interrupt_165_asm_entry); - register_interrupt_handler(0xa6, interrupt_166_asm_entry); - register_interrupt_handler(0xa7, interrupt_167_asm_entry); - register_interrupt_handler(0xa8, interrupt_168_asm_entry); - register_interrupt_handler(0xa9, interrupt_169_asm_entry); - register_interrupt_handler(0xaa, interrupt_170_asm_entry); - register_interrupt_handler(0xab, interrupt_171_asm_entry); - register_interrupt_handler(0xac, interrupt_172_asm_entry); - register_interrupt_handler(0xad, interrupt_173_asm_entry); - register_interrupt_handler(0xae, interrupt_174_asm_entry); - register_interrupt_handler(0xaf, interrupt_175_asm_entry); - register_interrupt_handler(0xb0, interrupt_176_asm_entry); - register_interrupt_handler(0xb1, interrupt_177_asm_entry); - register_interrupt_handler(0xb2, interrupt_178_asm_entry); - register_interrupt_handler(0xb3, interrupt_179_asm_entry); - register_interrupt_handler(0xb4, interrupt_180_asm_entry); - register_interrupt_handler(0xb5, interrupt_181_asm_entry); - register_interrupt_handler(0xb6, interrupt_182_asm_entry); - register_interrupt_handler(0xb7, interrupt_183_asm_entry); - register_interrupt_handler(0xb8, interrupt_184_asm_entry); - register_interrupt_handler(0xb9, interrupt_185_asm_entry); - register_interrupt_handler(0xba, interrupt_186_asm_entry); - register_interrupt_handler(0xbb, interrupt_187_asm_entry); - register_interrupt_handler(0xbc, interrupt_188_asm_entry); - register_interrupt_handler(0xbd, interrupt_189_asm_entry); - register_interrupt_handler(0xbe, interrupt_190_asm_entry); - register_interrupt_handler(0xbf, interrupt_191_asm_entry); - register_interrupt_handler(0xc0, interrupt_192_asm_entry); - register_interrupt_handler(0xc1, interrupt_193_asm_entry); - register_interrupt_handler(0xc2, interrupt_194_asm_entry); - register_interrupt_handler(0xc3, interrupt_195_asm_entry); - register_interrupt_handler(0xc4, interrupt_196_asm_entry); - register_interrupt_handler(0xc5, interrupt_197_asm_entry); - register_interrupt_handler(0xc6, interrupt_198_asm_entry); - register_interrupt_handler(0xc7, interrupt_199_asm_entry); - register_interrupt_handler(0xc8, interrupt_200_asm_entry); - register_interrupt_handler(0xc9, interrupt_201_asm_entry); - register_interrupt_handler(0xca, interrupt_202_asm_entry); - register_interrupt_handler(0xcb, interrupt_203_asm_entry); - register_interrupt_handler(0xcc, interrupt_204_asm_entry); - register_interrupt_handler(0xcd, interrupt_205_asm_entry); - register_interrupt_handler(0xce, interrupt_206_asm_entry); - register_interrupt_handler(0xcf, interrupt_207_asm_entry); - register_interrupt_handler(0xd0, interrupt_208_asm_entry); - register_interrupt_handler(0xd1, interrupt_209_asm_entry); - register_interrupt_handler(0xd2, interrupt_210_asm_entry); - register_interrupt_handler(0xd3, interrupt_211_asm_entry); - register_interrupt_handler(0xd4, interrupt_212_asm_entry); - register_interrupt_handler(0xd5, interrupt_213_asm_entry); - register_interrupt_handler(0xd6, interrupt_214_asm_entry); - register_interrupt_handler(0xd7, interrupt_215_asm_entry); - register_interrupt_handler(0xd8, interrupt_216_asm_entry); - register_interrupt_handler(0xd9, interrupt_217_asm_entry); - register_interrupt_handler(0xda, interrupt_218_asm_entry); - register_interrupt_handler(0xdb, interrupt_219_asm_entry); - register_interrupt_handler(0xdc, interrupt_220_asm_entry); - register_interrupt_handler(0xdd, interrupt_221_asm_entry); - register_interrupt_handler(0xde, interrupt_222_asm_entry); - register_interrupt_handler(0xdf, interrupt_223_asm_entry); - register_interrupt_handler(0xe0, interrupt_224_asm_entry); - register_interrupt_handler(0xe1, interrupt_225_asm_entry); - register_interrupt_handler(0xe2, interrupt_226_asm_entry); - register_interrupt_handler(0xe3, interrupt_227_asm_entry); - register_interrupt_handler(0xe4, interrupt_228_asm_entry); - register_interrupt_handler(0xe5, interrupt_229_asm_entry); - register_interrupt_handler(0xe6, interrupt_230_asm_entry); - register_interrupt_handler(0xe7, interrupt_231_asm_entry); - register_interrupt_handler(0xe8, interrupt_232_asm_entry); - register_interrupt_handler(0xe9, interrupt_233_asm_entry); - register_interrupt_handler(0xea, interrupt_234_asm_entry); - register_interrupt_handler(0xeb, interrupt_235_asm_entry); - register_interrupt_handler(0xec, interrupt_236_asm_entry); - register_interrupt_handler(0xed, interrupt_237_asm_entry); - register_interrupt_handler(0xee, interrupt_238_asm_entry); - register_interrupt_handler(0xef, interrupt_239_asm_entry); - register_interrupt_handler(0xf0, interrupt_240_asm_entry); - register_interrupt_handler(0xf1, interrupt_241_asm_entry); - register_interrupt_handler(0xf2, interrupt_242_asm_entry); - register_interrupt_handler(0xf3, interrupt_243_asm_entry); - register_interrupt_handler(0xf4, interrupt_244_asm_entry); - register_interrupt_handler(0xf5, interrupt_245_asm_entry); - register_interrupt_handler(0xf6, interrupt_246_asm_entry); - register_interrupt_handler(0xf7, interrupt_247_asm_entry); - register_interrupt_handler(0xf8, interrupt_248_asm_entry); - register_interrupt_handler(0xf9, interrupt_249_asm_entry); - register_interrupt_handler(0xfa, interrupt_250_asm_entry); - register_interrupt_handler(0xfb, interrupt_251_asm_entry); - register_interrupt_handler(0xfc, interrupt_252_asm_entry); - register_interrupt_handler(0xfd, interrupt_253_asm_entry); - register_interrupt_handler(0xfe, interrupt_254_asm_entry); - register_interrupt_handler(0xff, interrupt_255_asm_entry); - - for (u8 i = 0; i < GENERIC_INTERRUPT_HANDLERS_COUNT; ++i) { - auto* handler = new UnhandledInterruptHandler(i); - handler->register_interrupt_handler(); - } - - flush_idt(); -} - -void load_task_register(u16 selector) -{ - asm("ltr %0" ::"r"(selector)); -} - -void handle_interrupt(TrapFrame* trap) -{ - clac(); - auto& regs = *trap->regs; - VERIFY(regs.isr_number >= IRQ_VECTOR_BASE && regs.isr_number <= (IRQ_VECTOR_BASE + GENERIC_INTERRUPT_HANDLERS_COUNT)); - u8 irq = (u8)(regs.isr_number - 0x50); - s_entropy_source_interrupts.add_random_event(irq); - auto* handler = s_interrupt_handler[irq]; - VERIFY(handler); - handler->increment_invoking_counter(); - handler->handle_interrupt(regs); - handler->eoi(); -} - -void enter_trap_no_irq(TrapFrame* trap) -{ - InterruptDisabler disable; - Processor::current().enter_trap(*trap, false); -} - -void enter_trap(TrapFrame* trap) -{ - InterruptDisabler disable; - Processor::current().enter_trap(*trap, true); -} - -void exit_trap(TrapFrame* trap) -{ - InterruptDisabler disable; - return Processor::current().exit_trap(*trap); -} - -UNMAP_AFTER_INIT void write_cr0(FlatPtr value) -{ -#if ARCH(I386) - asm volatile("mov %%eax, %%cr0" ::"a"(value)); -#else - asm volatile("mov %%rax, %%cr0" ::"a"(value)); -#endif -} - -UNMAP_AFTER_INIT void write_cr4(FlatPtr value) -{ -#if ARCH(I386) - asm volatile("mov %%eax, %%cr4" ::"a"(value)); -#else - asm volatile("mov %%rax, %%cr4" ::"a"(value)); -#endif -} - -UNMAP_AFTER_INIT static void sse_init() -{ - write_cr0((read_cr0() & 0xfffffffbu) | 0x2); - write_cr4(read_cr4() | 0x600); -} - -FlatPtr read_cr0() -{ - FlatPtr cr0; -#if ARCH(I386) - asm("mov %%cr0, %%eax" - : "=a"(cr0)); -#else - asm("mov %%cr0, %%rax" - : "=a"(cr0)); -#endif - return cr0; -} - -FlatPtr read_cr2() -{ - FlatPtr cr2; -#if ARCH(I386) - asm("mov %%cr2, %%eax" - : "=a"(cr2)); -#else - asm("mov %%cr2, %%rax" - : "=a"(cr2)); -#endif - return cr2; -} - -FlatPtr read_cr3() -{ - FlatPtr cr3; -#if ARCH(I386) - asm("mov %%cr3, %%eax" - : "=a"(cr3)); -#else - asm("mov %%cr3, %%rax" - : "=a"(cr3)); -#endif - return cr3; -} - -void write_cr3(FlatPtr cr3) -{ - // NOTE: If you're here from a GPF crash, it's very likely that a PDPT entry is incorrect, not this! -#if ARCH(I386) - asm volatile("mov %%eax, %%cr3" ::"a"(cr3) - : "memory"); -#else - asm volatile("mov %%rax, %%cr3" ::"a"(cr3) - : "memory"); -#endif -} - -FlatPtr read_cr4() -{ - FlatPtr cr4; -#if ARCH(I386) - asm("mov %%cr4, %%eax" - : "=a"(cr4)); -#else - asm("mov %%cr4, %%rax" - : "=a"(cr4)); -#endif - return cr4; -} - -void read_debug_registers_into(DebugRegisterState& state) -{ - state.dr0 = read_dr0(); - state.dr1 = read_dr1(); - state.dr2 = read_dr2(); - state.dr3 = read_dr3(); - state.dr6 = read_dr6(); - state.dr7 = read_dr7(); -} - -void write_debug_registers_from(const DebugRegisterState& state) -{ - write_dr0(state.dr0); - write_dr1(state.dr1); - write_dr2(state.dr2); - write_dr3(state.dr3); - write_dr6(state.dr6); - write_dr7(state.dr7); -} - -void clear_debug_registers() -{ - write_dr0(0); - write_dr1(0); - write_dr2(0); - write_dr3(0); - write_dr7(1 << 10); // Bit 10 is reserved and must be set to 1. -} - -#if ARCH(I386) -# define DEFINE_DEBUG_REGISTER(index) \ - FlatPtr read_dr##index() \ - { \ - FlatPtr value; \ - asm("mov %%dr" #index ", %%eax" \ - : "=a"(value)); \ - return value; \ - } \ - void write_dr##index(FlatPtr value) \ - { \ - asm volatile("mov %%eax, %%dr" #index ::"a"(value)); \ - } -#else -# define DEFINE_DEBUG_REGISTER(index) \ - FlatPtr read_dr##index() \ - { \ - FlatPtr value; \ - asm("mov %%dr" #index ", %%rax" \ - : "=a"(value)); \ - return value; \ - } \ - void write_dr##index(FlatPtr value) \ - { \ - asm volatile("mov %%rax, %%dr" #index ::"a"(value)); \ - } -#endif - -DEFINE_DEBUG_REGISTER(0); -DEFINE_DEBUG_REGISTER(1); -DEFINE_DEBUG_REGISTER(2); -DEFINE_DEBUG_REGISTER(3); -DEFINE_DEBUG_REGISTER(6); -DEFINE_DEBUG_REGISTER(7); - -#define XCR_XFEATURE_ENABLED_MASK 0 - -UNMAP_AFTER_INIT u64 read_xcr0() -{ - u32 eax, edx; - asm volatile("xgetbv" - : "=a"(eax), "=d"(edx) - : "c"(XCR_XFEATURE_ENABLED_MASK)); - return eax + ((u64)edx << 32); -} - -UNMAP_AFTER_INIT void write_xcr0(u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - asm volatile("xsetbv" ::"a"(eax), "d"(edx), "c"(XCR_XFEATURE_ENABLED_MASK)); -} - -READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state; - -READONLY_AFTER_INIT static ProcessorContainer s_processors {}; -READONLY_AFTER_INIT volatile u32 Processor::g_total_processors; -static volatile bool s_smp_enabled; - -ProcessorContainer& Processor::processors() -{ - return s_processors; -} - -Processor& Processor::by_id(u32 cpu) -{ - // s_processors does not need to be protected by a lock of any kind. - // It is populated early in the boot process, and the BSP is waiting - // for all APs to finish, after which this array never gets modified - // again, so it's safe to not protect access to it here - auto& procs = processors(); - VERIFY(procs[cpu] != nullptr); - VERIFY(procs.size() > cpu); - return *procs[cpu]; -} - -[[noreturn]] static inline void halt_this() -{ - for (;;) { - asm volatile("cli; hlt"); - } -} - -UNMAP_AFTER_INIT void Processor::cpu_detect() -{ - // NOTE: This is called during Processor::early_initialize, we cannot - // safely log at this point because we don't have kmalloc - // initialized yet! - auto set_feature = - [&](CPUFeature f) { - m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f)); - }; - m_features = static_cast<CPUFeature>(0); - - CPUID processor_info(0x1); - if (processor_info.edx() & (1 << 4)) - set_feature(CPUFeature::TSC); - if (processor_info.edx() & (1 << 6)) - set_feature(CPUFeature::PAE); - if (processor_info.edx() & (1 << 13)) - set_feature(CPUFeature::PGE); - if (processor_info.edx() & (1 << 23)) - set_feature(CPUFeature::MMX); - if (processor_info.edx() & (1 << 24)) - set_feature(CPUFeature::FXSR); - if (processor_info.edx() & (1 << 25)) - set_feature(CPUFeature::SSE); - if (processor_info.edx() & (1 << 26)) - set_feature(CPUFeature::SSE2); - if (processor_info.ecx() & (1 << 0)) - set_feature(CPUFeature::SSE3); - if (processor_info.ecx() & (1 << 9)) - set_feature(CPUFeature::SSSE3); - if (processor_info.ecx() & (1 << 19)) - set_feature(CPUFeature::SSE4_1); - if (processor_info.ecx() & (1 << 20)) - set_feature(CPUFeature::SSE4_2); - if (processor_info.ecx() & (1 << 26)) - set_feature(CPUFeature::XSAVE); - if (processor_info.ecx() & (1 << 28)) - set_feature(CPUFeature::AVX); - if (processor_info.ecx() & (1 << 30)) - set_feature(CPUFeature::RDRAND); - if (processor_info.edx() & (1 << 11)) { - u32 stepping = processor_info.eax() & 0xf; - u32 model = (processor_info.eax() >> 4) & 0xf; - u32 family = (processor_info.eax() >> 8) & 0xf; - if (!(family == 6 && model < 3 && stepping < 3)) - set_feature(CPUFeature::SEP); - if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe)) - set_feature(CPUFeature::CONSTANT_TSC); - } - - u32 max_extended_leaf = CPUID(0x80000000).eax(); - - if (max_extended_leaf >= 0x80000001) { - CPUID extended_processor_info(0x80000001); - if (extended_processor_info.edx() & (1 << 20)) - set_feature(CPUFeature::NX); - if (extended_processor_info.edx() & (1 << 27)) - set_feature(CPUFeature::RDTSCP); - if (extended_processor_info.edx() & (1 << 11)) { - // Only available in 64 bit mode - set_feature(CPUFeature::SYSCALL); - } - } - - if (max_extended_leaf >= 0x80000007) { - CPUID cpuid(0x80000007); - if (cpuid.edx() & (1 << 8)) { - set_feature(CPUFeature::CONSTANT_TSC); - set_feature(CPUFeature::NONSTOP_TSC); - } - } - - if (max_extended_leaf >= 0x80000008) { - // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor. - CPUID cpuid(0x80000008); - m_physical_address_bit_width = cpuid.eax() & 0xff; - } else { - // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise. - m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32; - } - - CPUID extended_features(0x7); - if (extended_features.ebx() & (1 << 20)) - set_feature(CPUFeature::SMAP); - if (extended_features.ebx() & (1 << 7)) - set_feature(CPUFeature::SMEP); - if (extended_features.ecx() & (1 << 2)) - set_feature(CPUFeature::UMIP); - if (extended_features.ebx() & (1 << 18)) - set_feature(CPUFeature::RDSEED); -} - -UNMAP_AFTER_INIT void Processor::cpu_setup() -{ - // NOTE: This is called during Processor::early_initialize, we cannot - // safely log at this point because we don't have kmalloc - // initialized yet! - cpu_detect(); - - if (has_feature(CPUFeature::SSE)) { - // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR. - // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it. - VERIFY(has_feature(CPUFeature::FXSR)); - sse_init(); - } - - write_cr0(read_cr0() | 0x00010000); - - if (has_feature(CPUFeature::PGE)) { - // Turn on CR4.PGE so the CPU will respect the G bit in page tables. - write_cr4(read_cr4() | 0x80); - } - - if (has_feature(CPUFeature::NX)) { - // Turn on IA32_EFER.NXE - asm volatile( - "movl $0xc0000080, %ecx\n" - "rdmsr\n" - "orl $0x800, %eax\n" - "wrmsr\n"); - } - - if (has_feature(CPUFeature::SMEP)) { - // Turn on CR4.SMEP - write_cr4(read_cr4() | 0x100000); - } - - if (has_feature(CPUFeature::SMAP)) { - // Turn on CR4.SMAP - write_cr4(read_cr4() | 0x200000); - } - - if (has_feature(CPUFeature::UMIP)) { - write_cr4(read_cr4() | 0x800); - } - - if (has_feature(CPUFeature::TSC)) { - write_cr4(read_cr4() | 0x4); - } - - if (has_feature(CPUFeature::XSAVE)) { - // Turn on CR4.OSXSAVE - write_cr4(read_cr4() | 0x40000); - - // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1." - // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves. - write_xcr0(0x1); - - if (has_feature(CPUFeature::AVX)) { - // Turn on SSE, AVX and x87 flags - write_xcr0(read_xcr0() | 0x7); - } - } -} - -String Processor::features_string() const -{ - StringBuilder builder; - auto feature_to_str = - [](CPUFeature f) -> const char* { - switch (f) { - case CPUFeature::NX: - return "nx"; - case CPUFeature::PAE: - return "pae"; - case CPUFeature::PGE: - return "pge"; - case CPUFeature::RDRAND: - return "rdrand"; - case CPUFeature::RDSEED: - return "rdseed"; - case CPUFeature::SMAP: - return "smap"; - case CPUFeature::SMEP: - return "smep"; - case CPUFeature::SSE: - return "sse"; - case CPUFeature::TSC: - return "tsc"; - case CPUFeature::RDTSCP: - return "rdtscp"; - case CPUFeature::CONSTANT_TSC: - return "constant_tsc"; - case CPUFeature::NONSTOP_TSC: - return "nonstop_tsc"; - case CPUFeature::UMIP: - return "umip"; - case CPUFeature::SEP: - return "sep"; - case CPUFeature::SYSCALL: - return "syscall"; - case CPUFeature::MMX: - return "mmx"; - case CPUFeature::FXSR: - return "fxsr"; - case CPUFeature::SSE2: - return "sse2"; - case CPUFeature::SSE3: - return "sse3"; - case CPUFeature::SSSE3: - return "ssse3"; - case CPUFeature::SSE4_1: - return "sse4.1"; - case CPUFeature::SSE4_2: - return "sse4.2"; - case CPUFeature::XSAVE: - return "xsave"; - case CPUFeature::AVX: - return "avx"; - // no default statement here intentionally so that we get - // a warning if a new feature is forgotten to be added here - } - // Shouldn't ever happen - return "???"; - }; - bool first = true; - for (u32 flag = 1; flag != 0; flag <<= 1) { - if ((static_cast<u32>(m_features) & flag) != 0) { - if (first) - first = false; - else - builder.append(' '); - auto str = feature_to_str(static_cast<CPUFeature>(flag)); - builder.append(str, strlen(str)); - } - } - return builder.build(); -} - -String Processor::platform_string() const -{ - return "i386"; -} - -UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu) -{ - m_self = this; - - m_cpu = cpu; - m_in_irq = 0; - m_in_critical = 0; - - m_invoke_scheduler_async = false; - m_scheduler_initialized = false; - - m_message_queue = nullptr; - m_idle_thread = nullptr; - m_current_thread = nullptr; - m_scheduler_data = nullptr; - m_mm_data = nullptr; - m_info = nullptr; - - m_halt_requested = false; - if (cpu == 0) { - s_smp_enabled = false; - atomic_store(&g_total_processors, 1u, AK::MemoryOrder::memory_order_release); - } else { - atomic_fetch_add(&g_total_processors, 1u, AK::MemoryOrder::memory_order_acq_rel); - } - - deferred_call_pool_init(); - - cpu_setup(); - gdt_init(); - - VERIFY(is_initialized()); // sanity check - VERIFY(¤t() == this); // sanity check -} - -UNMAP_AFTER_INIT void Processor::initialize(u32 cpu) -{ - VERIFY(m_self == this); - VERIFY(¤t() == this); // sanity check - - dmesgln("CPU[{}]: Supported features: {}", id(), features_string()); - if (!has_feature(CPUFeature::RDRAND)) - dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", id()); - dmesgln("CPU[{}]: Physical address bit width: {}", id(), m_physical_address_bit_width); - - if (cpu == 0) - idt_init(); - else - flush_idt(); - - if (cpu == 0) { - VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0); - asm volatile("fninit"); - if (has_feature(CPUFeature::FXSR)) - asm volatile("fxsave %0" - : "=m"(s_clean_fpu_state)); - else - asm volatile("fnsave %0" - : "=m"(s_clean_fpu_state)); - } - - m_info = new ProcessorInfo(*this); - - { - // We need to prevent races between APs starting up at the same time - VERIFY(cpu < s_processors.size()); - s_processors[cpu] = this; - } -} - -void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high) -{ - u16 i = (selector & 0xfffc) >> 3; - u32 prev_gdt_length = m_gdt_length; - - if (i > m_gdt_length) { - m_gdt_length = i + 1; - VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0])); - m_gdtr.limit = (m_gdt_length + 1) * 8 - 1; - } - m_gdt[i].low = low; - m_gdt[i].high = high; - - // clear selectors we may have skipped - while (i < prev_gdt_length) { - m_gdt[i].low = 0; - m_gdt[i].high = 0; - i++; - } -} - -void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor) -{ - write_raw_gdt_entry(selector, descriptor.low, descriptor.high); -} - -Descriptor& Processor::get_gdt_entry(u16 selector) -{ - u16 i = (selector & 0xfffc) >> 3; - return *(Descriptor*)(&m_gdt[i]); -} - -void Processor::flush_gdt() -{ - m_gdtr.address = m_gdt; - m_gdtr.limit = (m_gdt_length * 8) - 1; - asm volatile("lgdt %0" ::"m"(m_gdtr) - : "memory"); -} - -const DescriptorTablePointer& Processor::get_gdtr() -{ - return m_gdtr; -} - -Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames) -{ - FlatPtr frame_ptr = 0, eip = 0; - Vector<FlatPtr, 32> stack_trace; - - auto walk_stack = [&](FlatPtr stack_ptr) { - static constexpr size_t max_stack_frames = 4096; - stack_trace.append(eip); - size_t count = 1; - while (stack_ptr && stack_trace.size() < max_stack_frames) { - FlatPtr retaddr; - - count++; - if (max_frames != 0 && count > max_frames) - break; - - if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) { - if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr) - break; - stack_trace.append(retaddr); - if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr)) - break; - } else { - void* fault_at; - if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr) - break; - stack_trace.append(retaddr); - if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at)) - break; - } - } - }; - auto capture_current_thread = [&]() { - frame_ptr = (FlatPtr)__builtin_frame_address(0); - eip = (FlatPtr)__builtin_return_address(0); - - walk_stack(frame_ptr); - }; - - // Since the thread may be running on another processor, there - // is a chance a context switch may happen while we're trying - // to get it. It also won't be entirely accurate and merely - // reflect the status at the last context switch. - ScopedSpinLock lock(g_scheduler_lock); - if (&thread == Processor::current_thread()) { - VERIFY(thread.state() == Thread::Running); - // Leave the scheduler lock. If we trigger page faults we may - // need to be preempted. Since this is our own thread it won't - // cause any problems as the stack won't change below this frame. - lock.unlock(); - capture_current_thread(); - } else if (thread.is_active()) { - VERIFY(thread.cpu() != Processor::id()); - // If this is the case, the thread is currently running - // on another processor. We can't trust the kernel stack as - // it may be changing at any time. We need to probably send - // an IPI to that processor, have it walk the stack and wait - // until it returns the data back to us - auto& proc = Processor::current(); - smp_unicast( - thread.cpu(), - [&]() { - dbgln("CPU[{}] getting stack for cpu #{}", Processor::id(), proc.get_id()); - ProcessPagingScope paging_scope(thread.process()); - VERIFY(&Processor::current() != &proc); - VERIFY(&thread == Processor::current_thread()); - // NOTE: Because the other processor is still holding the - // scheduler lock while waiting for this callback to finish, - // the current thread on the target processor cannot change - - // TODO: What to do about page faults here? We might deadlock - // because the other processor is still holding the - // scheduler lock... - capture_current_thread(); - }, - false); - } else { - switch (thread.state()) { - case Thread::Running: - VERIFY_NOT_REACHED(); // should have been handled above - case Thread::Runnable: - case Thread::Stopped: - case Thread::Blocked: - case Thread::Dying: - case Thread::Dead: { - // We need to retrieve ebp from what was last pushed to the kernel - // stack. Before switching out of that thread, it switch_context - // pushed the callee-saved registers, and the last of them happens - // to be ebp. - ProcessPagingScope paging_scope(thread.process()); - auto& tss = thread.tss(); - u32* stack_top = reinterpret_cast<u32*>(tss.esp); - if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) { - if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0])) - frame_ptr = 0; - } else { - void* fault_at; - if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at)) - frame_ptr = 0; - } - eip = tss.eip; - // TODO: We need to leave the scheduler lock here, but we also - // need to prevent the target thread from being run while - // we walk the stack - lock.unlock(); - walk_stack(frame_ptr); - break; - } - default: - dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string()); - break; - } - } - return stack_trace; -} - -extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) -{ - VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running); - VERIFY(to_thread->state() == Thread::Running); - - bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR); - Processor::set_current_thread(*to_thread); - - auto& from_tss = from_thread->tss(); - auto& to_tss = to_thread->tss(); - - if (has_fxsr) - asm volatile("fxsave %0" - : "=m"(from_thread->fpu_state())); - else - asm volatile("fnsave %0" - : "=m"(from_thread->fpu_state())); - - from_tss.fs = get_fs(); - from_tss.gs = get_gs(); - set_fs(to_tss.fs); - set_gs(to_tss.gs); - - if (from_thread->process().is_traced()) - read_debug_registers_into(from_thread->debug_register_state()); - - if (to_thread->process().is_traced()) { - write_debug_registers_from(to_thread->debug_register_state()); - } else { - clear_debug_registers(); - } - - auto& processor = Processor::current(); - auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS); - tls_descriptor.set_base(to_thread->thread_specific_data()); - tls_descriptor.set_limit(to_thread->thread_specific_region_size()); - - if (from_tss.cr3 != to_tss.cr3) - write_cr3(to_tss.cr3); - - to_thread->set_cpu(processor.get_id()); - processor.restore_in_critical(to_thread->saved_critical()); - - if (has_fxsr) - asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state())); - else - asm volatile("frstor %0" ::"m"(to_thread->fpu_state())); - - // TODO: ioperm? -} - -#define ENTER_THREAD_CONTEXT_ARGS_SIZE (2 * 4) // to_thread, from_thread - -void Processor::switch_context(Thread*& from_thread, Thread*& to_thread) -{ - VERIFY(!in_irq()); - VERIFY(m_in_critical == 1); - VERIFY(is_kernel_mode()); - - dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context --> switching out of: {} {}", VirtualAddress(from_thread), *from_thread); - from_thread->save_critical(m_in_critical); - -#if ARCH(I386) - // clang-format off - // Switch to new thread context, passing from_thread and to_thread - // through to the new context using registers edx and eax - asm volatile( - // NOTE: changing how much we push to the stack affects - // SWITCH_CONTEXT_TO_STACK_SIZE and thread_context_first_enter()! - "pushfl \n" - "pushl %%ebx \n" - "pushl %%esi \n" - "pushl %%edi \n" - "pushl %%ebp \n" - "movl %%esp, %[from_esp] \n" - "movl $1f, %[from_eip] \n" - "movl %[to_esp0], %%ebx \n" - "movl %%ebx, %[tss_esp0] \n" - "movl %[to_esp], %%esp \n" - "pushl %[to_thread] \n" - "pushl %[from_thread] \n" - "pushl %[to_eip] \n" - "cld \n" - "jmp enter_thread_context \n" - "1: \n" - "popl %%edx \n" - "popl %%eax \n" - "popl %%ebp \n" - "popl %%edi \n" - "popl %%esi \n" - "popl %%ebx \n" - "popfl \n" - : [from_esp] "=m" (from_thread->tss().esp), - [from_eip] "=m" (from_thread->tss().eip), - [tss_esp0] "=m" (m_tss.esp0), - "=d" (from_thread), // needed so that from_thread retains the correct value - "=a" (to_thread) // needed so that to_thread retains the correct value - : [to_esp] "g" (to_thread->tss().esp), - [to_esp0] "g" (to_thread->tss().esp0), - [to_eip] "c" (to_thread->tss().eip), - [from_thread] "d" (from_thread), - [to_thread] "a" (to_thread) - : "memory" - ); - // clang-format on -#else - PANIC("Context switching not implemented."); -#endif - - dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {}", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); - - Processor::current().restore_in_critical(to_thread->saved_critical()); -} - -extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap) -{ - VERIFY(!are_interrupts_enabled()); - VERIFY(is_kernel_mode()); - - dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); - - VERIFY(to_thread == Thread::current()); - - Scheduler::enter_current(*from_thread, true); - - // Since we got here and don't have Scheduler::context_switch in the - // call stack (because this is the first time we switched into this - // context), we need to notify the scheduler so that it can release - // the scheduler lock. We don't want to enable interrupts at this point - // as we're still in the middle of a context switch. Doing so could - // trigger a context switch within a context switch, leading to a crash. - Scheduler::leave_on_first_switch(trap->regs->eflags & ~0x200); -} - -extern "C" void thread_context_first_enter(void); - -// clang-format off -asm( -// enter_thread_context returns to here first time a thread is executing -".globl thread_context_first_enter \n" -"thread_context_first_enter: \n" -// switch_context will have pushed from_thread and to_thread to our new -// stack prior to thread_context_first_enter() being called, and the -// pointer to TrapFrame was the top of the stack before that -" movl 8(%esp), %ebx \n" // save pointer to TrapFrame -" cld \n" -" call context_first_init \n" -" addl $" __STRINGIFY(ENTER_THREAD_CONTEXT_ARGS_SIZE) ", %esp \n" -" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame -" jmp common_trap_exit \n" -); -// clang-format on - -void exit_kernel_thread(void) -{ - Thread::current()->exit(); -} - -u32 Processor::init_context(Thread& thread, bool leave_crit) -{ - VERIFY(is_kernel_mode()); - VERIFY(g_scheduler_lock.is_locked()); - if (leave_crit) { - // Leave the critical section we set up in in Process::exec, - // but because we still have the scheduler lock we should end up with 1 - m_in_critical--; // leave it without triggering anything or restoring flags - VERIFY(in_critical() == 1); - } - - u32 kernel_stack_top = thread.kernel_stack_top(); - - // Add a random offset between 0-256 (16-byte aligned) - kernel_stack_top -= round_up_to_power_of_two(get_fast_random<u8>(), 16); - - u32 stack_top = kernel_stack_top; - - // TODO: handle NT? - VERIFY((cpu_flags() & 0x24000) == 0); // Assume !(NT | VM) - - auto& tss = thread.tss(); - bool return_to_user = (tss.cs & 3) != 0; - - // make room for an interrupt frame - if (!return_to_user) { - // userspace_esp and userspace_ss are not popped off by iret - // unless we're switching back to user mode - stack_top -= sizeof(RegisterState) - 2 * sizeof(u32); - - // For kernel threads we'll push the thread function argument - // which should be in tss.esp and exit_kernel_thread as return - // address. - stack_top -= 2 * sizeof(u32); - *reinterpret_cast<u32*>(kernel_stack_top - 2 * sizeof(u32)) = tss.esp; - *reinterpret_cast<u32*>(kernel_stack_top - 3 * sizeof(u32)) = FlatPtr(&exit_kernel_thread); - } else { - stack_top -= sizeof(RegisterState); - } - - // we want to end up 16-byte aligned, %esp + 4 should be aligned - stack_top -= sizeof(u32); - *reinterpret_cast<u32*>(kernel_stack_top - sizeof(u32)) = 0; - - // set up the stack so that after returning from thread_context_first_enter() - // we will end up either in kernel mode or user mode, depending on how the thread is set up - // However, the first step is to always start in kernel mode with thread_context_first_enter - RegisterState& iretframe = *reinterpret_cast<RegisterState*>(stack_top); - iretframe.ss = tss.ss; - iretframe.gs = tss.gs; - iretframe.fs = tss.fs; - iretframe.es = tss.es; - iretframe.ds = tss.ds; - iretframe.edi = tss.edi; - iretframe.esi = tss.esi; - iretframe.ebp = tss.ebp; - iretframe.esp = 0; - iretframe.ebx = tss.ebx; - iretframe.edx = tss.edx; - iretframe.ecx = tss.ecx; - iretframe.eax = tss.eax; - iretframe.eflags = tss.eflags; - iretframe.eip = tss.eip; - iretframe.cs = tss.cs; - if (return_to_user) { - iretframe.userspace_esp = tss.esp; - iretframe.userspace_ss = tss.ss; - } - - // make space for a trap frame - stack_top -= sizeof(TrapFrame); - TrapFrame& trap = *reinterpret_cast<TrapFrame*>(stack_top); - trap.regs = &iretframe; - trap.prev_irq_level = 0; - trap.next_trap = nullptr; - - stack_top -= sizeof(u32); // pointer to TrapFrame - *reinterpret_cast<u32*>(stack_top) = stack_top + 4; - - if constexpr (CONTEXT_SWITCH_DEBUG) { - if (return_to_user) { - dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}, user_top={}:{}", - thread, - VirtualAddress(&thread), - iretframe.cs, tss.eip, - VirtualAddress(tss.esp), - VirtualAddress(stack_top), - iretframe.userspace_ss, - iretframe.userspace_esp); - } else { - dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}", - thread, - VirtualAddress(&thread), - iretframe.cs, tss.eip, - VirtualAddress(tss.esp), - VirtualAddress(stack_top)); - } - } - - // make switch_context() always first return to thread_context_first_enter() - // in kernel mode, so set up these values so that we end up popping iretframe - // off the stack right after the context switch completed, at which point - // control is transferred to what iretframe is pointing to. - tss.eip = FlatPtr(&thread_context_first_enter); - tss.esp0 = kernel_stack_top; - tss.esp = stack_top; - tss.cs = GDT_SELECTOR_CODE0; - tss.ds = GDT_SELECTOR_DATA0; - tss.es = GDT_SELECTOR_DATA0; - tss.gs = GDT_SELECTOR_DATA0; - tss.ss = GDT_SELECTOR_DATA0; - tss.fs = GDT_SELECTOR_PROC; - return stack_top; -} - -extern "C" u32 do_init_context(Thread* thread, u32 flags) -{ - VERIFY_INTERRUPTS_DISABLED(); - thread->tss().eflags = flags; - return Processor::current().init_context(*thread, true); -} - -extern "C" void do_assume_context(Thread* thread, u32 flags); - -#if ARCH(I386) -// clang-format off -asm( -".global do_assume_context \n" -"do_assume_context: \n" -" movl 4(%esp), %ebx \n" -" movl 8(%esp), %esi \n" -// We're going to call Processor::init_context, so just make sure -// we have enough stack space so we don't stomp over it -" subl $(" __STRINGIFY(4 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 4) "), %esp \n" -" pushl %esi \n" -" pushl %ebx \n" -" cld \n" -" call do_init_context \n" -" addl $8, %esp \n" -" movl %eax, %esp \n" // move stack pointer to what Processor::init_context set up for us -" pushl %ebx \n" // push to_thread -" pushl %ebx \n" // push from_thread -" pushl $thread_context_first_enter \n" // should be same as tss.eip -" jmp enter_thread_context \n" -); -// clang-format on -#endif - -void Processor::assume_context(Thread& thread, FlatPtr flags) -{ - dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread); - - VERIFY_INTERRUPTS_DISABLED(); - Scheduler::prepare_after_exec(); - // in_critical() should be 2 here. The critical section in Process::exec - // and then the scheduler lock - VERIFY(Processor::current().in_critical() == 2); -#if ARCH(I386) - do_assume_context(&thread, flags); -#elif ARCH(X86_64) - (void)flags; - TODO(); -#endif - VERIFY_NOT_REACHED(); -} - -extern "C" UNMAP_AFTER_INIT void pre_init_finished(void) -{ - VERIFY(g_scheduler_lock.own_lock()); - - // Because init_finished() will wait on the other APs, we need - // to release the scheduler lock so that the other APs can also get - // to this point - - // The target flags will get restored upon leaving the trap - u32 prev_flags = cpu_flags(); - Scheduler::leave_on_first_switch(prev_flags); -} - -extern "C" UNMAP_AFTER_INIT void post_init_finished(void) -{ - // We need to re-acquire the scheduler lock before a context switch - // transfers control into the idle loop, which needs the lock held - Scheduler::prepare_for_idle_loop(); -} - -UNMAP_AFTER_INIT void Processor::initialize_context_switching(Thread& initial_thread) -{ - VERIFY(initial_thread.process().is_kernel_process()); - - auto& tss = initial_thread.tss(); - m_tss = tss; - m_tss.esp0 = tss.esp0; - m_tss.ss0 = GDT_SELECTOR_DATA0; - // user mode needs to be able to switch to kernel mode: - m_tss.cs = m_tss.ds = m_tss.es = m_tss.gs = m_tss.ss = GDT_SELECTOR_CODE0 | 3; - m_tss.fs = GDT_SELECTOR_PROC | 3; - - m_scheduler_initialized = true; - -#if ARCH(I386) - // clang-format off - asm volatile( - "movl %[new_esp], %%esp \n" // switch to new stack - "pushl %[from_to_thread] \n" // to_thread - "pushl %[from_to_thread] \n" // from_thread - "pushl $" __STRINGIFY(GDT_SELECTOR_CODE0) " \n" - "pushl %[new_eip] \n" // save the entry eip to the stack - "movl %%esp, %%ebx \n" - "addl $20, %%ebx \n" // calculate pointer to TrapFrame - "pushl %%ebx \n" - "cld \n" - "pushl %[cpu] \n" // push argument for init_finished before register is clobbered - "call pre_init_finished \n" - "call init_finished \n" - "addl $4, %%esp \n" - "call post_init_finished \n" - "call enter_trap_no_irq \n" - "addl $4, %%esp \n" - "lret \n" - :: [new_esp] "g" (tss.esp), - [new_eip] "a" (tss.eip), - [from_to_thread] "b" (&initial_thread), - [cpu] "c" (id()) - ); - // clang-format on -#endif - - VERIFY_NOT_REACHED(); -} - -void Processor::enter_trap(TrapFrame& trap, bool raise_irq) -{ - VERIFY_INTERRUPTS_DISABLED(); - VERIFY(&Processor::current() == this); - trap.prev_irq_level = m_in_irq; - if (raise_irq) - m_in_irq++; - auto* current_thread = Processor::current_thread(); - if (current_thread) { - auto& current_trap = current_thread->current_trap(); - trap.next_trap = current_trap; - current_trap = &trap; - // The cs register of this trap tells us where we will return back to - current_thread->set_previous_mode(((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode); - } else { - trap.next_trap = nullptr; - } -} - -void Processor::exit_trap(TrapFrame& trap) -{ - VERIFY_INTERRUPTS_DISABLED(); - VERIFY(&Processor::current() == this); - VERIFY(m_in_irq >= trap.prev_irq_level); - m_in_irq = trap.prev_irq_level; - - smp_process_pending_messages(); - - if (!m_in_irq && !m_in_critical) - check_invoke_scheduler(); - - auto* current_thread = Processor::current_thread(); - if (current_thread) { - auto& current_trap = current_thread->current_trap(); - current_trap = trap.next_trap; - if (current_trap) { - VERIFY(current_trap->regs); - // If we have another higher level trap then we probably returned - // from an interrupt or irq handler. The cs register of the - // new/higher level trap tells us what the mode prior to it was - current_thread->set_previous_mode(((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode); - } else { - // If we don't have a higher level trap then we're back in user mode. - // Unless we're a kernel process, in which case we're always in kernel mode - current_thread->set_previous_mode(current_thread->process().is_kernel_process() ? Thread::PreviousMode::KernelMode : Thread::PreviousMode::UserMode); - } - } -} - -void Processor::check_invoke_scheduler() -{ - VERIFY(!m_in_irq); - VERIFY(!m_in_critical); - if (m_invoke_scheduler_async && m_scheduler_initialized) { - m_invoke_scheduler_async = false; - Scheduler::invoke_async(); - } -} - -void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count) -{ - auto ptr = vaddr.as_ptr(); - while (page_count > 0) { - // clang-format off - asm volatile("invlpg %0" - : - : "m"(*ptr) - : "memory"); - // clang-format on - ptr += PAGE_SIZE; - page_count--; - } -} - -void Processor::flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count) -{ - if (s_smp_enabled && (!is_user_address(vaddr) || Process::current()->thread_count() > 1)) - smp_broadcast_flush_tlb(page_directory, vaddr, page_count); - else - flush_tlb_local(vaddr, page_count); -} - -static volatile ProcessorMessage* s_message_pool; - -void Processor::smp_return_to_pool(ProcessorMessage& msg) -{ - ProcessorMessage* next = nullptr; - do { - msg.next = next; - } while (!atomic_compare_exchange_strong(&s_message_pool, next, &msg, AK::MemoryOrder::memory_order_acq_rel)); -} - -ProcessorMessage& Processor::smp_get_from_pool() -{ - ProcessorMessage* msg; - - // The assumption is that messages are never removed from the pool! - for (;;) { - msg = atomic_load(&s_message_pool, AK::MemoryOrder::memory_order_consume); - if (!msg) { - if (!Processor::current().smp_process_pending_messages()) { - // TODO: pause for a bit? - } - continue; - } - // If another processor were to use this message in the meanwhile, - // "msg" is still valid (because it never gets freed). We'd detect - // this because the expected value "msg" and pool would - // no longer match, and the compare_exchange will fail. But accessing - // "msg->next" is always safe here. - if (atomic_compare_exchange_strong(&s_message_pool, msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) { - // We successfully "popped" this available message - break; - } - } - - VERIFY(msg != nullptr); - return *msg; -} - -Atomic<u32> Processor::s_idle_cpu_mask { 0 }; - -u32 Processor::smp_wake_n_idle_processors(u32 wake_count) -{ - VERIFY(Processor::current().in_critical()); - VERIFY(wake_count > 0); - if (!s_smp_enabled) - return 0; - - // Wake at most N - 1 processors - if (wake_count >= Processor::count()) { - wake_count = Processor::count() - 1; - VERIFY(wake_count > 0); - } - - u32 current_id = Processor::current().id(); - - u32 did_wake_count = 0; - auto& apic = APIC::the(); - while (did_wake_count < wake_count) { - // Try to get a set of idle CPUs and flip them to busy - u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id); - u32 idle_count = __builtin_popcountl(idle_mask); - if (idle_count == 0) - break; // No (more) idle processor available - - u32 found_mask = 0; - for (u32 i = 0; i < idle_count; i++) { - u32 cpu = __builtin_ffsl(idle_mask) - 1; - idle_mask &= ~(1u << cpu); - found_mask |= 1u << cpu; - } - - idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask; - if (idle_mask == 0) - continue; // All of them were flipped to busy, try again - idle_count = __builtin_popcountl(idle_mask); - for (u32 i = 0; i < idle_count; i++) { - u32 cpu = __builtin_ffsl(idle_mask) - 1; - idle_mask &= ~(1u << cpu); - - // Send an IPI to that CPU to wake it up. There is a possibility - // someone else woke it up as well, or that it woke up due to - // a timer interrupt. But we tried hard to avoid this... - apic.send_ipi(cpu); - did_wake_count++; - } - } - return did_wake_count; -} - -UNMAP_AFTER_INIT void Processor::smp_enable() -{ - size_t msg_pool_size = Processor::count() * 100u; - size_t msg_entries_cnt = Processor::count(); - - auto msgs = new ProcessorMessage[msg_pool_size]; - auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt]; - size_t msg_entry_i = 0; - for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) { - auto& msg = msgs[i]; - msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr; - msg.per_proc_entries = &msg_entries[msg_entry_i]; - for (size_t k = 0; k < msg_entries_cnt; k++) - msg_entries[msg_entry_i + k].msg = &msg; - } - - atomic_store(&s_message_pool, &msgs[0], AK::MemoryOrder::memory_order_release); - - // Start sending IPI messages - s_smp_enabled = true; -} - -void Processor::smp_cleanup_message(ProcessorMessage& msg) -{ - switch (msg.type) { - case ProcessorMessage::Callback: - msg.callback_value().~Function(); - break; - default: - break; - } -} - -bool Processor::smp_process_pending_messages() -{ - bool did_process = false; - u32 prev_flags; - enter_critical(prev_flags); - - if (auto pending_msgs = atomic_exchange(&m_message_queue, nullptr, AK::MemoryOrder::memory_order_acq_rel)) { - // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first - auto reverse_list = - [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* { - ProcessorMessageEntry* rev_list = nullptr; - while (list) { - auto next = list->next; - list->next = rev_list; - rev_list = list; - list = next; - } - return rev_list; - }; - - pending_msgs = reverse_list(pending_msgs); - - // now process in the right order - ProcessorMessageEntry* next_msg; - for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) { - next_msg = cur_msg->next; - auto msg = cur_msg->msg; - - dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", id(), VirtualAddress(msg)); - - switch (msg->type) { - case ProcessorMessage::Callback: - msg->invoke_callback(); - break; - case ProcessorMessage::FlushTlb: - if (is_user_address(VirtualAddress(msg->flush_tlb.ptr))) { - // We assume that we don't cross into kernel land! - VERIFY(is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE)); - if (read_cr3() != msg->flush_tlb.page_directory->cr3()) { - // This processor isn't using this page directory right now, we can ignore this request - dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr)); - break; - } - } - flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count); - break; - } - - bool is_async = msg->async; // Need to cache this value *before* dropping the ref count! - auto prev_refs = atomic_fetch_sub(&msg->refs, 1u, AK::MemoryOrder::memory_order_acq_rel); - VERIFY(prev_refs != 0); - if (prev_refs == 1) { - // All processors handled this. If this is an async message, - // we need to clean it up and return it to the pool - if (is_async) { - smp_cleanup_message(*msg); - smp_return_to_pool(*msg); - } - } - - if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) - halt_this(); - } - did_process = true; - } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) { - halt_this(); - } - - leave_critical(prev_flags); - return did_process; -} - -bool Processor::smp_queue_message(ProcessorMessage& msg) -{ - // Note that it's quite possible that the other processor may pop - // the queue at any given time. We rely on the fact that the messages - // are pooled and never get freed! - auto& msg_entry = msg.per_proc_entries[id()]; - VERIFY(msg_entry.msg == &msg); - ProcessorMessageEntry* next = nullptr; - do { - msg_entry.next = next; - } while (!atomic_compare_exchange_strong(&m_message_queue, next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel)); - return next == nullptr; -} - -void Processor::smp_broadcast_message(ProcessorMessage& msg) -{ - auto& cur_proc = Processor::current(); - - dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), count(), VirtualAddress(&cur_proc)); - - atomic_store(&msg.refs, count() - 1, AK::MemoryOrder::memory_order_release); - VERIFY(msg.refs > 0); - bool need_broadcast = false; - for_each( - [&](Processor& proc) { - if (&proc != &cur_proc) { - if (proc.smp_queue_message(msg)) - need_broadcast = true; - } - }); - - // Now trigger an IPI on all other APs (unless all targets already had messages queued) - if (need_broadcast) - APIC::the().broadcast_ipi(); -} - -void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg) -{ - auto& cur_proc = Processor::current(); - VERIFY(!msg.async); - // If synchronous then we must cleanup and return the message back - // to the pool. Otherwise, the last processor to complete it will return it - while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) { - // TODO: pause for a bit? - - // We need to process any messages that may have been sent to - // us while we're waiting. This also checks if another processor - // may have requested us to halt. - cur_proc.smp_process_pending_messages(); - } - - smp_cleanup_message(msg); - smp_return_to_pool(msg); -} - -void Processor::smp_broadcast(Function<void()> callback, bool async) -{ - auto& msg = smp_get_from_pool(); - msg.async = async; - msg.type = ProcessorMessage::Callback; - new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback)); - smp_broadcast_message(msg); - if (!async) - smp_broadcast_wait_sync(msg); -} - -void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async) -{ - auto& cur_proc = Processor::current(); - VERIFY(cpu != cur_proc.get_id()); - auto& target_proc = processors()[cpu]; - msg.async = async; - - dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_proc)); - - atomic_store(&msg.refs, 1u, AK::MemoryOrder::memory_order_release); - if (target_proc->smp_queue_message(msg)) { - APIC::the().send_ipi(cpu); - } - - if (!async) { - // If synchronous then we must cleanup and return the message back - // to the pool. Otherwise, the last processor to complete it will return it - while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) { - // TODO: pause for a bit? - - // We need to process any messages that may have been sent to - // us while we're waiting. This also checks if another processor - // may have requested us to halt. - cur_proc.smp_process_pending_messages(); - } - - smp_cleanup_message(msg); - smp_return_to_pool(msg); - } -} - -void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async) -{ - auto& msg = smp_get_from_pool(); - msg.type = ProcessorMessage::Callback; - new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback)); - smp_unicast_message(cpu, msg, async); -} - -void Processor::smp_broadcast_flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count) -{ - auto& msg = smp_get_from_pool(); - msg.async = false; - msg.type = ProcessorMessage::FlushTlb; - msg.flush_tlb.page_directory = page_directory; - msg.flush_tlb.ptr = vaddr.as_ptr(); - msg.flush_tlb.page_count = page_count; - smp_broadcast_message(msg); - // While the other processors handle this request, we'll flush ours - flush_tlb_local(vaddr, page_count); - // Now wait until everybody is done as well - smp_broadcast_wait_sync(msg); -} - -void Processor::smp_broadcast_halt() -{ - // We don't want to use a message, because this could have been triggered - // by being out of memory and we might not be able to get a message - for_each( - [&](Processor& proc) { - proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release); - }); - - // Now trigger an IPI on all other APs - APIC::the().broadcast_ipi(); -} - -void Processor::Processor::halt() -{ - if (s_smp_enabled) - smp_broadcast_halt(); - - halt_this(); -} - -UNMAP_AFTER_INIT void Processor::deferred_call_pool_init() -{ - size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]); - for (size_t i = 0; i < pool_count; i++) { - auto& entry = m_deferred_call_pool[i]; - entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr; - new (entry.handler_storage) DeferredCallEntry::HandlerFunction; - entry.was_allocated = false; - } - m_pending_deferred_calls = nullptr; - m_free_deferred_call_pool_entry = &m_deferred_call_pool[0]; -} - -void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry) -{ - VERIFY(m_in_critical); - VERIFY(!entry->was_allocated); - - entry->handler_value() = {}; - - entry->next = m_free_deferred_call_pool_entry; - m_free_deferred_call_pool_entry = entry; -} - -DeferredCallEntry* Processor::deferred_call_get_free() -{ - VERIFY(m_in_critical); - - if (m_free_deferred_call_pool_entry) { - // Fast path, we have an entry in our pool - auto* entry = m_free_deferred_call_pool_entry; - m_free_deferred_call_pool_entry = entry->next; - VERIFY(!entry->was_allocated); - return entry; - } - - auto* entry = new DeferredCallEntry; - new (entry->handler_storage) DeferredCallEntry::HandlerFunction; - entry->was_allocated = true; - return entry; -} - -void Processor::deferred_call_execute_pending() -{ - VERIFY(m_in_critical); - - if (!m_pending_deferred_calls) - return; - auto* pending_list = m_pending_deferred_calls; - m_pending_deferred_calls = nullptr; - - // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first - auto reverse_list = - [](DeferredCallEntry* list) -> DeferredCallEntry* { - DeferredCallEntry* rev_list = nullptr; - while (list) { - auto next = list->next; - list->next = rev_list; - rev_list = list; - list = next; - } - return rev_list; - }; - pending_list = reverse_list(pending_list); - - do { - pending_list->invoke_handler(); - - // Return the entry back to the pool, or free it - auto* next = pending_list->next; - if (pending_list->was_allocated) { - pending_list->handler_value().~Function(); - delete pending_list; - } else - deferred_call_return_to_pool(pending_list); - pending_list = next; - } while (pending_list); -} - -void Processor::deferred_call_queue_entry(DeferredCallEntry* entry) -{ - VERIFY(m_in_critical); - entry->next = m_pending_deferred_calls; - m_pending_deferred_calls = entry; -} - -void Processor::deferred_call_queue(Function<void()> callback) -{ - // NOTE: If we are called outside of a critical section and outside - // of an irq handler, the function will be executed before we return! - ScopedCritical critical; - auto& cur_proc = Processor::current(); - - auto* entry = cur_proc.deferred_call_get_free(); - entry->handler_value() = move(callback); - - cur_proc.deferred_call_queue_entry(entry); -} - -UNMAP_AFTER_INIT void Processor::gdt_init() -{ - m_gdt_length = 0; - m_gdtr.address = nullptr; - m_gdtr.limit = 0; - - write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000); - write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0 - write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0 - write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3 - write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3 - - Descriptor tls_descriptor {}; - tls_descriptor.low = tls_descriptor.high = 0; - tls_descriptor.dpl = 3; - tls_descriptor.segment_present = 1; - tls_descriptor.granularity = 0; - tls_descriptor.operation_size64 = 0; - tls_descriptor.operation_size32 = 1; - tls_descriptor.descriptor_type = 1; - tls_descriptor.type = 2; - write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3 - - Descriptor fs_descriptor {}; - fs_descriptor.set_base(VirtualAddress { this }); - fs_descriptor.set_limit(sizeof(Processor)); - fs_descriptor.dpl = 0; - fs_descriptor.segment_present = 1; - fs_descriptor.granularity = 0; - fs_descriptor.operation_size64 = 0; - fs_descriptor.operation_size32 = 1; - fs_descriptor.descriptor_type = 1; - fs_descriptor.type = 2; - write_gdt_entry(GDT_SELECTOR_PROC, fs_descriptor); // fs0 - - Descriptor tss_descriptor {}; - tss_descriptor.set_base(VirtualAddress { &m_tss }); - tss_descriptor.set_limit(sizeof(TSS32)); - tss_descriptor.dpl = 0; - tss_descriptor.segment_present = 1; - tss_descriptor.granularity = 0; - tss_descriptor.operation_size64 = 0; - tss_descriptor.operation_size32 = 1; - tss_descriptor.descriptor_type = 0; - tss_descriptor.type = 9; - write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss - - flush_gdt(); - load_task_register(GDT_SELECTOR_TSS); - - asm volatile( - "mov %%ax, %%ds\n" - "mov %%ax, %%es\n" - "mov %%ax, %%gs\n" - "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0) - : "memory"); - set_fs(GDT_SELECTOR_PROC); - -#if ARCH(I386) - // Make sure CS points to the kernel code descriptor. - // clang-format off - asm volatile( - "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n" - "sanity:\n"); - // clang-format on -#endif -} - -void copy_kernel_registers_into_ptrace_registers(PtraceRegisters& ptrace_regs, const RegisterState& kernel_regs) -{ - ptrace_regs.eax = kernel_regs.eax, - ptrace_regs.ecx = kernel_regs.ecx, - ptrace_regs.edx = kernel_regs.edx, - ptrace_regs.ebx = kernel_regs.ebx, - ptrace_regs.esp = kernel_regs.userspace_esp, - ptrace_regs.ebp = kernel_regs.ebp, - ptrace_regs.esi = kernel_regs.esi, - ptrace_regs.edi = kernel_regs.edi, - ptrace_regs.eip = kernel_regs.eip, - ptrace_regs.eflags = kernel_regs.eflags, - ptrace_regs.cs = 0; - ptrace_regs.ss = 0; - ptrace_regs.ds = 0; - ptrace_regs.es = 0; - ptrace_regs.fs = 0; - ptrace_regs.gs = 0; -} - -void copy_ptrace_registers_into_kernel_registers(RegisterState& kernel_regs, const PtraceRegisters& ptrace_regs) -{ - kernel_regs.eax = ptrace_regs.eax; - kernel_regs.ecx = ptrace_regs.ecx; - kernel_regs.edx = ptrace_regs.edx; - kernel_regs.ebx = ptrace_regs.ebx; - kernel_regs.esp = ptrace_regs.esp; - kernel_regs.ebp = ptrace_regs.ebp; - kernel_regs.esi = ptrace_regs.esi; - kernel_regs.edi = ptrace_regs.edi; - kernel_regs.eip = ptrace_regs.eip; - kernel_regs.eflags = (kernel_regs.eflags & ~safe_eflags_mask) | (ptrace_regs.eflags & safe_eflags_mask); -} -} - -void __assertion_failed(const char* msg, const char* file, unsigned line, const char* func) -{ - asm volatile("cli"); - critical_dmesgln("ASSERTION FAILED: {}", msg); - critical_dmesgln("{}:{} in {}", file, line, func); - - abort(); -} - -[[noreturn]] void abort() -{ - // Switch back to the current process's page tables if there are any. - // Otherwise stack walking will be a disaster. - auto process = Process::current(); - if (process) - MM.enter_process_paging_scope(*process); - - Kernel::dump_backtrace(); - Processor::halt(); - - abort(); -} - -[[noreturn]] void _abort() -{ - asm volatile("ud2"); - __builtin_unreachable(); -} - -NonMaskableInterruptDisabler::NonMaskableInterruptDisabler() -{ - IO::out8(0x70, IO::in8(0x70) | 0x80); -} - -NonMaskableInterruptDisabler::~NonMaskableInterruptDisabler() -{ - IO::out8(0x70, IO::in8(0x70) & 0x7F); -} diff --git a/Kernel/Arch/x86/ASM_wrapper.h b/Kernel/Arch/x86/ASM_wrapper.h new file mode 100644 index 0000000000..9962247402 --- /dev/null +++ b/Kernel/Arch/x86/ASM_wrapper.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +namespace Kernel { + +ALWAYS_INLINE void cli() +{ + asm volatile("cli" :: + : "memory"); +} +ALWAYS_INLINE void sti() +{ + asm volatile("sti" :: + : "memory"); +} +ALWAYS_INLINE FlatPtr cpu_flags() +{ + FlatPtr flags; + asm volatile( + "pushf\n" + "pop %0\n" + : "=rm"(flags)::"memory"); + return flags; +} + +ALWAYS_INLINE void set_fs(u16 segment) +{ + asm volatile( + "mov %%ax, %%fs" ::"a"(segment) + : "memory"); +} + +ALWAYS_INLINE void set_gs(u16 segment) +{ + asm volatile( + "mov %%ax, %%gs" ::"a"(segment) + : "memory"); +} + +ALWAYS_INLINE u16 get_fs() +{ + u16 fs; + asm("mov %%fs, %%eax" + : "=a"(fs)); + return fs; +} + +ALWAYS_INLINE u16 get_gs() +{ + u16 gs; + asm("mov %%gs, %%eax" + : "=a"(gs)); + return gs; +} + +ALWAYS_INLINE u32 read_fs_u32(u32 offset) +{ + u32 val; + asm volatile( + "movl %%fs:%a[off], %k[val]" + : [val] "=r"(val) + : [off] "ir"(offset)); + return val; +} + +ALWAYS_INLINE FlatPtr read_fs_ptr(u32 offset) +{ + return read_fs_u32(offset); +} + +ALWAYS_INLINE void write_fs_u32(u32 offset, u32 val) +{ + asm volatile( + "movl %k[val], %%fs:%a[off]" ::[off] "ir"(offset), [val] "ir"(val) + : "memory"); +} + +ALWAYS_INLINE bool are_interrupts_enabled() +{ + return cpu_flags() & 0x200; +} + +FlatPtr read_cr0(); +FlatPtr read_cr2(); +FlatPtr read_cr3(); +FlatPtr read_cr4(); +u64 read_xcr0(); + +void write_cr0(FlatPtr); +void write_cr3(FlatPtr); +void write_cr4(FlatPtr); +void write_xcr0(u64); + +void flush_idt(); + +ALWAYS_INLINE void load_task_register(u16 selector) +{ + asm("ltr %0" ::"r"(selector)); +} + +FlatPtr read_dr0(); +void write_dr0(FlatPtr); +FlatPtr read_dr1(); +void write_dr1(FlatPtr); +FlatPtr read_dr2(); +void write_dr2(FlatPtr); +FlatPtr read_dr3(); +void write_dr3(FlatPtr); +FlatPtr read_dr6(); +void write_dr6(FlatPtr); +FlatPtr read_dr7(); +void write_dr7(FlatPtr); + +ALWAYS_INLINE static bool is_kernel_mode() +{ + u16 cs; + asm volatile( + "mov %%cs, %[cs] \n" + : [cs] "=g"(cs)); + return (cs & 3) == 0; +} + +ALWAYS_INLINE void read_tsc(u32& lsw, u32& msw) +{ + asm volatile("rdtsc" + : "=d"(msw), "=a"(lsw)); +} + +ALWAYS_INLINE u64 read_tsc() +{ + u32 lsw; + u32 msw; + read_tsc(lsw, msw); + return ((u64)msw << 32) | lsw; +} + +void stac(); +void clac(); + +[[noreturn]] ALWAYS_INLINE void halt_this() +{ + for (;;) { + asm volatile("cli; hlt"); + } +} + +} diff --git a/Kernel/Arch/x86/CPU.h b/Kernel/Arch/x86/CPU.h index c1baf2478c..08c1bfac52 100644 --- a/Kernel/Arch/x86/CPU.h +++ b/Kernel/Arch/x86/CPU.h @@ -7,31 +7,23 @@ #pragma once #include <AK/Atomic.h> -#include <AK/Badge.h> -#include <AK/Concepts.h> -#include <AK/Function.h> -#include <AK/Noncopyable.h> #include <AK/Vector.h> #include <Kernel/Arch/x86/DescriptorTable.h> -#include <Kernel/Arch/x86/TSS.h> - -#include <Kernel/PhysicalAddress.h> -#include <Kernel/VirtualAddress.h> -#include <LibC/sys/arch/i386/regs.h> #define READONLY_AFTER_INIT __attribute__((section(".ro_after_init"))) #define UNMAP_AFTER_INIT NEVER_INLINE __attribute__((section(".unmap_after_init"))) #define PAGE_SIZE 4096 +/* Map IRQ0-15 @ ISR 0x50-0x5F */ +#define IRQ_VECTOR_BASE 0x50 #define GENERIC_INTERRUPT_HANDLERS_COUNT (256 - IRQ_VECTOR_BASE) #define PAGE_MASK ((FlatPtr)0xfffff000u) namespace Kernel { -class MemoryManager; -class PageDirectory; -class PageTableEntry; +class RegisterState; +class GenericInterruptHandler; static constexpr u32 safe_eflags_mask = 0xdff; static constexpr u32 iopl_mask = 3u << 12; @@ -41,142 +33,6 @@ inline u32 get_iopl_from_eflags(u32 eflags) return (eflags & iopl_mask) >> 12; } -class PageDirectoryEntry { -public: - const PageTableEntry* page_table_base() const { return reinterpret_cast<PageTableEntry*>(m_raw & 0xfffff000u); } - PageTableEntry* page_table_base() { return reinterpret_cast<PageTableEntry*>(m_raw & 0xfffff000u); } - void set_page_table_base(u32 value) - { - m_raw &= 0x8000000000000fffULL; - m_raw |= value & 0xfffff000; - } - - bool is_null() const { return m_raw == 0; } - void clear() { m_raw = 0; } - - u64 raw() const { return m_raw; } - void copy_from(Badge<PageDirectory>, const PageDirectoryEntry& other) { m_raw = other.m_raw; } - - enum Flags { - Present = 1 << 0, - ReadWrite = 1 << 1, - UserSupervisor = 1 << 2, - WriteThrough = 1 << 3, - CacheDisabled = 1 << 4, - Huge = 1 << 7, - Global = 1 << 8, - NoExecute = 0x8000000000000000ULL, - }; - - bool is_present() const { return raw() & Present; } - void set_present(bool b) { set_bit(Present, b); } - - bool is_user_allowed() const { return raw() & UserSupervisor; } - void set_user_allowed(bool b) { set_bit(UserSupervisor, b); } - - bool is_huge() const { return raw() & Huge; } - void set_huge(bool b) { set_bit(Huge, b); } - - bool is_writable() const { return raw() & ReadWrite; } - void set_writable(bool b) { set_bit(ReadWrite, b); } - - bool is_write_through() const { return raw() & WriteThrough; } - void set_write_through(bool b) { set_bit(WriteThrough, b); } - - bool is_cache_disabled() const { return raw() & CacheDisabled; } - void set_cache_disabled(bool b) { set_bit(CacheDisabled, b); } - - bool is_global() const { return raw() & Global; } - void set_global(bool b) { set_bit(Global, b); } - - bool is_execute_disabled() const { return raw() & NoExecute; } - void set_execute_disabled(bool b) { set_bit(NoExecute, b); } - - void set_bit(u64 bit, bool value) - { - if (value) - m_raw |= bit; - else - m_raw &= ~bit; - } - -private: - u64 m_raw; -}; - -class PageTableEntry { -public: - void* physical_page_base() { return reinterpret_cast<void*>(m_raw & 0xfffff000u); } - void set_physical_page_base(u32 value) - { - m_raw &= 0x8000000000000fffULL; - m_raw |= value & 0xfffff000; - } - - u64 raw() const { return (u32)m_raw; } - - enum Flags { - Present = 1 << 0, - ReadWrite = 1 << 1, - UserSupervisor = 1 << 2, - WriteThrough = 1 << 3, - CacheDisabled = 1 << 4, - Global = 1 << 8, - NoExecute = 0x8000000000000000ULL, - }; - - bool is_present() const { return raw() & Present; } - void set_present(bool b) { set_bit(Present, b); } - - bool is_user_allowed() const { return raw() & UserSupervisor; } - void set_user_allowed(bool b) { set_bit(UserSupervisor, b); } - - bool is_writable() const { return raw() & ReadWrite; } - void set_writable(bool b) { set_bit(ReadWrite, b); } - - bool is_write_through() const { return raw() & WriteThrough; } - void set_write_through(bool b) { set_bit(WriteThrough, b); } - - bool is_cache_disabled() const { return raw() & CacheDisabled; } - void set_cache_disabled(bool b) { set_bit(CacheDisabled, b); } - - bool is_global() const { return raw() & Global; } - void set_global(bool b) { set_bit(Global, b); } - - bool is_execute_disabled() const { return raw() & NoExecute; } - void set_execute_disabled(bool b) { set_bit(NoExecute, b); } - - bool is_null() const { return m_raw == 0; } - void clear() { m_raw = 0; } - - void set_bit(u64 bit, bool value) - { - if (value) - m_raw |= bit; - else - m_raw &= ~bit; - } - -private: - u64 m_raw; -}; - -static_assert(sizeof(PageDirectoryEntry) == 8); -static_assert(sizeof(PageTableEntry) == 8); - -class PageDirectoryPointerTable { -public: - PageDirectoryEntry* directory(size_t index) - { - return (PageDirectoryEntry*)(raw[index] & ~0xfffu); - } - - u64 raw[4]; -}; - -class GenericInterruptHandler; -struct RegisterState; - template<typename T> void read_possibly_unaligned_data(u8* where, T& data) { @@ -223,13 +79,7 @@ void write_possibly_unaligned_data(u8* where, T data) const DescriptorTablePointer& get_gdtr(); const DescriptorTablePointer& get_idtr(); -void register_interrupt_handler(u8 number, void (*handler)()); -void register_user_callable_interrupt_handler(u8 number, void (*handler)()); -GenericInterruptHandler& get_interrupt_handler(u8 interrupt_number); -void register_generic_interrupt_handler(u8 number, GenericInterruptHandler&); -void unregister_generic_interrupt_handler(u8 number, GenericInterruptHandler&); -void flush_idt(); -void load_task_register(u16 selector); + [[noreturn]] void handle_crash(RegisterState&, const char* description, int signal, bool out_of_memory = false); #define LSW(x) ((u32)(x)&0xFFFF) @@ -237,205 +87,6 @@ void load_task_register(u16 selector); #define LSB(x) ((x)&0xFF) #define MSB(x) (((x) >> 8) & 0xFF) -#define cli() asm volatile("cli" :: \ - : "memory") -#define sti() asm volatile("sti" :: \ - : "memory") - -inline FlatPtr cpu_flags() -{ - FlatPtr flags; - asm volatile( - "pushf\n" - "pop %0\n" - : "=rm"(flags)::"memory"); - return flags; -} - -inline void set_fs(u16 segment) -{ - asm volatile( - "mov %%ax, %%fs" ::"a"(segment) - : "memory"); -} - -inline void set_gs(u16 segment) -{ - asm volatile( - "mov %%ax, %%gs" ::"a"(segment) - : "memory"); -} - -inline u16 get_fs() -{ - u16 fs; - asm("mov %%fs, %%eax" - : "=a"(fs)); - return fs; -} - -inline u16 get_gs() -{ - u16 gs; - asm("mov %%gs, %%eax" - : "=a"(gs)); - return gs; -} - -inline u32 read_fs_u32(u32 offset) -{ - u32 val; - asm volatile( - "movl %%fs:%a[off], %k[val]" - : [val] "=r"(val) - : [off] "ir"(offset)); - return val; -} - -inline FlatPtr read_fs_ptr(u32 offset) -{ - return read_fs_u32(offset); -} - -inline void write_fs_u32(u32 offset, u32 val) -{ - asm volatile( - "movl %k[val], %%fs:%a[off]" ::[off] "ir"(offset), [val] "ir"(val) - : "memory"); -} - -inline bool are_interrupts_enabled() -{ - return cpu_flags() & 0x200; -} - -class InterruptDisabler { -public: - InterruptDisabler() - { - m_flags = cpu_flags(); - cli(); - } - - ~InterruptDisabler() - { - if (m_flags & 0x200) - sti(); - } - -private: - u32 m_flags; -}; - -class NonMaskableInterruptDisabler { -public: - NonMaskableInterruptDisabler(); - ~NonMaskableInterruptDisabler(); -}; - -/* Map IRQ0-15 @ ISR 0x50-0x5F */ -#define IRQ_VECTOR_BASE 0x50 - -struct PageFaultFlags { - enum Flags { - NotPresent = 0x00, - ProtectionViolation = 0x01, - Read = 0x00, - Write = 0x02, - UserMode = 0x04, - SupervisorMode = 0x00, - ReservedBitViolation = 0x08, - InstructionFetch = 0x10, - }; -}; - -class PageFault { -public: - PageFault(u16 code, VirtualAddress vaddr) - : m_code(code) - , m_vaddr(vaddr) - { - } - - enum class Type { - PageNotPresent = PageFaultFlags::NotPresent, - ProtectionViolation = PageFaultFlags::ProtectionViolation, - }; - - enum class Access { - Read = PageFaultFlags::Read, - Write = PageFaultFlags::Write, - }; - - VirtualAddress vaddr() const { return m_vaddr; } - u16 code() const { return m_code; } - - Type type() const { return (Type)(m_code & 1); } - Access access() const { return (Access)(m_code & 2); } - - bool is_not_present() const { return (m_code & 1) == PageFaultFlags::NotPresent; } - bool is_protection_violation() const { return (m_code & 1) == PageFaultFlags::ProtectionViolation; } - bool is_read() const { return (m_code & 2) == PageFaultFlags::Read; } - bool is_write() const { return (m_code & 2) == PageFaultFlags::Write; } - bool is_user() const { return (m_code & 4) == PageFaultFlags::UserMode; } - bool is_supervisor() const { return (m_code & 4) == PageFaultFlags::SupervisorMode; } - bool is_instruction_fetch() const { return (m_code & 16) == PageFaultFlags::InstructionFetch; } - -private: - u16 m_code; - VirtualAddress m_vaddr; -}; - -struct [[gnu::packed]] RegisterState { - FlatPtr ss; - FlatPtr gs; - FlatPtr fs; - FlatPtr es; - FlatPtr ds; - FlatPtr edi; - FlatPtr esi; - FlatPtr ebp; - FlatPtr esp; - FlatPtr ebx; - FlatPtr edx; - FlatPtr ecx; - FlatPtr eax; - u16 exception_code; - u16 isr_number; -#if ARCH(X86_64) - u32 padding; -#endif - FlatPtr eip; - FlatPtr cs; - FlatPtr eflags; - FlatPtr userspace_esp; - FlatPtr userspace_ss; -}; - -struct [[gnu::packed]] DebugRegisterState { - FlatPtr dr0; - FlatPtr dr1; - FlatPtr dr2; - FlatPtr dr3; - FlatPtr dr6; - FlatPtr dr7; -}; - -#if ARCH(I386) -# define REGISTER_STATE_SIZE (19 * 4) -#else -# define REGISTER_STATE_SIZE (19 * 8) -#endif -static_assert(REGISTER_STATE_SIZE == sizeof(RegisterState)); - -void copy_kernel_registers_into_ptrace_registers(PtraceRegisters&, const RegisterState&); -void copy_ptrace_registers_into_kernel_registers(RegisterState&, const PtraceRegisters&); - -struct [[gnu::aligned(16)]] FPUState -{ - u8 buffer[512]; -}; - constexpr FlatPtr page_base_of(FlatPtr address) { return address & PAGE_MASK; @@ -456,643 +107,4 @@ inline FlatPtr offset_in_page(const void* address) return offset_in_page((FlatPtr)address); } -FlatPtr read_cr0(); -FlatPtr read_cr2(); -FlatPtr read_cr3(); -FlatPtr read_cr4(); -u64 read_xcr0(); - -void write_cr0(FlatPtr); -void write_cr3(FlatPtr); -void write_cr4(FlatPtr); -void write_xcr0(u64); - -void read_debug_registers_into(DebugRegisterState&); -void write_debug_registers_from(const DebugRegisterState&); -void clear_debug_registers(); -FlatPtr read_dr0(); -void write_dr0(FlatPtr); -FlatPtr read_dr1(); -void write_dr1(FlatPtr); -FlatPtr read_dr2(); -void write_dr2(FlatPtr); -FlatPtr read_dr3(); -void write_dr3(FlatPtr); -FlatPtr read_dr6(); -void write_dr6(FlatPtr); -FlatPtr read_dr7(); -void write_dr7(FlatPtr); - -static inline bool is_kernel_mode() -{ - u16 cs; - asm volatile( - "mov %%cs, %[cs] \n" - : [cs] "=g"(cs)); - return (cs & 3) == 0; -} - -class CPUID { -public: - explicit CPUID(u32 function) { asm volatile("cpuid" - : "=a"(m_eax), "=b"(m_ebx), "=c"(m_ecx), "=d"(m_edx) - : "a"(function), "c"(0)); } - u32 eax() const { return m_eax; } - u32 ebx() const { return m_ebx; } - u32 ecx() const { return m_ecx; } - u32 edx() const { return m_edx; } - -private: - u32 m_eax { 0xffffffff }; - u32 m_ebx { 0xffffffff }; - u32 m_ecx { 0xffffffff }; - u32 m_edx { 0xffffffff }; -}; - -inline void read_tsc(u32& lsw, u32& msw) -{ - asm volatile("rdtsc" - : "=d"(msw), "=a"(lsw)); -} - -inline u64 read_tsc() -{ - u32 lsw; - u32 msw; - read_tsc(lsw, msw); - return ((u64)msw << 32) | lsw; -} - -// FIXME: This can't hold every CPU feature as-is. -enum class CPUFeature : u32 { - NX = (1 << 0), - PAE = (1 << 1), - PGE = (1 << 2), - RDRAND = (1 << 3), - RDSEED = (1 << 4), - SMAP = (1 << 5), - SMEP = (1 << 6), - SSE = (1 << 7), - TSC = (1 << 8), - RDTSCP = (1 << 9), - CONSTANT_TSC = (1 << 10), - NONSTOP_TSC = (1 << 11), - UMIP = (1 << 12), - SEP = (1 << 13), - SYSCALL = (1 << 14), - MMX = (1 << 15), - SSE2 = (1 << 16), - SSE3 = (1 << 17), - SSSE3 = (1 << 18), - SSE4_1 = (1 << 19), - SSE4_2 = (1 << 20), - XSAVE = (1 << 21), - AVX = (1 << 22), - FXSR = (1 << 23), -}; - -class Thread; -struct TrapFrame; - -#define GDT_SELECTOR_CODE0 0x08 -#define GDT_SELECTOR_DATA0 0x10 -#define GDT_SELECTOR_CODE3 0x18 -#define GDT_SELECTOR_DATA3 0x20 -#define GDT_SELECTOR_TLS 0x28 -#define GDT_SELECTOR_PROC 0x30 -#define GDT_SELECTOR_TSS 0x38 - -// SYSENTER makes certain assumptions on how the GDT is structured: -static_assert(GDT_SELECTOR_CODE0 + 8 == GDT_SELECTOR_DATA0); // SS0 = CS0 + 8 - -// SYSEXIT makes certain assumptions on how the GDT is structured: -static_assert(GDT_SELECTOR_CODE0 + 16 == GDT_SELECTOR_CODE3); // CS3 = CS0 + 16 -static_assert(GDT_SELECTOR_CODE0 + 24 == GDT_SELECTOR_DATA3); // SS3 = CS0 + 32 - -class ProcessorInfo; -class SchedulerPerProcessorData; -struct MemoryManagerData; -struct ProcessorMessageEntry; - -struct ProcessorMessage { - using CallbackFunction = Function<void()>; - - enum Type { - FlushTlb, - Callback, - }; - Type type; - volatile u32 refs; // atomic - union { - ProcessorMessage* next; // only valid while in the pool - alignas(CallbackFunction) u8 callback_storage[sizeof(CallbackFunction)]; - struct { - const PageDirectory* page_directory; - u8* ptr; - size_t page_count; - } flush_tlb; - }; - - volatile bool async; - - ProcessorMessageEntry* per_proc_entries; - - CallbackFunction& callback_value() - { - return *bit_cast<CallbackFunction*>(&callback_storage); - } - - void invoke_callback() - { - VERIFY(type == Type::Callback); - callback_value()(); - } -}; - -struct ProcessorMessageEntry { - ProcessorMessageEntry* next; - ProcessorMessage* msg; -}; - -struct DeferredCallEntry { - using HandlerFunction = Function<void()>; - - DeferredCallEntry* next; - alignas(HandlerFunction) u8 handler_storage[sizeof(HandlerFunction)]; - bool was_allocated; - - HandlerFunction& handler_value() - { - return *bit_cast<HandlerFunction*>(&handler_storage); - } - - void invoke_handler() - { - handler_value()(); - } -}; - -class Processor; -// Note: We only support processors at most at the moment, -// so allocate 8 slots of inline capacity in the container. -using ProcessorContainer = Array<Processor*, 8>; - -class Processor { - friend class ProcessorInfo; - - AK_MAKE_NONCOPYABLE(Processor); - AK_MAKE_NONMOVABLE(Processor); - - Processor* m_self; - - DescriptorTablePointer m_gdtr; - Descriptor m_gdt[256]; - u32 m_gdt_length; - - u32 m_cpu; - u32 m_in_irq; - Atomic<u32, AK::MemoryOrder::memory_order_relaxed> m_in_critical; - static Atomic<u32> s_idle_cpu_mask; - - TSS m_tss; - static FPUState s_clean_fpu_state; - CPUFeature m_features; - static volatile u32 g_total_processors; // atomic - u8 m_physical_address_bit_width; - - ProcessorInfo* m_info; - MemoryManagerData* m_mm_data; - SchedulerPerProcessorData* m_scheduler_data; - Thread* m_current_thread; - Thread* m_idle_thread; - - volatile ProcessorMessageEntry* m_message_queue; // atomic, LIFO - - bool m_invoke_scheduler_async; - bool m_scheduler_initialized; - Atomic<bool> m_halt_requested; - - DeferredCallEntry* m_pending_deferred_calls; // in reverse order - DeferredCallEntry* m_free_deferred_call_pool_entry; - DeferredCallEntry m_deferred_call_pool[5]; - - void gdt_init(); - void write_raw_gdt_entry(u16 selector, u32 low, u32 high); - void write_gdt_entry(u16 selector, Descriptor& descriptor); - static ProcessorContainer& processors(); - - static void smp_return_to_pool(ProcessorMessage& msg); - static ProcessorMessage& smp_get_from_pool(); - static void smp_cleanup_message(ProcessorMessage& msg); - bool smp_queue_message(ProcessorMessage& msg); - static void smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async); - static void smp_broadcast_message(ProcessorMessage& msg); - static void smp_broadcast_wait_sync(ProcessorMessage& msg); - static void smp_broadcast_halt(); - - void deferred_call_pool_init(); - void deferred_call_execute_pending(); - DeferredCallEntry* deferred_call_get_free(); - void deferred_call_return_to_pool(DeferredCallEntry*); - void deferred_call_queue_entry(DeferredCallEntry*); - - void cpu_detect(); - void cpu_setup(); - - String features_string() const; - -public: - Processor() = default; - - void early_initialize(u32 cpu); - void initialize(u32 cpu); - - void idle_begin() - { - s_idle_cpu_mask.fetch_or(1u << m_cpu, AK::MemoryOrder::memory_order_relaxed); - } - - void idle_end() - { - s_idle_cpu_mask.fetch_and(~(1u << m_cpu), AK::MemoryOrder::memory_order_relaxed); - } - - static u32 count() - { - // NOTE: because this value never changes once all APs are booted, - // we don't really need to do an atomic_load() on this variable - return g_total_processors; - } - - ALWAYS_INLINE static void wait_check() - { - Processor::current().smp_process_pending_messages(); - // TODO: pause - } - - [[noreturn]] static void halt(); - - static void flush_entire_tlb_local() - { - write_cr3(read_cr3()); - } - - static void flush_tlb_local(VirtualAddress vaddr, size_t page_count); - static void flush_tlb(const PageDirectory*, VirtualAddress, size_t); - - Descriptor& get_gdt_entry(u16 selector); - void flush_gdt(); - const DescriptorTablePointer& get_gdtr(); - - static Processor& by_id(u32 cpu); - - static size_t processor_count() { return processors().size(); } - - template<IteratorFunction<Processor&> Callback> - static inline IterationDecision for_each(Callback callback) - { - auto& procs = processors(); - size_t count = procs.size(); - for (size_t i = 0; i < count; i++) { - if (callback(*procs[i]) == IterationDecision::Break) - return IterationDecision::Break; - } - return IterationDecision::Continue; - } - - template<VoidFunction<Processor&> Callback> - static inline IterationDecision for_each(Callback callback) - { - auto& procs = processors(); - size_t count = procs.size(); - for (size_t i = 0; i < count; i++) { - if (procs[i] != nullptr) - callback(*procs[i]); - } - return IterationDecision::Continue; - } - - ALWAYS_INLINE u8 physical_address_bit_width() const { return m_physical_address_bit_width; } - - ALWAYS_INLINE ProcessorInfo& info() { return *m_info; } - - ALWAYS_INLINE static Processor& current() - { - return *(Processor*)read_fs_ptr(__builtin_offsetof(Processor, m_self)); - } - - ALWAYS_INLINE static bool is_initialized() - { - return get_fs() == GDT_SELECTOR_PROC && read_fs_u32(__builtin_offsetof(Processor, m_self)) != 0; - } - - ALWAYS_INLINE void set_scheduler_data(SchedulerPerProcessorData& scheduler_data) - { - m_scheduler_data = &scheduler_data; - } - - ALWAYS_INLINE SchedulerPerProcessorData& get_scheduler_data() const - { - return *m_scheduler_data; - } - - ALWAYS_INLINE void set_mm_data(MemoryManagerData& mm_data) - { - m_mm_data = &mm_data; - } - - ALWAYS_INLINE MemoryManagerData& get_mm_data() const - { - return *m_mm_data; - } - - ALWAYS_INLINE void set_idle_thread(Thread& idle_thread) - { - m_idle_thread = &idle_thread; - } - - ALWAYS_INLINE static Thread* current_thread() - { - // If we were to use Processor::current here, we'd have to - // disable interrupts to prevent a race where we may get pre-empted - // right after getting the Processor structure and then get moved - // to another processor, which would lead us to get the wrong thread. - // To avoid having to disable interrupts, we can just read the field - // directly in an atomic fashion, similar to Processor::current. - return (Thread*)read_fs_ptr(__builtin_offsetof(Processor, m_current_thread)); - } - - ALWAYS_INLINE static void set_current_thread(Thread& current_thread) - { - // See comment in Processor::current_thread - write_fs_u32(__builtin_offsetof(Processor, m_current_thread), FlatPtr(¤t_thread)); - } - - ALWAYS_INLINE static Thread* idle_thread() - { - // See comment in Processor::current_thread - return (Thread*)read_fs_u32(__builtin_offsetof(Processor, m_idle_thread)); - } - - ALWAYS_INLINE u32 get_id() const - { - // NOTE: This variant should only be used when iterating over all - // Processor instances, or when it's guaranteed that the thread - // cannot move to another processor in between calling Processor::current - // and Processor::get_id, or if this fact is not important. - // All other cases should use Processor::id instead! - return m_cpu; - } - - ALWAYS_INLINE static u32 id() - { - // See comment in Processor::current_thread - return read_fs_ptr(__builtin_offsetof(Processor, m_cpu)); - } - - ALWAYS_INLINE static bool is_bootstrap_processor() - { - return Processor::id() == 0; - } - - ALWAYS_INLINE u32 raise_irq() - { - return m_in_irq++; - } - - ALWAYS_INLINE void restore_irq(u32 prev_irq) - { - VERIFY(prev_irq <= m_in_irq); - if (!prev_irq) { - u32 prev_critical = 0; - if (m_in_critical.compare_exchange_strong(prev_critical, 1)) { - m_in_irq = prev_irq; - deferred_call_execute_pending(); - auto prev_raised = m_in_critical.exchange(prev_critical); - VERIFY(prev_raised == prev_critical + 1); - check_invoke_scheduler(); - } else if (prev_critical == 0) { - check_invoke_scheduler(); - } - } else { - m_in_irq = prev_irq; - } - } - - ALWAYS_INLINE u32& in_irq() - { - return m_in_irq; - } - - ALWAYS_INLINE void restore_in_critical(u32 critical) - { - m_in_critical = critical; - } - - ALWAYS_INLINE void enter_critical(u32& prev_flags) - { - prev_flags = cpu_flags(); - cli(); - m_in_critical++; - } - - ALWAYS_INLINE void leave_critical(u32 prev_flags) - { - cli(); // Need to prevent IRQs from interrupting us here! - VERIFY(m_in_critical > 0); - if (m_in_critical == 1) { - if (!m_in_irq) { - deferred_call_execute_pending(); - VERIFY(m_in_critical == 1); - } - m_in_critical--; - if (!m_in_irq) - check_invoke_scheduler(); - } else { - m_in_critical--; - } - if (prev_flags & 0x200) - sti(); - else - cli(); - } - - ALWAYS_INLINE u32 clear_critical(u32& prev_flags, bool enable_interrupts) - { - prev_flags = cpu_flags(); - u32 prev_crit = m_in_critical.exchange(0, AK::MemoryOrder::memory_order_acquire); - if (!m_in_irq) - check_invoke_scheduler(); - if (enable_interrupts) - sti(); - return prev_crit; - } - - ALWAYS_INLINE void restore_critical(u32 prev_crit, u32 prev_flags) - { - m_in_critical.store(prev_crit, AK::MemoryOrder::memory_order_release); - VERIFY(!prev_crit || !(prev_flags & 0x200)); - if (prev_flags & 0x200) - sti(); - else - cli(); - } - - ALWAYS_INLINE u32 in_critical() { return m_in_critical.load(); } - - ALWAYS_INLINE const FPUState& clean_fpu_state() const - { - return s_clean_fpu_state; - } - - static void smp_enable(); - bool smp_process_pending_messages(); - - static void smp_broadcast(Function<void()>, bool async); - static void smp_unicast(u32 cpu, Function<void()>, bool async); - static void smp_broadcast_flush_tlb(const PageDirectory*, VirtualAddress, size_t); - static u32 smp_wake_n_idle_processors(u32 wake_count); - - static void deferred_call_queue(Function<void()> callback); - - ALWAYS_INLINE bool has_feature(CPUFeature f) const - { - return (static_cast<u32>(m_features) & static_cast<u32>(f)) != 0; - } - - void check_invoke_scheduler(); - void invoke_scheduler_async() { m_invoke_scheduler_async = true; } - - void enter_trap(TrapFrame& trap, bool raise_irq); - - void exit_trap(TrapFrame& trap); - - [[noreturn]] void initialize_context_switching(Thread& initial_thread); - NEVER_INLINE void switch_context(Thread*& from_thread, Thread*& to_thread); - [[noreturn]] static void assume_context(Thread& thread, FlatPtr flags); - u32 init_context(Thread& thread, bool leave_crit); - static Vector<FlatPtr> capture_stack_trace(Thread& thread, size_t max_frames = 0); - - String platform_string() const; -}; - -class ScopedCritical { - AK_MAKE_NONCOPYABLE(ScopedCritical); - -public: - ScopedCritical() - { - enter(); - } - - ~ScopedCritical() - { - if (m_valid) - leave(); - } - - ScopedCritical(ScopedCritical&& from) - : m_prev_flags(exchange(from.m_prev_flags, 0)) - , m_valid(exchange(from.m_valid, false)) - { - } - - ScopedCritical& operator=(ScopedCritical&& from) - { - if (&from != this) { - m_prev_flags = exchange(from.m_prev_flags, 0); - m_valid = exchange(from.m_valid, false); - } - return *this; - } - - void leave() - { - VERIFY(m_valid); - m_valid = false; - Processor::current().leave_critical(m_prev_flags); - } - - void enter() - { - VERIFY(!m_valid); - m_valid = true; - Processor::current().enter_critical(m_prev_flags); - } - -private: - u32 m_prev_flags { 0 }; - bool m_valid { false }; -}; - -struct TrapFrame { - u32 prev_irq_level; - TrapFrame* next_trap; - RegisterState* regs; // must be last - - TrapFrame() = delete; - TrapFrame(const TrapFrame&) = delete; - TrapFrame(TrapFrame&&) = delete; - TrapFrame& operator=(const TrapFrame&) = delete; - TrapFrame& operator=(TrapFrame&&) = delete; -}; - -#if ARCH(I386) -# define TRAP_FRAME_SIZE (3 * 4) -#else -# define TRAP_FRAME_SIZE (3 * 8) -#endif - -static_assert(TRAP_FRAME_SIZE == sizeof(TrapFrame)); - -extern "C" void enter_trap_no_irq(TrapFrame*) __attribute__((used)); -extern "C" void enter_trap(TrapFrame*) __attribute__((used)); -extern "C" void exit_trap(TrapFrame*) __attribute__((used)); - -class MSR { - uint32_t m_msr; - -public: - static bool have() - { - CPUID id(1); - return (id.edx() & (1 << 5)) != 0; - } - - MSR(const MSR&) = delete; - MSR& operator=(const MSR&) = delete; - - MSR(uint32_t msr) - : m_msr(msr) - { - } - - void get(u32& low, u32& high) - { - asm volatile("rdmsr" - : "=a"(low), "=d"(high) - : "c"(m_msr)); - } - - void set(u32 low, u32 high) - { - asm volatile("wrmsr" ::"a"(low), "d"(high), "c"(m_msr)); - } -}; - -ALWAYS_INLINE void stac() -{ - if (!Processor::current().has_feature(CPUFeature::SMAP)) - return; - asm volatile("stac" :: - : "cc"); -} - -ALWAYS_INLINE void clac() -{ - if (!Processor::current().has_feature(CPUFeature::SMAP)) - return; - asm volatile("clac" :: - : "cc"); -} } diff --git a/Kernel/Arch/x86/CPUID.h b/Kernel/Arch/x86/CPUID.h new file mode 100644 index 0000000000..5b094012aa --- /dev/null +++ b/Kernel/Arch/x86/CPUID.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +namespace Kernel { + +class CPUID { +public: + explicit CPUID(u32 function) { asm volatile("cpuid" + : "=a"(m_eax), "=b"(m_ebx), "=c"(m_ecx), "=d"(m_edx) + : "a"(function), "c"(0)); } + u32 eax() const { return m_eax; } + u32 ebx() const { return m_ebx; } + u32 ecx() const { return m_ecx; } + u32 edx() const { return m_edx; } + +private: + u32 m_eax { 0xffffffff }; + u32 m_ebx { 0xffffffff }; + u32 m_ecx { 0xffffffff }; + u32 m_edx { 0xffffffff }; +}; + +// FIXME: This can't hold every CPU feature as-is. +enum class CPUFeature : u32 { + NX = (1 << 0), + PAE = (1 << 1), + PGE = (1 << 2), + RDRAND = (1 << 3), + RDSEED = (1 << 4), + SMAP = (1 << 5), + SMEP = (1 << 6), + SSE = (1 << 7), + TSC = (1 << 8), + RDTSCP = (1 << 9), + CONSTANT_TSC = (1 << 10), + NONSTOP_TSC = (1 << 11), + UMIP = (1 << 12), + SEP = (1 << 13), + SYSCALL = (1 << 14), + MMX = (1 << 15), + SSE2 = (1 << 16), + SSE3 = (1 << 17), + SSSE3 = (1 << 18), + SSE4_1 = (1 << 19), + SSE4_2 = (1 << 20), + XSAVE = (1 << 21), + AVX = (1 << 22), + FXSR = (1 << 23), +}; + +} diff --git a/Kernel/Arch/x86/InterruptDisabler.h b/Kernel/Arch/x86/InterruptDisabler.h new file mode 100644 index 0000000000..13a6af81f9 --- /dev/null +++ b/Kernel/Arch/x86/InterruptDisabler.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +#include <Kernel/IO.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> + +namespace Kernel { + +class InterruptDisabler { +public: + InterruptDisabler() + { + m_flags = cpu_flags(); + cli(); + } + + ~InterruptDisabler() + { + if (m_flags & 0x200) + sti(); + } + +private: + u32 m_flags; +}; + +class NonMaskableInterruptDisabler { +public: + NonMaskableInterruptDisabler() + { + IO::out8(0x70, IO::in8(0x70) | 0x80); + } + + ~NonMaskableInterruptDisabler() + { + IO::out8(0x70, IO::in8(0x70) & 0x7F); + } +}; + +} diff --git a/Kernel/Arch/x86/Interrupts.h b/Kernel/Arch/x86/Interrupts.h index 63a666f33d..1607d60ab5 100644 --- a/Kernel/Arch/x86/Interrupts.h +++ b/Kernel/Arch/x86/Interrupts.h @@ -8,6 +8,7 @@ #pragma once #include <AK/Types.h> +#include <Kernel/Interrupts/GenericInterruptHandler.h> namespace Kernel { @@ -33,4 +34,6 @@ GenericInterruptHandler& get_interrupt_handler(u8 interrupt_number); void register_generic_interrupt_handler(u8 number, GenericInterruptHandler&); void unregister_generic_interrupt_handler(u8 number, GenericInterruptHandler&); +void idt_init(); + } diff --git a/Kernel/Arch/x86/MSR.h b/Kernel/Arch/x86/MSR.h new file mode 100644 index 0000000000..462390467e --- /dev/null +++ b/Kernel/Arch/x86/MSR.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/CPUID.h> + +namespace Kernel { + +class MSR { + uint32_t m_msr; + +public: + static bool have() + { + CPUID id(1); + return (id.edx() & (1 << 5)) != 0; + } + + MSR(const MSR&) = delete; + MSR& operator=(const MSR&) = delete; + + MSR(uint32_t msr) + : m_msr(msr) + { + } + + void get(u32& low, u32& high) + { + asm volatile("rdmsr" + : "=a"(low), "=d"(high) + : "c"(m_msr)); + } + + void set(u32 low, u32 high) + { + asm volatile("wrmsr" ::"a"(low), "d"(high), "c"(m_msr)); + } +}; + +} diff --git a/Kernel/Arch/x86/PageFault.h b/Kernel/Arch/x86/PageFault.h new file mode 100644 index 0000000000..57f38184de --- /dev/null +++ b/Kernel/Arch/x86/PageFault.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> +#include <Kernel/VirtualAddress.h> + +namespace Kernel { + +struct PageFaultFlags { + enum Flags { + NotPresent = 0x00, + ProtectionViolation = 0x01, + Read = 0x00, + Write = 0x02, + UserMode = 0x04, + SupervisorMode = 0x00, + ReservedBitViolation = 0x08, + InstructionFetch = 0x10, + }; +}; + +class PageFault { +public: + PageFault(u16 code, VirtualAddress vaddr) + : m_code(code) + , m_vaddr(vaddr) + { + } + + enum class Type { + PageNotPresent = PageFaultFlags::NotPresent, + ProtectionViolation = PageFaultFlags::ProtectionViolation, + }; + + enum class Access { + Read = PageFaultFlags::Read, + Write = PageFaultFlags::Write, + }; + + VirtualAddress vaddr() const { return m_vaddr; } + u16 code() const { return m_code; } + + Type type() const { return (Type)(m_code & 1); } + Access access() const { return (Access)(m_code & 2); } + + bool is_not_present() const { return (m_code & 1) == PageFaultFlags::NotPresent; } + bool is_protection_violation() const { return (m_code & 1) == PageFaultFlags::ProtectionViolation; } + bool is_read() const { return (m_code & 2) == PageFaultFlags::Read; } + bool is_write() const { return (m_code & 2) == PageFaultFlags::Write; } + bool is_user() const { return (m_code & 4) == PageFaultFlags::UserMode; } + bool is_supervisor() const { return (m_code & 4) == PageFaultFlags::SupervisorMode; } + bool is_instruction_fetch() const { return (m_code & 16) == PageFaultFlags::InstructionFetch; } + +private: + u16 m_code; + VirtualAddress m_vaddr; +}; + +} diff --git a/Kernel/Arch/x86/Processor.h b/Kernel/Arch/x86/Processor.h new file mode 100644 index 0000000000..200c5831ab --- /dev/null +++ b/Kernel/Arch/x86/Processor.h @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Array.h> +#include <AK/Concepts.h> +#include <AK/Function.h> +#include <AK/Types.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> +#include <Kernel/Arch/x86/CPUID.h> +#include <Kernel/Arch/x86/DescriptorTable.h> +#include <Kernel/Arch/x86/PageDirectory.h> +#include <Kernel/Arch/x86/TSS.h> + +namespace Kernel { + +class Thread; +class SchedulerPerProcessorData; +struct MemoryManagerData; +struct ProcessorMessageEntry; + +class TrapFrame; +class ProcessorInfo; + +struct [[gnu::aligned(16)]] FPUState +{ + u8 buffer[512]; +}; + +struct ProcessorMessage { + using CallbackFunction = Function<void()>; + + enum Type { + FlushTlb, + Callback, + }; + Type type; + volatile u32 refs; // atomic + union { + ProcessorMessage* next; // only valid while in the pool + alignas(CallbackFunction) u8 callback_storage[sizeof(CallbackFunction)]; + struct { + const PageDirectory* page_directory; + u8* ptr; + size_t page_count; + } flush_tlb; + }; + + volatile bool async; + + ProcessorMessageEntry* per_proc_entries; + + CallbackFunction& callback_value() + { + return *bit_cast<CallbackFunction*>(&callback_storage); + } + + void invoke_callback() + { + VERIFY(type == Type::Callback); + callback_value()(); + } +}; + +struct ProcessorMessageEntry { + ProcessorMessageEntry* next; + ProcessorMessage* msg; +}; + +struct DeferredCallEntry { + using HandlerFunction = Function<void()>; + + DeferredCallEntry* next; + alignas(HandlerFunction) u8 handler_storage[sizeof(HandlerFunction)]; + bool was_allocated; + + HandlerFunction& handler_value() + { + return *bit_cast<HandlerFunction*>(&handler_storage); + } + + void invoke_handler() + { + handler_value()(); + } +}; + +class Processor; +// Note: We only support processors at most at the moment, +// so allocate 8 slots of inline capacity in the container. +using ProcessorContainer = Array<Processor*, 8>; + +class Processor { + friend class ProcessorInfo; + + AK_MAKE_NONCOPYABLE(Processor); + AK_MAKE_NONMOVABLE(Processor); + + Processor* m_self; + + DescriptorTablePointer m_gdtr; + Descriptor m_gdt[256]; + u32 m_gdt_length; + + u32 m_cpu; + u32 m_in_irq; + Atomic<u32, AK::MemoryOrder::memory_order_relaxed> m_in_critical; + static Atomic<u32> s_idle_cpu_mask; + + TSS m_tss; + static FPUState s_clean_fpu_state; + CPUFeature m_features; + static volatile u32 g_total_processors; // atomic + u8 m_physical_address_bit_width; + + ProcessorInfo* m_info; + MemoryManagerData* m_mm_data; + SchedulerPerProcessorData* m_scheduler_data; + Thread* m_current_thread; + Thread* m_idle_thread; + + volatile ProcessorMessageEntry* m_message_queue; // atomic, LIFO + + bool m_invoke_scheduler_async; + bool m_scheduler_initialized; + Atomic<bool> m_halt_requested; + + DeferredCallEntry* m_pending_deferred_calls; // in reverse order + DeferredCallEntry* m_free_deferred_call_pool_entry; + DeferredCallEntry m_deferred_call_pool[5]; + + void gdt_init(); + void write_raw_gdt_entry(u16 selector, u32 low, u32 high); + void write_gdt_entry(u16 selector, Descriptor& descriptor); + static ProcessorContainer& processors(); + + static void smp_return_to_pool(ProcessorMessage& msg); + static ProcessorMessage& smp_get_from_pool(); + static void smp_cleanup_message(ProcessorMessage& msg); + bool smp_queue_message(ProcessorMessage& msg); + static void smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async); + static void smp_broadcast_message(ProcessorMessage& msg); + static void smp_broadcast_wait_sync(ProcessorMessage& msg); + static void smp_broadcast_halt(); + + void deferred_call_pool_init(); + void deferred_call_execute_pending(); + DeferredCallEntry* deferred_call_get_free(); + void deferred_call_return_to_pool(DeferredCallEntry*); + void deferred_call_queue_entry(DeferredCallEntry*); + + void cpu_detect(); + void cpu_setup(); + + String features_string() const; + +public: + Processor() = default; + + void early_initialize(u32 cpu); + void initialize(u32 cpu); + + void idle_begin() + { + s_idle_cpu_mask.fetch_or(1u << m_cpu, AK::MemoryOrder::memory_order_relaxed); + } + + void idle_end() + { + s_idle_cpu_mask.fetch_and(~(1u << m_cpu), AK::MemoryOrder::memory_order_relaxed); + } + + static u32 count() + { + // NOTE: because this value never changes once all APs are booted, + // we don't really need to do an atomic_load() on this variable + return g_total_processors; + } + + ALWAYS_INLINE static void wait_check() + { + Processor::current().smp_process_pending_messages(); + // TODO: pause + } + + [[noreturn]] static void halt(); + + static void flush_entire_tlb_local() + { + write_cr3(read_cr3()); + } + + static void flush_tlb_local(VirtualAddress vaddr, size_t page_count); + static void flush_tlb(const PageDirectory*, VirtualAddress, size_t); + + Descriptor& get_gdt_entry(u16 selector); + void flush_gdt(); + const DescriptorTablePointer& get_gdtr(); + + static Processor& by_id(u32 cpu); + + static size_t processor_count() { return processors().size(); } + + template<IteratorFunction<Processor&> Callback> + static inline IterationDecision for_each(Callback callback) + { + auto& procs = processors(); + size_t count = procs.size(); + for (size_t i = 0; i < count; i++) { + if (callback(*procs[i]) == IterationDecision::Break) + return IterationDecision::Break; + } + return IterationDecision::Continue; + } + + template<VoidFunction<Processor&> Callback> + static inline IterationDecision for_each(Callback callback) + { + auto& procs = processors(); + size_t count = procs.size(); + for (size_t i = 0; i < count; i++) { + if (procs[i] != nullptr) + callback(*procs[i]); + } + return IterationDecision::Continue; + } + + ALWAYS_INLINE u8 physical_address_bit_width() const { return m_physical_address_bit_width; } + + ALWAYS_INLINE ProcessorInfo& info() { return *m_info; } + + ALWAYS_INLINE static Processor& current() + { + return *(Processor*)read_fs_ptr(__builtin_offsetof(Processor, m_self)); + } + + ALWAYS_INLINE static bool is_initialized() + { + return get_fs() == GDT_SELECTOR_PROC && read_fs_u32(__builtin_offsetof(Processor, m_self)) != 0; + } + + ALWAYS_INLINE void set_scheduler_data(SchedulerPerProcessorData& scheduler_data) + { + m_scheduler_data = &scheduler_data; + } + + ALWAYS_INLINE SchedulerPerProcessorData& get_scheduler_data() const + { + return *m_scheduler_data; + } + + ALWAYS_INLINE void set_mm_data(MemoryManagerData& mm_data) + { + m_mm_data = &mm_data; + } + + ALWAYS_INLINE MemoryManagerData& get_mm_data() const + { + return *m_mm_data; + } + + ALWAYS_INLINE void set_idle_thread(Thread& idle_thread) + { + m_idle_thread = &idle_thread; + } + + ALWAYS_INLINE static Thread* current_thread() + { + // If we were to use Processor::current here, we'd have to + // disable interrupts to prevent a race where we may get pre-empted + // right after getting the Processor structure and then get moved + // to another processor, which would lead us to get the wrong thread. + // To avoid having to disable interrupts, we can just read the field + // directly in an atomic fashion, similar to Processor::current. + return (Thread*)read_fs_ptr(__builtin_offsetof(Processor, m_current_thread)); + } + + ALWAYS_INLINE static void set_current_thread(Thread& current_thread) + { + // See comment in Processor::current_thread + write_fs_u32(__builtin_offsetof(Processor, m_current_thread), FlatPtr(¤t_thread)); + } + + ALWAYS_INLINE static Thread* idle_thread() + { + // See comment in Processor::current_thread + return (Thread*)read_fs_u32(__builtin_offsetof(Processor, m_idle_thread)); + } + + ALWAYS_INLINE u32 get_id() const + { + // NOTE: This variant should only be used when iterating over all + // Processor instances, or when it's guaranteed that the thread + // cannot move to another processor in between calling Processor::current + // and Processor::get_id, or if this fact is not important. + // All other cases should use Processor::id instead! + return m_cpu; + } + + ALWAYS_INLINE static u32 id() + { + // See comment in Processor::current_thread + return read_fs_ptr(__builtin_offsetof(Processor, m_cpu)); + } + + ALWAYS_INLINE static bool is_bootstrap_processor() + { + return Processor::id() == 0; + } + + ALWAYS_INLINE u32 raise_irq() + { + return m_in_irq++; + } + + ALWAYS_INLINE void restore_irq(u32 prev_irq) + { + VERIFY(prev_irq <= m_in_irq); + if (!prev_irq) { + u32 prev_critical = 0; + if (m_in_critical.compare_exchange_strong(prev_critical, 1)) { + m_in_irq = prev_irq; + deferred_call_execute_pending(); + auto prev_raised = m_in_critical.exchange(prev_critical); + VERIFY(prev_raised == prev_critical + 1); + check_invoke_scheduler(); + } else if (prev_critical == 0) { + check_invoke_scheduler(); + } + } else { + m_in_irq = prev_irq; + } + } + + ALWAYS_INLINE u32& in_irq() + { + return m_in_irq; + } + + ALWAYS_INLINE void restore_in_critical(u32 critical) + { + m_in_critical = critical; + } + + ALWAYS_INLINE void enter_critical(u32& prev_flags) + { + prev_flags = cpu_flags(); + cli(); + m_in_critical++; + } + + ALWAYS_INLINE void leave_critical(u32 prev_flags) + { + cli(); // Need to prevent IRQs from interrupting us here! + VERIFY(m_in_critical > 0); + if (m_in_critical == 1) { + if (!m_in_irq) { + deferred_call_execute_pending(); + VERIFY(m_in_critical == 1); + } + m_in_critical--; + if (!m_in_irq) + check_invoke_scheduler(); + } else { + m_in_critical--; + } + if (prev_flags & 0x200) + sti(); + else + cli(); + } + + ALWAYS_INLINE u32 clear_critical(u32& prev_flags, bool enable_interrupts) + { + prev_flags = cpu_flags(); + u32 prev_crit = m_in_critical.exchange(0, AK::MemoryOrder::memory_order_acquire); + if (!m_in_irq) + check_invoke_scheduler(); + if (enable_interrupts) + sti(); + return prev_crit; + } + + ALWAYS_INLINE void restore_critical(u32 prev_crit, u32 prev_flags) + { + m_in_critical.store(prev_crit, AK::MemoryOrder::memory_order_release); + VERIFY(!prev_crit || !(prev_flags & 0x200)); + if (prev_flags & 0x200) + sti(); + else + cli(); + } + + ALWAYS_INLINE u32 in_critical() { return m_in_critical.load(); } + + ALWAYS_INLINE const FPUState& clean_fpu_state() const + { + return s_clean_fpu_state; + } + + static void smp_enable(); + bool smp_process_pending_messages(); + + static void smp_broadcast(Function<void()>, bool async); + static void smp_unicast(u32 cpu, Function<void()>, bool async); + static void smp_broadcast_flush_tlb(const PageDirectory*, VirtualAddress, size_t); + static u32 smp_wake_n_idle_processors(u32 wake_count); + + static void deferred_call_queue(Function<void()> callback); + + ALWAYS_INLINE bool has_feature(CPUFeature f) const + { + return (static_cast<u32>(m_features) & static_cast<u32>(f)) != 0; + } + + void check_invoke_scheduler(); + void invoke_scheduler_async() { m_invoke_scheduler_async = true; } + + void enter_trap(TrapFrame& trap, bool raise_irq); + + void exit_trap(TrapFrame& trap); + + [[noreturn]] void initialize_context_switching(Thread& initial_thread); + NEVER_INLINE void switch_context(Thread*& from_thread, Thread*& to_thread); + [[noreturn]] static void assume_context(Thread& thread, FlatPtr flags); + u32 init_context(Thread& thread, bool leave_crit); + static Vector<FlatPtr> capture_stack_trace(Thread& thread, size_t max_frames = 0); + + String platform_string() const; +}; + +} diff --git a/Kernel/Arch/x86/ProcessorInfo.h b/Kernel/Arch/x86/ProcessorInfo.h index b3bcaeb756..8a55980522 100644 --- a/Kernel/Arch/x86/ProcessorInfo.h +++ b/Kernel/Arch/x86/ProcessorInfo.h @@ -7,6 +7,7 @@ #pragma once #include <AK/String.h> +#include <AK/Types.h> namespace Kernel { diff --git a/Kernel/Arch/x86/RegisterState.h b/Kernel/Arch/x86/RegisterState.h new file mode 100644 index 0000000000..0f0d2f3a5f --- /dev/null +++ b/Kernel/Arch/x86/RegisterState.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> +#include <LibC/sys/arch/i386/regs.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> +#include <Kernel/Arch/x86/CPU.h> + +namespace Kernel { + +struct [[gnu::packed]] RegisterState { + FlatPtr ss; + FlatPtr gs; + FlatPtr fs; + FlatPtr es; + FlatPtr ds; + FlatPtr edi; + FlatPtr esi; + FlatPtr ebp; + FlatPtr esp; + FlatPtr ebx; + FlatPtr edx; + FlatPtr ecx; + FlatPtr eax; + u16 exception_code; + u16 isr_number; +#if ARCH(X86_64) + u32 padding; +#endif + FlatPtr eip; + FlatPtr cs; + FlatPtr eflags; + FlatPtr userspace_esp; + FlatPtr userspace_ss; +}; + +#if ARCH(I386) +# define REGISTER_STATE_SIZE (19 * 4) +#else +# define REGISTER_STATE_SIZE (19 * 8) +#endif +static_assert(REGISTER_STATE_SIZE == sizeof(RegisterState)); + +inline void copy_kernel_registers_into_ptrace_registers(PtraceRegisters& ptrace_regs, const RegisterState& kernel_regs) +{ + ptrace_regs.eax = kernel_regs.eax, + ptrace_regs.ecx = kernel_regs.ecx, + ptrace_regs.edx = kernel_regs.edx, + ptrace_regs.ebx = kernel_regs.ebx, + ptrace_regs.esp = kernel_regs.userspace_esp, + ptrace_regs.ebp = kernel_regs.ebp, + ptrace_regs.esi = kernel_regs.esi, + ptrace_regs.edi = kernel_regs.edi, + ptrace_regs.eip = kernel_regs.eip, + ptrace_regs.eflags = kernel_regs.eflags, + ptrace_regs.cs = 0; + ptrace_regs.ss = 0; + ptrace_regs.ds = 0; + ptrace_regs.es = 0; + ptrace_regs.fs = 0; + ptrace_regs.gs = 0; +} + +inline void copy_ptrace_registers_into_kernel_registers(RegisterState& kernel_regs, const PtraceRegisters& ptrace_regs) +{ + kernel_regs.eax = ptrace_regs.eax; + kernel_regs.ecx = ptrace_regs.ecx; + kernel_regs.edx = ptrace_regs.edx; + kernel_regs.ebx = ptrace_regs.ebx; + kernel_regs.esp = ptrace_regs.esp; + kernel_regs.ebp = ptrace_regs.ebp; + kernel_regs.esi = ptrace_regs.esi; + kernel_regs.edi = ptrace_regs.edi; + kernel_regs.eip = ptrace_regs.eip; + kernel_regs.eflags = (kernel_regs.eflags & ~safe_eflags_mask) | (ptrace_regs.eflags & safe_eflags_mask); +} + +struct [[gnu::packed]] DebugRegisterState { + FlatPtr dr0; + FlatPtr dr1; + FlatPtr dr2; + FlatPtr dr3; + FlatPtr dr6; + FlatPtr dr7; +}; + +inline void read_debug_registers_into(DebugRegisterState& state) +{ + state.dr0 = read_dr0(); + state.dr1 = read_dr1(); + state.dr2 = read_dr2(); + state.dr3 = read_dr3(); + state.dr6 = read_dr6(); + state.dr7 = read_dr7(); +} + +inline void write_debug_registers_from(const DebugRegisterState& state) +{ + write_dr0(state.dr0); + write_dr1(state.dr1); + write_dr2(state.dr2); + write_dr3(state.dr3); + write_dr6(state.dr6); + write_dr7(state.dr7); +} + +inline void clear_debug_registers() +{ + write_dr0(0); + write_dr1(0); + write_dr2(0); + write_dr3(0); + write_dr7(1 << 10); // Bit 10 is reserved and must be set to 1. +} + +} diff --git a/Kernel/Arch/x86/ScopedCritical.h b/Kernel/Arch/x86/ScopedCritical.h new file mode 100644 index 0000000000..e6be3829a0 --- /dev/null +++ b/Kernel/Arch/x86/ScopedCritical.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/Processor.h> + +namespace Kernel { + +class ScopedCritical { + AK_MAKE_NONCOPYABLE(ScopedCritical); + +public: + ScopedCritical() + { + enter(); + } + + ~ScopedCritical() + { + if (m_valid) + leave(); + } + + ScopedCritical(ScopedCritical&& from) + : m_prev_flags(exchange(from.m_prev_flags, 0)) + , m_valid(exchange(from.m_valid, false)) + { + } + + ScopedCritical& operator=(ScopedCritical&& from) + { + if (&from != this) { + m_prev_flags = exchange(from.m_prev_flags, 0); + m_valid = exchange(from.m_valid, false); + } + return *this; + } + + void leave() + { + VERIFY(m_valid); + m_valid = false; + Processor::current().leave_critical(m_prev_flags); + } + + void enter() + { + VERIFY(!m_valid); + m_valid = true; + Processor::current().enter_critical(m_prev_flags); + } + +private: + u32 m_prev_flags { 0 }; + bool m_valid { false }; +}; + +} diff --git a/Kernel/Arch/x86/SmapDisabler.h b/Kernel/Arch/x86/SmapDisabler.h index 83dd6db7c3..8dcbac6475 100644 --- a/Kernel/Arch/x86/SmapDisabler.h +++ b/Kernel/Arch/x86/SmapDisabler.h @@ -6,7 +6,7 @@ #pragma once -#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/ASM_wrapper.h> namespace Kernel { diff --git a/Kernel/Arch/x86/TrapFrame.h b/Kernel/Arch/x86/TrapFrame.h new file mode 100644 index 0000000000..bf46e58f8c --- /dev/null +++ b/Kernel/Arch/x86/TrapFrame.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/InterruptDisabler.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/RegisterState.h> + +namespace Kernel { + +struct TrapFrame { + u32 prev_irq_level; + TrapFrame* next_trap; + RegisterState* regs; // must be last + + TrapFrame() = delete; + TrapFrame(const TrapFrame&) = delete; + TrapFrame(TrapFrame&&) = delete; + TrapFrame& operator=(const TrapFrame&) = delete; + TrapFrame& operator=(TrapFrame&&) = delete; +}; + +#if ARCH(I386) +# define TRAP_FRAME_SIZE (3 * 4) +#else +# define TRAP_FRAME_SIZE (3 * 8) +#endif + +static_assert(TRAP_FRAME_SIZE == sizeof(TrapFrame)); + +extern "C" void enter_trap_no_irq(TrapFrame* trap); +extern "C" void enter_trap(TrapFrame*) __attribute__((used)); +extern "C" void exit_trap(TrapFrame*) __attribute__((used)); + +} diff --git a/Kernel/Arch/x86/common/ASM_wrapper.cpp b/Kernel/Arch/x86/common/ASM_wrapper.cpp new file mode 100644 index 0000000000..787f291225 --- /dev/null +++ b/Kernel/Arch/x86/common/ASM_wrapper.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> + +namespace Kernel { + +#define XCR_XFEATURE_ENABLED_MASK 0 + +UNMAP_AFTER_INIT u64 read_xcr0() +{ + u32 eax, edx; + asm volatile("xgetbv" + : "=a"(eax), "=d"(edx) + : "c"(XCR_XFEATURE_ENABLED_MASK)); + return eax + ((u64)edx << 32); +} + +UNMAP_AFTER_INIT void write_xcr0(u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + asm volatile("xsetbv" ::"a"(eax), "d"(edx), "c"(XCR_XFEATURE_ENABLED_MASK)); +} + +void stac() +{ + if (!Processor::current().has_feature(CPUFeature::SMAP)) + return; + asm volatile("stac" :: + : "cc"); +} + +void clac() +{ + if (!Processor::current().has_feature(CPUFeature::SMAP)) + return; + asm volatile("clac" :: + : "cc"); +} + +} diff --git a/Kernel/Arch/x86/common/Interrupts.cpp b/Kernel/Arch/x86/common/Interrupts.cpp new file mode 100644 index 0000000000..bcd5e5e663 --- /dev/null +++ b/Kernel/Arch/x86/common/Interrupts.cpp @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Format.h> +#include <AK/Types.h> + +#include <Kernel/Interrupts/GenericInterruptHandler.h> +#include <Kernel/Interrupts/SharedIRQHandler.h> +#include <Kernel/Interrupts/SpuriousInterruptHandler.h> +#include <Kernel/Interrupts/UnhandledInterruptHandler.h> +#include <Kernel/Panic.h> +#include <Kernel/PerformanceManager.h> +#include <Kernel/Process.h> +#include <Kernel/Random.h> +#include <Kernel/Thread.h> + +#include <LibC/mallocdefs.h> + +#include <Kernel/Arch/x86/ISRStubs.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/RegisterState.h> +#include <Kernel/Arch/x86/TrapFrame.h> + +extern FlatPtr start_of_unmap_after_init; +extern FlatPtr end_of_unmap_after_init; +extern FlatPtr start_of_ro_after_init; +extern FlatPtr end_of_ro_after_init; + +namespace Kernel { + +READONLY_AFTER_INIT static DescriptorTablePointer s_idtr; +READONLY_AFTER_INIT static IDTEntry s_idt[256]; + +static GenericInterruptHandler* s_interrupt_handler[GENERIC_INTERRUPT_HANDLERS_COUNT]; + +static EntropySource s_entropy_source_interrupts { EntropySource::Static::Interrupts }; + +// clang-format off + +#if ARCH(I386) +#define EH_ENTRY(ec, title) \ + extern "C" void title##_asm_entry(); \ + extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \ + asm( \ + ".globl " #title "_asm_entry\n" \ + "" #title "_asm_entry: \n" \ + " pusha\n" \ + " pushl %ds\n" \ + " pushl %es\n" \ + " pushl %fs\n" \ + " pushl %gs\n" \ + " pushl %ss\n" \ + " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \ + " mov %ax, %ds\n" \ + " mov %ax, %es\n" \ + " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \ + " mov %ax, %fs\n" \ + " pushl %esp \n" /* set TrapFrame::regs */ \ + " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \ + " pushl %esp \n" \ + " cld\n" \ + " call enter_trap_no_irq \n" \ + " call " #title "_handler\n" \ + " jmp common_trap_exit \n"); + +#define EH_ENTRY_NO_CODE(ec, title) \ + extern "C" void title##_asm_entry(); \ + extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \ + asm( \ + ".globl " #title "_asm_entry\n" \ + "" #title "_asm_entry: \n" \ + " pushl $0x0\n" \ + " pusha\n" \ + " pushl %ds\n" \ + " pushl %es\n" \ + " pushl %fs\n" \ + " pushl %gs\n" \ + " pushl %ss\n" \ + " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \ + " mov %ax, %ds\n" \ + " mov %ax, %es\n" \ + " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \ + " mov %ax, %fs\n" \ + " pushl %esp \n" /* set TrapFrame::regs */ \ + " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \ + " pushl %esp \n" \ + " cld\n" \ + " call enter_trap_no_irq \n" \ + " call " #title "_handler\n" \ + " jmp common_trap_exit \n"); + +#elif ARCH(X86_64) +#define EH_ENTRY(ec, title) \ + extern "C" void title##_asm_entry(); \ + extern "C" void title##_handler(TrapFrame*); \ + asm( \ + ".globl " #title "_asm_entry\n" \ + "" #title "_asm_entry: \n" \ + " cli;hlt;\n" \ +); + +#define EH_ENTRY_NO_CODE(ec, title) \ + extern "C" void title##_handler(TrapFrame*); \ + extern "C" void title##_asm_entry(); \ +asm( \ + ".globl " #title "_asm_entry\n" \ + "" #title "_asm_entry: \n" \ + " cli;hlt;\n" \ +); +#endif + +// clang-format on + +static void dump(const RegisterState& regs) +{ + u16 ss; + u32 esp; + + if (!(regs.cs & 3)) { + ss = regs.ss; + esp = regs.esp; + } else { + ss = regs.userspace_ss; + esp = regs.userspace_esp; + } + + dbgln("Exception code: {:04x} (isr: {:04x})", regs.exception_code, regs.isr_number); + dbgln(" pc={:04x}:{:08x} eflags={:08x}", (u16)regs.cs, regs.eip, regs.eflags); + dbgln(" stack={:04x}:{:08x}", ss, esp); + dbgln(" ds={:04x} es={:04x} fs={:04x} gs={:04x}", (u16)regs.ds, (u16)regs.es, (u16)regs.fs, (u16)regs.gs); + dbgln(" eax={:08x} ebx={:08x} ecx={:08x} edx={:08x}", regs.eax, regs.ebx, regs.ecx, regs.edx); + dbgln(" ebp={:08x} esp={:08x} esi={:08x} edi={:08x}", regs.ebp, regs.esp, regs.esi, regs.edi); + dbgln(" cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4()); +} + +void handle_crash(RegisterState& regs, const char* description, int signal, bool out_of_memory) +{ + auto process = Process::current(); + if (!process) { + PANIC("{} with !current", description); + } + + // If a process crashed while inspecting another process, + // make sure we switch back to the right page tables. + MM.enter_process_paging_scope(*process); + + dmesgln("CRASH: CPU #{} {} in ring {}", Processor::id(), description, (regs.cs & 3)); + dump(regs); + + if (!(regs.cs & 3)) { + PANIC("Crash in ring 0"); + } + + process->crash(signal, regs.eip, out_of_memory); +} + +EH_ENTRY_NO_CODE(6, illegal_instruction); +void illegal_instruction_handler(TrapFrame* trap) +{ + clac(); + handle_crash(*trap->regs, "Illegal instruction", SIGILL); +} + +EH_ENTRY_NO_CODE(0, divide_error); +void divide_error_handler(TrapFrame* trap) +{ + clac(); + handle_crash(*trap->regs, "Divide error", SIGFPE); +} + +EH_ENTRY(13, general_protection_fault); +void general_protection_fault_handler(TrapFrame* trap) +{ + clac(); + handle_crash(*trap->regs, "General protection fault", SIGSEGV); +} + +// 7: FPU not available exception +EH_ENTRY_NO_CODE(7, fpu_exception); +void fpu_exception_handler(TrapFrame*) +{ + // Just clear the TS flag. We've already restored the FPU state eagerly. + // FIXME: It would be nice if we didn't have to do this at all. + asm volatile("clts"); +} + +// 14: Page Fault +EH_ENTRY(14, page_fault); +void page_fault_handler(TrapFrame* trap) +{ + clac(); + + auto& regs = *trap->regs; + auto fault_address = read_cr2(); + + if constexpr (PAGE_FAULT_DEBUG) { + u32 fault_page_directory = read_cr3(); + dbgln("CPU #{} ring {} {} page fault in PD={:#x}, {}{} {}", + Processor::is_initialized() ? Processor::id() : 0, + regs.cs & 3, + regs.exception_code & 1 ? "PV" : "NP", + fault_page_directory, + regs.exception_code & 8 ? "reserved-bit " : "", + regs.exception_code & 2 ? "write" : "read", + VirtualAddress(fault_address)); + + dump(regs); + } + + bool faulted_in_kernel = !(regs.cs & 3); + + if (faulted_in_kernel && Processor::current().in_irq()) { + // If we're faulting in an IRQ handler, first check if we failed + // due to safe_memcpy, safe_strnlen, or safe_memset. If we did, + // gracefully continue immediately. Because we're in an IRQ handler + // we can't really try to resolve the page fault in a meaningful + // way, so we need to do this before calling into + // MemoryManager::handle_page_fault, which would just bail and + // request a crash + if (handle_safe_access_fault(regs, fault_address)) + return; + } + + auto current_thread = Thread::current(); + + if (current_thread) { + current_thread->set_handling_page_fault(true); + PerformanceManager::add_page_fault_event(*current_thread, regs); + } + + ScopeGuard guard = [current_thread] { + if (current_thread) + current_thread->set_handling_page_fault(false); + }; + + if (!faulted_in_kernel && !MM.validate_user_stack(current_thread->process(), VirtualAddress(regs.userspace_esp))) { + dbgln("Invalid stack pointer: {}", VirtualAddress(regs.userspace_esp)); + handle_crash(regs, "Bad stack on page fault", SIGSTKFLT); + } + + if (fault_address >= (FlatPtr)&start_of_ro_after_init && fault_address < (FlatPtr)&end_of_ro_after_init) { + dump(regs); + PANIC("Attempt to write into READONLY_AFTER_INIT section"); + } + + if (fault_address >= (FlatPtr)&start_of_unmap_after_init && fault_address < (FlatPtr)&end_of_unmap_after_init) { + dump(regs); + PANIC("Attempt to access UNMAP_AFTER_INIT section"); + } + + PageFault fault { regs.exception_code, VirtualAddress { fault_address } }; + auto response = MM.handle_page_fault(fault); + + if (response == PageFaultResponse::ShouldCrash || response == PageFaultResponse::OutOfMemory) { + if (faulted_in_kernel && handle_safe_access_fault(regs, fault_address)) { + // If this would be a ring0 (kernel) fault and the fault was triggered by + // safe_memcpy, safe_strnlen, or safe_memset then we resume execution at + // the appropriate _fault label rather than crashing + return; + } + + if (response != PageFaultResponse::OutOfMemory && current_thread) { + if (current_thread->has_signal_handler(SIGSEGV)) { + current_thread->send_urgent_signal_to_self(SIGSEGV); + return; + } + } + + dbgln("Unrecoverable page fault, {}{}{} address {}", + regs.exception_code & PageFaultFlags::ReservedBitViolation ? "reserved bit violation / " : "", + regs.exception_code & PageFaultFlags::InstructionFetch ? "instruction fetch / " : "", + regs.exception_code & PageFaultFlags::Write ? "write to" : "read from", + VirtualAddress(fault_address)); + u32 malloc_scrub_pattern = explode_byte(MALLOC_SCRUB_BYTE); + u32 free_scrub_pattern = explode_byte(FREE_SCRUB_BYTE); + u32 kmalloc_scrub_pattern = explode_byte(KMALLOC_SCRUB_BYTE); + u32 kfree_scrub_pattern = explode_byte(KFREE_SCRUB_BYTE); + u32 slab_alloc_scrub_pattern = explode_byte(SLAB_ALLOC_SCRUB_BYTE); + u32 slab_dealloc_scrub_pattern = explode_byte(SLAB_DEALLOC_SCRUB_BYTE); + if ((fault_address & 0xffff0000) == (malloc_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be uninitialized malloc() memory", VirtualAddress(fault_address)); + } else if ((fault_address & 0xffff0000) == (free_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be recently free()'d memory", VirtualAddress(fault_address)); + } else if ((fault_address & 0xffff0000) == (kmalloc_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be uninitialized kmalloc() memory", VirtualAddress(fault_address)); + } else if ((fault_address & 0xffff0000) == (kfree_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be recently kfree()'d memory", VirtualAddress(fault_address)); + } else if ((fault_address & 0xffff0000) == (slab_alloc_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be uninitialized slab_alloc() memory", VirtualAddress(fault_address)); + } else if ((fault_address & 0xffff0000) == (slab_dealloc_scrub_pattern & 0xffff0000)) { + dbgln("Note: Address {} looks like it may be recently slab_dealloc()'d memory", VirtualAddress(fault_address)); + } else if (fault_address < 4096) { + dbgln("Note: Address {} looks like a possible nullptr dereference", VirtualAddress(fault_address)); + } + + if (current_thread) { + auto& current_process = current_thread->process(); + if (current_process.is_user_process()) { + current_process.set_coredump_metadata("fault_address", String::formatted("{:p}", fault_address)); + current_process.set_coredump_metadata("fault_type", fault.type() == PageFault::Type::PageNotPresent ? "NotPresent" : "ProtectionViolation"); + String fault_access; + if (fault.is_instruction_fetch()) + fault_access = "Execute"; + else + fault_access = fault.access() == PageFault::Access::Read ? "Read" : "Write"; + current_process.set_coredump_metadata("fault_access", fault_access); + } + } + + handle_crash(regs, "Page Fault", SIGSEGV, response == PageFaultResponse::OutOfMemory); + } else if (response == PageFaultResponse::Continue) { + dbgln_if(PAGE_FAULT_DEBUG, "Continuing after resolved page fault"); + } else { + VERIFY_NOT_REACHED(); + } +} + +EH_ENTRY_NO_CODE(1, debug); +void debug_handler(TrapFrame* trap) +{ + clac(); + auto& regs = *trap->regs; + auto current_thread = Thread::current(); + auto& process = current_thread->process(); + if ((regs.cs & 3) == 0) { + PANIC("Debug exception in ring 0"); + } + constexpr u8 REASON_SINGLESTEP = 14; + auto debug_status = read_dr6(); + auto should_trap_mask = (1 << REASON_SINGLESTEP) | 0b1111; + if ((debug_status & should_trap_mask) == 0) + return; + if (auto tracer = process.tracer()) { + tracer->set_regs(regs); + } + current_thread->send_urgent_signal_to_self(SIGTRAP); + write_dr6(debug_status & ~(should_trap_mask)); +} + +EH_ENTRY_NO_CODE(3, breakpoint); +void breakpoint_handler(TrapFrame* trap) +{ + clac(); + auto& regs = *trap->regs; + auto current_thread = Thread::current(); + auto& process = current_thread->process(); + if ((regs.cs & 3) == 0) { + PANIC("Breakpoint trap in ring 0"); + } + if (auto tracer = process.tracer()) { + tracer->set_regs(regs); + } + current_thread->send_urgent_signal_to_self(SIGTRAP); +} + +#define EH(i, msg) \ + static void _exception##i() \ + { \ + dbgln("{}", msg); \ + PANIC("cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4()); \ + } + +EH(2, "Unknown error") +EH(4, "Overflow") +EH(5, "Bounds check") +EH(8, "Double fault") +EH(9, "Coprocessor segment overrun") +EH(10, "Invalid TSS") +EH(11, "Segment not present") +EH(12, "Stack exception") +EH(15, "Unknown error") +EH(16, "Coprocessor error") + +extern "C" void pre_init_finished(void) __attribute__((used)); +extern "C" void post_init_finished(void) __attribute__((used)); +extern "C" void handle_interrupt(TrapFrame*) __attribute__((used)); + +extern "C" UNMAP_AFTER_INIT void pre_init_finished(void) +{ + VERIFY(g_scheduler_lock.own_lock()); + + // Because init_finished() will wait on the other APs, we need + // to release the scheduler lock so that the other APs can also get + // to this point + + // The target flags will get restored upon leaving the trap + u32 prev_flags = cpu_flags(); + Scheduler::leave_on_first_switch(prev_flags); +} + +extern "C" UNMAP_AFTER_INIT void post_init_finished(void) +{ + // We need to re-acquire the scheduler lock before a context switch + // transfers control into the idle loop, which needs the lock held + Scheduler::prepare_for_idle_loop(); +} + +void handle_interrupt(TrapFrame* trap) +{ + clac(); + auto& regs = *trap->regs; + VERIFY(regs.isr_number >= IRQ_VECTOR_BASE && regs.isr_number <= (IRQ_VECTOR_BASE + GENERIC_INTERRUPT_HANDLERS_COUNT)); + u8 irq = (u8)(regs.isr_number - 0x50); + s_entropy_source_interrupts.add_random_event(irq); + auto* handler = s_interrupt_handler[irq]; + VERIFY(handler); + handler->increment_invoking_counter(); + handler->handle_interrupt(regs); + handler->eoi(); +} + +const DescriptorTablePointer& get_idtr() +{ + return s_idtr; +} + +static void unimp_trap() +{ + PANIC("Unhandled IRQ"); +} + +GenericInterruptHandler& get_interrupt_handler(u8 interrupt_number) +{ + auto*& handler_slot = s_interrupt_handler[interrupt_number]; + VERIFY(handler_slot != nullptr); + return *handler_slot; +} + +static void revert_to_unused_handler(u8 interrupt_number) +{ + auto handler = new UnhandledInterruptHandler(interrupt_number); + handler->register_interrupt_handler(); +} + +void register_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler) +{ + VERIFY(interrupt_number < GENERIC_INTERRUPT_HANDLERS_COUNT); + auto*& handler_slot = s_interrupt_handler[interrupt_number]; + if (handler_slot != nullptr) { + if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) { + if (handler_slot) { + auto* unhandled_handler = static_cast<UnhandledInterruptHandler*>(handler_slot); + unhandled_handler->unregister_interrupt_handler(); + delete unhandled_handler; + } + handler_slot = &handler; + return; + } + if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) { + VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler); + static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler); + return; + } + if (!handler_slot->is_shared_handler()) { + if (handler_slot->type() == HandlerType::SpuriousInterruptHandler) { + static_cast<SpuriousInterruptHandler*>(handler_slot)->register_handler(handler); + return; + } + VERIFY(handler_slot->type() == HandlerType::IRQHandler); + auto& previous_handler = *handler_slot; + handler_slot = nullptr; + SharedIRQHandler::initialize(interrupt_number); + VERIFY(handler_slot); + static_cast<SharedIRQHandler*>(handler_slot)->register_handler(previous_handler); + static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler); + return; + } + VERIFY_NOT_REACHED(); + } else { + handler_slot = &handler; + } +} + +void unregister_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler) +{ + auto*& handler_slot = s_interrupt_handler[interrupt_number]; + VERIFY(handler_slot != nullptr); + if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) { + dbgln("Trying to unregister unused handler (?)"); + return; + } + if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) { + VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler); + auto* shared_handler = static_cast<SharedIRQHandler*>(handler_slot); + shared_handler->unregister_handler(handler); + if (!shared_handler->sharing_devices_count()) { + handler_slot = nullptr; + revert_to_unused_handler(interrupt_number); + } + return; + } + if (!handler_slot->is_shared_handler()) { + VERIFY(handler_slot->type() == HandlerType::IRQHandler); + handler_slot = nullptr; + revert_to_unused_handler(interrupt_number); + return; + } + VERIFY_NOT_REACHED(); +} + +UNMAP_AFTER_INIT void register_interrupt_handler(u8 index, void (*handler)()) +{ + // FIXME: Why is that with selector 8? + // FIXME: Is the Gate Type really required to be an Interrupt + // FIXME: What's up with that storage segment 0? + s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::InterruptGate32, 0, 0); +} + +UNMAP_AFTER_INIT void register_user_callable_interrupt_handler(u8 index, void (*handler)()) +{ + // FIXME: Why is that with selector 8? + // FIXME: Is the Gate Type really required to be a Trap + // FIXME: What's up with that storage segment 0? + s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::TrapGate32, 0, 3); +} + +UNMAP_AFTER_INIT void flush_idt() +{ + asm("lidt %0" ::"m"(s_idtr)); +} + +UNMAP_AFTER_INIT void idt_init() +{ + s_idtr.address = s_idt; + s_idtr.limit = 256 * 8 - 1; + + register_interrupt_handler(0x00, divide_error_asm_entry); + register_user_callable_interrupt_handler(0x01, debug_asm_entry); + register_interrupt_handler(0x02, _exception2); + register_user_callable_interrupt_handler(0x03, breakpoint_asm_entry); + register_interrupt_handler(0x04, _exception4); + register_interrupt_handler(0x05, _exception5); + register_interrupt_handler(0x06, illegal_instruction_asm_entry); + register_interrupt_handler(0x07, fpu_exception_asm_entry); + register_interrupt_handler(0x08, _exception8); + register_interrupt_handler(0x09, _exception9); + register_interrupt_handler(0x0a, _exception10); + register_interrupt_handler(0x0b, _exception11); + register_interrupt_handler(0x0c, _exception12); + register_interrupt_handler(0x0d, general_protection_fault_asm_entry); + register_interrupt_handler(0x0e, page_fault_asm_entry); + register_interrupt_handler(0x0f, _exception15); + register_interrupt_handler(0x10, _exception16); + + for (u8 i = 0x11; i < 0x50; i++) + register_interrupt_handler(i, unimp_trap); + + dbgln("Initializing unhandled interrupt handlers"); + register_interrupt_handler(0x50, interrupt_80_asm_entry); + register_interrupt_handler(0x51, interrupt_81_asm_entry); + register_interrupt_handler(0x52, interrupt_82_asm_entry); + register_interrupt_handler(0x53, interrupt_83_asm_entry); + register_interrupt_handler(0x54, interrupt_84_asm_entry); + register_interrupt_handler(0x55, interrupt_85_asm_entry); + register_interrupt_handler(0x56, interrupt_86_asm_entry); + register_interrupt_handler(0x57, interrupt_87_asm_entry); + register_interrupt_handler(0x58, interrupt_88_asm_entry); + register_interrupt_handler(0x59, interrupt_89_asm_entry); + register_interrupt_handler(0x5a, interrupt_90_asm_entry); + register_interrupt_handler(0x5b, interrupt_91_asm_entry); + register_interrupt_handler(0x5c, interrupt_92_asm_entry); + register_interrupt_handler(0x5d, interrupt_93_asm_entry); + register_interrupt_handler(0x5e, interrupt_94_asm_entry); + register_interrupt_handler(0x5f, interrupt_95_asm_entry); + register_interrupt_handler(0x60, interrupt_96_asm_entry); + register_interrupt_handler(0x61, interrupt_97_asm_entry); + register_interrupt_handler(0x62, interrupt_98_asm_entry); + register_interrupt_handler(0x63, interrupt_99_asm_entry); + register_interrupt_handler(0x64, interrupt_100_asm_entry); + register_interrupt_handler(0x65, interrupt_101_asm_entry); + register_interrupt_handler(0x66, interrupt_102_asm_entry); + register_interrupt_handler(0x67, interrupt_103_asm_entry); + register_interrupt_handler(0x68, interrupt_104_asm_entry); + register_interrupt_handler(0x69, interrupt_105_asm_entry); + register_interrupt_handler(0x6a, interrupt_106_asm_entry); + register_interrupt_handler(0x6b, interrupt_107_asm_entry); + register_interrupt_handler(0x6c, interrupt_108_asm_entry); + register_interrupt_handler(0x6d, interrupt_109_asm_entry); + register_interrupt_handler(0x6e, interrupt_110_asm_entry); + register_interrupt_handler(0x6f, interrupt_111_asm_entry); + register_interrupt_handler(0x70, interrupt_112_asm_entry); + register_interrupt_handler(0x71, interrupt_113_asm_entry); + register_interrupt_handler(0x72, interrupt_114_asm_entry); + register_interrupt_handler(0x73, interrupt_115_asm_entry); + register_interrupt_handler(0x74, interrupt_116_asm_entry); + register_interrupt_handler(0x75, interrupt_117_asm_entry); + register_interrupt_handler(0x76, interrupt_118_asm_entry); + register_interrupt_handler(0x77, interrupt_119_asm_entry); + register_interrupt_handler(0x78, interrupt_120_asm_entry); + register_interrupt_handler(0x79, interrupt_121_asm_entry); + register_interrupt_handler(0x7a, interrupt_122_asm_entry); + register_interrupt_handler(0x7b, interrupt_123_asm_entry); + register_interrupt_handler(0x7c, interrupt_124_asm_entry); + register_interrupt_handler(0x7d, interrupt_125_asm_entry); + register_interrupt_handler(0x7e, interrupt_126_asm_entry); + register_interrupt_handler(0x7f, interrupt_127_asm_entry); + register_interrupt_handler(0x80, interrupt_128_asm_entry); + register_interrupt_handler(0x81, interrupt_129_asm_entry); + register_interrupt_handler(0x82, interrupt_130_asm_entry); + register_interrupt_handler(0x83, interrupt_131_asm_entry); + register_interrupt_handler(0x84, interrupt_132_asm_entry); + register_interrupt_handler(0x85, interrupt_133_asm_entry); + register_interrupt_handler(0x86, interrupt_134_asm_entry); + register_interrupt_handler(0x87, interrupt_135_asm_entry); + register_interrupt_handler(0x88, interrupt_136_asm_entry); + register_interrupt_handler(0x89, interrupt_137_asm_entry); + register_interrupt_handler(0x8a, interrupt_138_asm_entry); + register_interrupt_handler(0x8b, interrupt_139_asm_entry); + register_interrupt_handler(0x8c, interrupt_140_asm_entry); + register_interrupt_handler(0x8d, interrupt_141_asm_entry); + register_interrupt_handler(0x8e, interrupt_142_asm_entry); + register_interrupt_handler(0x8f, interrupt_143_asm_entry); + register_interrupt_handler(0x90, interrupt_144_asm_entry); + register_interrupt_handler(0x91, interrupt_145_asm_entry); + register_interrupt_handler(0x92, interrupt_146_asm_entry); + register_interrupt_handler(0x93, interrupt_147_asm_entry); + register_interrupt_handler(0x94, interrupt_148_asm_entry); + register_interrupt_handler(0x95, interrupt_149_asm_entry); + register_interrupt_handler(0x96, interrupt_150_asm_entry); + register_interrupt_handler(0x97, interrupt_151_asm_entry); + register_interrupt_handler(0x98, interrupt_152_asm_entry); + register_interrupt_handler(0x99, interrupt_153_asm_entry); + register_interrupt_handler(0x9a, interrupt_154_asm_entry); + register_interrupt_handler(0x9b, interrupt_155_asm_entry); + register_interrupt_handler(0x9c, interrupt_156_asm_entry); + register_interrupt_handler(0x9d, interrupt_157_asm_entry); + register_interrupt_handler(0x9e, interrupt_158_asm_entry); + register_interrupt_handler(0x9f, interrupt_159_asm_entry); + register_interrupt_handler(0xa0, interrupt_160_asm_entry); + register_interrupt_handler(0xa1, interrupt_161_asm_entry); + register_interrupt_handler(0xa2, interrupt_162_asm_entry); + register_interrupt_handler(0xa3, interrupt_163_asm_entry); + register_interrupt_handler(0xa4, interrupt_164_asm_entry); + register_interrupt_handler(0xa5, interrupt_165_asm_entry); + register_interrupt_handler(0xa6, interrupt_166_asm_entry); + register_interrupt_handler(0xa7, interrupt_167_asm_entry); + register_interrupt_handler(0xa8, interrupt_168_asm_entry); + register_interrupt_handler(0xa9, interrupt_169_asm_entry); + register_interrupt_handler(0xaa, interrupt_170_asm_entry); + register_interrupt_handler(0xab, interrupt_171_asm_entry); + register_interrupt_handler(0xac, interrupt_172_asm_entry); + register_interrupt_handler(0xad, interrupt_173_asm_entry); + register_interrupt_handler(0xae, interrupt_174_asm_entry); + register_interrupt_handler(0xaf, interrupt_175_asm_entry); + register_interrupt_handler(0xb0, interrupt_176_asm_entry); + register_interrupt_handler(0xb1, interrupt_177_asm_entry); + register_interrupt_handler(0xb2, interrupt_178_asm_entry); + register_interrupt_handler(0xb3, interrupt_179_asm_entry); + register_interrupt_handler(0xb4, interrupt_180_asm_entry); + register_interrupt_handler(0xb5, interrupt_181_asm_entry); + register_interrupt_handler(0xb6, interrupt_182_asm_entry); + register_interrupt_handler(0xb7, interrupt_183_asm_entry); + register_interrupt_handler(0xb8, interrupt_184_asm_entry); + register_interrupt_handler(0xb9, interrupt_185_asm_entry); + register_interrupt_handler(0xba, interrupt_186_asm_entry); + register_interrupt_handler(0xbb, interrupt_187_asm_entry); + register_interrupt_handler(0xbc, interrupt_188_asm_entry); + register_interrupt_handler(0xbd, interrupt_189_asm_entry); + register_interrupt_handler(0xbe, interrupt_190_asm_entry); + register_interrupt_handler(0xbf, interrupt_191_asm_entry); + register_interrupt_handler(0xc0, interrupt_192_asm_entry); + register_interrupt_handler(0xc1, interrupt_193_asm_entry); + register_interrupt_handler(0xc2, interrupt_194_asm_entry); + register_interrupt_handler(0xc3, interrupt_195_asm_entry); + register_interrupt_handler(0xc4, interrupt_196_asm_entry); + register_interrupt_handler(0xc5, interrupt_197_asm_entry); + register_interrupt_handler(0xc6, interrupt_198_asm_entry); + register_interrupt_handler(0xc7, interrupt_199_asm_entry); + register_interrupt_handler(0xc8, interrupt_200_asm_entry); + register_interrupt_handler(0xc9, interrupt_201_asm_entry); + register_interrupt_handler(0xca, interrupt_202_asm_entry); + register_interrupt_handler(0xcb, interrupt_203_asm_entry); + register_interrupt_handler(0xcc, interrupt_204_asm_entry); + register_interrupt_handler(0xcd, interrupt_205_asm_entry); + register_interrupt_handler(0xce, interrupt_206_asm_entry); + register_interrupt_handler(0xcf, interrupt_207_asm_entry); + register_interrupt_handler(0xd0, interrupt_208_asm_entry); + register_interrupt_handler(0xd1, interrupt_209_asm_entry); + register_interrupt_handler(0xd2, interrupt_210_asm_entry); + register_interrupt_handler(0xd3, interrupt_211_asm_entry); + register_interrupt_handler(0xd4, interrupt_212_asm_entry); + register_interrupt_handler(0xd5, interrupt_213_asm_entry); + register_interrupt_handler(0xd6, interrupt_214_asm_entry); + register_interrupt_handler(0xd7, interrupt_215_asm_entry); + register_interrupt_handler(0xd8, interrupt_216_asm_entry); + register_interrupt_handler(0xd9, interrupt_217_asm_entry); + register_interrupt_handler(0xda, interrupt_218_asm_entry); + register_interrupt_handler(0xdb, interrupt_219_asm_entry); + register_interrupt_handler(0xdc, interrupt_220_asm_entry); + register_interrupt_handler(0xdd, interrupt_221_asm_entry); + register_interrupt_handler(0xde, interrupt_222_asm_entry); + register_interrupt_handler(0xdf, interrupt_223_asm_entry); + register_interrupt_handler(0xe0, interrupt_224_asm_entry); + register_interrupt_handler(0xe1, interrupt_225_asm_entry); + register_interrupt_handler(0xe2, interrupt_226_asm_entry); + register_interrupt_handler(0xe3, interrupt_227_asm_entry); + register_interrupt_handler(0xe4, interrupt_228_asm_entry); + register_interrupt_handler(0xe5, interrupt_229_asm_entry); + register_interrupt_handler(0xe6, interrupt_230_asm_entry); + register_interrupt_handler(0xe7, interrupt_231_asm_entry); + register_interrupt_handler(0xe8, interrupt_232_asm_entry); + register_interrupt_handler(0xe9, interrupt_233_asm_entry); + register_interrupt_handler(0xea, interrupt_234_asm_entry); + register_interrupt_handler(0xeb, interrupt_235_asm_entry); + register_interrupt_handler(0xec, interrupt_236_asm_entry); + register_interrupt_handler(0xed, interrupt_237_asm_entry); + register_interrupt_handler(0xee, interrupt_238_asm_entry); + register_interrupt_handler(0xef, interrupt_239_asm_entry); + register_interrupt_handler(0xf0, interrupt_240_asm_entry); + register_interrupt_handler(0xf1, interrupt_241_asm_entry); + register_interrupt_handler(0xf2, interrupt_242_asm_entry); + register_interrupt_handler(0xf3, interrupt_243_asm_entry); + register_interrupt_handler(0xf4, interrupt_244_asm_entry); + register_interrupt_handler(0xf5, interrupt_245_asm_entry); + register_interrupt_handler(0xf6, interrupt_246_asm_entry); + register_interrupt_handler(0xf7, interrupt_247_asm_entry); + register_interrupt_handler(0xf8, interrupt_248_asm_entry); + register_interrupt_handler(0xf9, interrupt_249_asm_entry); + register_interrupt_handler(0xfa, interrupt_250_asm_entry); + register_interrupt_handler(0xfb, interrupt_251_asm_entry); + register_interrupt_handler(0xfc, interrupt_252_asm_entry); + register_interrupt_handler(0xfd, interrupt_253_asm_entry); + register_interrupt_handler(0xfe, interrupt_254_asm_entry); + register_interrupt_handler(0xff, interrupt_255_asm_entry); + + for (u8 i = 0; i < GENERIC_INTERRUPT_HANDLERS_COUNT; ++i) { + auto* handler = new UnhandledInterruptHandler(i); + handler->register_interrupt_handler(); + } + + flush_idt(); +} + +} diff --git a/Kernel/Arch/x86/common/Processor.cpp b/Kernel/Arch/x86/common/Processor.cpp new file mode 100644 index 0000000000..17d798cc20 --- /dev/null +++ b/Kernel/Arch/x86/common/Processor.cpp @@ -0,0 +1,1110 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Format.h> +#include <AK/StdLibExtras.h> +#include <AK/String.h> +#include <AK/Types.h> + +#include <Kernel/Interrupts/APIC.h> +#include <Kernel/Process.h> +#include <Kernel/Random.h> +#include <Kernel/StdLib.h> +#include <Kernel/Thread.h> +#include <Kernel/VM/ProcessPagingScope.h> + +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/CPUID.h> +#include <Kernel/Arch/x86/Interrupts.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/ProcessorInfo.h> +#include <Kernel/Arch/x86/SafeMem.h> +#include <Kernel/Arch/x86/ScopedCritical.h> +#include <Kernel/Arch/x86/TrapFrame.h> + +namespace Kernel { + +READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state; + +READONLY_AFTER_INIT static ProcessorContainer s_processors {}; +READONLY_AFTER_INIT volatile u32 Processor::g_total_processors; +static volatile bool s_smp_enabled; + +static volatile ProcessorMessage* s_message_pool; +Atomic<u32> Processor::s_idle_cpu_mask { 0 }; + +extern "C" void thread_context_first_enter(void); +extern "C" void exit_kernel_thread(void); + +UNMAP_AFTER_INIT static void sse_init() +{ + write_cr0((read_cr0() & 0xfffffffbu) | 0x2); + write_cr4(read_cr4() | 0x600); +} + +void exit_kernel_thread(void) +{ + Thread::current()->exit(); +} + +UNMAP_AFTER_INIT void Processor::cpu_detect() +{ + // NOTE: This is called during Processor::early_initialize, we cannot + // safely log at this point because we don't have kmalloc + // initialized yet! + auto set_feature = + [&](CPUFeature f) { + m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f)); + }; + m_features = static_cast<CPUFeature>(0); + + CPUID processor_info(0x1); + if (processor_info.edx() & (1 << 4)) + set_feature(CPUFeature::TSC); + if (processor_info.edx() & (1 << 6)) + set_feature(CPUFeature::PAE); + if (processor_info.edx() & (1 << 13)) + set_feature(CPUFeature::PGE); + if (processor_info.edx() & (1 << 23)) + set_feature(CPUFeature::MMX); + if (processor_info.edx() & (1 << 24)) + set_feature(CPUFeature::FXSR); + if (processor_info.edx() & (1 << 25)) + set_feature(CPUFeature::SSE); + if (processor_info.edx() & (1 << 26)) + set_feature(CPUFeature::SSE2); + if (processor_info.ecx() & (1 << 0)) + set_feature(CPUFeature::SSE3); + if (processor_info.ecx() & (1 << 9)) + set_feature(CPUFeature::SSSE3); + if (processor_info.ecx() & (1 << 19)) + set_feature(CPUFeature::SSE4_1); + if (processor_info.ecx() & (1 << 20)) + set_feature(CPUFeature::SSE4_2); + if (processor_info.ecx() & (1 << 26)) + set_feature(CPUFeature::XSAVE); + if (processor_info.ecx() & (1 << 28)) + set_feature(CPUFeature::AVX); + if (processor_info.ecx() & (1 << 30)) + set_feature(CPUFeature::RDRAND); + if (processor_info.edx() & (1 << 11)) { + u32 stepping = processor_info.eax() & 0xf; + u32 model = (processor_info.eax() >> 4) & 0xf; + u32 family = (processor_info.eax() >> 8) & 0xf; + if (!(family == 6 && model < 3 && stepping < 3)) + set_feature(CPUFeature::SEP); + if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe)) + set_feature(CPUFeature::CONSTANT_TSC); + } + + u32 max_extended_leaf = CPUID(0x80000000).eax(); + + if (max_extended_leaf >= 0x80000001) { + CPUID extended_processor_info(0x80000001); + if (extended_processor_info.edx() & (1 << 20)) + set_feature(CPUFeature::NX); + if (extended_processor_info.edx() & (1 << 27)) + set_feature(CPUFeature::RDTSCP); + if (extended_processor_info.edx() & (1 << 11)) { + // Only available in 64 bit mode + set_feature(CPUFeature::SYSCALL); + } + } + + if (max_extended_leaf >= 0x80000007) { + CPUID cpuid(0x80000007); + if (cpuid.edx() & (1 << 8)) { + set_feature(CPUFeature::CONSTANT_TSC); + set_feature(CPUFeature::NONSTOP_TSC); + } + } + + if (max_extended_leaf >= 0x80000008) { + // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor. + CPUID cpuid(0x80000008); + m_physical_address_bit_width = cpuid.eax() & 0xff; + } else { + // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise. + m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32; + } + + CPUID extended_features(0x7); + if (extended_features.ebx() & (1 << 20)) + set_feature(CPUFeature::SMAP); + if (extended_features.ebx() & (1 << 7)) + set_feature(CPUFeature::SMEP); + if (extended_features.ecx() & (1 << 2)) + set_feature(CPUFeature::UMIP); + if (extended_features.ebx() & (1 << 18)) + set_feature(CPUFeature::RDSEED); +} + +UNMAP_AFTER_INIT void Processor::cpu_setup() +{ + // NOTE: This is called during Processor::early_initialize, we cannot + // safely log at this point because we don't have kmalloc + // initialized yet! + cpu_detect(); + + if (has_feature(CPUFeature::SSE)) { + // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR. + // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it. + VERIFY(has_feature(CPUFeature::FXSR)); + sse_init(); + } + + write_cr0(read_cr0() | 0x00010000); + + if (has_feature(CPUFeature::PGE)) { + // Turn on CR4.PGE so the CPU will respect the G bit in page tables. + write_cr4(read_cr4() | 0x80); + } + + if (has_feature(CPUFeature::NX)) { + // Turn on IA32_EFER.NXE + asm volatile( + "movl $0xc0000080, %ecx\n" + "rdmsr\n" + "orl $0x800, %eax\n" + "wrmsr\n"); + } + + if (has_feature(CPUFeature::SMEP)) { + // Turn on CR4.SMEP + write_cr4(read_cr4() | 0x100000); + } + + if (has_feature(CPUFeature::SMAP)) { + // Turn on CR4.SMAP + write_cr4(read_cr4() | 0x200000); + } + + if (has_feature(CPUFeature::UMIP)) { + write_cr4(read_cr4() | 0x800); + } + + if (has_feature(CPUFeature::TSC)) { + write_cr4(read_cr4() | 0x4); + } + + if (has_feature(CPUFeature::XSAVE)) { + // Turn on CR4.OSXSAVE + write_cr4(read_cr4() | 0x40000); + + // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1." + // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves. + write_xcr0(0x1); + + if (has_feature(CPUFeature::AVX)) { + // Turn on SSE, AVX and x87 flags + write_xcr0(read_xcr0() | 0x7); + } + } +} + +String Processor::features_string() const +{ + StringBuilder builder; + auto feature_to_str = + [](CPUFeature f) -> const char* { + switch (f) { + case CPUFeature::NX: + return "nx"; + case CPUFeature::PAE: + return "pae"; + case CPUFeature::PGE: + return "pge"; + case CPUFeature::RDRAND: + return "rdrand"; + case CPUFeature::RDSEED: + return "rdseed"; + case CPUFeature::SMAP: + return "smap"; + case CPUFeature::SMEP: + return "smep"; + case CPUFeature::SSE: + return "sse"; + case CPUFeature::TSC: + return "tsc"; + case CPUFeature::RDTSCP: + return "rdtscp"; + case CPUFeature::CONSTANT_TSC: + return "constant_tsc"; + case CPUFeature::NONSTOP_TSC: + return "nonstop_tsc"; + case CPUFeature::UMIP: + return "umip"; + case CPUFeature::SEP: + return "sep"; + case CPUFeature::SYSCALL: + return "syscall"; + case CPUFeature::MMX: + return "mmx"; + case CPUFeature::FXSR: + return "fxsr"; + case CPUFeature::SSE2: + return "sse2"; + case CPUFeature::SSE3: + return "sse3"; + case CPUFeature::SSSE3: + return "ssse3"; + case CPUFeature::SSE4_1: + return "sse4.1"; + case CPUFeature::SSE4_2: + return "sse4.2"; + case CPUFeature::XSAVE: + return "xsave"; + case CPUFeature::AVX: + return "avx"; + // no default statement here intentionally so that we get + // a warning if a new feature is forgotten to be added here + } + // Shouldn't ever happen + return "???"; + }; + bool first = true; + for (u32 flag = 1; flag != 0; flag <<= 1) { + if ((static_cast<u32>(m_features) & flag) != 0) { + if (first) + first = false; + else + builder.append(' '); + auto str = feature_to_str(static_cast<CPUFeature>(flag)); + builder.append(str, strlen(str)); + } + } + return builder.build(); +} + +UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu) +{ + m_self = this; + + m_cpu = cpu; + m_in_irq = 0; + m_in_critical = 0; + + m_invoke_scheduler_async = false; + m_scheduler_initialized = false; + + m_message_queue = nullptr; + m_idle_thread = nullptr; + m_current_thread = nullptr; + m_scheduler_data = nullptr; + m_mm_data = nullptr; + m_info = nullptr; + + m_halt_requested = false; + if (cpu == 0) { + s_smp_enabled = false; + atomic_store(&g_total_processors, 1u, AK::MemoryOrder::memory_order_release); + } else { + atomic_fetch_add(&g_total_processors, 1u, AK::MemoryOrder::memory_order_acq_rel); + } + + deferred_call_pool_init(); + + cpu_setup(); + gdt_init(); + + VERIFY(is_initialized()); // sanity check + VERIFY(¤t() == this); // sanity check +} + +UNMAP_AFTER_INIT void Processor::initialize(u32 cpu) +{ + VERIFY(m_self == this); + VERIFY(¤t() == this); // sanity check + + dmesgln("CPU[{}]: Supported features: {}", id(), features_string()); + if (!has_feature(CPUFeature::RDRAND)) + dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", id()); + dmesgln("CPU[{}]: Physical address bit width: {}", id(), m_physical_address_bit_width); + + if (cpu == 0) + idt_init(); + else + flush_idt(); + + if (cpu == 0) { + VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0); + asm volatile("fninit"); + if (has_feature(CPUFeature::FXSR)) + asm volatile("fxsave %0" + : "=m"(s_clean_fpu_state)); + else + asm volatile("fnsave %0" + : "=m"(s_clean_fpu_state)); + } + + m_info = new ProcessorInfo(*this); + + { + // We need to prevent races between APs starting up at the same time + VERIFY(cpu < s_processors.size()); + s_processors[cpu] = this; + } +} + +void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high) +{ + u16 i = (selector & 0xfffc) >> 3; + u32 prev_gdt_length = m_gdt_length; + + if (i > m_gdt_length) { + m_gdt_length = i + 1; + VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0])); + m_gdtr.limit = (m_gdt_length + 1) * 8 - 1; + } + m_gdt[i].low = low; + m_gdt[i].high = high; + + // clear selectors we may have skipped + while (i < prev_gdt_length) { + m_gdt[i].low = 0; + m_gdt[i].high = 0; + i++; + } +} + +void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor) +{ + write_raw_gdt_entry(selector, descriptor.low, descriptor.high); +} + +Descriptor& Processor::get_gdt_entry(u16 selector) +{ + u16 i = (selector & 0xfffc) >> 3; + return *(Descriptor*)(&m_gdt[i]); +} + +void Processor::flush_gdt() +{ + m_gdtr.address = m_gdt; + m_gdtr.limit = (m_gdt_length * 8) - 1; + asm volatile("lgdt %0" ::"m"(m_gdtr) + : "memory"); +} + +const DescriptorTablePointer& Processor::get_gdtr() +{ + return m_gdtr; +} + +Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames) +{ + FlatPtr frame_ptr = 0, eip = 0; + Vector<FlatPtr, 32> stack_trace; + + auto walk_stack = [&](FlatPtr stack_ptr) { + static constexpr size_t max_stack_frames = 4096; + stack_trace.append(eip); + size_t count = 1; + while (stack_ptr && stack_trace.size() < max_stack_frames) { + FlatPtr retaddr; + + count++; + if (max_frames != 0 && count > max_frames) + break; + + if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) { + if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr) + break; + stack_trace.append(retaddr); + if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr)) + break; + } else { + void* fault_at; + if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr) + break; + stack_trace.append(retaddr); + if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at)) + break; + } + } + }; + auto capture_current_thread = [&]() { + frame_ptr = (FlatPtr)__builtin_frame_address(0); + eip = (FlatPtr)__builtin_return_address(0); + + walk_stack(frame_ptr); + }; + + // Since the thread may be running on another processor, there + // is a chance a context switch may happen while we're trying + // to get it. It also won't be entirely accurate and merely + // reflect the status at the last context switch. + ScopedSpinLock lock(g_scheduler_lock); + if (&thread == Processor::current_thread()) { + VERIFY(thread.state() == Thread::Running); + // Leave the scheduler lock. If we trigger page faults we may + // need to be preempted. Since this is our own thread it won't + // cause any problems as the stack won't change below this frame. + lock.unlock(); + capture_current_thread(); + } else if (thread.is_active()) { + VERIFY(thread.cpu() != Processor::id()); + // If this is the case, the thread is currently running + // on another processor. We can't trust the kernel stack as + // it may be changing at any time. We need to probably send + // an IPI to that processor, have it walk the stack and wait + // until it returns the data back to us + auto& proc = Processor::current(); + smp_unicast( + thread.cpu(), + [&]() { + dbgln("CPU[{}] getting stack for cpu #{}", Processor::id(), proc.get_id()); + ProcessPagingScope paging_scope(thread.process()); + VERIFY(&Processor::current() != &proc); + VERIFY(&thread == Processor::current_thread()); + // NOTE: Because the other processor is still holding the + // scheduler lock while waiting for this callback to finish, + // the current thread on the target processor cannot change + + // TODO: What to do about page faults here? We might deadlock + // because the other processor is still holding the + // scheduler lock... + capture_current_thread(); + }, + false); + } else { + switch (thread.state()) { + case Thread::Running: + VERIFY_NOT_REACHED(); // should have been handled above + case Thread::Runnable: + case Thread::Stopped: + case Thread::Blocked: + case Thread::Dying: + case Thread::Dead: { + // We need to retrieve ebp from what was last pushed to the kernel + // stack. Before switching out of that thread, it switch_context + // pushed the callee-saved registers, and the last of them happens + // to be ebp. + ProcessPagingScope paging_scope(thread.process()); + auto& tss = thread.tss(); + u32* stack_top = reinterpret_cast<u32*>(tss.esp); + if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) { + if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0])) + frame_ptr = 0; + } else { + void* fault_at; + if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at)) + frame_ptr = 0; + } + eip = tss.eip; + // TODO: We need to leave the scheduler lock here, but we also + // need to prevent the target thread from being run while + // we walk the stack + lock.unlock(); + walk_stack(frame_ptr); + break; + } + default: + dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string()); + break; + } + } + return stack_trace; +} + +ProcessorContainer& Processor::processors() +{ + return s_processors; +} + +Processor& Processor::by_id(u32 cpu) +{ + // s_processors does not need to be protected by a lock of any kind. + // It is populated early in the boot process, and the BSP is waiting + // for all APs to finish, after which this array never gets modified + // again, so it's safe to not protect access to it here + auto& procs = processors(); + VERIFY(procs[cpu] != nullptr); + VERIFY(procs.size() > cpu); + return *procs[cpu]; +} + +void Processor::enter_trap(TrapFrame& trap, bool raise_irq) +{ + VERIFY_INTERRUPTS_DISABLED(); + VERIFY(&Processor::current() == this); + trap.prev_irq_level = m_in_irq; + if (raise_irq) + m_in_irq++; + auto* current_thread = Processor::current_thread(); + if (current_thread) { + auto& current_trap = current_thread->current_trap(); + trap.next_trap = current_trap; + current_trap = &trap; + // The cs register of this trap tells us where we will return back to + current_thread->set_previous_mode(((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode); + } else { + trap.next_trap = nullptr; + } +} + +void Processor::exit_trap(TrapFrame& trap) +{ + VERIFY_INTERRUPTS_DISABLED(); + VERIFY(&Processor::current() == this); + VERIFY(m_in_irq >= trap.prev_irq_level); + m_in_irq = trap.prev_irq_level; + + smp_process_pending_messages(); + + if (!m_in_irq && !m_in_critical) + check_invoke_scheduler(); + + auto* current_thread = Processor::current_thread(); + if (current_thread) { + auto& current_trap = current_thread->current_trap(); + current_trap = trap.next_trap; + if (current_trap) { + VERIFY(current_trap->regs); + // If we have another higher level trap then we probably returned + // from an interrupt or irq handler. The cs register of the + // new/higher level trap tells us what the mode prior to it was + current_thread->set_previous_mode(((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode); + } else { + // If we don't have a higher level trap then we're back in user mode. + // Unless we're a kernel process, in which case we're always in kernel mode + current_thread->set_previous_mode(current_thread->process().is_kernel_process() ? Thread::PreviousMode::KernelMode : Thread::PreviousMode::UserMode); + } + } +} + +void Processor::check_invoke_scheduler() +{ + VERIFY(!m_in_irq); + VERIFY(!m_in_critical); + if (m_invoke_scheduler_async && m_scheduler_initialized) { + m_invoke_scheduler_async = false; + Scheduler::invoke_async(); + } +} + +void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count) +{ + auto ptr = vaddr.as_ptr(); + while (page_count > 0) { + // clang-format off + asm volatile("invlpg %0" + : + : "m"(*ptr) + : "memory"); + // clang-format on + ptr += PAGE_SIZE; + page_count--; + } +} + +void Processor::flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count) +{ + if (s_smp_enabled && (!is_user_address(vaddr) || Process::current()->thread_count() > 1)) + smp_broadcast_flush_tlb(page_directory, vaddr, page_count); + else + flush_tlb_local(vaddr, page_count); +} + +void Processor::smp_return_to_pool(ProcessorMessage& msg) +{ + ProcessorMessage* next = nullptr; + do { + msg.next = next; + } while (!atomic_compare_exchange_strong(&s_message_pool, next, &msg, AK::MemoryOrder::memory_order_acq_rel)); +} + +ProcessorMessage& Processor::smp_get_from_pool() +{ + ProcessorMessage* msg; + + // The assumption is that messages are never removed from the pool! + for (;;) { + msg = atomic_load(&s_message_pool, AK::MemoryOrder::memory_order_consume); + if (!msg) { + if (!Processor::current().smp_process_pending_messages()) { + // TODO: pause for a bit? + } + continue; + } + // If another processor were to use this message in the meanwhile, + // "msg" is still valid (because it never gets freed). We'd detect + // this because the expected value "msg" and pool would + // no longer match, and the compare_exchange will fail. But accessing + // "msg->next" is always safe here. + if (atomic_compare_exchange_strong(&s_message_pool, msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) { + // We successfully "popped" this available message + break; + } + } + + VERIFY(msg != nullptr); + return *msg; +} + +u32 Processor::smp_wake_n_idle_processors(u32 wake_count) +{ + VERIFY(Processor::current().in_critical()); + VERIFY(wake_count > 0); + if (!s_smp_enabled) + return 0; + + // Wake at most N - 1 processors + if (wake_count >= Processor::count()) { + wake_count = Processor::count() - 1; + VERIFY(wake_count > 0); + } + + u32 current_id = Processor::current().id(); + + u32 did_wake_count = 0; + auto& apic = APIC::the(); + while (did_wake_count < wake_count) { + // Try to get a set of idle CPUs and flip them to busy + u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id); + u32 idle_count = __builtin_popcountl(idle_mask); + if (idle_count == 0) + break; // No (more) idle processor available + + u32 found_mask = 0; + for (u32 i = 0; i < idle_count; i++) { + u32 cpu = __builtin_ffsl(idle_mask) - 1; + idle_mask &= ~(1u << cpu); + found_mask |= 1u << cpu; + } + + idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask; + if (idle_mask == 0) + continue; // All of them were flipped to busy, try again + idle_count = __builtin_popcountl(idle_mask); + for (u32 i = 0; i < idle_count; i++) { + u32 cpu = __builtin_ffsl(idle_mask) - 1; + idle_mask &= ~(1u << cpu); + + // Send an IPI to that CPU to wake it up. There is a possibility + // someone else woke it up as well, or that it woke up due to + // a timer interrupt. But we tried hard to avoid this... + apic.send_ipi(cpu); + did_wake_count++; + } + } + return did_wake_count; +} + +UNMAP_AFTER_INIT void Processor::smp_enable() +{ + size_t msg_pool_size = Processor::count() * 100u; + size_t msg_entries_cnt = Processor::count(); + + auto msgs = new ProcessorMessage[msg_pool_size]; + auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt]; + size_t msg_entry_i = 0; + for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) { + auto& msg = msgs[i]; + msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr; + msg.per_proc_entries = &msg_entries[msg_entry_i]; + for (size_t k = 0; k < msg_entries_cnt; k++) + msg_entries[msg_entry_i + k].msg = &msg; + } + + atomic_store(&s_message_pool, &msgs[0], AK::MemoryOrder::memory_order_release); + + // Start sending IPI messages + s_smp_enabled = true; +} + +void Processor::smp_cleanup_message(ProcessorMessage& msg) +{ + switch (msg.type) { + case ProcessorMessage::Callback: + msg.callback_value().~Function(); + break; + default: + break; + } +} + +bool Processor::smp_process_pending_messages() +{ + bool did_process = false; + u32 prev_flags; + enter_critical(prev_flags); + + if (auto pending_msgs = atomic_exchange(&m_message_queue, nullptr, AK::MemoryOrder::memory_order_acq_rel)) { + // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first + auto reverse_list = + [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* { + ProcessorMessageEntry* rev_list = nullptr; + while (list) { + auto next = list->next; + list->next = rev_list; + rev_list = list; + list = next; + } + return rev_list; + }; + + pending_msgs = reverse_list(pending_msgs); + + // now process in the right order + ProcessorMessageEntry* next_msg; + for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) { + next_msg = cur_msg->next; + auto msg = cur_msg->msg; + + dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", id(), VirtualAddress(msg)); + + switch (msg->type) { + case ProcessorMessage::Callback: + msg->invoke_callback(); + break; + case ProcessorMessage::FlushTlb: + if (is_user_address(VirtualAddress(msg->flush_tlb.ptr))) { + // We assume that we don't cross into kernel land! + VERIFY(is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE)); + if (read_cr3() != msg->flush_tlb.page_directory->cr3()) { + // This processor isn't using this page directory right now, we can ignore this request + dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr)); + break; + } + } + flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count); + break; + } + + bool is_async = msg->async; // Need to cache this value *before* dropping the ref count! + auto prev_refs = atomic_fetch_sub(&msg->refs, 1u, AK::MemoryOrder::memory_order_acq_rel); + VERIFY(prev_refs != 0); + if (prev_refs == 1) { + // All processors handled this. If this is an async message, + // we need to clean it up and return it to the pool + if (is_async) { + smp_cleanup_message(*msg); + smp_return_to_pool(*msg); + } + } + + if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) + halt_this(); + } + did_process = true; + } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) { + halt_this(); + } + + leave_critical(prev_flags); + return did_process; +} + +bool Processor::smp_queue_message(ProcessorMessage& msg) +{ + // Note that it's quite possible that the other processor may pop + // the queue at any given time. We rely on the fact that the messages + // are pooled and never get freed! + auto& msg_entry = msg.per_proc_entries[id()]; + VERIFY(msg_entry.msg == &msg); + ProcessorMessageEntry* next = nullptr; + do { + msg_entry.next = next; + } while (!atomic_compare_exchange_strong(&m_message_queue, next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel)); + return next == nullptr; +} + +void Processor::smp_broadcast_message(ProcessorMessage& msg) +{ + auto& cur_proc = Processor::current(); + + dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), count(), VirtualAddress(&cur_proc)); + + atomic_store(&msg.refs, count() - 1, AK::MemoryOrder::memory_order_release); + VERIFY(msg.refs > 0); + bool need_broadcast = false; + for_each( + [&](Processor& proc) { + if (&proc != &cur_proc) { + if (proc.smp_queue_message(msg)) + need_broadcast = true; + } + }); + + // Now trigger an IPI on all other APs (unless all targets already had messages queued) + if (need_broadcast) + APIC::the().broadcast_ipi(); +} + +void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg) +{ + auto& cur_proc = Processor::current(); + VERIFY(!msg.async); + // If synchronous then we must cleanup and return the message back + // to the pool. Otherwise, the last processor to complete it will return it + while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) { + // TODO: pause for a bit? + + // We need to process any messages that may have been sent to + // us while we're waiting. This also checks if another processor + // may have requested us to halt. + cur_proc.smp_process_pending_messages(); + } + + smp_cleanup_message(msg); + smp_return_to_pool(msg); +} + +void Processor::smp_broadcast(Function<void()> callback, bool async) +{ + auto& msg = smp_get_from_pool(); + msg.async = async; + msg.type = ProcessorMessage::Callback; + new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback)); + smp_broadcast_message(msg); + if (!async) + smp_broadcast_wait_sync(msg); +} + +void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async) +{ + auto& cur_proc = Processor::current(); + VERIFY(cpu != cur_proc.get_id()); + auto& target_proc = processors()[cpu]; + msg.async = async; + + dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_proc)); + + atomic_store(&msg.refs, 1u, AK::MemoryOrder::memory_order_release); + if (target_proc->smp_queue_message(msg)) { + APIC::the().send_ipi(cpu); + } + + if (!async) { + // If synchronous then we must cleanup and return the message back + // to the pool. Otherwise, the last processor to complete it will return it + while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) { + // TODO: pause for a bit? + + // We need to process any messages that may have been sent to + // us while we're waiting. This also checks if another processor + // may have requested us to halt. + cur_proc.smp_process_pending_messages(); + } + + smp_cleanup_message(msg); + smp_return_to_pool(msg); + } +} + +void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async) +{ + auto& msg = smp_get_from_pool(); + msg.type = ProcessorMessage::Callback; + new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback)); + smp_unicast_message(cpu, msg, async); +} + +void Processor::smp_broadcast_flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count) +{ + auto& msg = smp_get_from_pool(); + msg.async = false; + msg.type = ProcessorMessage::FlushTlb; + msg.flush_tlb.page_directory = page_directory; + msg.flush_tlb.ptr = vaddr.as_ptr(); + msg.flush_tlb.page_count = page_count; + smp_broadcast_message(msg); + // While the other processors handle this request, we'll flush ours + flush_tlb_local(vaddr, page_count); + // Now wait until everybody is done as well + smp_broadcast_wait_sync(msg); +} + +void Processor::smp_broadcast_halt() +{ + // We don't want to use a message, because this could have been triggered + // by being out of memory and we might not be able to get a message + for_each( + [&](Processor& proc) { + proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release); + }); + + // Now trigger an IPI on all other APs + APIC::the().broadcast_ipi(); +} + +void Processor::Processor::halt() +{ + if (s_smp_enabled) + smp_broadcast_halt(); + + halt_this(); +} + +UNMAP_AFTER_INIT void Processor::deferred_call_pool_init() +{ + size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]); + for (size_t i = 0; i < pool_count; i++) { + auto& entry = m_deferred_call_pool[i]; + entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr; + new (entry.handler_storage) DeferredCallEntry::HandlerFunction; + entry.was_allocated = false; + } + m_pending_deferred_calls = nullptr; + m_free_deferred_call_pool_entry = &m_deferred_call_pool[0]; +} + +void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry) +{ + VERIFY(m_in_critical); + VERIFY(!entry->was_allocated); + + entry->handler_value() = {}; + + entry->next = m_free_deferred_call_pool_entry; + m_free_deferred_call_pool_entry = entry; +} + +DeferredCallEntry* Processor::deferred_call_get_free() +{ + VERIFY(m_in_critical); + + if (m_free_deferred_call_pool_entry) { + // Fast path, we have an entry in our pool + auto* entry = m_free_deferred_call_pool_entry; + m_free_deferred_call_pool_entry = entry->next; + VERIFY(!entry->was_allocated); + return entry; + } + + auto* entry = new DeferredCallEntry; + new (entry->handler_storage) DeferredCallEntry::HandlerFunction; + entry->was_allocated = true; + return entry; +} + +void Processor::deferred_call_execute_pending() +{ + VERIFY(m_in_critical); + + if (!m_pending_deferred_calls) + return; + auto* pending_list = m_pending_deferred_calls; + m_pending_deferred_calls = nullptr; + + // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first + auto reverse_list = + [](DeferredCallEntry* list) -> DeferredCallEntry* { + DeferredCallEntry* rev_list = nullptr; + while (list) { + auto next = list->next; + list->next = rev_list; + rev_list = list; + list = next; + } + return rev_list; + }; + pending_list = reverse_list(pending_list); + + do { + pending_list->invoke_handler(); + + // Return the entry back to the pool, or free it + auto* next = pending_list->next; + if (pending_list->was_allocated) { + pending_list->handler_value().~Function(); + delete pending_list; + } else + deferred_call_return_to_pool(pending_list); + pending_list = next; + } while (pending_list); +} + +void Processor::deferred_call_queue_entry(DeferredCallEntry* entry) +{ + VERIFY(m_in_critical); + entry->next = m_pending_deferred_calls; + m_pending_deferred_calls = entry; +} + +void Processor::deferred_call_queue(Function<void()> callback) +{ + // NOTE: If we are called outside of a critical section and outside + // of an irq handler, the function will be executed before we return! + ScopedCritical critical; + auto& cur_proc = Processor::current(); + + auto* entry = cur_proc.deferred_call_get_free(); + entry->handler_value() = move(callback); + + cur_proc.deferred_call_queue_entry(entry); +} + +UNMAP_AFTER_INIT void Processor::gdt_init() +{ + m_gdt_length = 0; + m_gdtr.address = nullptr; + m_gdtr.limit = 0; + + write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000); + write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0 + write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0 + write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3 + write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3 + + Descriptor tls_descriptor {}; + tls_descriptor.low = tls_descriptor.high = 0; + tls_descriptor.dpl = 3; + tls_descriptor.segment_present = 1; + tls_descriptor.granularity = 0; + tls_descriptor.operation_size64 = 0; + tls_descriptor.operation_size32 = 1; + tls_descriptor.descriptor_type = 1; + tls_descriptor.type = 2; + write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3 + + Descriptor fs_descriptor {}; + fs_descriptor.set_base(VirtualAddress { this }); + fs_descriptor.set_limit(sizeof(Processor)); + fs_descriptor.dpl = 0; + fs_descriptor.segment_present = 1; + fs_descriptor.granularity = 0; + fs_descriptor.operation_size64 = 0; + fs_descriptor.operation_size32 = 1; + fs_descriptor.descriptor_type = 1; + fs_descriptor.type = 2; + write_gdt_entry(GDT_SELECTOR_PROC, fs_descriptor); // fs0 + + Descriptor tss_descriptor {}; + tss_descriptor.set_base(VirtualAddress { &m_tss }); + tss_descriptor.set_limit(sizeof(TSS32)); + tss_descriptor.dpl = 0; + tss_descriptor.segment_present = 1; + tss_descriptor.granularity = 0; + tss_descriptor.operation_size64 = 0; + tss_descriptor.operation_size32 = 1; + tss_descriptor.descriptor_type = 0; + tss_descriptor.type = 9; + write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss + + flush_gdt(); + load_task_register(GDT_SELECTOR_TSS); + + asm volatile( + "mov %%ax, %%ds\n" + "mov %%ax, %%es\n" + "mov %%ax, %%gs\n" + "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0) + : "memory"); + set_fs(GDT_SELECTOR_PROC); + +#if ARCH(I386) + // Make sure CS points to the kernel code descriptor. + // clang-format off + asm volatile( + "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n" + "sanity:\n"); + // clang-format on +#endif +} + +} diff --git a/Kernel/Arch/x86/common/TrapFrame.cpp b/Kernel/Arch/x86/common/TrapFrame.cpp new file mode 100644 index 0000000000..a5e4468d34 --- /dev/null +++ b/Kernel/Arch/x86/common/TrapFrame.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <Kernel/Arch/x86/TrapFrame.h> + +namespace Kernel { + +extern "C" void enter_trap_no_irq(TrapFrame* trap) +{ + InterruptDisabler disable; + Processor::current().enter_trap(*trap, false); +} + +extern "C" void enter_trap(TrapFrame* trap) +{ + InterruptDisabler disable; + Processor::current().enter_trap(*trap, true); +} + +extern "C" void exit_trap(TrapFrame* trap) +{ + InterruptDisabler disable; + return Processor::current().exit_trap(*trap); +} + +} diff --git a/Kernel/Arch/x86/i386/ASM_wrapper.cpp b/Kernel/Arch/x86/i386/ASM_wrapper.cpp new file mode 100644 index 0000000000..f5ba002a5a --- /dev/null +++ b/Kernel/Arch/x86/i386/ASM_wrapper.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> + +namespace Kernel { + +UNMAP_AFTER_INIT void write_cr0(FlatPtr value) +{ + asm volatile("mov %%eax, %%cr0" ::"a"(value)); +} + +UNMAP_AFTER_INIT void write_cr4(FlatPtr value) +{ + asm volatile("mov %%eax, %%cr4" ::"a"(value)); +} +FlatPtr read_cr0() +{ + FlatPtr cr0; + asm("mov %%cr0, %%eax" + : "=a"(cr0)); + return cr0; +} + +FlatPtr read_cr2() +{ + FlatPtr cr2; + asm("mov %%cr2, %%eax" + : "=a"(cr2)); + return cr2; +} + +FlatPtr read_cr3() +{ + FlatPtr cr3; + asm("mov %%cr3, %%eax" + : "=a"(cr3)); + return cr3; +} + +void write_cr3(FlatPtr cr3) +{ + // NOTE: If you're here from a GPF crash, it's very likely that a PDPT entry is incorrect, not this! + asm volatile("mov %%eax, %%cr3" ::"a"(cr3) + : "memory"); +} + +FlatPtr read_cr4() +{ + FlatPtr cr4; + asm("mov %%cr4, %%eax" + : "=a"(cr4)); + return cr4; +} + +#define DEFINE_DEBUG_REGISTER(index) \ + FlatPtr read_dr##index() \ + { \ + FlatPtr value; \ + asm("mov %%dr" #index ", %%eax" \ + : "=a"(value)); \ + return value; \ + } \ + void write_dr##index(FlatPtr value) \ + { \ + asm volatile("mov %%eax, %%dr" #index ::"a"(value)); \ + } + +DEFINE_DEBUG_REGISTER(0); +DEFINE_DEBUG_REGISTER(1); +DEFINE_DEBUG_REGISTER(2); +DEFINE_DEBUG_REGISTER(3); +DEFINE_DEBUG_REGISTER(6); +DEFINE_DEBUG_REGISTER(7); + +} diff --git a/Kernel/Arch/i386/Boot/boot.S b/Kernel/Arch/x86/i386/Boot/boot.S index 33e48097df..33e48097df 100644 --- a/Kernel/Arch/i386/Boot/boot.S +++ b/Kernel/Arch/x86/i386/Boot/boot.S diff --git a/Kernel/Arch/x86/i386/CPU.cpp b/Kernel/Arch/x86/i386/CPU.cpp new file mode 100644 index 0000000000..2b464c37ac --- /dev/null +++ b/Kernel/Arch/x86/i386/CPU.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Assertions.h> +#include <AK/Types.h> + +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/TrapFrame.h> +#include <Kernel/KSyms.h> +#include <Kernel/Process.h> +#include <Kernel/Thread.h> + +namespace Kernel { + +// The compiler can't see the calls to these functions inside assembly. +// Declare them, to avoid dead code warnings. +extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used)); +extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used)); +extern "C" u32 do_init_context(Thread* thread, u32 flags) __attribute__((used)); + +extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) +{ + VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running); + VERIFY(to_thread->state() == Thread::Running); + + bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR); + Processor::set_current_thread(*to_thread); + + auto& from_tss = from_thread->tss(); + auto& to_tss = to_thread->tss(); + + if (has_fxsr) + asm volatile("fxsave %0" + : "=m"(from_thread->fpu_state())); + else + asm volatile("fnsave %0" + : "=m"(from_thread->fpu_state())); + + from_tss.fs = get_fs(); + from_tss.gs = get_gs(); + set_fs(to_tss.fs); + set_gs(to_tss.gs); + + if (from_thread->process().is_traced()) + read_debug_registers_into(from_thread->debug_register_state()); + + if (to_thread->process().is_traced()) { + write_debug_registers_from(to_thread->debug_register_state()); + } else { + clear_debug_registers(); + } + + auto& processor = Processor::current(); + auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS); + tls_descriptor.set_base(to_thread->thread_specific_data()); + tls_descriptor.set_limit(to_thread->thread_specific_region_size()); + + if (from_tss.cr3 != to_tss.cr3) + write_cr3(to_tss.cr3); + + to_thread->set_cpu(processor.get_id()); + processor.restore_in_critical(to_thread->saved_critical()); + + if (has_fxsr) + asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state())); + else + asm volatile("frstor %0" ::"m"(to_thread->fpu_state())); + + // TODO: ioperm? +} + +extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap) +{ + VERIFY(!are_interrupts_enabled()); + VERIFY(is_kernel_mode()); + + dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); + + VERIFY(to_thread == Thread::current()); + + Scheduler::enter_current(*from_thread, true); + + // Since we got here and don't have Scheduler::context_switch in the + // call stack (because this is the first time we switched into this + // context), we need to notify the scheduler so that it can release + // the scheduler lock. We don't want to enable interrupts at this point + // as we're still in the middle of a context switch. Doing so could + // trigger a context switch within a context switch, leading to a crash. + Scheduler::leave_on_first_switch(trap->regs->eflags & ~0x200); +} + +extern "C" u32 do_init_context(Thread* thread, u32 flags) +{ + VERIFY_INTERRUPTS_DISABLED(); + thread->tss().eflags = flags; + return Processor::current().init_context(*thread, true); +} + +} + +void __assertion_failed(const char* msg, const char* file, unsigned line, const char* func) +{ + asm volatile("cli"); + critical_dmesgln("ASSERTION FAILED: {}", msg); + critical_dmesgln("{}:{} in {}", file, line, func); + + abort(); +} + +[[noreturn]] void abort() +{ + // Switch back to the current process's page tables if there are any. + // Otherwise stack walking will be a disaster. + auto process = Process::current(); + if (process) + MM.enter_process_paging_scope(*process); + + Kernel::dump_backtrace(); + Processor::halt(); + + abort(); +} + +[[noreturn]] void _abort() +{ + asm volatile("ud2"); + __builtin_unreachable(); +} diff --git a/Kernel/Arch/i386/InterruptEntry.cpp b/Kernel/Arch/x86/i386/InterruptEntry.cpp index d26b6b5a44..6a52802b7c 100644 --- a/Kernel/Arch/i386/InterruptEntry.cpp +++ b/Kernel/Arch/x86/i386/InterruptEntry.cpp @@ -4,8 +4,8 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include <Kernel/Arch/x86/CPU.h> #include <Kernel/Arch/x86/DescriptorTable.h> +#include <Kernel/Arch/x86/TrapFrame.h> // clang-format off asm( ".globl interrupt_common_asm_entry\n" diff --git a/Kernel/Arch/x86/i386/Processor.cpp b/Kernel/Arch/x86/i386/Processor.cpp new file mode 100644 index 0000000000..4c35e351d9 --- /dev/null +++ b/Kernel/Arch/x86/i386/Processor.cpp @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/StdLibExtras.h> +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/TrapFrame.h> +#include <Kernel/Panic.h> +#include <Kernel/Process.h> +#include <Kernel/Random.h> +#include <Kernel/Thread.h> + +namespace Kernel { + +#define ENTER_THREAD_CONTEXT_ARGS_SIZE (2 * 4) // to_thread, from_thread +extern "C" void thread_context_first_enter(void); +extern "C" void do_assume_context(Thread* thread, u32 flags); +extern "C" void exit_kernel_thread(void); + +// clang-format off +asm( +// enter_thread_context returns to here first time a thread is executing +".globl thread_context_first_enter \n" +"thread_context_first_enter: \n" +// switch_context will have pushed from_thread and to_thread to our new +// stack prior to thread_context_first_enter() being called, and the +// pointer to TrapFrame was the top of the stack before that +" movl 8(%esp), %ebx \n" // save pointer to TrapFrame +" cld \n" +" call context_first_init \n" +" addl $" __STRINGIFY(ENTER_THREAD_CONTEXT_ARGS_SIZE) ", %esp \n" +" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame +" jmp common_trap_exit \n" +); +// clang-format on + +#if ARCH(I386) +// clang-format off +asm( +".global do_assume_context \n" +"do_assume_context: \n" +" movl 4(%esp), %ebx \n" +" movl 8(%esp), %esi \n" +// We're going to call Processor::init_context, so just make sure +// we have enough stack space so we don't stomp over it +" subl $(" __STRINGIFY(4 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 4) "), %esp \n" +" pushl %esi \n" +" pushl %ebx \n" +" cld \n" +" call do_init_context \n" +" addl $8, %esp \n" +" movl %eax, %esp \n" // move stack pointer to what Processor::init_context set up for us +" pushl %ebx \n" // push to_thread +" pushl %ebx \n" // push from_thread +" pushl $thread_context_first_enter \n" // should be same as tss.eip +" jmp enter_thread_context \n" +); +// clang-format on +#endif + +String Processor::platform_string() const +{ + // FIXME: other platforms + return "i386"; +} + +u32 Processor::init_context(Thread& thread, bool leave_crit) +{ + VERIFY(is_kernel_mode()); + VERIFY(g_scheduler_lock.is_locked()); + if (leave_crit) { + // Leave the critical section we set up in in Process::exec, + // but because we still have the scheduler lock we should end up with 1 + m_in_critical--; // leave it without triggering anything or restoring flags + VERIFY(in_critical() == 1); + } + + u32 kernel_stack_top = thread.kernel_stack_top(); + + // Add a random offset between 0-256 (16-byte aligned) + kernel_stack_top -= round_up_to_power_of_two(get_fast_random<u8>(), 16); + + u32 stack_top = kernel_stack_top; + + // TODO: handle NT? + VERIFY((cpu_flags() & 0x24000) == 0); // Assume !(NT | VM) + + auto& tss = thread.tss(); + bool return_to_user = (tss.cs & 3) != 0; + + // make room for an interrupt frame + if (!return_to_user) { + // userspace_esp and userspace_ss are not popped off by iret + // unless we're switching back to user mode + stack_top -= sizeof(RegisterState) - 2 * sizeof(u32); + + // For kernel threads we'll push the thread function argument + // which should be in tss.esp and exit_kernel_thread as return + // address. + stack_top -= 2 * sizeof(u32); + *reinterpret_cast<u32*>(kernel_stack_top - 2 * sizeof(u32)) = tss.esp; + *reinterpret_cast<u32*>(kernel_stack_top - 3 * sizeof(u32)) = FlatPtr(&exit_kernel_thread); + } else { + stack_top -= sizeof(RegisterState); + } + + // we want to end up 16-byte aligned, %esp + 4 should be aligned + stack_top -= sizeof(u32); + *reinterpret_cast<u32*>(kernel_stack_top - sizeof(u32)) = 0; + + // set up the stack so that after returning from thread_context_first_enter() + // we will end up either in kernel mode or user mode, depending on how the thread is set up + // However, the first step is to always start in kernel mode with thread_context_first_enter + RegisterState& iretframe = *reinterpret_cast<RegisterState*>(stack_top); + iretframe.ss = tss.ss; + iretframe.gs = tss.gs; + iretframe.fs = tss.fs; + iretframe.es = tss.es; + iretframe.ds = tss.ds; + iretframe.edi = tss.edi; + iretframe.esi = tss.esi; + iretframe.ebp = tss.ebp; + iretframe.esp = 0; + iretframe.ebx = tss.ebx; + iretframe.edx = tss.edx; + iretframe.ecx = tss.ecx; + iretframe.eax = tss.eax; + iretframe.eflags = tss.eflags; + iretframe.eip = tss.eip; + iretframe.cs = tss.cs; + if (return_to_user) { + iretframe.userspace_esp = tss.esp; + iretframe.userspace_ss = tss.ss; + } + + // make space for a trap frame + stack_top -= sizeof(TrapFrame); + TrapFrame& trap = *reinterpret_cast<TrapFrame*>(stack_top); + trap.regs = &iretframe; + trap.prev_irq_level = 0; + trap.next_trap = nullptr; + + stack_top -= sizeof(u32); // pointer to TrapFrame + *reinterpret_cast<u32*>(stack_top) = stack_top + 4; + + if constexpr (CONTEXT_SWITCH_DEBUG) { + if (return_to_user) { + dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}, user_top={}:{}", + thread, + VirtualAddress(&thread), + iretframe.cs, tss.eip, + VirtualAddress(tss.esp), + VirtualAddress(stack_top), + iretframe.userspace_ss, + iretframe.userspace_esp); + } else { + dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}", + thread, + VirtualAddress(&thread), + iretframe.cs, tss.eip, + VirtualAddress(tss.esp), + VirtualAddress(stack_top)); + } + } + + // make switch_context() always first return to thread_context_first_enter() + // in kernel mode, so set up these values so that we end up popping iretframe + // off the stack right after the context switch completed, at which point + // control is transferred to what iretframe is pointing to. + tss.eip = FlatPtr(&thread_context_first_enter); + tss.esp0 = kernel_stack_top; + tss.esp = stack_top; + tss.cs = GDT_SELECTOR_CODE0; + tss.ds = GDT_SELECTOR_DATA0; + tss.es = GDT_SELECTOR_DATA0; + tss.gs = GDT_SELECTOR_DATA0; + tss.ss = GDT_SELECTOR_DATA0; + tss.fs = GDT_SELECTOR_PROC; + return stack_top; +} + +void Processor::switch_context(Thread*& from_thread, Thread*& to_thread) +{ + VERIFY(!in_irq()); + VERIFY(m_in_critical == 1); + VERIFY(is_kernel_mode()); + + dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context --> switching out of: {} {}", VirtualAddress(from_thread), *from_thread); + from_thread->save_critical(m_in_critical); + + // clang-format off + // Switch to new thread context, passing from_thread and to_thread + // through to the new context using registers edx and eax + asm volatile( + // NOTE: changing how much we push to the stack affects + // SWITCH_CONTEXT_TO_STACK_SIZE and thread_context_first_enter()! + "pushfl \n" + "pushl %%ebx \n" + "pushl %%esi \n" + "pushl %%edi \n" + "pushl %%ebp \n" + "movl %%esp, %[from_esp] \n" + "movl $1f, %[from_eip] \n" + "movl %[to_esp0], %%ebx \n" + "movl %%ebx, %[tss_esp0] \n" + "movl %[to_esp], %%esp \n" + "pushl %[to_thread] \n" + "pushl %[from_thread] \n" + "pushl %[to_eip] \n" + "cld \n" + "jmp enter_thread_context \n" + "1: \n" + "popl %%edx \n" + "popl %%eax \n" + "popl %%ebp \n" + "popl %%edi \n" + "popl %%esi \n" + "popl %%ebx \n" + "popfl \n" + : [from_esp] "=m" (from_thread->tss().esp), + [from_eip] "=m" (from_thread->tss().eip), + [tss_esp0] "=m" (m_tss.esp0), + "=d" (from_thread), // needed so that from_thread retains the correct value + "=a" (to_thread) // needed so that to_thread retains the correct value + : [to_esp] "g" (to_thread->tss().esp), + [to_esp0] "g" (to_thread->tss().esp0), + [to_eip] "c" (to_thread->tss().eip), + [from_thread] "d" (from_thread), + [to_thread] "a" (to_thread) + : "memory" + ); + // clang-format on + + dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {}", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); + + Processor::current().restore_in_critical(to_thread->saved_critical()); +} + +void Processor::assume_context(Thread& thread, FlatPtr flags) +{ + dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread); + + VERIFY_INTERRUPTS_DISABLED(); + Scheduler::prepare_after_exec(); + // in_critical() should be 2 here. The critical section in Process::exec + // and then the scheduler lock + VERIFY(Processor::current().in_critical() == 2); + + do_assume_context(&thread, flags); + + VERIFY_NOT_REACHED(); +} + +UNMAP_AFTER_INIT void Processor::initialize_context_switching(Thread& initial_thread) +{ + VERIFY(initial_thread.process().is_kernel_process()); + + auto& tss = initial_thread.tss(); + m_tss = tss; + m_tss.esp0 = tss.esp0; + m_tss.ss0 = GDT_SELECTOR_DATA0; + // user mode needs to be able to switch to kernel mode: + m_tss.cs = m_tss.ds = m_tss.es = m_tss.gs = m_tss.ss = GDT_SELECTOR_CODE0 | 3; + m_tss.fs = GDT_SELECTOR_PROC | 3; + + m_scheduler_initialized = true; + + // clang-format off + asm volatile( + "movl %[new_esp], %%esp \n" // switch to new stack + "pushl %[from_to_thread] \n" // to_thread + "pushl %[from_to_thread] \n" // from_thread + "pushl $" __STRINGIFY(GDT_SELECTOR_CODE0) " \n" + "pushl %[new_eip] \n" // save the entry eip to the stack + "movl %%esp, %%ebx \n" + "addl $20, %%ebx \n" // calculate pointer to TrapFrame + "pushl %%ebx \n" + "cld \n" + "pushl %[cpu] \n" // push argument for init_finished before register is clobbered + "call pre_init_finished \n" + "call init_finished \n" + "addl $4, %%esp \n" + "call post_init_finished \n" + "call enter_trap_no_irq \n" + "addl $4, %%esp \n" + "lret \n" + :: [new_esp] "g" (tss.esp), + [new_eip] "a" (tss.eip), + [from_to_thread] "b" (&initial_thread), + [cpu] "c" (id()) + ); + // clang-format on + + VERIFY_NOT_REACHED(); +} + +} diff --git a/Kernel/Arch/i386/ProcessorInfo.cpp b/Kernel/Arch/x86/i386/ProcessorInfo.cpp index 300444d733..3cb24ba64e 100644 --- a/Kernel/Arch/i386/ProcessorInfo.cpp +++ b/Kernel/Arch/x86/i386/ProcessorInfo.cpp @@ -6,7 +6,8 @@ #include <AK/StringBuilder.h> #include <AK/Types.h> -#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/CPUID.h> +#include <Kernel/Arch/x86/Processor.h> #include <Kernel/Arch/x86/ProcessorInfo.h> namespace Kernel { diff --git a/Kernel/Arch/i386/SafeMem.cpp b/Kernel/Arch/x86/i386/SafeMem.cpp index 4b96a4b4cf..bb49388180 100644 --- a/Kernel/Arch/i386/SafeMem.cpp +++ b/Kernel/Arch/x86/i386/SafeMem.cpp @@ -4,7 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/RegisterState.h> #include <Kernel/Arch/x86/SafeMem.h> #define CODE_SECTION(section_name) __attribute__((section(section_name))) diff --git a/Kernel/Arch/x86/x86_64/ASM_wrapper.cpp b/Kernel/Arch/x86/x86_64/ASM_wrapper.cpp new file mode 100644 index 0000000000..9a463bf1b4 --- /dev/null +++ b/Kernel/Arch/x86/x86_64/ASM_wrapper.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Types.h> + +#include <Kernel/Arch/x86/ASM_wrapper.h> +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> + +namespace Kernel { + +UNMAP_AFTER_INIT void write_cr0(FlatPtr value) +{ + asm volatile("mov %%rax, %%cr0" ::"a"(value)); +} + +UNMAP_AFTER_INIT void write_cr4(FlatPtr value) +{ + asm volatile("mov %%rax, %%cr4" ::"a"(value)); +} +FlatPtr read_cr0() +{ + FlatPtr cr0; + asm("mov %%cr0, %%rax" + : "=a"(cr0)); + return cr0; +} + +FlatPtr read_cr2() +{ + FlatPtr cr2; + asm("mov %%cr2, %%rax" + : "=a"(cr2)); + return cr2; +} + +FlatPtr read_cr3() +{ + FlatPtr cr3; + asm("mov %%cr3, %%rax" + : "=a"(cr3)); + return cr3; +} + +void write_cr3(FlatPtr cr3) +{ + // NOTE: If you're here from a GPF crash, it's very likely that a PDPT entry is incorrect, not this! + asm volatile("mov %%rax, %%cr3" ::"a"(cr3) + : "memory"); +} + +FlatPtr read_cr4() +{ + FlatPtr cr4; + asm("mov %%cr4, %%rax" + : "=a"(cr4)); + return cr4; +} + +#define DEFINE_DEBUG_REGISTER(index) \ + FlatPtr read_dr##index() \ + { \ + FlatPtr value; \ + asm("mov %%dr" #index ", %%rax" \ + : "=a"(value)); \ + return value; \ + } \ + void write_dr##index(FlatPtr value) \ + { \ + asm volatile("mov %%rax, %%dr" #index ::"a"(value)); \ + } + +DEFINE_DEBUG_REGISTER(0); +DEFINE_DEBUG_REGISTER(1); +DEFINE_DEBUG_REGISTER(2); +DEFINE_DEBUG_REGISTER(3); +DEFINE_DEBUG_REGISTER(6); +DEFINE_DEBUG_REGISTER(7); + +} diff --git a/Kernel/Arch/x86_64/Boot/boot.S b/Kernel/Arch/x86/x86_64/Boot/boot.S index 8e1d9d27ba..8e1d9d27ba 100644 --- a/Kernel/Arch/x86_64/Boot/boot.S +++ b/Kernel/Arch/x86/x86_64/Boot/boot.S diff --git a/Kernel/Arch/x86/x86_64/Processor.cpp b/Kernel/Arch/x86/x86_64/Processor.cpp new file mode 100644 index 0000000000..17dae9352b --- /dev/null +++ b/Kernel/Arch/x86/x86_64/Processor.cpp @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/StdLibExtras.h> +#include <Kernel/Arch/x86/CPU.h> +#include <Kernel/Arch/x86/Processor.h> +#include <Kernel/Arch/x86/TrapFrame.h> +#include <Kernel/Panic.h> +#include <Kernel/Process.h> +#include <Kernel/Random.h> +#include <Kernel/Thread.h> + +namespace Kernel { + +#define ENTER_THREAD_CONTEXT_ARGS_SIZE (2 * 4) // to_thread, from_thread +extern "C" void thread_context_first_enter(void); +extern "C" void do_assume_context(Thread* thread, u32 flags); +extern "C" void exit_kernel_thread(void); + +// clang-format off +asm( +// enter_thread_context returns to here first time a thread is executing +".globl thread_context_first_enter \n" +"thread_context_first_enter: \n" +// switch_context will have pushed from_thread and to_thread to our new +// stack prior to thread_context_first_enter() being called, and the +// pointer to TrapFrame was the top of the stack before that +" movl 8(%esp), %ebx \n" // save pointer to TrapFrame +" cld \n" +" call context_first_init \n" +" addl $" __STRINGIFY(ENTER_THREAD_CONTEXT_ARGS_SIZE) ", %esp \n" +" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame +" jmp common_trap_exit \n" +); +// clang-format on + +#if ARCH(I386) +// clang-format off +asm( +".global do_assume_context \n" +"do_assume_context: \n" +" movl 4(%esp), %ebx \n" +" movl 8(%esp), %esi \n" +// We're going to call Processor::init_context, so just make sure +// we have enough stack space so we don't stomp over it +" subl $(" __STRINGIFY(4 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 4) "), %esp \n" +" pushl %esi \n" +" pushl %ebx \n" +" cld \n" +" call do_init_context \n" +" addl $8, %esp \n" +" movl %eax, %esp \n" // move stack pointer to what Processor::init_context set up for us +" pushl %ebx \n" // push to_thread +" pushl %ebx \n" // push from_thread +" pushl $thread_context_first_enter \n" // should be same as tss.eip +" jmp enter_thread_context \n" +); +// clang-format on +#endif + +String Processor::platform_string() const +{ + // FIXME: other platforms + return "x86_64"; +} + +u32 Processor::init_context(Thread& thread, bool leave_crit) +{ + VERIFY(is_kernel_mode()); + VERIFY(g_scheduler_lock.is_locked()); + if (leave_crit) { + // Leave the critical section we set up in in Process::exec, + // but because we still have the scheduler lock we should end up with 1 + m_in_critical--; // leave it without triggering anything or restoring flags + VERIFY(in_critical() == 1); + } + + u32 kernel_stack_top = thread.kernel_stack_top(); + + // Add a random offset between 0-256 (16-byte aligned) + kernel_stack_top -= round_up_to_power_of_two(get_fast_random<u8>(), 16); + + u32 stack_top = kernel_stack_top; + + // TODO: handle NT? + VERIFY((cpu_flags() & 0x24000) == 0); // Assume !(NT | VM) + + auto& tss = thread.tss(); + bool return_to_user = (tss.cs & 3) != 0; + + // make room for an interrupt frame + if (!return_to_user) { + // userspace_esp and userspace_ss are not popped off by iret + // unless we're switching back to user mode + stack_top -= sizeof(RegisterState) - 2 * sizeof(u32); + + // For kernel threads we'll push the thread function argument + // which should be in tss.esp and exit_kernel_thread as return + // address. + stack_top -= 2 * sizeof(u32); + *reinterpret_cast<u32*>(kernel_stack_top - 2 * sizeof(u32)) = tss.esp; + *reinterpret_cast<u32*>(kernel_stack_top - 3 * sizeof(u32)) = FlatPtr(&exit_kernel_thread); + } else { + stack_top -= sizeof(RegisterState); + } + + // we want to end up 16-byte aligned, %esp + 4 should be aligned + stack_top -= sizeof(u32); + *reinterpret_cast<u32*>(kernel_stack_top - sizeof(u32)) = 0; + + // set up the stack so that after returning from thread_context_first_enter() + // we will end up either in kernel mode or user mode, depending on how the thread is set up + // However, the first step is to always start in kernel mode with thread_context_first_enter + RegisterState& iretframe = *reinterpret_cast<RegisterState*>(stack_top); + // FIXME: copy state to be recovered through TSS + TODO(); + + // make space for a trap frame + stack_top -= sizeof(TrapFrame); + TrapFrame& trap = *reinterpret_cast<TrapFrame*>(stack_top); + trap.regs = &iretframe; + trap.prev_irq_level = 0; + trap.next_trap = nullptr; + + stack_top -= sizeof(u32); // pointer to TrapFrame + *reinterpret_cast<u32*>(stack_top) = stack_top + 4; + + if constexpr (CONTEXT_SWITCH_DEBUG) { + if (return_to_user) { + dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}, user_top={}:{}", + thread, + VirtualAddress(&thread), + iretframe.cs, tss.eip, + VirtualAddress(tss.esp), + VirtualAddress(stack_top), + iretframe.userspace_ss, + iretframe.userspace_esp); + } else { + dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}", + thread, + VirtualAddress(&thread), + iretframe.cs, tss.eip, + VirtualAddress(tss.esp), + VirtualAddress(stack_top)); + } + } + + // make switch_context() always first return to thread_context_first_enter() + // in kernel mode, so set up these values so that we end up popping iretframe + // off the stack right after the context switch completed, at which point + // control is transferred to what iretframe is pointing to. + tss.eip = FlatPtr(&thread_context_first_enter); + tss.esp0 = kernel_stack_top; + tss.esp = stack_top; + tss.cs = GDT_SELECTOR_CODE0; + tss.ds = GDT_SELECTOR_DATA0; + tss.es = GDT_SELECTOR_DATA0; + tss.gs = GDT_SELECTOR_DATA0; + tss.ss = GDT_SELECTOR_DATA0; + tss.fs = GDT_SELECTOR_PROC; + return stack_top; +} + +void Processor::switch_context(Thread*& from_thread, Thread*& to_thread) +{ + VERIFY(!in_irq()); + VERIFY(m_in_critical == 1); + VERIFY(is_kernel_mode()); + + dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context --> switching out of: {} {}", VirtualAddress(from_thread), *from_thread); + from_thread->save_critical(m_in_critical); + + PANIC("Context switching not implemented."); + + dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {}", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); + + Processor::current().restore_in_critical(to_thread->saved_critical()); +} + +void Processor::assume_context(Thread& thread, FlatPtr flags) +{ + dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread); + + VERIFY_INTERRUPTS_DISABLED(); + Scheduler::prepare_after_exec(); + // in_critical() should be 2 here. The critical section in Process::exec + // and then the scheduler lock + VERIFY(Processor::current().in_critical() == 2); + + (void)flags; + TODO(); + + VERIFY_NOT_REACHED(); +} + +UNMAP_AFTER_INIT void Processor::initialize_context_switching(Thread& initial_thread) +{ + VERIFY(initial_thread.process().is_kernel_process()); + + auto& tss = initial_thread.tss(); + m_tss = tss; + m_tss.esp0 = tss.esp0; + m_tss.ss0 = GDT_SELECTOR_DATA0; + // user mode needs to be able to switch to kernel mode: + m_tss.cs = m_tss.ds = m_tss.es = m_tss.gs = m_tss.ss = GDT_SELECTOR_CODE0 | 3; + m_tss.fs = GDT_SELECTOR_PROC | 3; + + m_scheduler_initialized = true; + + // FIXME: Context switching (see i386 impl) + + VERIFY_NOT_REACHED(); +} + +} |