/* * Copyright (c) 2018-2021, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include namespace Kernel { extern "C" void syscall_handler(TrapFrame*) __attribute__((used)); extern "C" void syscall_asm_entry(); static void syscall_asm_entry_dummy() __attribute__((used)); NEVER_INLINE void syscall_asm_entry_dummy() { // clang-format off #if ARCH(I386) asm( ".globl syscall_asm_entry\n" "syscall_asm_entry:\n" " pushl $0x0\n" " pusha\n" " pushl %ds\n" " pushl %es\n" " pushl %fs\n" " pushl %gs\n" " pushl %ss\n" " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" " mov %ax, %ds\n" " mov %ax, %es\n" " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" " mov %ax, %fs\n" " cld\n" " xor %esi, %esi\n" " xor %edi, %edi\n" " pushl %esp \n" // set TrapFrame::regs " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" " movl %esp, %ebx \n" " pushl %ebx \n" // push pointer to TrapFrame " call enter_trap_no_irq \n" " movl %ebx, 0(%esp) \n" // push pointer to TrapFrame " call syscall_handler \n" " movl %ebx, 0(%esp) \n" // push pointer to TrapFrame " jmp common_trap_exit \n"); #elif ARCH(X86_64) asm( ".globl syscall_asm_entry\n" "syscall_asm_entry:\n" " pushq $0x0\n" " pushq %r15\n" " pushq %r14\n" " pushq %r13\n" " pushq %r12\n" " pushq %r11\n" " pushq %r10\n" " pushq %r9\n" " pushq %r8\n" " pushq %rax\n" " pushq %rcx\n" " pushq %rdx\n" " pushq %rbx\n" " pushq %rsp\n" " pushq %rbp\n" " pushq %rsi\n" " pushq %rdi\n" " pushq %rsp \n" /* set TrapFrame::regs */ " subq $" __STRINGIFY(TRAP_FRAME_SIZE - 8) ", %rsp \n" " subq $0x8, %rsp\n" /* align stack */ " lea 0x8(%rsp), %rdi \n" " cld\n" " call enter_trap_no_irq \n" " lea 0x8(%rsp), %rdi \n" " call syscall_handler\n" " addq $0x8, %rsp\n" /* undo alignment */ " jmp common_trap_exit \n"); #endif // clang-format on } namespace Syscall { static KResultOr handle(RegisterState&, FlatPtr function, FlatPtr arg1, FlatPtr arg2, FlatPtr arg3); UNMAP_AFTER_INIT void initialize() { register_user_callable_interrupt_handler(syscall_vector, syscall_asm_entry); } #pragma GCC diagnostic ignored "-Wcast-function-type" typedef KResultOr (Process::*Handler)(FlatPtr, FlatPtr, FlatPtr); typedef KResultOr (Process::*HandlerWithRegisterState)(RegisterState&); #define __ENUMERATE_SYSCALL(x) reinterpret_cast(&Process::sys$##x), static const Handler s_syscall_table[] = { ENUMERATE_SYSCALLS(__ENUMERATE_SYSCALL) }; #undef __ENUMERATE_SYSCALL KResultOr handle(RegisterState& regs, FlatPtr function, FlatPtr arg1, FlatPtr arg2, FlatPtr arg3) { VERIFY_INTERRUPTS_ENABLED(); auto current_thread = Thread::current(); auto& process = current_thread->process(); current_thread->did_syscall(); if (function == SC_exit || function == SC_exit_thread) { // These syscalls need special handling since they never return to the caller. if (auto* tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) { #if ARCH(I386) regs.eax = 0; #else regs.rax = 0; #endif tracer->set_trace_syscalls(false); process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread! } switch (function) { case SC_exit: process.sys$exit(arg1); break; case SC_exit_thread: process.sys$exit_thread(arg1, arg2, arg3); break; default: VERIFY_NOT_REACHED(); } } if (function == SC_fork || function == SC_sigreturn) { // These syscalls want the RegisterState& rather than individual parameters. auto handler = (HandlerWithRegisterState)s_syscall_table[function]; return (process.*(handler))(regs); } if (function >= Function::__Count) { dbgln("Unknown syscall {} requested ({:08x}, {:08x}, {:08x})", function, arg1, arg2, arg3); return ENOSYS; } if (s_syscall_table[function] == nullptr) { dbgln("Null syscall {} requested, you probably need to rebuild this program!", function); return ENOSYS; } return (process.*(s_syscall_table[function]))(arg1, arg2, arg3); } } NEVER_INLINE void syscall_handler(TrapFrame* trap) { auto& regs = *trap->regs; auto current_thread = Thread::current(); { ScopedSpinLock lock(g_scheduler_lock); current_thread->set_may_die_immediately(false); } ScopeGuard reset_may_die_immediately = [¤t_thread] { ScopedSpinLock lock(g_scheduler_lock); current_thread->set_may_die_immediately(true); }; VERIFY(current_thread->previous_mode() == Thread::PreviousMode::UserMode); auto& process = current_thread->process(); if (auto tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) { tracer->set_trace_syscalls(false); process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread! } current_thread->yield_if_stopped(); // Make sure SMAP protection is enabled on syscall entry. clac(); // Apply a random offset in the range 0-255 to the stack pointer, // to make kernel stacks a bit less deterministic. u32 lsw; u32 msw; read_tsc(lsw, msw); auto* ptr = (char*)__builtin_alloca(lsw & 0xff); asm volatile("" : "=m"(*ptr)); static constexpr FlatPtr iopl_mask = 3u << 12; FlatPtr flags; #if ARCH(I386) flags = regs.eflags; #else flags = regs.rflags; #endif if ((flags & (iopl_mask)) != 0) { PANIC("Syscall from process with IOPL != 0"); } // NOTE: We take the big process lock before inspecting memory regions. process.big_lock().lock(); VirtualAddress userspace_sp; #if ARCH(I386) userspace_sp = VirtualAddress { regs.userspace_esp }; #else userspace_sp = VirtualAddress { regs.userspace_rsp }; #endif if (!MM.validate_user_stack(process, userspace_sp)) { dbgln("Invalid stack pointer: {:p}", userspace_sp); handle_crash(regs, "Bad stack on syscall entry", SIGSTKFLT); } VirtualAddress ip; #if ARCH(I386) ip = VirtualAddress { regs.eip }; #else ip = VirtualAddress { regs.rip }; #endif auto* calling_region = MM.find_user_region_from_vaddr(process.space(), ip); if (!calling_region) { dbgln("Syscall from {:p} which has no associated region", ip); handle_crash(regs, "Syscall from unknown region", SIGSEGV); } if (calling_region->is_writable()) { dbgln("Syscall from writable memory at {:p}", ip); handle_crash(regs, "Syscall from writable memory", SIGSEGV); } if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) { dbgln("Syscall from non-syscall region"); handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); } #if ARCH(I386) auto function = regs.eax; auto arg1 = regs.edx; auto arg2 = regs.ecx; auto arg3 = regs.ebx; #else auto function = regs.rax; auto arg1 = regs.rdx; auto arg2 = regs.rcx; auto arg3 = regs.rbx; #endif auto result = Syscall::handle(regs, function, arg1, arg2, arg3); if (result.is_error()) { #if ARCH(I386) regs.eax = result.error(); #else regs.rax = result.error(); #endif } else { #if ARCH(I386) regs.eax = result.value(); #else regs.rax = result.value(); #endif } process.big_lock().unlock(); if (auto tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) { tracer->set_trace_syscalls(false); process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread! } current_thread->yield_if_stopped(); current_thread->check_dispatch_pending_signal(); // If the previous mode somehow changed something is seriously messed up... VERIFY(current_thread->previous_mode() == Thread::PreviousMode::UserMode); // Check if we're supposed to return to userspace or just die. current_thread->die_if_needed(); VERIFY(!g_scheduler_lock.own_lock()); } }