/* * Copyright (c) 2018-2021, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Kernel { extern Region* g_signal_trampoline_region; struct LoadResult { OwnPtr space; FlatPtr load_base { 0 }; FlatPtr entry_eip { 0 }; size_t size { 0 }; WeakPtr tls_region; size_t tls_size { 0 }; size_t tls_alignment { 0 }; WeakPtr stack_region; }; static Vector generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd); static bool validate_stack_size(const Vector& arguments, const Vector& environment) { size_t total_arguments_size = 0; size_t total_environment_size = 0; for (auto& a : arguments) total_arguments_size += a.length() + 1; for (auto& e : environment) total_environment_size += e.length() + 1; total_arguments_size += sizeof(char*) * (arguments.size() + 1); total_environment_size += sizeof(char*) * (environment.size() + 1); static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8; static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8; if (total_arguments_size > max_arguments_size) return false; if (total_environment_size > max_environment_size) return false; // FIXME: This doesn't account for the size of the auxiliary vector return true; } static KResultOr make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Region& region, Vector arguments, Vector environment, Vector auxiliary_values) { FlatPtr new_sp = region.range().end().get(); // Add some bits of randomness to the user stack pointer. new_sp -= round_up_to_power_of_two(get_fast_random() % 4096, 16); auto push_on_new_stack = [&new_sp](FlatPtr value) { new_sp -= sizeof(FlatPtr); Userspace stack_ptr = new_sp; return copy_to_user(stack_ptr, &value); }; auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) { new_sp -= sizeof(auxv_t); Userspace stack_ptr = new_sp; return copy_to_user(stack_ptr, &value); }; auto push_string_on_new_stack = [&new_sp](const String& string) { new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr)); Userspace stack_ptr = new_sp; return copy_to_user(stack_ptr, string.characters(), string.length() + 1); }; Vector argv_entries; for (auto& argument : arguments) { push_string_on_new_stack(argument); if (!argv_entries.try_append(new_sp)) return ENOMEM; } Vector env_entries; for (auto& variable : environment) { push_string_on_new_stack(variable); if (!env_entries.try_append(new_sp)) return ENOMEM; } for (auto& value : auxiliary_values) { if (!value.optional_string.is_empty()) { push_string_on_new_stack(value.optional_string); value.auxv.a_un.a_ptr = (void*)new_sp; } } for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) { auto& value = auxiliary_values[i]; push_aux_value_on_new_stack(value.auxv); } push_on_new_stack(0); for (ssize_t i = env_entries.size() - 1; i >= 0; --i) push_on_new_stack(env_entries[i]); FlatPtr envp = new_sp; push_on_new_stack(0); for (ssize_t i = argv_entries.size() - 1; i >= 0; --i) push_on_new_stack(argv_entries[i]); FlatPtr argv = new_sp; // NOTE: The stack needs to be 16-byte aligned. new_sp -= new_sp % 16; #if ARCH(I386) // GCC assumes that the return address has been pushed to the stack when it enters the function, // so we need to reserve an extra pointer's worth of bytes below this to make GCC's stack alignment // calculations work new_sp -= sizeof(void*); push_on_new_stack(envp); push_on_new_stack(argv); push_on_new_stack(argv_entries.size()); #else regs.rdi = argv_entries.size(); regs.rsi = argv; regs.rdx = envp; #endif VERIFY(new_sp % 16 == 0); // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant return new_sp; } struct RequiredLoadRange { FlatPtr start { 0 }; FlatPtr end { 0 }; }; static KResultOr get_required_load_range(FileDescription& program_description) { auto& inode = *(program_description.inode()); auto vmobject = SharedInodeVMObject::try_create_with_inode(inode); if (!vmobject) { dbgln("get_required_load_range: Unable to allocate SharedInodeVMObject"); return ENOMEM; } size_t executable_size = inode.size(); auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, page_round_up(executable_size), "ELF memory range calculation", Region::Access::Read); if (!region) { dbgln("Could not allocate memory for ELF"); return ENOMEM; } auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size); if (!elf_image.is_valid()) { return EINVAL; } RequiredLoadRange range {}; elf_image.for_each_program_header([&range](const auto& pheader) { if (pheader.type() != PT_LOAD) return; auto region_start = (FlatPtr)pheader.vaddr().as_ptr(); auto region_end = region_start + pheader.size_in_memory(); if (range.start == 0 || region_start < range.start) range.start = region_start; if (range.end == 0 || region_end > range.end) range.end = region_end; }); VERIFY(range.end > range.start); return range; }; static KResultOr get_load_offset(const ElfW(Ehdr) & main_program_header, FileDescription& main_program_description, FileDescription* interpreter_description) { constexpr FlatPtr load_range_start = 0x08000000; constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB; auto random_load_offset_in_range([](auto start, auto size) { return page_round_down(start + get_good_random() % size); }); if (main_program_header.e_type == ET_DYN) { return random_load_offset_in_range(load_range_start, load_range_size); } if (main_program_header.e_type != ET_EXEC) return EINVAL; auto main_program_load_range_result = get_required_load_range(main_program_description); if (main_program_load_range_result.is_error()) return main_program_load_range_result.error(); auto main_program_load_range = main_program_load_range_result.value(); RequiredLoadRange selected_range {}; if (interpreter_description) { auto interpreter_load_range_result = get_required_load_range(*interpreter_description); if (interpreter_load_range_result.is_error()) return interpreter_load_range_result.error(); auto interpreter_size_in_memory = interpreter_load_range_result.value().end - interpreter_load_range_result.value().start; auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory; // No intersection if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end) return random_load_offset_in_range(load_range_start, load_range_size); RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start }; RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end }; // Select larger part if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start) selected_range = first_available_part; else selected_range = second_available_part; } else selected_range = main_program_load_range; // If main program is too big and leaves us without enough space for adequate loader randomization if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size) return E2BIG; return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start); } enum class ShouldAllocateTls { No, Yes, }; enum class ShouldAllowSyscalls { No, Yes, }; static KResultOr load_elf_object(NonnullOwnPtr new_space, FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls) { auto& inode = *(object_description.inode()); auto vmobject = SharedInodeVMObject::try_create_with_inode(inode); if (!vmobject) { dbgln("load_elf_object: Unable to allocate SharedInodeVMObject"); return ENOMEM; } if (vmobject->writable_mappings()) { dbgln("Refusing to execute a write-mapped program"); return ETXTBSY; } size_t executable_size = inode.size(); auto executable_region = MM.allocate_kernel_region_with_vmobject(*vmobject, page_round_up(executable_size), "ELF loading", Region::Access::Read); if (!executable_region) { dbgln("Could not allocate memory for ELF loading"); return ENOMEM; } auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size); if (!elf_image.is_valid()) return ENOEXEC; Region* master_tls_region { nullptr }; size_t master_tls_size = 0; size_t master_tls_alignment = 0; FlatPtr load_base_address = 0; String elf_name = object_description.absolute_path(); VERIFY(!Processor::current().in_critical()); MemoryManager::enter_space(*new_space); KResult ph_load_result = KSuccess; elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) { if (program_header.type() == PT_TLS) { VERIFY(should_allocate_tls == ShouldAllocateTls::Yes); VERIFY(program_header.size_in_memory()); if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable."); ph_load_result = ENOEXEC; return IterationDecision::Break; } auto range = new_space->allocate_range({}, program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; } master_tls_region = region_or_error.value(); master_tls_size = program_header.size_in_memory(); master_tls_alignment = program_header.alignment(); if (!copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())) { ph_load_result = EFAULT; return IterationDecision::Break; } return IterationDecision::Continue; } if (program_header.type() != PT_LOAD) return IterationDecision::Continue; if (program_header.is_writable()) { // Writable section: create a copy in memory. VERIFY(program_header.size_in_memory()); VERIFY(program_header.alignment() == PAGE_SIZE); if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable."); ph_load_result = ENOEXEC; return IterationDecision::Break; } int prot = 0; if (program_header.is_readable()) prot |= PROT_READ; if (program_header.is_writable()) prot |= PROT_WRITE; auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""); auto range_base = VirtualAddress { page_round_down(program_header.vaddr().offset(load_offset).get()) }; auto range_end = VirtualAddress { page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) }; auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; } // It's not always the case with PIE executables (and very well shouldn't be) that the // virtual address in the program header matches the one we end up giving the process. // In order to copy the data image correctly into memory, we need to copy the data starting at // the right initial page offset into the pages allocated for the elf_alloc-XX section. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between // the .text and .data PT_LOAD sections of the executable. // Accessing it would definitely be a bug. auto page_offset = program_header.vaddr(); page_offset.mask(~PAGE_MASK); if (!copy_to_user((u8*)region_or_error.value()->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())) { ph_load_result = EFAULT; return IterationDecision::Break; } return IterationDecision::Continue; } // Non-writable section: map the executable itself in memory. VERIFY(program_header.size_in_memory()); VERIFY(program_header.alignment() == PAGE_SIZE); int prot = 0; if (program_header.is_readable()) prot |= PROT_READ; if (program_header.is_writable()) prot |= PROT_WRITE; if (program_header.is_executable()) prot |= PROT_EXEC; auto range_base = VirtualAddress { page_round_down(program_header.vaddr().offset(load_offset).get()) }; auto range_end = VirtualAddress { page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) }; auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; } if (should_allow_syscalls == ShouldAllowSyscalls::Yes) region_or_error.value()->set_syscall_region(true); if (program_header.offset() == 0) load_base_address = (FlatPtr)region_or_error.value()->vaddr().as_ptr(); return IterationDecision::Continue; }); if (ph_load_result.is_error()) { dbgln("do_exec: Failure loading program ({})", ph_load_result.error()); return ph_load_result; } if (!elf_image.entry().offset(load_offset).get()) { dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset)); return ENOEXEC; } auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size); if (!stack_range.has_value()) { dbgln("do_exec: Failed to allocate VM range for stack"); return ENOMEM; } auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (stack_region_or_error.is_error()) return stack_region_or_error.error(); auto& stack_region = *stack_region_or_error.value(); stack_region.set_stack(true); return LoadResult { move(new_space), load_base_address, elf_image.entry().offset(load_offset).get(), executable_size, AK::try_make_weak_ptr(master_tls_region), master_tls_size, master_tls_alignment, stack_region.make_weak_ptr() }; } KResultOr Process::load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const ElfW(Ehdr) & main_program_header) { auto new_space = Space::create(*this, nullptr); if (!new_space) return ENOMEM; ScopeGuard space_guard([&]() { MemoryManager::enter_process_paging_scope(*this); }); auto load_offset = get_load_offset(main_program_header, main_program_description, interpreter_description); if (load_offset.is_error()) { return load_offset.error(); } if (interpreter_description.is_null()) { auto result = load_elf_object(new_space.release_nonnull(), main_program_description, load_offset.value(), ShouldAllocateTls::Yes, ShouldAllowSyscalls::No); if (result.is_error()) return result.error(); m_master_tls_region = result.value().tls_region; m_master_tls_size = result.value().tls_size; m_master_tls_alignment = result.value().tls_alignment; return result; } auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, load_offset.value(), ShouldAllocateTls::No, ShouldAllowSyscalls::Yes); if (interpreter_load_result.is_error()) return interpreter_load_result.error(); // TLS allocation will be done in userspace by the loader VERIFY(!interpreter_load_result.value().tls_region); VERIFY(!interpreter_load_result.value().tls_alignment); VERIFY(!interpreter_load_result.value().tls_size); return interpreter_load_result; } KResult Process::do_exec(NonnullRefPtr main_program_description, Vector arguments, Vector environment, RefPtr interpreter_description, Thread*& new_main_thread, u32& prev_flags, const ElfW(Ehdr) & main_program_header) { VERIFY(is_user_process()); VERIFY(!Processor::current().in_critical()); auto path = main_program_description->absolute_path(); dbgln_if(EXEC_DEBUG, "do_exec: {}", path); // FIXME: How much stack space does process startup need? if (!validate_stack_size(arguments, environment)) return E2BIG; auto parts = path.split('/'); if (parts.is_empty()) return ENOENT; auto main_program_metadata = main_program_description->metadata(); auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); if (load_result_or_error.is_error()) { dbgln("do_exec: Failed to load main program or interpreter for {}", path); return load_result_or_error.error(); } auto signal_trampoline_range = load_result_or_error.value().space->allocate_range({}, PAGE_SIZE); if (!signal_trampoline_range.has_value()) { dbgln("do_exec: Failed to allocate VM for signal trampoline"); return ENOMEM; } // We commit to the new executable at this point. There is no turning back! // Prevent other processes from attaching to us with ptrace while we're doing this. MutexLocker ptrace_locker(ptrace_lock()); // Disable profiling temporarily in case it's running on this process. auto was_profiling = m_profiling; TemporaryChange profiling_disabler(m_profiling, false); kill_threads_except_self(); auto& load_result = load_result_or_error.value(); bool executable_is_setid = false; if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { if (main_program_metadata.is_setuid()) { executable_is_setid = true; ProtectedDataMutationScope scope { *this }; m_euid = main_program_metadata.uid; m_suid = main_program_metadata.uid; } if (main_program_metadata.is_setgid()) { executable_is_setid = true; ProtectedDataMutationScope scope { *this }; m_egid = main_program_metadata.gid; m_sgid = main_program_metadata.gid; } } set_dumpable(!executable_is_setid); { // We must disable global profiling (especially kfree tracing) here because // we might otherwise end up walking the stack into the process' space that // is about to be destroyed. TemporaryChange global_profiling_disabler(g_profiling_all_threads, false); m_space = load_result.space.release_nonnull(); } MemoryManager::enter_space(*m_space); auto signal_trampoline_region = m_space->allocate_region_with_vmobject(signal_trampoline_range.value(), g_signal_trampoline_region->vmobject(), 0, "Signal trampoline", PROT_READ | PROT_EXEC, true); if (signal_trampoline_region.is_error()) { VERIFY_NOT_REACHED(); } signal_trampoline_region.value()->set_syscall_region(true); m_executable = main_program_description->custody(); m_arguments = arguments; m_environment = environment; m_veil_state = VeilState::None; m_unveiled_paths.clear(); m_unveiled_paths.set_metadata({ "/", UnveilAccess::None, false }); m_coredump_metadata.clear(); auto current_thread = Thread::current(); current_thread->clear_signals(); clear_futex_queues_on_exec(); fds().change_each([&](auto& file_description_metadata) { if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC) file_description_metadata = {}; }); int main_program_fd = -1; if (interpreter_description) { main_program_fd = m_fds.allocate(); VERIFY(main_program_fd >= 0); auto seek_result = main_program_description->seek(0, SEEK_SET); VERIFY(!seek_result.is_error()); main_program_description->set_readable(true); m_fds[main_program_fd].set(move(main_program_description), FD_CLOEXEC); } new_main_thread = nullptr; if (¤t_thread->process() == this) { new_main_thread = current_thread; } else { for_each_thread([&](auto& thread) { new_main_thread = &thread; return IterationDecision::Break; }); } VERIFY(new_main_thread); auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, uid(), euid(), gid(), egid(), path, main_program_fd); // NOTE: We create the new stack before disabling interrupts since it will zero-fault // and we don't want to deal with faults after this point. auto make_stack_result = make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), move(arguments), move(environment), move(auxv)); if (make_stack_result.is_error()) return make_stack_result.error(); FlatPtr new_userspace_sp = make_stack_result.value(); if (wait_for_tracer_at_next_execve()) { // Make sure we release the ptrace lock here or the tracer will block forever. ptrace_locker.unlock(); Thread::current()->send_urgent_signal_to_self(SIGSTOP); } // We enter a critical section here because we don't want to get interrupted between do_exec() // and Processor::assume_context() or the next context switch. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec(). Processor::current().enter_critical(prev_flags); // NOTE: Be careful to not trigger any page faults below! m_name = parts.take_last(); new_main_thread->set_name(m_name); { ProtectedDataMutationScope scope { *this }; m_promises = m_execpromises; m_has_promises = m_has_execpromises; m_execpromises = 0; m_has_execpromises = false; m_signal_trampoline = signal_trampoline_region.value()->vaddr(); // FIXME: PID/TID ISSUE m_pid = new_main_thread->tid().value(); } auto tsr_result = new_main_thread->make_thread_specific_region({}); if (tsr_result.is_error()) { // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable. VERIFY_NOT_REACHED(); } new_main_thread->reset_fpu_state(); auto& regs = new_main_thread->m_regs; #if ARCH(I386) regs.cs = GDT_SELECTOR_CODE3 | 3; regs.ds = GDT_SELECTOR_DATA3 | 3; regs.es = GDT_SELECTOR_DATA3 | 3; regs.ss = GDT_SELECTOR_DATA3 | 3; regs.fs = GDT_SELECTOR_DATA3 | 3; regs.gs = GDT_SELECTOR_TLS | 3; regs.eip = load_result.entry_eip; regs.esp = new_userspace_sp; #else regs.rip = load_result.entry_eip; regs.rsp = new_userspace_sp; #endif regs.cr3 = space().page_directory().cr3(); { TemporaryChange profiling_disabler(m_profiling, was_profiling); PerformanceManager::add_process_exec_event(*this); } { ScopedSpinLock lock(g_scheduler_lock); new_main_thread->set_state(Thread::State::Runnable); } u32 lock_count_to_restore; [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore); VERIFY_INTERRUPTS_DISABLED(); VERIFY(Processor::current().in_critical()); return KSuccess; } static Vector generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd) { Vector auxv; // PHDR/EXECFD // PH* auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE }); auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base }); auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip }); // NOTELF auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid }); auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid }); auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid }); auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid }); auxv.append({ ELF::AuxiliaryValue::Platform, Processor::current().platform_string() }); // FIXME: This is platform specific auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() }); auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() }); // FIXME: Also take into account things like extended filesystem permissions? That's what linux does... auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 }); char random_bytes[16] {}; get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes)); auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) }); auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path }); auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd }); auxv.append({ ELF::AuxiliaryValue::Null, 0L }); return auxv; } static KResultOr> find_shebang_interpreter_for_executable(const char first_page[], int nread) { int word_start = 2; int word_length = 0; if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') { Vector interpreter_words; for (int i = 2; i < nread; ++i) { if (first_page[i] == '\n') { break; } if (first_page[i] != ' ') { ++word_length; } if (first_page[i] == ' ') { if (word_length > 0) { interpreter_words.append(String(&first_page[word_start], word_length)); } word_length = 0; word_start = i + 1; } } if (word_length > 0) interpreter_words.append(String(&first_page[word_start], word_length)); if (!interpreter_words.is_empty()) return interpreter_words; } return ENOEXEC; } KResultOr> Process::find_elf_interpreter_for_executable(const String& path, const ElfW(Ehdr) & main_program_header, int nread, size_t file_size) { // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD String interpreter_path; if (!ELF::validate_program_headers(main_program_header, file_size, (const u8*)&main_program_header, nread, &interpreter_path)) { dbgln("exec({}): File has invalid ELF Program headers", path); return ENOEXEC; } if (!interpreter_path.is_empty()) { dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path); auto interp_result = VirtualFileSystem::the().open(interpreter_path, O_EXEC, 0, current_directory()); if (interp_result.is_error()) { dbgln("exec({}): Unable to open program interpreter {}", path, interpreter_path); return interp_result.error(); } auto interpreter_description = interp_result.value(); auto interp_metadata = interpreter_description->metadata(); VERIFY(interpreter_description->inode()); // Validate the program interpreter as a valid elf binary. // If your program interpreter is a #! file or something, it's time to stop playing games :) if (interp_metadata.size < (int)sizeof(ElfW(Ehdr))) return ENOEXEC; char first_page[PAGE_SIZE] = {}; auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page); auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page)); if (nread_or_error.is_error()) return ENOEXEC; nread = nread_or_error.value(); if (nread < (int)sizeof(ElfW(Ehdr))) return ENOEXEC; auto elf_header = (ElfW(Ehdr)*)first_page; if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) { dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_description->absolute_path()); return ENOEXEC; } // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD String interpreter_interpreter_path; if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) { dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_description->absolute_path()); return ENOEXEC; } if (!interpreter_interpreter_path.is_empty()) { dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_description->absolute_path(), interpreter_interpreter_path); return ELOOP; } return interpreter_description; } if (main_program_header.e_type == ET_REL) { // We can't exec an ET_REL, that's just an object file from the compiler return ENOEXEC; } if (main_program_header.e_type == ET_DYN) { // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible // for its own relocation (i.e. it's /usr/lib/Loader.so) if (path != "/usr/lib/Loader.so") dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path); return nullptr; } // No interpreter, but, path refers to a valid elf image return KResult(KSuccess); } KResult Process::exec(String path, Vector arguments, Vector environment, int recursion_depth) { if (recursion_depth > 2) { dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path); return ELOOP; } // Open the file to check what kind of binary format it is // Currently supported formats: // - #! interpreted file // - ELF32 // * ET_EXEC binary that just gets loaded // * ET_DYN binary that requires a program interpreter // auto file_or_error = VirtualFileSystem::the().open(path, O_EXEC, 0, current_directory()); if (file_or_error.is_error()) return file_or_error.error(); auto description = file_or_error.release_value(); auto metadata = description->metadata(); if (!metadata.is_regular_file()) return EACCES; // Always gonna need at least 3 bytes. these are for #!X if (metadata.size < 3) return ENOEXEC; VERIFY(description->inode()); // Read the first page of the program into memory so we can validate the binfmt of it char first_page[PAGE_SIZE]; auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page); auto nread_or_error = description->read(first_page_buffer, sizeof(first_page)); if (nread_or_error.is_error()) return ENOEXEC; // 1) #! interpreted file auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value()); if (!shebang_result.is_error()) { auto shebang_words = shebang_result.release_value(); auto shebang_path = shebang_words.first(); arguments[0] = move(path); if (!arguments.try_prepend(move(shebang_words))) return ENOMEM; return exec(move(shebang_path), move(arguments), move(environment), ++recursion_depth); } // #2) ELF32 for i386 if (nread_or_error.value() < (int)sizeof(ElfW(Ehdr))) return ENOEXEC; auto main_program_header = (ElfW(Ehdr)*)first_page; if (!ELF::validate_elf_header(*main_program_header, metadata.size)) { dbgln("exec({}): File has invalid ELF header", path); return ENOEXEC; } auto elf_result = find_elf_interpreter_for_executable(path, *main_program_header, nread_or_error.value(), metadata.size); // Assume a static ELF executable by default RefPtr interpreter_description; // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out) if (!elf_result.is_error()) { // It's a dynamic ELF executable, with or without an interpreter. Do not allocate TLS interpreter_description = elf_result.value(); } else if (elf_result.error().is_error()) return elf_result.error(); // The bulk of exec() is done by do_exec(), which ensures that all locals // are cleaned up by the time we yield-teleport below. Thread* new_main_thread = nullptr; u32 prev_flags = 0; auto result = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header); if (result.is_error()) return result; VERIFY_INTERRUPTS_DISABLED(); VERIFY(Processor::current().in_critical()); auto current_thread = Thread::current(); if (current_thread == new_main_thread) { // We need to enter the scheduler lock before changing the state // and it will be released after the context switch into that // thread. We should also still be in our critical section VERIFY(!g_scheduler_lock.own_lock()); VERIFY(Processor::current().in_critical() == 1); g_scheduler_lock.lock(); current_thread->set_state(Thread::State::Running); Processor::assume_context(*current_thread, prev_flags); VERIFY_NOT_REACHED(); } Processor::current().leave_critical(prev_flags); return KSuccess; } KResultOr Process::sys$execve(Userspace user_params) { VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); REQUIRE_PROMISE(exec); // NOTE: Be extremely careful with allocating any kernel memory in exec(). // On success, the kernel stack will be lost. Syscall::SC_execve_params params; if (!copy_from_user(¶ms, user_params)) return EFAULT; if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX) return E2BIG; String path; { auto path_arg = get_syscall_path_argument(params.path); if (path_arg.is_error()) return path_arg.error(); path = path_arg.value()->view(); } auto copy_user_strings = [](const auto& list, auto& output) { if (!list.length) return true; Checked size = sizeof(*list.strings); size *= list.length; if (size.has_overflow()) return false; Vector strings; if (!strings.try_resize(list.length)) return false; if (!copy_from_user(strings.data(), list.strings, size.value())) return false; for (size_t i = 0; i < list.length; ++i) { auto string = copy_string_from_user(strings[i]); if (string.is_null()) return false; if (!output.try_append(move(string))) return false; } return true; }; Vector arguments; if (!copy_user_strings(params.arguments, arguments)) return EFAULT; Vector environment; if (!copy_user_strings(params.environment, environment)) return EFAULT; auto result = exec(move(path), move(arguments), move(environment)); VERIFY(result.is_error()); // We should never continue after a successful exec! return result.error(); } }