diff options
author | Andreas Kling <kling@serenityos.org> | 2021-02-08 15:45:40 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-02-08 18:27:28 +0100 |
commit | f1b5def8fd48cf09704a6d4151f9002b80354430 (patch) | |
tree | ec7af45dcfb129a0ae9ec434141d84f3faaef885 /Kernel | |
parent | b2cba3036ef83d4099d917124ad65efc955a2f68 (diff) | |
download | serenity-f1b5def8fd48cf09704a6d4151f9002b80354430.zip |
Kernel: Factor address space management out of the Process class
This patch adds Space, a class representing a process's address space.
- Each Process has a Space.
- The Space owns the PageDirectory and all Regions in the Process.
This allows us to reorganize sys$execve() so that it constructs and
populates a new Space fully before committing to it.
Previously, we would construct the new address space while still
running in the old one, and encountering an error meant we had to do
tedious and error-prone rollback.
Those problems are now gone, replaced by what's hopefully a set of much
smaller problems and missing cleanups. :^)
Diffstat (limited to 'Kernel')
27 files changed, 493 insertions, 403 deletions
diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 23367237f6..a5e8762685 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -212,6 +212,7 @@ set(KERNEL_SOURCES VM/RangeAllocator.cpp VM/Region.cpp VM/SharedInodeVMObject.cpp + VM/Space.cpp VM/VMObject.cpp WaitQueue.cpp init.cpp diff --git a/Kernel/CoreDump.cpp b/Kernel/CoreDump.cpp index 2f3007e863..4c947f197f 100644 --- a/Kernel/CoreDump.cpp +++ b/Kernel/CoreDump.cpp @@ -59,7 +59,7 @@ OwnPtr<CoreDump> CoreDump::create(NonnullRefPtr<Process> process, const String& CoreDump::CoreDump(NonnullRefPtr<Process> process, NonnullRefPtr<FileDescription>&& fd) : m_process(move(process)) , m_fd(move(fd)) - , m_num_program_headers(m_process->m_regions.size() + 1) // +1 for NOTE segment + , m_num_program_headers(m_process->space().region_count() + 1) // +1 for NOTE segment { } @@ -137,7 +137,7 @@ KResult CoreDump::write_elf_header() KResult CoreDump::write_program_headers(size_t notes_size) { size_t offset = sizeof(Elf32_Ehdr) + m_num_program_headers * sizeof(Elf32_Phdr); - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { Elf32_Phdr phdr {}; phdr.p_type = PT_LOAD; @@ -178,7 +178,7 @@ KResult CoreDump::write_program_headers(size_t notes_size) KResult CoreDump::write_regions() { - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { if (region.is_kernel()) continue; @@ -258,13 +258,13 @@ ByteBuffer CoreDump::create_notes_threads_data() const ByteBuffer CoreDump::create_notes_regions_data() const { ByteBuffer regions_data; - for (size_t region_index = 0; region_index < m_process->m_regions.size(); ++region_index) { + for (size_t region_index = 0; region_index < m_process->space().region_count(); ++region_index) { ByteBuffer memory_region_info_buffer; ELF::Core::MemoryRegionInfo info {}; info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo; - auto& region = m_process->m_regions[region_index]; + auto& region = m_process->space().regions()[region_index]; info.region_start = reinterpret_cast<uint32_t>(region.vaddr().as_ptr()); info.region_end = reinterpret_cast<uint32_t>(region.vaddr().as_ptr() + region.size()); info.program_header_index = region_index; @@ -316,7 +316,7 @@ ByteBuffer CoreDump::create_notes_segment_data() const KResult CoreDump::write() { - ScopedSpinLock lock(m_process->get_lock()); + ScopedSpinLock lock(m_process->space().get_lock()); ProcessPagingScope scope(m_process); ByteBuffer notes_segment = create_notes_segment_data(); diff --git a/Kernel/Devices/BXVGADevice.cpp b/Kernel/Devices/BXVGADevice.cpp index 349cbe1079..307f66d2d6 100644 --- a/Kernel/Devices/BXVGADevice.cpp +++ b/Kernel/Devices/BXVGADevice.cpp @@ -185,7 +185,7 @@ KResultOr<Region*> BXVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MBVGADevice.cpp b/Kernel/Devices/MBVGADevice.cpp index 55e7d176e8..999108c554 100644 --- a/Kernel/Devices/MBVGADevice.cpp +++ b/Kernel/Devices/MBVGADevice.cpp @@ -64,7 +64,7 @@ KResultOr<Region*> MBVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MemoryDevice.cpp b/Kernel/Devices/MemoryDevice.cpp index acbe53ae90..8763271ebc 100644 --- a/Kernel/Devices/MemoryDevice.cpp +++ b/Kernel/Devices/MemoryDevice.cpp @@ -66,7 +66,7 @@ KResultOr<Region*> MemoryDevice::mmap(Process& process, FileDescription&, const if (!vmobject) return ENOMEM; dbgln("MemoryDevice: Mapped physical memory at {} for range of {} bytes", viewed_address, range.size()); - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/FileSystem/AnonymousFile.cpp b/Kernel/FileSystem/AnonymousFile.cpp index aede4e9474..82a5669f84 100644 --- a/Kernel/FileSystem/AnonymousFile.cpp +++ b/Kernel/FileSystem/AnonymousFile.cpp @@ -47,7 +47,7 @@ KResultOr<Region*> AnonymousFile::mmap(Process& process, FileDescription&, const if (range.size() != m_vmobject->size()) return EINVAL; - return process.allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); + return process.space().allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); } } diff --git a/Kernel/FileSystem/InodeFile.cpp b/Kernel/FileSystem/InodeFile.cpp index fe52944006..755e9abbea 100644 --- a/Kernel/FileSystem/InodeFile.cpp +++ b/Kernel/FileSystem/InodeFile.cpp @@ -117,7 +117,7 @@ KResultOr<Region*> InodeFile::mmap(Process& process, FileDescription& descriptio vmobject = PrivateInodeVMObject::create_with_inode(inode()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); + return process.space().allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); } String InodeFile::absolute_path(const FileDescription& description) const diff --git a/Kernel/FileSystem/ProcFS.cpp b/Kernel/FileSystem/ProcFS.cpp index bb83131cf1..8551c6fece 100644 --- a/Kernel/FileSystem/ProcFS.cpp +++ b/Kernel/FileSystem/ProcFS.cpp @@ -317,8 +317,8 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder) return false; JsonArraySerializer array { builder }; { - ScopedSpinLock lock(process->get_lock()); - for (auto& region : process->regions()) { + ScopedSpinLock lock(process->space().get_lock()); + for (auto& region : process->space().regions()) { if (!region.is_user_accessible() && !Process::current()->is_superuser()) continue; auto region_object = array.add_object(); diff --git a/Kernel/Forward.h b/Kernel/Forward.h index d2e1c26c54..e5a3de0c27 100644 --- a/Kernel/Forward.h +++ b/Kernel/Forward.h @@ -62,6 +62,7 @@ class Region; class Scheduler; class SchedulerPerProcessorData; class Socket; +class Space; template<typename BaseType> class SpinLock; class RecursiveSpinLock; diff --git a/Kernel/PerformanceEventBuffer.cpp b/Kernel/PerformanceEventBuffer.cpp index 0aaa07c3fd..c0e6d2e06c 100644 --- a/Kernel/PerformanceEventBuffer.cpp +++ b/Kernel/PerformanceEventBuffer.cpp @@ -121,7 +121,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto process = Process::from_pid(pid); ASSERT(process); - ScopedSpinLock locker(process->get_lock()); + ScopedSpinLock locker(process->space().get_lock()); JsonObjectSerializer object(builder); object.add("pid", pid.value()); @@ -129,7 +129,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto region_array = object.add_array("regions"); - for (const auto& region : process->regions()) { + for (const auto& region : process->space().regions()) { auto region_object = region_array.add_object(); region_object.add("base", region.vaddr().get()); region_object.add("size", region.size()); diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 462ae5bda9..7a39711418 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -116,110 +116,6 @@ bool Process::in_group(gid_t gid) const return m_gid == gid || m_extra_gids.contains_slow(gid); } -Optional<Range> Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) -{ - vaddr.mask(PAGE_MASK); - size = PAGE_ROUND_UP(size); - if (vaddr.is_null()) - return page_directory().range_allocator().allocate_anywhere(size, alignment); - return page_directory().range_allocator().allocate_specific(vaddr, size); -} - -Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) -{ - auto& region = add_region( - Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); - region.set_syscall_region(source_region.is_syscall_region()); - region.set_mmap(source_region.is_mmap()); - region.set_stack(source_region.is_stack()); - size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; - for (size_t i = 0; i < region.page_count(); ++i) { - if (source_region.should_cow(page_offset_in_source_region + i)) - region.set_should_cow(i, true); - } - return region; -} - -KResultOr<Region*> Process::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) -{ - ASSERT(range.is_valid()); - auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); - if (!vmobject) - return ENOMEM; - auto region = Region::create_user_accessible(this, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); - if (!region->map(page_directory())) - return ENOMEM; - return &add_region(move(region)); -} - -KResultOr<Region*> Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) -{ - ASSERT(range.is_valid()); - size_t end_in_vmobject = offset_in_vmobject + range.size(); - if (end_in_vmobject <= offset_in_vmobject) { - dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); - return EINVAL; - } - if (offset_in_vmobject >= vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); - return EINVAL; - } - if (end_in_vmobject > vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); - return EINVAL; - } - offset_in_vmobject &= PAGE_MASK; - auto& region = add_region(Region::create_user_accessible(this, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); - if (!region.map(page_directory())) { - // FIXME: What is an appropriate error code here, really? - return ENOMEM; - } - return ®ion; -} - -bool Process::deallocate_region(Region& region) -{ - OwnPtr<Region> region_protector; - ScopedSpinLock lock(m_lock); - - if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) - m_region_lookup_cache.region = nullptr; - for (size_t i = 0; i < m_regions.size(); ++i) { - if (&m_regions[i] == ®ion) { - region_protector = m_regions.unstable_take(i); - return true; - } - } - return false; -} - -Region* Process::find_region_from_range(const Range& range) -{ - ScopedSpinLock lock(m_lock); - if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) - return m_region_lookup_cache.region.unsafe_ptr(); - - size_t size = PAGE_ROUND_UP(range.size()); - for (auto& region : m_regions) { - if (region.vaddr() == range.base() && region.size() == size) { - m_region_lookup_cache.range = range; - m_region_lookup_cache.region = region; - return ®ion; - } - } - return nullptr; -} - -Region* Process::find_region_containing(const Range& range) -{ - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { - if (region.contains(range)) - return ®ion; - } - return nullptr; -} - void Process::kill_threads_except_self() { InterruptDisabler disabler; @@ -339,7 +235,7 @@ Process::Process(RefPtr<Thread>& first_thread, const String& name, uid_t uid, gi { dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, m_pid.value()); - m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr); + m_space = Space::create(*this, fork_parent ? &fork_parent->space() : nullptr); if (fork_parent) { // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. @@ -365,28 +261,6 @@ Process::~Process() } } -void Process::dump_regions() -{ - klog() << "Process regions:"; - klog() << "BEGIN END SIZE ACCESS NAME"; - - ScopedSpinLock lock(m_lock); - - Vector<Region*> sorted_regions; - sorted_regions.ensure_capacity(m_regions.size()); - for (auto& region : m_regions) - sorted_regions.append(®ion); - quick_sort(sorted_regions, [](auto& a, auto& b) { - return a->vaddr() < b->vaddr(); - }); - - for (auto& sorted_region : sorted_regions) { - auto& region = *sorted_region; - klog() << String::format("%08x", region.vaddr().get()) << " -- " << String::format("%08x", region.vaddr().offset(region.size() - 1).get()) << " " << String::format("%08zx", region.size()) << " " << (region.is_readable() ? 'R' : ' ') << (region.is_writable() ? 'W' : ' ') << (region.is_executable() ? 'X' : ' ') << (region.is_shared() ? 'S' : ' ') << (region.is_stack() ? 'T' : ' ') << (region.vmobject().is_anonymous() ? 'A' : ' ') << " " << region.name().characters(); - } - MM.dump_kernel_regions(); -} - // Make sure the compiler doesn't "optimize away" this function: extern void signal_trampoline_dummy(); void signal_trampoline_dummy() @@ -457,7 +331,7 @@ void Process::crash(int signal, u32 eip, bool out_of_memory) } m_termination_signal = signal; set_dump_core(!out_of_memory); - dump_regions(); + space().dump_regions(); ASSERT(is_user_process()); die(); // We can not return from here, as there is nowhere @@ -643,10 +517,7 @@ void Process::finalize() unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated); - { - ScopedSpinLock lock(m_lock); - m_regions.clear(); - } + m_space->remove_all_regions({}); ASSERT(ref_count() > 0); // WaitBlockCondition::finalize will be in charge of dropping the last @@ -689,8 +560,8 @@ size_t Process::amount_dirty_private() const // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping. // That's probably a situation that needs to be looked at in general. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (!region.is_shared()) amount += region.amount_dirty(); } @@ -701,8 +572,8 @@ size_t Process::amount_clean_inode() const { HashTable<const InodeVMObject*> vmobjects; { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_inode()) vmobjects.set(&static_cast<const InodeVMObject&>(region.vmobject())); } @@ -716,8 +587,8 @@ size_t Process::amount_clean_inode() const size_t Process::amount_virtual() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.size(); } return amount; @@ -727,8 +598,8 @@ size_t Process::amount_resident() const { // FIXME: This will double count if multiple regions use the same physical page. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_resident(); } return amount; @@ -741,8 +612,8 @@ size_t Process::amount_shared() const // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored // so that every Region contributes +1 ref to each of its PhysicalPages. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_shared(); } return amount; @@ -751,8 +622,8 @@ size_t Process::amount_shared() const size_t Process::amount_purgeable_volatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -762,8 +633,8 @@ size_t Process::amount_purgeable_volatile() const size_t Process::amount_purgeable_nonvolatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && !static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -858,14 +729,6 @@ void Process::set_root_directory(const Custody& root) m_root_directory = root; } -Region& Process::add_region(NonnullOwnPtr<Region> region) -{ - auto* ptr = region.ptr(); - ScopedSpinLock lock(m_lock); - m_regions.append(move(region)); - return *ptr; -} - void Process::set_tty(TTY* tty) { m_tty = tty; diff --git a/Kernel/Process.h b/Kernel/Process.h index 9fdd863efd..427dbb660d 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ #include <Kernel/UnveilNode.h> #include <Kernel/VM/AllocationStrategy.h> #include <Kernel/VM/RangeAllocator.h> +#include <Kernel/VM/Space.h> #include <LibC/signal_numbers.h> #include <LibELF/exec_elf.h> @@ -99,6 +100,8 @@ enum class VeilState { typedef HashMap<FlatPtr, RefPtr<FutexQueue>> FutexQueues; +struct LoadResult; + class Process : public RefCounted<Process> , public InlineLinkedListNode<Process> @@ -164,9 +167,6 @@ public: bool is_kernel_process() const { return m_is_kernel_process; } bool is_user_process() const { return !m_is_kernel_process; } - PageDirectory& page_directory() { return *m_page_directory; } - const PageDirectory& page_directory() const { return *m_page_directory; } - static RefPtr<Process> from_pid(ProcessID); static SessionID get_sid_from_pgid(ProcessGroupID pgid); @@ -188,8 +188,6 @@ public: bool is_dumpable() const { return m_dumpable; } void set_dumpable(bool dumpable) { m_dumpable = dumpable; } - ThreadID exec_tid() const { return m_exec_tid; } - mode_t umask() const { return m_umask; } bool in_group(gid_t) const; @@ -209,8 +207,6 @@ public: void die(); void finalize(); - ALWAYS_INLINE SpinLock<u32>& get_lock() const { return m_lock; } - ThreadTracer* tracer() { return m_tracer.ptr(); } bool is_traced() const { return !!m_tracer; } void start_tracing_from(ProcessID tracer); @@ -373,14 +369,6 @@ public: const TTY* tty() const { return m_tty; } void set_tty(TTY*); - size_t region_count() const { return m_regions.size(); } - const NonnullOwnPtrVector<Region>& regions() const - { - ASSERT(m_lock.is_locked()); - return m_regions; - } - void dump_regions(); - u32 m_ticks_in_user { 0 }; u32 m_ticks_in_kernel { 0 }; @@ -410,25 +398,12 @@ public: int exec(String path, Vector<String> arguments, Vector<String> environment, int recusion_depth = 0); - struct LoadResult { - FlatPtr load_base { 0 }; - FlatPtr entry_eip { 0 }; - size_t size { 0 }; - FlatPtr program_headers { 0 }; - size_t num_program_headers { 0 }; - WeakPtr<Region> tls_region; - size_t tls_size { 0 }; - size_t tls_alignment { 0 }; - WeakPtr<Region> stack_region; - }; - enum class ShouldAllocateTls { No = 0, Yes, }; KResultOr<LoadResult> load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header); - KResultOr<LoadResult> load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls); KResultOr<FlatPtr> get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description); bool is_superuser() const @@ -436,13 +411,6 @@ public: return m_euid == 0; } - KResultOr<Region*> allocate_region_with_vmobject(const Range&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, const String& name, int prot, bool shared); - KResultOr<Region*> allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); - bool deallocate_region(Region& region); - - Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); - Vector<Region*, 2> split_region_around_range(const Region& source_region, const Range&); - void terminate_due_to_signal(u8 signal); KResult send_signal(u8 signal, Process* sender); @@ -503,7 +471,8 @@ public: PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; } - bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + Space& space() { return *m_space; } + const Space& space() const { return *m_space; } private: friend class MemoryManager; @@ -518,10 +487,6 @@ private: Process(RefPtr<Thread>& first_thread, const String& name, uid_t, gid_t, ProcessID ppid, bool is_kernel_process, RefPtr<Custody> cwd = nullptr, RefPtr<Custody> executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr); static ProcessID allocate_pid(); - Optional<Range> allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); - - Region& add_region(NonnullOwnPtr<Region>); - void kill_threads_except_self(); void kill_all_threads(); bool dump_core(); @@ -552,13 +517,13 @@ private: void clear_futex_queues_on_exec(); - RefPtr<PageDirectory> m_page_directory; - Process* m_prev { nullptr }; Process* m_next { nullptr }; String m_name; + OwnPtr<Space> m_space; + ProcessID m_pid { 0 }; SessionID m_sid { 0 }; RefPtr<ProcessGroup> m_pg; @@ -570,8 +535,6 @@ private: uid_t m_suid { 0 }; gid_t m_sgid { 0 }; - ThreadID m_exec_tid { 0 }; - OwnPtr<ThreadTracer> m_tracer; static const int m_max_open_file_descriptors { FD_SETSIZE }; @@ -617,16 +580,6 @@ private: RefPtr<TTY> m_tty; - Region* find_region_from_range(const Range&); - Region* find_region_containing(const Range&); - - NonnullOwnPtrVector<Region> m_regions; - struct RegionLookupCache { - Optional<Range> range; - WeakPtr<Region> region; - }; - RegionLookupCache m_region_lookup_cache; - ProcessID m_ppid { 0 }; mode_t m_umask { 022 }; @@ -639,12 +592,9 @@ private: size_t m_master_tls_alignment { 0 }; Lock m_big_lock { "Process" }; - mutable SpinLock<u32> m_lock; RefPtr<Timer> m_alarm_timer; - bool m_enforces_syscall_regions { false }; - bool m_has_promises { false }; u32 m_promises { 0 }; bool m_has_execpromises { false }; diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index 8eb79eca52..62652008a5 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -176,7 +176,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - auto* calling_region = MM.find_region_from_vaddr(process, VirtualAddress(regs.eip)); + auto* calling_region = MM.find_region_from_vaddr(process.space(), VirtualAddress(regs.eip)); if (!calling_region) { dbgln("Syscall from {:p} which has no associated region", regs.eip); handle_crash(regs, "Syscall from unknown region", SIGSEGV); @@ -189,7 +189,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) { + if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) { dbgln("Syscall from non-syscall region"); handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); ASSERT_NOT_REACHED(); diff --git a/Kernel/Syscalls/execve.cpp b/Kernel/Syscalls/execve.cpp index e102f56970..a06969f905 100644 --- a/Kernel/Syscalls/execve.cpp +++ b/Kernel/Syscalls/execve.cpp @@ -47,6 +47,19 @@ namespace Kernel { +struct LoadResult { + OwnPtr<Space> space; + FlatPtr load_base { 0 }; + FlatPtr entry_eip { 0 }; + size_t size { 0 }; + FlatPtr program_headers { 0 }; + size_t num_program_headers { 0 }; + WeakPtr<Region> tls_region; + size_t tls_size { 0 }; + size_t tls_alignment { 0 }; + WeakPtr<Region> stack_region; +}; + static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd); static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment) @@ -142,7 +155,7 @@ static KResultOr<FlatPtr> make_userspace_stack_for_main_thread(Region& region, V return new_esp; } -KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls) +static KResultOr<LoadResult> load_elf_object(NonnullOwnPtr<Space> new_space, FileDescription& object_description, FlatPtr load_offset, Process::ShouldAllocateTls should_allocate_tls) { auto& inode = *(object_description.inode()); auto vmobject = SharedInodeVMObject::create_with_inode(inode); @@ -172,10 +185,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ String elf_name = object_description.absolute_path(); ASSERT(!Processor::current().in_critical()); + MemoryManager::enter_space(*new_space); + KResult ph_load_result = KSuccess; elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) { if (program_header.type() == PT_TLS) { - ASSERT(should_allocate_tls == ShouldAllocateTls::Yes); + ASSERT(should_allocate_tls == Process::ShouldAllocateTls::Yes); ASSERT(program_header.size_in_memory()); if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { @@ -184,13 +199,13 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ return IterationDecision::Break; } - auto range = allocate_range({}, program_header.size_in_memory()); + auto range = new_space->allocate_range({}, program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -225,12 +240,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ if (program_header.is_writable()) prot |= PROT_WRITE; auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""); - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -262,12 +277,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ prot |= PROT_WRITE; if (program_header.is_executable()) prot |= PROT_EXEC; - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); + auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -287,19 +302,20 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ return ENOEXEC; } - auto stack_range = allocate_range({}, Thread::default_userspace_stack_size); + auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size); if (!stack_range.has_value()) { dbgln("do_exec: Failed to allocate VM range for stack"); return ENOMEM; } - auto stack_region_or_error = allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (stack_region_or_error.is_error()) return stack_region_or_error.error(); auto& stack_region = *stack_region_or_error.value(); stack_region.set_stack(true); return LoadResult { + move(new_space), load_base_address, elf_image.entry().offset(load_offset).get(), executable_size, @@ -312,44 +328,20 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_ }; } -KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header) +KResultOr<LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header) { - RefPtr<PageDirectory> old_page_directory; - NonnullOwnPtrVector<Region> old_regions; - - { - auto page_directory = PageDirectory::create_for_userspace(*this); - if (!page_directory) - return ENOMEM; - - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - old_page_directory = move(m_page_directory); - old_regions = move(m_regions); - m_page_directory = page_directory.release_nonnull(); - MM.enter_process_paging_scope(*this); - } + auto new_space = Space::create(*this, nullptr); + if (!new_space) + return ENOMEM; - ArmedScopeGuard rollback_regions_guard([&]() { - ASSERT(Process::current() == this); - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - // Explicitly clear m_regions *before* restoring the page directory, - // otherwise we may silently corrupt memory! - m_regions.clear(); - // Now that we freed the regions, revert to the original page directory - // and restore the original regions - m_page_directory = move(old_page_directory); - MM.enter_process_paging_scope(*this); - m_regions = move(old_regions); + ScopeGuard space_guard([&]() { + MemoryManager::enter_process_paging_scope(*this); }); if (interpreter_description.is_null()) { - auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); + auto result = load_elf_object(new_space.release_nonnull(), main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); if (result.is_error()) return result.error(); - - rollback_regions_guard.disarm(); return result; } @@ -358,7 +350,7 @@ KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main return interpreter_load_offset.error(); } - auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); + auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); if (interpreter_load_result.is_error()) return interpreter_load_result.error(); @@ -368,7 +360,6 @@ KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main ASSERT(!interpreter_load_result.value().tls_alignment); ASSERT(!interpreter_load_result.value().tls_size); - rollback_regions_guard.disarm(); return interpreter_load_result; } @@ -481,34 +472,22 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve if (parts.is_empty()) return -ENOENT; - // Disable profiling temporarily in case it's running on this process. - TemporaryChange profiling_disabler(m_profiling, false); - - // Mark this thread as the current thread that does exec - // No other thread from this process will be scheduled to run - auto current_thread = Thread::current(); - m_exec_tid = current_thread->tid(); - - // NOTE: We switch credentials before altering the memory layout of the process. - // This ensures that ptrace access control takes the right credentials into account. + auto main_program_metadata = main_program_description->metadata(); - // FIXME: This still feels rickety. Perhaps it would be better to simply block ptrace - // clients until we're ready to be traced? Or reject them with EPERM? + auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); + if (load_result_or_error.is_error()) { + dbgln("do_exec({}): Failed to load main program or interpreter", path); + return load_result_or_error.error(); + } - auto main_program_metadata = main_program_description->metadata(); + // We commit to the new executable at this point. There is no turning back! - auto old_euid = m_euid; - auto old_suid = m_suid; - auto old_egid = m_egid; - auto old_sgid = m_sgid; + // Disable profiling temporarily in case it's running on this process. + TemporaryChange profiling_disabler(m_profiling, false); - ArmedScopeGuard cred_restore_guard = [&] { - m_euid = old_euid; - m_suid = old_suid; - m_egid = old_egid; - m_sgid = old_sgid; - }; + kill_threads_except_self(); + auto& load_result = load_result_or_error.value(); bool executable_is_setid = false; if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { @@ -522,17 +501,8 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve } } - auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); - if (load_result_or_error.is_error()) { - dbgln("do_exec({}): Failed to load main program or interpreter", path); - return load_result_or_error.error(); - } - auto& load_result = load_result_or_error.value(); - - // We can commit to the new credentials at this point. - cred_restore_guard.disarm(); - - kill_threads_except_self(); + m_space = load_result.space.release_nonnull(); + MemoryManager::enter_space(*m_space); #if EXEC_DEBUG dbgln("Memory layout after ELF load:"); @@ -549,20 +519,17 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve m_execpromises = 0; m_has_execpromises = false; - m_enforces_syscall_regions = false; - m_veil_state = VeilState::None; m_unveiled_paths.clear(); m_coredump_metadata.clear(); + auto current_thread = Thread::current(); current_thread->set_default_signal_dispositions(); current_thread->clear_signals(); clear_futex_queues_on_exec(); - m_region_lookup_cache = {}; - set_dumpable(!executable_is_setid); for (size_t i = 0; i < m_fds.size(); ++i) { @@ -616,8 +583,10 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve // FIXME: PID/TID ISSUE m_pid = new_main_thread->tid().value(); auto tsr_result = new_main_thread->make_thread_specific_region({}); - if (tsr_result.is_error()) - return tsr_result.error(); + if (tsr_result.is_error()) { + // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable. + ASSERT_NOT_REACHED(); + } new_main_thread->reset_fpu_state(); auto& tss = new_main_thread->m_tss; @@ -629,7 +598,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve tss.gs = GDT_SELECTOR_TLS | 3; tss.eip = load_result.entry_eip; tss.esp = new_userspace_esp; - tss.cr3 = m_page_directory->cr3(); + tss.cr3 = space().page_directory().cr3(); tss.ss2 = m_pid.value(); // Throw away any recorded performance events in this process. @@ -870,8 +839,6 @@ int Process::exec(String path, Vector<String> arguments, Vector<String> environm u32 prev_flags = 0; int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header); - m_exec_tid = 0; - if (rc < 0) return rc; diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index 62287358c8..c5b4a35bc8 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -47,15 +47,14 @@ pid_t Process::sys$fork(RegisterState& regs) child->m_has_execpromises = m_has_execpromises; child->m_veil_state = m_veil_state; child->m_unveiled_paths = m_unveiled_paths.deep_copy(); - child->m_enforces_syscall_regions = m_enforces_syscall_regions; child->m_fds = m_fds; child->m_sid = m_sid; child->m_pg = m_pg; child->m_umask = m_umask; + child->m_extra_gids = m_extra_gids; dbgln_if(FORK_DEBUG, "fork: child={}", child); - - child->m_extra_gids = m_extra_gids; + child->space().set_enforces_syscall_regions(space().enforces_syscall_regions()); auto& child_tss = child_first_thread->m_tss; child_tss.eax = 0; // fork() returns 0 in the child :^) @@ -80,8 +79,8 @@ pid_t Process::sys$fork(RegisterState& regs) #endif { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", ®ion, region.name(), region.vaddr()); auto region_clone = region.clone(*child); if (!region_clone) { @@ -90,8 +89,8 @@ pid_t Process::sys$fork(RegisterState& regs) return -ENOMEM; } - auto& child_region = child->add_region(region_clone.release_nonnull()); - child_region.map(child->page_directory()); + auto& child_region = child->space().add_region(region_clone.release_nonnull()); + child_region.map(child->space().page_directory()); if (®ion == m_master_tls_region.unsafe_ptr()) child->m_master_tls_region = child_region; diff --git a/Kernel/Syscalls/futex.cpp b/Kernel/Syscalls/futex.cpp index 27763ea965..bee8bf9bef 100644 --- a/Kernel/Syscalls/futex.cpp +++ b/Kernel/Syscalls/futex.cpp @@ -147,7 +147,7 @@ int Process::sys$futex(Userspace<const Syscall::SC_futex_params*> user_params) if (!is_private) { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset), sizeof(u32))) return -EFAULT; - auto region = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset)); + auto region = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset)); if (!region) return -EFAULT; vmobject = region->vmobject(); @@ -159,7 +159,7 @@ int Process::sys$futex(Userspace<const Syscall::SC_futex_params*> user_params) case FUTEX_WAKE_OP: { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset2), sizeof(u32))) return -EFAULT; - auto region2 = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset2)); + auto region2 = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset2)); if (!region2) return -EFAULT; vmobject2 = region2->vmobject(); diff --git a/Kernel/Syscalls/get_stack_bounds.cpp b/Kernel/Syscalls/get_stack_bounds.cpp index a91a401946..a1b115ec3c 100644 --- a/Kernel/Syscalls/get_stack_bounds.cpp +++ b/Kernel/Syscalls/get_stack_bounds.cpp @@ -32,7 +32,7 @@ namespace Kernel { int Process::sys$get_stack_bounds(FlatPtr* user_stack_base, size_t* user_stack_size) { FlatPtr stack_pointer = Thread::current()->get_register_dump_from_stack().userspace_esp; - auto* stack_region = MM.find_region_from_vaddr(*this, VirtualAddress(stack_pointer)); + auto* stack_region = MM.find_region_from_vaddr(space(), VirtualAddress(stack_pointer)); if (!stack_region) { ASSERT_NOT_REACHED(); return -EINVAL; diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index eaf0f5c1b5..4a6bb96662 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -204,13 +204,13 @@ void* Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> user_params) Optional<Range> range; if (map_randomized) { - range = page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); + range = space().page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); } else { - range = allocate_range(VirtualAddress(addr), size, alignment); + range = space().allocate_range(VirtualAddress(addr), size, alignment); if (!range.has_value()) { if (addr && !map_fixed) { // If there's an address but MAP_FIXED wasn't specified, the address is just a hint. - range = allocate_range({}, size, alignment); + range = space().allocate_range({}, size, alignment); } } } @@ -220,7 +220,7 @@ void* Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> user_params) if (map_anonymous) { auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve; - auto region_or_error = allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); + auto region_or_error = space().allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); region = region_or_error.value(); @@ -280,7 +280,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) Range range_to_mprotect = { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_mprotect)) { + if (auto* whole_region = space().find_region_from_range(range_to_mprotect)) { if (!whole_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region)) @@ -300,7 +300,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) } // Check if we can carve out the desired range from an existing region - if (auto* old_region = find_region_containing(range_to_mprotect)) { + if (auto* old_region = space().find_region_containing(range_to_mprotect)) { if (!old_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region)) @@ -314,23 +314,23 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) // This vector is the region(s) adjacent to our range. // We need to allocate a new region for the range we wanted to change permission bits on. - auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect); + auto adjacent_regions = space().split_region_around_range(*old_region, range_to_mprotect); size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get()); - auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); + auto& new_region = space().allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); new_region.set_readable(prot & PROT_READ); new_region.set_writable(prot & PROT_WRITE); new_region.set_executable(prot & PROT_EXEC); // Unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Map the new regions using our page directory (they were just allocated and don't have one). for (auto* adjacent_region : adjacent_regions) { - adjacent_region->map(page_directory()); + adjacent_region->map(space().page_directory()); } - new_region.map(page_directory()); + new_region.map(space().page_directory()); return 0; } @@ -349,7 +349,7 @@ int Process::sys$madvise(void* address, size_t size, int advice) if (!is_user_range(VirtualAddress(address), size)) return -EFAULT; - auto* region = find_region_from_range({ VirtualAddress(address), size }); + auto* region = space().find_region_from_range({ VirtualAddress(address), size }); if (!region) return -EINVAL; if (!region->is_mmap()) @@ -397,7 +397,7 @@ int Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params* if (name.is_null()) return -EFAULT; - auto* region = find_region_from_range({ VirtualAddress(params.addr), params.size }); + auto* region = space().find_region_from_range({ VirtualAddress(params.addr), params.size }); if (!region) return -EINVAL; if (!region->is_mmap()) @@ -406,24 +406,6 @@ int Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params* return 0; } -// Carve out a virtual address range from a region and return the two regions on either side -Vector<Region*, 2> Process::split_region_around_range(const Region& source_region, const Range& desired_range) -{ - Range old_region_range = source_region.range(); - auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); - - ASSERT(!remaining_ranges_after_unmap.is_empty()); - auto make_replacement_region = [&](const Range& new_range) -> Region& { - ASSERT(old_region_range.contains(new_range)); - size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); - return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); - }; - Vector<Region*, 2> new_regions; - for (auto& new_range : remaining_ranges_after_unmap) { - new_regions.unchecked_append(&make_replacement_region(new_range)); - } - return new_regions; -} int Process::sys$munmap(void* addr, size_t size) { REQUIRE_PROMISE(stdio); @@ -435,30 +417,30 @@ int Process::sys$munmap(void* addr, size_t size) return -EFAULT; Range range_to_unmap { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_unmap)) { + if (auto* whole_region = space().find_region_from_range(range_to_unmap)) { if (!whole_region->is_mmap()) return -EPERM; - bool success = deallocate_region(*whole_region); + bool success = space().deallocate_region(*whole_region); ASSERT(success); return 0; } - if (auto* old_region = find_region_containing(range_to_unmap)) { + if (auto* old_region = space().find_region_containing(range_to_unmap)) { if (!old_region->is_mmap()) return -EPERM; - auto new_regions = split_region_around_range(*old_region, range_to_unmap); + auto new_regions = space().split_region_around_range(*old_region, range_to_unmap); // We manually unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Instead we give back the unwanted VM manually. - page_directory().range_allocator().deallocate(range_to_unmap); + space().page_directory().range_allocator().deallocate(range_to_unmap); // And finally we map the new region(s) using our page directory (they were just allocated and don't have one). for (auto* new_region : new_regions) { - new_region->map(page_directory()); + new_region->map(space().page_directory()); } return 0; } @@ -476,7 +458,7 @@ void* Process::sys$mremap(Userspace<const Syscall::SC_mremap_params*> user_param if (!copy_from_user(¶ms, user_params)) return (void*)-EFAULT; - auto* old_region = find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); + auto* old_region = space().find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); if (!old_region) return (void*)-EINVAL; @@ -491,11 +473,11 @@ void* Process::sys$mremap(Userspace<const Syscall::SC_mremap_params*> user_param // Unmap without deallocating the VM range since we're going to reuse it. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); auto new_vmobject = PrivateInodeVMObject::create_with_inode(inode); - auto new_region_or_error = allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); + auto new_region_or_error = space().allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); if (new_region_or_error.is_error()) return (void*)new_region_or_error.error().error(); auto& new_region = *new_region_or_error.value(); @@ -527,11 +509,11 @@ void* Process::sys$allocate_tls(size_t size) }); ASSERT(main_thread); - auto range = allocate_range({}, size); + auto range = space().allocate_range({}, size); if (!range.has_value()) return (void*)-ENOMEM; - auto region_or_error = allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); + auto region_or_error = space().allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); @@ -552,15 +534,15 @@ void* Process::sys$allocate_tls(size_t size) int Process::sys$msyscall(void* address) { - if (m_enforces_syscall_regions) + if (space().enforces_syscall_regions()) return -EPERM; if (!address) { - m_enforces_syscall_regions = true; + space().set_enforces_syscall_regions(true); return 0; } - auto* region = find_region_containing(Range { VirtualAddress { address }, 1 }); + auto* region = space().find_region_containing(Range { VirtualAddress { address }, 1 }); if (!region) return -EINVAL; diff --git a/Kernel/Syscalls/ptrace.cpp b/Kernel/Syscalls/ptrace.cpp index 26e91fcaae..22cb45ec49 100644 --- a/Kernel/Syscalls/ptrace.cpp +++ b/Kernel/Syscalls/ptrace.cpp @@ -73,7 +73,7 @@ KResultOr<u32> Process::peek_user_data(Userspace<const u32*> address) KResult Process::poke_user_data(Userspace<u32*> address, u32 data) { Range range = { VirtualAddress(address), sizeof(u32) }; - auto* region = find_region_containing(range); + auto* region = space().find_region_containing(range); if (!region) return EFAULT; ProcessPagingScope scope(*this); diff --git a/Kernel/Syscalls/thread.cpp b/Kernel/Syscalls/thread.cpp index d65591833a..ff8d4b0ae3 100644 --- a/Kernel/Syscalls/thread.cpp +++ b/Kernel/Syscalls/thread.cpp @@ -80,7 +80,7 @@ int Process::sys$create_thread(void* (*entry)(void*), Userspace<const Syscall::S auto& tss = thread->tss(); tss.eip = (FlatPtr)entry; tss.eflags = 0x0202; - tss.cr3 = page_directory().cr3(); + tss.cr3 = space().page_directory().cr3(); tss.esp = (u32)user_stack_address; auto tsr_result = thread->make_thread_specific_region({}); diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index 3a0fba1b4e..2dc0c7d924 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -108,7 +108,7 @@ Thread::Thread(NonnullRefPtr<Process> process, NonnullOwnPtr<Region> kernel_stac m_tss.gs = GDT_SELECTOR_TLS | 3; } - m_tss.cr3 = m_process->page_directory().cr3(); + m_tss.cr3 = m_process->space().page_directory().cr3(); m_kernel_stack_base = m_kernel_stack_region->vaddr().get(); m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u; @@ -1015,11 +1015,11 @@ KResult Thread::make_thread_specific_region(Badge<Process>) if (!process().m_master_tls_region) return KSuccess; - auto range = process().allocate_range({}, thread_specific_region_size()); + auto range = process().space().allocate_range({}, thread_specific_region_size()); if (!range.has_value()) return ENOMEM; - auto region_or_error = process().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); + auto region_or_error = process().space().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return region_or_error.error(); diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp index 3ee5f39671..60c299b170 100644 --- a/Kernel/VM/MemoryManager.cpp +++ b/Kernel/VM/MemoryManager.cpp @@ -401,29 +401,29 @@ Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr) return nullptr; } -Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::user_region_from_vaddr(Space& space, VirtualAddress vaddr) { - ScopedSpinLock lock(s_mm_lock); // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure! - for (auto& region : process.m_regions) { + ScopedSpinLock lock(space.get_lock()); + for (auto& region : space.regions()) { if (region.contains(vaddr)) return ®ion; } return nullptr; } -Region* MemoryManager::find_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::find_region_from_vaddr(Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(process, vaddr)) + if (auto* region = user_region_from_vaddr(space, vaddr)) return region; return kernel_region_from_vaddr(vaddr); } -const Region* MemoryManager::find_region_from_vaddr(const Process& process, VirtualAddress vaddr) +const Region* MemoryManager::find_region_from_vaddr(const Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr)) + if (auto* region = user_region_from_vaddr(const_cast<Space&>(space), vaddr)) return region; return kernel_region_from_vaddr(vaddr); } @@ -436,8 +436,8 @@ Region* MemoryManager::find_region_from_vaddr(VirtualAddress vaddr) auto page_directory = PageDirectory::find_by_cr3(read_cr3()); if (!page_directory) return nullptr; - ASSERT(page_directory->process()); - return user_region_from_vaddr(*page_directory->process(), vaddr); + ASSERT(page_directory->space()); + return user_region_from_vaddr(*page_directory->space(), vaddr); } PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) @@ -735,12 +735,17 @@ RefPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page() void MemoryManager::enter_process_paging_scope(Process& process) { + enter_space(process.space()); +} + +void MemoryManager::enter_space(Space& space) +{ auto current_thread = Thread::current(); ASSERT(current_thread != nullptr); ScopedSpinLock lock(s_mm_lock); - current_thread->tss().cr3 = process.page_directory().cr3(); - write_cr3(process.page_directory().cr3()); + current_thread->tss().cr3 = space.page_directory().cr3(); + write_cr3(space.page_directory().cr3()); } void MemoryManager::flush_tlb_local(VirtualAddress vaddr, size_t page_count) @@ -846,7 +851,7 @@ bool MemoryManager::validate_user_stack(const Process& process, VirtualAddress v if (!is_user_address(vaddr)) return false; ScopedSpinLock lock(s_mm_lock); - auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr); + auto* region = user_region_from_vaddr(const_cast<Process&>(process).space(), vaddr); return region && region->is_user_accessible() && region->is_stack(); } diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h index 13c3a79d78..ceee66796e 100644 --- a/Kernel/VM/MemoryManager.h +++ b/Kernel/VM/MemoryManager.h @@ -143,7 +143,8 @@ public: PageFaultResponse handle_page_fault(const PageFault&); - void enter_process_paging_scope(Process&); + static void enter_process_paging_scope(Process&); + static void enter_space(Space&); bool validate_user_stack(const Process&, VirtualAddress) const; @@ -196,8 +197,8 @@ public: } } - static Region* find_region_from_vaddr(Process&, VirtualAddress); - static const Region* find_region_from_vaddr(const Process&, VirtualAddress); + static Region* find_region_from_vaddr(Space&, VirtualAddress); + static const Region* find_region_from_vaddr(const Space&, VirtualAddress); void dump_kernel_regions(); @@ -225,7 +226,7 @@ private: static void flush_tlb_local(VirtualAddress, size_t page_count = 1); static void flush_tlb(const PageDirectory*, VirtualAddress, size_t page_count = 1); - static Region* user_region_from_vaddr(Process&, VirtualAddress); + static Region* user_region_from_vaddr(Space&, VirtualAddress); static Region* kernel_region_from_vaddr(VirtualAddress); static Region* find_region_from_vaddr(VirtualAddress); diff --git a/Kernel/VM/PageDirectory.cpp b/Kernel/VM/PageDirectory.cpp index 939853810f..3f38942353 100644 --- a/Kernel/VM/PageDirectory.cpp +++ b/Kernel/VM/PageDirectory.cpp @@ -73,7 +73,7 @@ PageDirectory::PageDirectory() m_directory_pages[3] = PhysicalPage::create(boot_pd3_paddr, true, false); } -PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_range_allocator) +PageDirectory::PageDirectory(const RangeAllocator* parent_range_allocator) { ScopedSpinLock lock(s_mm_lock); if (parent_range_allocator) { @@ -142,8 +142,8 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang auto* new_pd = MM.quickmap_pd(*this, 0); memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry)); - // If we got here, we successfully created it. Set m_process now - m_process = &process; + // If we got here, we successfully created it. Set m_space now + m_valid = true; cr3_map().set(cr3(), this); } @@ -151,7 +151,7 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang PageDirectory::~PageDirectory() { ScopedSpinLock lock(s_mm_lock); - if (m_process) + if (m_space) cr3_map().remove(cr3()); } diff --git a/Kernel/VM/PageDirectory.h b/Kernel/VM/PageDirectory.h index f4bd7f41ff..ba26e37f01 100644 --- a/Kernel/VM/PageDirectory.h +++ b/Kernel/VM/PageDirectory.h @@ -40,10 +40,10 @@ class PageDirectory : public RefCounted<PageDirectory> { friend class MemoryManager; public: - static RefPtr<PageDirectory> create_for_userspace(Process& process, const RangeAllocator* parent_range_allocator = nullptr) + static RefPtr<PageDirectory> create_for_userspace(const RangeAllocator* parent_range_allocator = nullptr) { - auto page_directory = adopt(*new PageDirectory(process, parent_range_allocator)); - if (!page_directory->process()) + auto page_directory = adopt(*new PageDirectory(parent_range_allocator)); + if (!page_directory->is_valid()) return {}; return page_directory; } @@ -55,24 +55,31 @@ public: u32 cr3() const { return m_directory_table->paddr().get(); } RangeAllocator& range_allocator() { return m_range_allocator; } + const RangeAllocator& range_allocator() const { return m_range_allocator; } + RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; } - Process* process() { return m_process; } - const Process* process() const { return m_process; } + bool is_valid() const { return m_valid; } + + Space* space() { return m_space; } + const Space* space() const { return m_space; } + + void set_space(Badge<Space>, Space& space) { m_space = &space; } RecursiveSpinLock& get_lock() { return m_lock; } private: - PageDirectory(Process&, const RangeAllocator* parent_range_allocator); + explicit PageDirectory(const RangeAllocator* parent_range_allocator); PageDirectory(); - Process* m_process { nullptr }; + Space* m_space { nullptr }; RangeAllocator m_range_allocator; RangeAllocator m_identity_range_allocator; RefPtr<PhysicalPage> m_directory_table; RefPtr<PhysicalPage> m_directory_pages[4]; HashMap<u32, RefPtr<PhysicalPage>> m_page_tables; RecursiveSpinLock m_lock; + bool m_valid { false }; }; } diff --git a/Kernel/VM/Space.cpp b/Kernel/VM/Space.cpp new file mode 100644 index 0000000000..bbee739db1 --- /dev/null +++ b/Kernel/VM/Space.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2021, Andreas Kling <kling@serenityos.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <AK/QuickSort.h> +#include <Kernel/Process.h> +#include <Kernel/SpinLock.h> +#include <Kernel/VM/AnonymousVMObject.h> +#include <Kernel/VM/MemoryManager.h> +#include <Kernel/VM/Space.h> + +namespace Kernel { + +OwnPtr<Space> Space::create(Process& process, const Space* parent) +{ + auto page_directory = PageDirectory::create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr); + if (!page_directory) + return {}; + auto space = adopt_own(*new Space(process, page_directory.release_nonnull())); + space->page_directory().set_space({}, *space); + return space; +} + +Space::Space(Process& process, NonnullRefPtr<PageDirectory> page_directory) + : m_process(&process) + , m_page_directory(move(page_directory)) +{ +} + +Space::~Space() +{ +} + +Optional<Range> Space::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) +{ + vaddr.mask(PAGE_MASK); + size = PAGE_ROUND_UP(size); + if (vaddr.is_null()) + return page_directory().range_allocator().allocate_anywhere(size, alignment); + return page_directory().range_allocator().allocate_specific(vaddr, size); +} + +Region& Space::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) +{ + auto& region = add_region(Region::create_user_accessible( + m_process, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); + region.set_syscall_region(source_region.is_syscall_region()); + region.set_mmap(source_region.is_mmap()); + region.set_stack(source_region.is_stack()); + size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; + for (size_t i = 0; i < region.page_count(); ++i) { + if (source_region.should_cow(page_offset_in_source_region + i)) + region.set_should_cow(i, true); + } + return region; +} + +KResultOr<Region*> Space::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) +{ + ASSERT(range.is_valid()); + auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); + if (!vmobject) + return ENOMEM; + auto region = Region::create_user_accessible(m_process, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); + if (!region->map(page_directory())) + return ENOMEM; + return &add_region(move(region)); +} + +KResultOr<Region*> Space::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) +{ + ASSERT(range.is_valid()); + size_t end_in_vmobject = offset_in_vmobject + range.size(); + if (end_in_vmobject <= offset_in_vmobject) { + dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); + return EINVAL; + } + if (offset_in_vmobject >= vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); + return EINVAL; + } + if (end_in_vmobject > vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); + return EINVAL; + } + offset_in_vmobject &= PAGE_MASK; + auto& region = add_region(Region::create_user_accessible(m_process, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); + if (!region.map(page_directory())) { + // FIXME: What is an appropriate error code here, really? + return ENOMEM; + } + return ®ion; +} + +bool Space::deallocate_region(Region& region) +{ + OwnPtr<Region> region_protector; + ScopedSpinLock lock(m_lock); + + if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) + m_region_lookup_cache.region = nullptr; + for (size_t i = 0; i < m_regions.size(); ++i) { + if (&m_regions[i] == ®ion) { + region_protector = m_regions.unstable_take(i); + return true; + } + } + return false; +} + +Region* Space::find_region_from_range(const Range& range) +{ + ScopedSpinLock lock(m_lock); + if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) + return m_region_lookup_cache.region.unsafe_ptr(); + + size_t size = PAGE_ROUND_UP(range.size()); + for (auto& region : m_regions) { + if (region.vaddr() == range.base() && region.size() == size) { + m_region_lookup_cache.range = range; + m_region_lookup_cache.region = region; + return ®ion; + } + } + return nullptr; +} + +Region* Space::find_region_containing(const Range& range) +{ + ScopedSpinLock lock(m_lock); + for (auto& region : m_regions) { + if (region.contains(range)) + return ®ion; + } + return nullptr; +} + +Region& Space::add_region(NonnullOwnPtr<Region> region) +{ + auto* ptr = region.ptr(); + ScopedSpinLock lock(m_lock); + m_regions.append(move(region)); + return *ptr; +} + +// Carve out a virtual address range from a region and return the two regions on either side +Vector<Region*, 2> Space::split_region_around_range(const Region& source_region, const Range& desired_range) +{ + Range old_region_range = source_region.range(); + auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); + + ASSERT(!remaining_ranges_after_unmap.is_empty()); + auto make_replacement_region = [&](const Range& new_range) -> Region& { + ASSERT(old_region_range.contains(new_range)); + size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); + return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); + }; + Vector<Region*, 2> new_regions; + for (auto& new_range : remaining_ranges_after_unmap) { + new_regions.unchecked_append(&make_replacement_region(new_range)); + } + return new_regions; +} + +void Space::dump_regions() +{ + klog() << "Process regions:"; + klog() << "BEGIN END SIZE ACCESS NAME"; + + ScopedSpinLock lock(m_lock); + + Vector<Region*> sorted_regions; + sorted_regions.ensure_capacity(m_regions.size()); + for (auto& region : m_regions) + sorted_regions.append(®ion); + quick_sort(sorted_regions, [](auto& a, auto& b) { + return a->vaddr() < b->vaddr(); + }); + + for (auto& sorted_region : sorted_regions) { + auto& region = *sorted_region; + dmesgln("{:08x} -- {:08x} {:08x} {:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(), + region.is_readable() ? 'R' : ' ', + region.is_writable() ? 'W' : ' ', + region.is_executable() ? 'X' : ' ', + region.is_shared() ? 'S' : ' ', + region.is_stack() ? 'T' : ' ', + region.is_syscall_region() ? 'C' : ' ', + region.name()); + } + MM.dump_kernel_regions(); +} + +void Space::remove_all_regions(Badge<Process>) +{ + ScopedSpinLock lock(m_lock); + m_regions.clear(); +} + +} diff --git a/Kernel/VM/Space.h b/Kernel/VM/Space.h new file mode 100644 index 0000000000..4e4a76dc13 --- /dev/null +++ b/Kernel/VM/Space.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/NonnullOwnPtrVector.h> +#include <AK/WeakPtr.h> +#include <Kernel/UnixTypes.h> +#include <Kernel/VM/AllocationStrategy.h> +#include <Kernel/VM/PageDirectory.h> + +namespace Kernel { + +class Space { +public: + static OwnPtr<Space> create(Process&, const Space* parent); + ~Space(); + + PageDirectory& page_directory() { return *m_page_directory; } + const PageDirectory& page_directory() const { return *m_page_directory; } + + Region& add_region(NonnullOwnPtr<Region>); + + size_t region_count() const { return m_regions.size(); } + + NonnullOwnPtrVector<Region>& regions() { return m_regions; } + const NonnullOwnPtrVector<Region>& regions() const { return m_regions; } + + void dump_regions(); + + Optional<Range> allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); + + KResultOr<Region*> allocate_region_with_vmobject(const Range&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, const String& name, int prot, bool shared); + KResultOr<Region*> allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); + bool deallocate_region(Region& region); + + Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); + Vector<Region*, 2> split_region_around_range(const Region& source_region, const Range&); + + Region* find_region_from_range(const Range&); + Region* find_region_containing(const Range&); + + bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + void set_enforces_syscall_regions(bool b) { m_enforces_syscall_regions = b; } + + void remove_all_regions(Badge<Process>); + + SpinLock<u32>& get_lock() const { return m_lock; } + +private: + Space(Process&, NonnullRefPtr<PageDirectory>); + + Process* m_process { nullptr }; + mutable SpinLock<u32> m_lock; + + RefPtr<PageDirectory> m_page_directory; + + NonnullOwnPtrVector<Region> m_regions; + + struct RegionLookupCache { + Optional<Range> range; + WeakPtr<Region> region; + }; + RegionLookupCache m_region_lookup_cache; + + bool m_enforces_syscall_regions { false }; +}; + +} |