summaryrefslogtreecommitdiff
path: root/Kernel
diff options
context:
space:
mode:
authorAndreas Kling <kling@serenityos.org>2021-07-12 22:52:17 +0200
committerAndreas Kling <kling@serenityos.org>2021-07-13 22:40:25 +0200
commitba87571366ddae18d6d7659cf94cd44fe1e49519 (patch)
tree7b7206eb38854ace55fbffb742bbcc71c70b98de /Kernel
parentbe83b3aff464e175bbe6800a4458a89e3f26e6b8 (diff)
downloadserenity-ba87571366ddae18d6d7659cf94cd44fe1e49519.zip
Kernel: Implement zone-based buddy allocator for physical memory
The previous allocator was very naive and kept the state of all pages in one big bitmap. When allocating, we had to scan through the bitmap until we found an unset bit. This patch introduces a new binary buddy allocator that manages the physical memory pages. Each PhysicalRegion is divided into zones (PhysicalZone) of 16MB each. Any extra pages at the end of physical RAM that don't fit into a 16MB zone are turned into 15 or fewer 1MB zones. Each zone starts out with one full-sized block, which is then recursively subdivided into halves upon allocation, until a block of the request size can be returned. There are more opportunities for improvement here: the way zone objects are allocated and stored is non-optimal. Same goes for the allocation of buddy block state bitmaps.
Diffstat (limited to 'Kernel')
-rw-r--r--Kernel/CMakeLists.txt1
-rw-r--r--Kernel/VM/MemoryManager.cpp31
-rw-r--r--Kernel/VM/MemoryManager.h7
-rw-r--r--Kernel/VM/PhysicalPage.cpp7
-rw-r--r--Kernel/VM/PhysicalPage.h13
-rw-r--r--Kernel/VM/PhysicalRegion.cpp177
-rw-r--r--Kernel/VM/PhysicalRegion.h32
-rw-r--r--Kernel/VM/PhysicalZone.cpp198
-rw-r--r--Kernel/VM/PhysicalZone.h86
9 files changed, 409 insertions, 143 deletions
diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt
index 4a5da36099..e7f8b5eb95 100644
--- a/Kernel/CMakeLists.txt
+++ b/Kernel/CMakeLists.txt
@@ -260,6 +260,7 @@ set(KERNEL_SOURCES
VM/PageDirectory.cpp
VM/PhysicalPage.cpp
VM/PhysicalRegion.cpp
+ VM/PhysicalZone.cpp
VM/PrivateInodeVMObject.cpp
VM/ProcessPagingScope.cpp
VM/PurgeablePageRanges.cpp
diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp
index be576ae43a..e2cc4e019b 100644
--- a/Kernel/VM/MemoryManager.cpp
+++ b/Kernel/VM/MemoryManager.cpp
@@ -257,7 +257,7 @@ UNMAP_AFTER_INIT void MemoryManager::parse_memory_map()
// Assign page to user physical physical_region.
if (!physical_region || physical_region->upper().offset(PAGE_SIZE) != addr) {
- m_user_physical_regions.append(PhysicalRegion::create(addr, addr));
+ m_user_physical_regions.append(PhysicalRegion::try_create(addr, addr).release_nonnull());
physical_region = &m_user_physical_regions.last();
} else {
physical_region->expand(physical_region->lower(), addr);
@@ -266,9 +266,10 @@ UNMAP_AFTER_INIT void MemoryManager::parse_memory_map()
}
// Append statically-allocated super physical physical_region.
- m_super_physical_regions.append(PhysicalRegion::create(
+ m_super_physical_regions.append(PhysicalRegion::try_create(
PhysicalAddress(virtual_to_low_physical(FlatPtr(super_pages))),
- PhysicalAddress(virtual_to_low_physical(FlatPtr(super_pages + sizeof(super_pages))))));
+ PhysicalAddress(virtual_to_low_physical(FlatPtr(super_pages + sizeof(super_pages)))))
+ .release_nonnull());
for (auto& region : m_super_physical_regions)
m_system_memory_info.super_physical_pages += region.finalize_capacity();
@@ -293,11 +294,15 @@ UNMAP_AFTER_INIT void MemoryManager::parse_memory_map()
dmesgln("MM: {} range @ {} - {} (size 0x{:x})", UserMemoryRangeTypeNames[to_underlying(used_range.type)], used_range.start, used_range.end.offset(-1), used_range.end.as_ptr() - used_range.start.as_ptr());
}
- for (auto& region : m_super_physical_regions)
+ for (auto& region : m_super_physical_regions) {
dmesgln("MM: Super physical region: {} - {} (size 0x{:x})", region.lower(), region.upper().offset(-1), PAGE_SIZE * region.size());
+ region.initialize_zones();
+ }
- for (auto& region : m_user_physical_regions)
+ for (auto& region : m_user_physical_regions) {
dmesgln("MM: User physical region: {} - {} (size 0x{:x})", region.lower(), region.upper().offset(-1), PAGE_SIZE * region.size());
+ region.initialize_zones();
+ }
}
extern "C" PageDirectoryEntry boot_pd3[1024];
@@ -337,9 +342,12 @@ UNMAP_AFTER_INIT void MemoryManager::initialize_physical_pages()
// Now that we know how much memory we need for a contiguous array of PhysicalPage instances, find a memory region that can fit it
PhysicalRegion* found_region { nullptr };
- for (auto& region : m_user_physical_regions) {
+ Optional<size_t> found_region_index;
+ for (size_t i = 0; i < m_user_physical_regions.size(); ++i) {
+ auto& region = m_user_physical_regions[i];
if (region.size() >= physical_page_array_pages_and_page_tables_count) {
found_region = &region;
+ found_region_index = i;
break;
}
}
@@ -354,12 +362,9 @@ UNMAP_AFTER_INIT void MemoryManager::initialize_physical_pages()
if (found_region->size() == physical_page_array_pages_and_page_tables_count) {
// We're stealing the entire region
- m_physical_pages_region = move(*found_region);
- m_user_physical_regions.remove_first_matching([&](auto& region) {
- return &region == found_region;
- });
+ m_physical_pages_region = m_user_physical_regions.take(*found_region_index);
} else {
- m_physical_pages_region = found_region->take_pages_from_beginning(physical_page_array_pages_and_page_tables_count);
+ m_physical_pages_region = found_region->try_take_pages_from_beginning(physical_page_array_pages_and_page_tables_count);
}
m_used_memory_ranges.append({ UsedMemoryRangeType::PhysicalPages, m_physical_pages_region->lower(), m_physical_pages_region->upper() });
@@ -445,7 +450,7 @@ UNMAP_AFTER_INIT void MemoryManager::initialize_physical_pages()
auto pt_paddr = page_tables_base.offset(pt_index * PAGE_SIZE);
auto physical_page_index = PhysicalAddress::physical_page_index(pt_paddr.get());
auto& physical_page_entry = m_physical_page_entries[physical_page_index];
- auto physical_page = adopt_ref(*new (&physical_page_entry.physical_page) PhysicalPage(false));
+ auto physical_page = adopt_ref(*new (&physical_page_entry.allocated.physical_page) PhysicalPage(false));
auto result = kernel_page_tables.set(virtual_page_array_current_page & ~0x1fffff, move(physical_page));
VERIFY(result == AK::HashSetResult::InsertedNewEntry);
@@ -465,7 +470,7 @@ PhysicalPageEntry& MemoryManager::get_physical_page_entry(PhysicalAddress physic
PhysicalAddress MemoryManager::get_physical_address(PhysicalPage const& physical_page)
{
- PhysicalPageEntry const& physical_page_entry = *reinterpret_cast<PhysicalPageEntry const*>((u8 const*)&physical_page - __builtin_offsetof(PhysicalPageEntry, physical_page));
+ PhysicalPageEntry const& physical_page_entry = *reinterpret_cast<PhysicalPageEntry const*>((u8 const*)&physical_page - __builtin_offsetof(PhysicalPageEntry, allocated.physical_page));
VERIFY(m_physical_page_entries);
size_t physical_page_entry_index = &physical_page_entry - m_physical_page_entries;
VERIFY(physical_page_entry_index < m_physical_page_entries_count);
diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h
index 21aeb61621..a57767bae6 100644
--- a/Kernel/VM/MemoryManager.h
+++ b/Kernel/VM/MemoryManager.h
@@ -8,6 +8,7 @@
#include <AK/Concepts.h>
#include <AK/HashTable.h>
+#include <AK/NonnullOwnPtrVector.h>
#include <AK/NonnullRefPtrVector.h>
#include <AK/String.h>
#include <Kernel/Arch/x86/PageFault.h>
@@ -244,9 +245,9 @@ private:
SystemMemoryInfo m_system_memory_info;
- Vector<PhysicalRegion> m_user_physical_regions;
- Vector<PhysicalRegion> m_super_physical_regions;
- Optional<PhysicalRegion> m_physical_pages_region;
+ NonnullOwnPtrVector<PhysicalRegion> m_user_physical_regions;
+ NonnullOwnPtrVector<PhysicalRegion> m_super_physical_regions;
+ OwnPtr<PhysicalRegion> m_physical_pages_region;
PhysicalPageEntry* m_physical_page_entries { nullptr };
size_t m_physical_page_entries_count { 0 };
diff --git a/Kernel/VM/PhysicalPage.cpp b/Kernel/VM/PhysicalPage.cpp
index 6396cda0ea..6591db0160 100644
--- a/Kernel/VM/PhysicalPage.cpp
+++ b/Kernel/VM/PhysicalPage.cpp
@@ -13,7 +13,7 @@ namespace Kernel {
NonnullRefPtr<PhysicalPage> PhysicalPage::create(PhysicalAddress paddr, bool may_return_to_freelist)
{
auto& physical_page_entry = MM.get_physical_page_entry(paddr);
- return adopt_ref(*new (&physical_page_entry.physical_page) PhysicalPage(may_return_to_freelist));
+ return adopt_ref(*new (&physical_page_entry.allocated.physical_page) PhysicalPage(may_return_to_freelist));
}
PhysicalPage::PhysicalPage(bool may_return_to_freelist)
@@ -28,9 +28,12 @@ PhysicalAddress PhysicalPage::paddr() const
void PhysicalPage::free_this()
{
+ auto paddr = MM.get_physical_address(*this);
if (m_may_return_to_freelist) {
- auto paddr = MM.get_physical_address(*this);
+ auto& this_as_freelist_entry = MM.get_physical_page_entry(paddr).freelist;
this->~PhysicalPage(); // delete in place
+ this_as_freelist_entry.next_index = -1;
+ this_as_freelist_entry.prev_index = -1;
MM.deallocate_physical_page(paddr);
} else {
this->~PhysicalPage(); // delete in place
diff --git a/Kernel/VM/PhysicalPage.h b/Kernel/VM/PhysicalPage.h
index 9c61b7dec9..8daab4d339 100644
--- a/Kernel/VM/PhysicalPage.h
+++ b/Kernel/VM/PhysicalPage.h
@@ -50,10 +50,17 @@ private:
};
struct PhysicalPageEntry {
- // This structure either holds a valid PhysicalPage
- // or a PhysicalAllocator's free list information!
union {
- PhysicalPage physical_page;
+ // If it's a live PhysicalPage object:
+ struct {
+ PhysicalPage physical_page;
+ } allocated;
+
+ // If it's an entry in a PhysicalZone::Bucket's freelist.
+ struct {
+ i16 next_index;
+ i16 prev_index;
+ } freelist;
};
};
diff --git a/Kernel/VM/PhysicalRegion.cpp b/Kernel/VM/PhysicalRegion.cpp
index 32b7668f92..ae9d5f792f 100644
--- a/Kernel/VM/PhysicalRegion.cpp
+++ b/Kernel/VM/PhysicalRegion.cpp
@@ -8,10 +8,28 @@
#include <AK/RefPtr.h>
#include <Kernel/Assertions.h>
#include <Kernel/Random.h>
+#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PhysicalRegion.h>
+#include <Kernel/VM/PhysicalZone.h>
namespace Kernel {
+static constexpr u32 next_power_of_two(u32 value)
+{
+ value--;
+ value |= value >> 1;
+ value |= value >> 2;
+ value |= value >> 4;
+ value |= value >> 8;
+ value |= value >> 16;
+ value++;
+ return value;
+}
+
+PhysicalRegion::~PhysicalRegion()
+{
+}
+
PhysicalRegion::PhysicalRegion(PhysicalAddress lower, PhysicalAddress upper)
: m_lower(lower)
, m_upper(upper)
@@ -26,17 +44,34 @@ void PhysicalRegion::expand(PhysicalAddress lower, PhysicalAddress upper)
m_upper = upper;
}
+void PhysicalRegion::initialize_zones()
+{
+ size_t remaining_pages = m_pages;
+ auto base_address = m_lower;
+
+ auto make_zones = [&](size_t zone_size) {
+ while (remaining_pages >= zone_size) {
+ m_zones.append(make<PhysicalZone>(base_address, zone_size));
+ dmesgln(" * Zone {:016x}-{:016x} ({} bytes)", base_address.get(), base_address.get() + zone_size * PAGE_SIZE - 1, zone_size * PAGE_SIZE);
+ base_address = base_address.offset(zone_size * PAGE_SIZE);
+ remaining_pages -= zone_size;
+ }
+ };
+
+ make_zones(4096);
+ make_zones(256);
+}
+
unsigned PhysicalRegion::finalize_capacity()
{
VERIFY(!m_pages);
m_pages = (m_upper.get() - m_lower.get()) / PAGE_SIZE;
- m_bitmap.grow(m_pages, false);
return size();
}
-PhysicalRegion PhysicalRegion::take_pages_from_beginning(unsigned page_count)
+OwnPtr<PhysicalRegion> PhysicalRegion::try_take_pages_from_beginning(unsigned page_count)
{
VERIFY(m_used == 0);
VERIFY(page_count > 0);
@@ -47,136 +82,64 @@ PhysicalRegion PhysicalRegion::take_pages_from_beginning(unsigned page_count)
// TODO: find a more elegant way to re-init the existing region
m_pages = 0;
- m_bitmap = {}; // FIXME: Kind of wasteful
finalize_capacity();
- auto taken_region = create(taken_lower, taken_upper);
- taken_region.finalize_capacity();
+ auto taken_region = try_create(taken_lower, taken_upper);
+ if (!taken_region)
+ return {};
+ taken_region->finalize_capacity();
return taken_region;
}
NonnullRefPtrVector<PhysicalPage> PhysicalRegion::take_contiguous_free_pages(size_t count, size_t physical_alignment)
{
- VERIFY(m_pages);
- VERIFY(m_used != m_pages);
-
- NonnullRefPtrVector<PhysicalPage> physical_pages;
- physical_pages.ensure_capacity(count);
-
- auto first_contiguous_page = find_contiguous_free_pages(count, physical_alignment);
+ // FIXME: Care about alignment.
+ (void)physical_alignment;
- for (size_t index = 0; index < count; index++)
- physical_pages.append(PhysicalPage::create(m_lower.offset(PAGE_SIZE * (index + first_contiguous_page))));
- return physical_pages;
-}
+ auto rounded_page_count = next_power_of_two(count);
+ auto order = __builtin_ctz(rounded_page_count);
-unsigned PhysicalRegion::find_contiguous_free_pages(size_t count, size_t physical_alignment)
-{
- VERIFY(count != 0);
- VERIFY(physical_alignment % PAGE_SIZE == 0);
- // search from the last page we allocated
- auto range = find_and_allocate_contiguous_range(count, physical_alignment / PAGE_SIZE);
- VERIFY(range.has_value());
- return range.value();
-}
+ Optional<PhysicalAddress> page_base;
+ for (auto& zone : m_zones) {
+ page_base = zone.allocate_block(order);
-Optional<unsigned> PhysicalRegion::find_one_free_page()
-{
- if (m_used == m_pages) {
- // We know we don't have any free pages, no need to check the bitmap
- // Check if we can draw one from the return queue
- if (m_recently_returned.size() > 0) {
- u8 index = get_fast_random<u8>() % m_recently_returned.size();
- Checked<PhysicalPtr> local_offset = m_recently_returned[index].get();
- local_offset -= m_lower.get();
- m_recently_returned.remove(index);
- VERIFY(!local_offset.has_overflow());
- VERIFY(local_offset.value() < (FlatPtr)(m_pages * PAGE_SIZE));
- return local_offset.value() / PAGE_SIZE;
- }
- return {};
+ if (page_base.has_value())
+ break;
}
- auto free_index = m_bitmap.find_one_anywhere_unset(m_free_hint);
- if (!free_index.has_value())
- return {};
- auto page_index = free_index.value();
- m_bitmap.set(page_index, true);
- m_used++;
- m_free_hint = free_index.value() + 1; // Just a guess
- if (m_free_hint >= m_bitmap.size())
- m_free_hint = 0;
- return page_index;
-}
-
-Optional<unsigned> PhysicalRegion::find_and_allocate_contiguous_range(size_t count, unsigned alignment)
-{
- VERIFY(count != 0);
- size_t found_pages_count = 0;
- // TODO: Improve how we deal with alignment != 1
- auto first_index = m_bitmap.find_longest_range_of_unset_bits(count + alignment - 1, found_pages_count);
- if (!first_index.has_value())
+ if (!page_base.has_value())
return {};
- auto page = first_index.value();
- if (alignment != 1) {
- auto lower_page = m_lower.get() / PAGE_SIZE;
- page = ((lower_page + page + alignment - 1) & ~(alignment - 1)) - lower_page;
- }
- if (found_pages_count >= count) {
- m_bitmap.set_range<true>(page, count);
- m_used += count;
- m_free_hint = first_index.value() + count + 1; // Just a guess
- if (m_free_hint >= m_bitmap.size())
- m_free_hint = 0;
- return page;
- }
- return {};
-}
-
-RefPtr<PhysicalPage> PhysicalRegion::take_free_page()
-{
- VERIFY(m_pages);
-
- auto free_index = find_one_free_page();
- if (!free_index.has_value())
- return nullptr;
+ NonnullRefPtrVector<PhysicalPage> physical_pages;
+ physical_pages.ensure_capacity(count);
- return PhysicalPage::create(m_lower.offset((PhysicalPtr)free_index.value() * PAGE_SIZE));
+ for (size_t i = 0; i < count; ++i)
+ physical_pages.append(PhysicalPage::create(page_base.value().offset(i * PAGE_SIZE)));
+ return physical_pages;
}
-void PhysicalRegion::free_page_at(PhysicalAddress addr)
+RefPtr<PhysicalPage> PhysicalRegion::take_free_page()
{
- VERIFY(m_pages);
-
- if (m_used == 0) {
- VERIFY_NOT_REACHED();
+ for (auto& zone : m_zones) {
+ auto page = zone.allocate_block(0);
+ if (page.has_value())
+ return PhysicalPage::create(page.value());
}
- Checked<PhysicalPtr> local_offset = addr.get();
- local_offset -= m_lower.get();
- VERIFY(!local_offset.has_overflow());
- VERIFY(local_offset.value() < ((PhysicalPtr)m_pages * PAGE_SIZE));
-
- auto page = local_offset.value() / PAGE_SIZE;
- m_bitmap.set(page, false);
- m_free_hint = page; // We know we can find one here for sure
- m_used--;
+ dbgln("PhysicalRegion::take_free_page: No free physical pages");
+ return nullptr;
}
void PhysicalRegion::return_page(PhysicalAddress paddr)
{
- auto returned_count = m_recently_returned.size();
- if (returned_count >= m_recently_returned.capacity()) {
- // Return queue is full, pick a random entry and free that page
- // and replace the entry with this page
- auto& entry = m_recently_returned[get_fast_random<u8>()];
- free_page_at(entry);
- entry = paddr;
- } else {
- // Still filling the return queue, just append it
- m_recently_returned.append(paddr);
+ for (auto& zone : m_zones) {
+ if (zone.contains(paddr)) {
+ zone.deallocate_block(paddr, 0);
+ return;
+ }
}
+
+ VERIFY_NOT_REACHED();
}
}
diff --git a/Kernel/VM/PhysicalRegion.h b/Kernel/VM/PhysicalRegion.h
index 343eb1dd6f..94daaac1d0 100644
--- a/Kernel/VM/PhysicalRegion.h
+++ b/Kernel/VM/PhysicalRegion.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
+ * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@@ -7,49 +7,51 @@
#pragma once
#include <AK/Bitmap.h>
+#include <AK/NonnullOwnPtrVector.h>
#include <AK/Optional.h>
+#include <AK/OwnPtr.h>
#include <Kernel/VM/PhysicalPage.h>
namespace Kernel {
+class PhysicalZone;
+
class PhysicalRegion {
public:
- static PhysicalRegion create(PhysicalAddress lower, PhysicalAddress upper)
+ static OwnPtr<PhysicalRegion> try_create(PhysicalAddress lower, PhysicalAddress upper)
{
- return { lower, upper };
+ return adopt_own_if_nonnull(new PhysicalRegion { lower, upper });
}
+ ~PhysicalRegion();
+
void expand(PhysicalAddress lower, PhysicalAddress upper);
unsigned finalize_capacity();
+ void initialize_zones();
PhysicalAddress lower() const { return m_lower; }
PhysicalAddress upper() const { return m_upper; }
unsigned size() const { return m_pages; }
- unsigned used() const { return m_used - m_recently_returned.size(); }
- unsigned free() const { return m_pages - m_used + m_recently_returned.size(); }
+ unsigned used() const { return m_used; }
+ unsigned free() const { return m_pages - m_used; }
bool contains(PhysicalAddress paddr) const { return paddr >= m_lower && paddr <= m_upper; }
- PhysicalRegion take_pages_from_beginning(unsigned);
+ OwnPtr<PhysicalRegion> try_take_pages_from_beginning(unsigned);
RefPtr<PhysicalPage> take_free_page();
NonnullRefPtrVector<PhysicalPage> take_contiguous_free_pages(size_t count, size_t physical_alignment = PAGE_SIZE);
void return_page(PhysicalAddress);
private:
- unsigned find_contiguous_free_pages(size_t count, size_t physical_alignment = PAGE_SIZE);
- Optional<unsigned> find_and_allocate_contiguous_range(size_t count, unsigned alignment = 1);
- Optional<unsigned> find_one_free_page();
- void free_page_at(PhysicalAddress);
-
PhysicalRegion(PhysicalAddress lower, PhysicalAddress upper);
+ NonnullOwnPtrVector<PhysicalZone> m_zones;
+
+ size_t m_used { 0 };
+
PhysicalAddress m_lower;
PhysicalAddress m_upper;
unsigned m_pages { 0 };
- unsigned m_used { 0 };
- Bitmap m_bitmap;
- size_t m_free_hint { 0 };
- Vector<PhysicalAddress, 256> m_recently_returned;
};
}
diff --git a/Kernel/VM/PhysicalZone.cpp b/Kernel/VM/PhysicalZone.cpp
new file mode 100644
index 0000000000..89c086bc1c
--- /dev/null
+++ b/Kernel/VM/PhysicalZone.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "PhysicalPage.h"
+#include <AK/Format.h>
+#include <Kernel/VM/MemoryManager.h>
+#include <Kernel/VM/PhysicalZone.h>
+
+namespace Kernel {
+
+PhysicalPageEntry& PhysicalZone::get_freelist_entry(ChunkIndex index) const
+{
+ return MM.get_physical_page_entry(m_base_address.offset(index * ZONE_CHUNK_SIZE));
+}
+
+PhysicalZone::PhysicalZone(PhysicalAddress base_address, size_t page_count)
+ : m_base_address(base_address)
+ , m_page_count(page_count)
+ , m_used_chunks(0)
+{
+ size_t const chunk_count = page_count * 2;
+ for (int order = max_order; order >= 0; --order) {
+ auto& bucket = m_buckets[order];
+ size_t block_size = 2u << order;
+ size_t bitmap_size_for_order = ceil_div((size_t)(chunk_count / block_size), (size_t)1);
+ bucket.order = order;
+ if (bitmap_size_for_order)
+ bucket.bitmap.grow(bitmap_size_for_order, false);
+ }
+
+ auto first_order = __builtin_ctz(page_count);
+ size_t block_size = 2u << first_order;
+ auto& bucket = m_buckets[first_order];
+ size_t remaining_chunk_count = chunk_count;
+ size_t initial_bundle_count = remaining_chunk_count / block_size;
+
+ size_t offset = 0;
+ for (size_t i = 0; i < initial_bundle_count; ++i) {
+ ChunkIndex index = offset + i;
+ bucket.set_buddy_bit(index, true);
+
+ auto& freelist_entry = get_freelist_entry(index).freelist;
+ freelist_entry.next_index = bucket.freelist;
+ freelist_entry.prev_index = -1;
+ bucket.freelist = index;
+
+ remaining_chunk_count -= block_size;
+ offset += block_size;
+ }
+}
+
+Optional<PhysicalAddress> PhysicalZone::allocate_block(size_t order)
+{
+ size_t block_size = 2u << order;
+ auto result = allocate_block_impl(order);
+ if (!result.has_value())
+ return {};
+ m_used_chunks += block_size;
+ VERIFY(!(result.value() & 1));
+ return m_base_address.offset(result.value() * ZONE_CHUNK_SIZE);
+}
+
+Optional<PhysicalZone::ChunkIndex> PhysicalZone::allocate_block_impl(size_t order)
+{
+ if (order > max_order)
+ return {};
+ size_t block_size = 2u << order;
+ auto& bucket = m_buckets[order];
+ if (bucket.freelist == -1) {
+ // The freelist for this order is empty, try to allocate a block from one order higher, and split it.
+ auto buddies = allocate_block_impl(order + 1);
+
+ if (!buddies.has_value()) {
+ // Looks like we're unable to satisfy this allocation request.
+ return {};
+ }
+
+ // Split the block from order+1 into two parts.
+ // We keep one (in the freelist for this order) and return the other.
+
+ ChunkIndex index = buddies.value();
+
+ // First half goes in the freelist
+ auto& freelist_entry = get_freelist_entry(index).freelist;
+ freelist_entry.next_index = -1;
+ freelist_entry.prev_index = -1;
+ bucket.freelist = index;
+
+ VERIFY(bucket.get_buddy_bit(index) == false);
+
+ // Set buddy bit to 1 (one used, one unused).
+ bucket.set_buddy_bit(index, true);
+
+ // Second half is returned.
+ return index + block_size;
+ }
+
+ // Freelist has at least one entry, return that.
+ ChunkIndex index = bucket.freelist;
+
+ bucket.freelist = get_freelist_entry(bucket.freelist).freelist.next_index;
+ if (bucket.freelist != -1) {
+ get_freelist_entry(bucket.freelist).freelist.prev_index = -1;
+ }
+
+ VERIFY(bucket.get_buddy_bit(index) == true);
+ bucket.set_buddy_bit(index, false);
+
+ return index;
+}
+
+void PhysicalZone::deallocate_block(PhysicalAddress address, size_t order)
+{
+ size_t block_size = 2u << order;
+ ChunkIndex index = (address.get() - m_base_address.get()) / ZONE_CHUNK_SIZE;
+ deallocate_block_impl(index, order);
+ m_used_chunks -= block_size;
+}
+
+void PhysicalZone::deallocate_block_impl(ChunkIndex index, size_t order)
+{
+ size_t block_size = 2u << order;
+
+ // Basic algorithm:
+ // If the buddy block is free (buddy bit is 1 -- because this block was the only used one):
+ // Then,
+ // 1. Merge with buddy.
+ // 2. Return the merged block to order+1.
+ // Else (buddy bit is 0 -- because both blocks are used)
+ // 1. Add the block to the freelist.
+ // 2. Set buddy bit to 1.
+ auto& bucket = m_buckets[order];
+
+ if (bucket.get_buddy_bit(index)) {
+ // Buddy is free! Merge with buddy and coalesce upwards to the next order.
+ auto buddy_bit_index = bucket.buddy_bit_index(index);
+ ChunkIndex buddy_base_index = (buddy_bit_index << 1) << (1 + order);
+
+ if (index == buddy_base_index)
+ remove_from_freelist(bucket, buddy_base_index + block_size);
+ else
+ remove_from_freelist(bucket, buddy_base_index);
+
+ bucket.set_buddy_bit(index, false);
+ deallocate_block_impl(buddy_base_index, order + 1);
+ } else {
+ // Buddy is in use. Add freed block to freelist and set buddy bit to 1.
+
+ if (bucket.freelist != -1) {
+ get_freelist_entry(bucket.freelist).freelist.prev_index = index;
+ }
+
+ auto& freelist_entry = get_freelist_entry(index).freelist;
+ freelist_entry.next_index = bucket.freelist;
+ freelist_entry.prev_index = -1;
+ bucket.freelist = index;
+
+ bucket.set_buddy_bit(index, true);
+ }
+}
+
+void PhysicalZone::remove_from_freelist(BuddyBucket& bucket, ChunkIndex index)
+{
+ auto& freelist_entry = get_freelist_entry(index).freelist;
+ VERIFY(freelist_entry.prev_index >= -1);
+ VERIFY(freelist_entry.next_index >= -1);
+ if (freelist_entry.prev_index != -1) {
+ auto& prev_entry = get_freelist_entry(freelist_entry.prev_index).freelist;
+ prev_entry.next_index = freelist_entry.next_index;
+ }
+ if (freelist_entry.next_index != -1) {
+ auto& next_entry = get_freelist_entry(freelist_entry.next_index).freelist;
+ next_entry.prev_index = freelist_entry.prev_index;
+ }
+ if (bucket.freelist == index)
+ bucket.freelist = freelist_entry.next_index;
+ freelist_entry.next_index = -1;
+ freelist_entry.prev_index = -1;
+}
+
+void PhysicalZone::dump() const
+{
+ dbgln("(( {} used, {} available, page_count: {} ))", m_used_chunks, available(), m_page_count);
+ for (size_t i = 0; i <= max_order; ++i) {
+ auto& bucket = m_buckets[i];
+ dbgln("[{:2} / {:4}] ", i, (size_t)(2u << i));
+ auto entry = bucket.freelist;
+ while (entry != -1) {
+ dbgln(" {}", entry);
+ entry = get_freelist_entry(entry).freelist.next_index;
+ }
+ }
+}
+
+}
diff --git a/Kernel/VM/PhysicalZone.h b/Kernel/VM/PhysicalZone.h
new file mode 100644
index 0000000000..468fadaad5
--- /dev/null
+++ b/Kernel/VM/PhysicalZone.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Bitmap.h>
+#include <AK/Forward.h>
+#include <AK/Types.h>
+#include <Kernel/Forward.h>
+
+namespace Kernel {
+
+// A PhysicalZone is an allocator that manages a sub-area of a PhysicalRegion.
+// Its total size is always a power of two.
+// You allocate chunks at a time. One chunk is PAGE_SIZE/2, and the minimum allocation size is 2 chunks.
+// The allocator uses a buddy block scheme internally.
+
+class PhysicalZone {
+public:
+ static constexpr size_t ZONE_CHUNK_SIZE = PAGE_SIZE / 2;
+ using ChunkIndex = i16;
+
+ PhysicalZone(PhysicalAddress base, size_t page_count);
+
+ Optional<PhysicalAddress> allocate_block(size_t order);
+ void deallocate_block(PhysicalAddress, size_t order);
+
+ void dump() const;
+ size_t available() const { return m_page_count - (m_used_chunks / 2); }
+
+ PhysicalAddress base() const { return m_base_address; }
+ bool contains(PhysicalAddress paddr) const
+ {
+ return paddr >= m_base_address && paddr < m_base_address.offset(m_page_count * PAGE_SIZE);
+ }
+
+private:
+ Optional<ChunkIndex> allocate_block_impl(size_t order);
+ void deallocate_block_impl(ChunkIndex, size_t order);
+
+ struct BuddyBucket {
+ bool get_buddy_bit(ChunkIndex index) const
+ {
+ return bitmap.get(buddy_bit_index(index));
+ }
+
+ void set_buddy_bit(ChunkIndex index, bool value)
+ {
+ bitmap.set(buddy_bit_index(index), value);
+ }
+
+ size_t buddy_bit_index(ChunkIndex index) const
+ {
+ // NOTE: We cut the index in half since one chunk is half a page.
+ return (index >> 1) >> (1 + order);
+ }
+
+ // This bucket's index in the m_buckets array. (Redundant data kept here for convenience.)
+ size_t order { 0 };
+
+ // This is the start of the freelist for this buddy size.
+ // It's an index into the global PhysicalPageEntry array (offset by this PhysicalRegion's base.)
+ // A value of -1 indicates an empty freelist.
+ ChunkIndex freelist { -1 };
+
+ // Bitmap with 1 bit per buddy pair.
+ // 0 == Both blocks either free or used.
+ // 1 == One block free, one block used.
+ Bitmap bitmap;
+ };
+
+ static constexpr size_t max_order = 12;
+ BuddyBucket m_buckets[max_order + 1];
+
+ PhysicalPageEntry& get_freelist_entry(ChunkIndex) const;
+ void remove_from_freelist(BuddyBucket&, ChunkIndex);
+
+ PhysicalAddress m_base_address { 0 };
+ size_t m_page_count { 0 };
+ size_t m_used_chunks { 0 };
+};
+
+}