From 4e0adb638dfe51916cbb708eb4cf6c4dee4e39d9 Mon Sep 17 00:00:00 2001 From: Peter Elliott Date: Wed, 4 May 2022 23:08:57 -0600 Subject: LibC: Implement posix_memalign(3) and aligned_alloc(3) Some ports linked against posix_memalign, but didn't use it, and others used it if it was Available. So I decided to implement posix_memalign. My implementation adds almost no overhead to regular mallocs. However, if an alignment is specified, it will use the smallest ChunkedBlock, for which aligned chunks exist, and simply use one of the chunks that is aligned. If it cannot use a ChunkedBlock, for size or alignment reasons, it will use a BigAllocationBlock, and return a pointer to the first aligned address past the start of the block. This implementation supports alignments up to 32768, due to the limitations of the BigAllocationBlock technique. --- Userland/Libraries/LibC/CMakeLists.txt | 3 - Userland/Libraries/LibC/malloc.cpp | 212 ++++++++++++++++++++++++++------- Userland/Libraries/LibC/stdlib.cpp | 11 -- Userland/Libraries/LibC/stdlib.h | 6 +- 4 files changed, 172 insertions(+), 60 deletions(-) (limited to 'Userland/Libraries') diff --git a/Userland/Libraries/LibC/CMakeLists.txt b/Userland/Libraries/LibC/CMakeLists.txt index 9485cc602d..2f8a603cb6 100644 --- a/Userland/Libraries/LibC/CMakeLists.txt +++ b/Userland/Libraries/LibC/CMakeLists.txt @@ -139,9 +139,6 @@ set(SOURCES ${LIBC_SOURCES} ${AK_SOURCES} ${ELF_SOURCES} ${ASM_SOURCES}) # Prevent GCC from removing null checks by marking the `FILE*` argument non-null set_source_files_properties(stdio.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin-fputc -fno-builtin-fputs -fno-builtin-fwrite") -# Add in the `posix_memalign` symbol to avoid breaking existing binaries. -set_source_files_properties(stdlib.cpp PROPERTIES COMPILE_FLAGS "-DSERENITY_LIBC_SHOW_POSIX_MEMALIGN") - # Prevent naively implemented string functions (like strlen) from being "optimized" into a call to themselves. if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set_source_files_properties(string.cpp wchar.cpp PROPERTIES COMPILE_FLAGS "-fno-tree-loop-distribution -fno-tree-loop-distribute-patterns") diff --git a/Userland/Libraries/LibC/malloc.cpp b/Userland/Libraries/LibC/malloc.cpp index 5b3e7add2b..05381069dc 100644 --- a/Userland/Libraries/LibC/malloc.cpp +++ b/Userland/Libraries/LibC/malloc.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2021, Andreas Kling + * Copyright (c) 2022, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -153,12 +154,75 @@ static inline BigAllocator (&big_allocators())[1] return reinterpret_cast(g_big_allocators_storage); } -static Allocator* allocator_for_size(size_t size, size_t& good_size) +// --- BEGIN MATH --- +// This stuff is only used for checking if there exists an aligned block in a +// chunk. It has no bearing on the rest of the allocator, especially for +// regular malloc. + +static inline unsigned long modulo(long a, long b) +{ + return (b + (a % b)) % b; +} + +struct EuclideanResult { + long x; + long y; + long gcd; +}; + +// Returns x, y, gcd. +static inline EuclideanResult extended_euclid(long a, long b) +{ + EuclideanResult old = { 1, 0, a }; + EuclideanResult current = { 0, 1, b }; + + while (current.gcd != 0) { + long quotient = old.gcd / current.gcd; + + EuclideanResult next = { + old.x - quotient * current.x, + old.y - quotient * current.y, + old.gcd - quotient * current.gcd, + }; + + old = current; + current = next; + } + + return old; +} + +static inline bool block_has_aligned_chunk(long align, long bytes_per_chunk, long chunk_capacity) +{ + // Never do math on a normal malloc. + if ((size_t)align <= sizeof(ChunkedBlock)) + return true; + + // Solve the linear congruence n*bytes_per_chunk = -sizeof(ChunkedBlock) (mod align). + auto [x, y, gcd] = extended_euclid(bytes_per_chunk % align, align); + long constant = modulo(-sizeof(ChunkedBlock), align); + if (constant % gcd != 0) + // No solution. Chunk size is probably a multiple of align. + return false; + + long n = modulo(x * (constant / gcd), align); + if (x < 0) + n = (n + align / gcd) % align; + + // Don't ask me to prove this. + VERIFY(n > 0); + return n < chunk_capacity; +} + +// --- END MATH --- + +static Allocator* allocator_for_size(size_t size, size_t& good_size, size_t align = 1) { for (size_t i = 0; size_classes[i]; ++i) { - if (size <= size_classes[i]) { + auto& allocator = allocators()[i]; + if (size <= size_classes[i] && block_has_aligned_chunk(align, allocator.size, (ChunkedBlock::block_size - sizeof(ChunkedBlock)) / allocator.size)) { good_size = size_classes[i]; - return &allocators()[i]; + return &allocator; } } good_size = PAGE_ROUND_UP(size); @@ -176,7 +240,7 @@ static BigAllocator* big_allocator_for_size(size_t size) extern "C" { -static void* os_alloc(size_t size, char const* name) +static ErrorOr os_alloc(size_t size, char const* name) { int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_PURGEABLE; #if ARCH(X86_64) @@ -185,8 +249,7 @@ static void* os_alloc(size_t size, char const* name) auto* ptr = serenity_mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, 0, 0, ChunkedBlock::block_size, name); VERIFY(ptr != nullptr); if (ptr == MAP_FAILED) { - errno = ENOMEM; - return nullptr; + return ENOMEM; } return ptr; } @@ -197,6 +260,31 @@ static void os_free(void* ptr, size_t size) assert(rc == 0); } +static void* try_allocate_chunk_aligned(size_t align, ChunkedBlock& block) +{ + // These loops are guaranteed to run only once for a standard-aligned malloc. + for (FreelistEntry** entry = &(block.m_freelist); *entry != nullptr; entry = &((*entry)->next)) { + if ((reinterpret_cast(*entry) & (align - 1)) == 0) { + --block.m_free_chunks; + void* ptr = *entry; + *entry = (*entry)->next; // Delete the entry. + return ptr; + } + } + for (; block.m_next_lazy_freelist_index < block.chunk_capacity(); block.m_next_lazy_freelist_index++) { + void* ptr = block.m_slot + block.m_next_lazy_freelist_index * block.m_size; + if ((reinterpret_cast(ptr) & (align - 1)) == 0) { + --block.m_free_chunks; + block.m_next_lazy_freelist_index++; + return ptr; + } + auto* entry = (FreelistEntry*)ptr; + entry->next = block.m_freelist; + block.m_freelist = entry; + } + return nullptr; +} + enum class CallerWillInitializeMemory { No, Yes, @@ -208,12 +296,20 @@ enum class CallerWillInitializeMemory { __thread bool s_allocation_enabled; #endif -static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory) +static ErrorOr malloc_impl(size_t size, size_t align, CallerWillInitializeMemory caller_will_initialize_memory) { #ifndef NO_TLS VERIFY(s_allocation_enabled); #endif + // Align must be a power of 2. + if (popcount(align) != 1) + return EINVAL; + + // FIXME: Support larger than 32KiB alignments (if you dare). + if (sizeof(BigAllocationBlock) + align >= ChunkedBlock::block_size) + return EINVAL; + if (s_log_malloc) dbgln("LibC: malloc({})", size); @@ -226,16 +322,15 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini g_malloc_stats.number_of_malloc_calls++; size_t good_size; - auto* allocator = allocator_for_size(size, good_size); + auto* allocator = allocator_for_size(size, good_size, align); PthreadMutexLocker locker(s_malloc_mutex); if (!allocator) { - size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size, ChunkedBlock::block_size); + size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size + ((align > 16) ? align : 0), ChunkedBlock::block_size); if (real_size < size) { dbgln_if(MALLOC_DEBUG, "LibC: Detected overflow trying to do big allocation of size {} for {}", real_size, size); - errno = ENOMEM; - return nullptr; + return ENOMEM; } #ifdef RECYCLE_BIG_ALLOCATIONS if (auto* allocator = big_allocator_for_size(real_size)) { @@ -257,27 +352,31 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini new (block) BigAllocationBlock(real_size); } - ue_notify_malloc(&block->m_slot[0], size); - return &block->m_slot[0]; + void* ptr = reinterpret_cast(round_up_to_power_of_two(reinterpret_cast(&block->m_slot[0]), align)); + + ue_notify_malloc(ptr, size); + return ptr; } } #endif - auto* block = (BigAllocationBlock*)os_alloc(real_size, "malloc: BigAllocationBlock"); - if (block == nullptr) { - dbgln_if(MALLOC_DEBUG, "LibC: Failed to do big allocation of size {} for {}", real_size, size); - return nullptr; - } + auto* block = (BigAllocationBlock*)TRY(os_alloc(real_size, "malloc: BigAllocationBlock")); g_malloc_stats.number_of_big_allocs++; new (block) BigAllocationBlock(real_size); - ue_notify_malloc(&block->m_slot[0], size); - return &block->m_slot[0]; + + void* ptr = reinterpret_cast(round_up_to_power_of_two(reinterpret_cast(&block->m_slot[0]), align)); + ue_notify_malloc(ptr, size); + return ptr; } ChunkedBlock* block = nullptr; + void* ptr = nullptr; for (auto& current : allocator->usable_blocks) { if (current.free_chunks()) { - block = ¤t; - break; + ptr = try_allocate_chunk_aligned(align, current); + if (ptr) { + block = ¤t; + break; + } } } @@ -321,23 +420,16 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini g_malloc_stats.number_of_block_allocs++; char buffer[64]; snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size); - block = (ChunkedBlock*)os_alloc(ChunkedBlock::block_size, buffer); - if (block == nullptr) { - return nullptr; - } + block = (ChunkedBlock*)TRY(os_alloc(ChunkedBlock::block_size, buffer)); new (block) ChunkedBlock(good_size); allocator->usable_blocks.append(*block); ++allocator->block_count; } - --block->m_free_chunks; - void* ptr = block->m_freelist; - if (ptr) { - block->m_freelist = block->m_freelist->next; - } else { - ptr = block->m_slot + block->m_next_lazy_freelist_index * block->m_size; - block->m_next_lazy_freelist_index++; + if (!ptr) { + ptr = try_allocate_chunk_aligned(align, *block); } + VERIFY(ptr); if (block->is_full()) { g_malloc_stats.number_of_blocks_full++; @@ -451,14 +543,21 @@ static void free_impl(void* ptr) void* malloc(size_t size) { MemoryAuditingSuppressor suppressor; - void* ptr = malloc_impl(size, CallerWillInitializeMemory::No); + auto ptr_or_error = malloc_impl(size, 16, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + if (s_profiling) - perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast(ptr)); - return ptr; + perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast(ptr_or_error.value())); + + return ptr_or_error.value(); } // This is a Microsoft extension, and is not found on other Unix-like systems. -// FIXME: Implement aligned_alloc() instead +// FIXME: Remove this when all patches have been switched to aligned_alloc() // // This is used in libc++ to implement C++17 aligned new/delete. // @@ -513,10 +612,41 @@ void* calloc(size_t count, size_t size) return nullptr; } size_t new_size = count * size; - auto* ptr = malloc_impl(new_size, CallerWillInitializeMemory::Yes); - if (ptr) - memset(ptr, 0, new_size); - return ptr; + auto ptr_or_error = malloc_impl(new_size, 16, CallerWillInitializeMemory::Yes); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + + memset(ptr_or_error.value(), 0, new_size); + return ptr_or_error.value(); +} + +// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html +int posix_memalign(void** memptr, size_t alignment, size_t size) +{ + MemoryAuditingSuppressor suppressor; + auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) + return ptr_or_error.error().code(); + + *memptr = ptr_or_error.value(); + return 0; +} + +void* aligned_alloc(size_t alignment, size_t size) +{ + MemoryAuditingSuppressor suppressor; + auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + + return ptr_or_error.value(); } size_t malloc_size(void const* ptr) diff --git a/Userland/Libraries/LibC/stdlib.cpp b/Userland/Libraries/LibC/stdlib.cpp index bd21c87fa7..695c9c35e8 100644 --- a/Userland/Libraries/LibC/stdlib.cpp +++ b/Userland/Libraries/LibC/stdlib.cpp @@ -1335,14 +1335,3 @@ void _Exit(int status) { _exit(status); } - -#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN -// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html -int posix_memalign(void** memptr, size_t alignment, size_t size) -{ - (void)memptr; - (void)alignment; - (void)size; - TODO(); -} -#endif diff --git a/Userland/Libraries/LibC/stdlib.h b/Userland/Libraries/LibC/stdlib.h index 2b38dc6590..bdf9a56252 100644 --- a/Userland/Libraries/LibC/stdlib.h +++ b/Userland/Libraries/LibC/stdlib.h @@ -101,11 +101,7 @@ int posix_openpt(int flags); int grantpt(int fd); int unlockpt(int fd); -// FIXME: Remove the ifdef once we have a working memalign implementation. -// This is hidden by default until then because many applications prefer -// `posix_memalign` over other implementations of aligned memory. -#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN int posix_memalign(void**, size_t alignment, size_t size); -#endif +__attribute__((malloc, alloc_size(2), alloc_align(1))) void* aligned_alloc(size_t alignment, size_t size); __END_DECLS -- cgit v1.2.3