diff options
author | Andrew Kaster <andrewdkaster@gmail.com> | 2020-01-03 23:31:51 -0500 |
---|---|---|
committer | Andreas Kling <awesomekling@gmail.com> | 2020-01-04 10:39:04 +0100 |
commit | 767f4c7421139372b50d25c64468e6e6d157a1b2 (patch) | |
tree | 32ca953910c45e81c563ef88bf377864c25c8b4b | |
parent | 85b95f472d1c4932d0a92d70b27ad06910212807 (diff) | |
download | serenity-767f4c7421139372b50d25c64468e6e6d157a1b2.zip |
LibELF+LibC: Split ELFDynamicObject into a Loader + Object
Separate some responsibilities:
ELFDynamicLoader is responsible for loading elf binaries from disk and
performing relocations, calling init functions, and eventually calling
finalizer functions.
ELFDynamicObject is a helper class to parse the .dynamic section of an
elf binary, or the table of Elf32_Dyn entries at the _DYNAMIC symbol.
ELFDynamicObject now owns the helper classes for Relocations, Symbols,
Sections and the like that ELFDynamicLoader will use to perform
relocations and symbol lookup.
Because these new helpers are constructed from offsets into the .dynamic
section within the loaded .data section of the binary, we don't need the
ELFImage for nearly as much of the loading processes as we did before.
Therefore we can remove most of the extra DynamicXXX classes and just
keep the one that lets us find the location of _DYNAMIC in the new ELF.
And finally, since we changed the name of the class that dlopen/dlsym
care about, we need to compile/link and use the new ELFDynamicLoader
class in LibC.
-rw-r--r-- | Libraries/LibC/Makefile | 1 | ||||
-rw-r--r-- | Libraries/LibC/dlfcn.cpp | 14 | ||||
-rw-r--r-- | Libraries/LibELF/ELFDynamicLoader.cpp | 343 | ||||
-rw-r--r-- | Libraries/LibELF/ELFDynamicLoader.h | 89 | ||||
-rw-r--r-- | Libraries/LibELF/ELFDynamicObject.cpp | 462 | ||||
-rw-r--r-- | Libraries/LibELF/ELFDynamicObject.h | 286 | ||||
-rw-r--r-- | Libraries/LibELF/ELFImage.cpp | 46 | ||||
-rw-r--r-- | Libraries/LibELF/ELFImage.h | 125 |
8 files changed, 760 insertions, 606 deletions
diff --git a/Libraries/LibC/Makefile b/Libraries/LibC/Makefile index c070b77375..ca67dfed31 100644 --- a/Libraries/LibC/Makefile +++ b/Libraries/LibC/Makefile @@ -58,6 +58,7 @@ LIBC_OBJS = \ ELF_OBJS = \ ../LibELF/ELFDynamicObject.o \ + ../LibELF/ELFDynamicLoader.o \ ../LibELF/ELFImage.o OBJS = $(AK_OBJS) $(LIBC_OBJS) $(ELF_OBJS) diff --git a/Libraries/LibC/dlfcn.cpp b/Libraries/LibC/dlfcn.cpp index 200f0a7262..1fc94e051f 100644 --- a/Libraries/LibC/dlfcn.cpp +++ b/Libraries/LibC/dlfcn.cpp @@ -12,12 +12,12 @@ #include <AK/ScopeGuard.h> #include <AK/String.h> #include <AK/StringBuilder.h> -#include <LibELF/ELFDynamicObject.h> +#include <LibELF/ELFDynamicLoader.h> // NOTE: The string here should never include a trailing newline (according to POSIX) String g_dlerror_msg; -HashMap<String, RefPtr<ELFDynamicObject>> g_elf_objects; +HashMap<String, RefPtr<ELFDynamicLoader>> g_elf_objects; extern "C" { @@ -68,19 +68,19 @@ void* dlopen(const char* filename, int flags) return nullptr; } - auto image = ELFDynamicObject::construct(filename, fd, file_stats.st_size); + auto loader = ELFDynamicLoader::construct(filename, fd, file_stats.st_size); - if (!image->is_valid()) { + if (!loader->is_valid()) { g_dlerror_msg = String::format("%s is not a valid ELF dynamic shared object!", filename); return nullptr; } - if (!image->load(flags)) { + if (!loader->load_from_image(flags)) { g_dlerror_msg = String::format("Failed to load ELF object %s", filename); return nullptr; } - g_elf_objects.set(file_path.basename(), move(image)); + g_elf_objects.set(file_path.basename(), move(loader)); g_dlerror_msg = "Successfully loaded ELF object."; // we have one refcount already @@ -91,7 +91,7 @@ void* dlsym(void* handle, const char* symbol_name) { // FIXME: When called with a NULL handle we're supposed to search every dso in the process... that'll get expensive ASSERT(handle); - auto* dso = reinterpret_cast<ELFDynamicObject*>(handle); + auto* dso = reinterpret_cast<ELFDynamicLoader*>(handle); void* symbol = dso->symbol_for_name(symbol_name); if (!symbol) { g_dlerror_msg = "Symbol not found"; diff --git a/Libraries/LibELF/ELFDynamicLoader.cpp b/Libraries/LibELF/ELFDynamicLoader.cpp new file mode 100644 index 0000000000..ef1e0e71ed --- /dev/null +++ b/Libraries/LibELF/ELFDynamicLoader.cpp @@ -0,0 +1,343 @@ +#include <AK/StringBuilder.h> +#include <LibELF/ELFDynamicLoader.h> + +#include <assert.h> +#include <dlfcn.h> +#include <mman.h> +#include <stdio.h> +#include <stdlib.h> + +#define DYNAMIC_LOAD_DEBUG +//#define DYNAMIC_LOAD_VERBOSE + +#ifdef DYNAMIC_LOAD_VERBOSE +# define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__) +#else +# define VERBOSE(fmt, ...) \ + do { \ + } while (0) +#endif + +static bool s_always_bind_now = false; + +NonnullRefPtr<ELFDynamicLoader> ELFDynamicLoader::construct(const char* filename, int fd, size_t size) +{ + return adopt(*new ELFDynamicLoader(filename, fd, size)); +} + +ELFDynamicLoader::ELFDynamicLoader(const char* filename, int fd, size_t size) + : m_filename(filename) + , m_file_size(size) + , m_image_fd(fd) +{ + String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters()); + + m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters()); + if (MAP_FAILED == m_file_mapping) { + m_valid = false; + } +} + +ELFDynamicLoader::~ELFDynamicLoader() +{ + if (MAP_FAILED != m_file_mapping) + munmap(m_file_mapping, m_file_size); +} + +void* ELFDynamicLoader::symbol_for_name(const char* name) +{ + auto symbol = m_dynamic_object->hash_section().lookup_symbol(name); + + if (symbol.is_undefined()) + return nullptr; + + return m_dynamic_object->base_address().offset(symbol.value()).as_ptr(); +} + +bool ELFDynamicLoader::load_from_image(unsigned flags) +{ + ELFImage elf_image((u8*)m_file_mapping); + + m_valid = elf_image.is_valid() && elf_image.is_dynamic(); + + if (!m_valid) { + return false; + } + + const ELFImage::DynamicSection probably_dynamic_section = elf_image.dynamic_section(); + if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) { + m_valid = false; + return false; + } + +#ifdef DYNAMIC_LOAD_VERBOSE + m_image->dump(); +#endif + + load_program_headers(elf_image); + + const ELFImage::DynamicSection image_dynamic_section = elf_image.dynamic_section(); + m_dynamic_object = AK::make<ELFDynamicObject>(m_text_segment_load_address, image_dynamic_section.offset()); + + return load_stage_2(flags); +} + +bool ELFDynamicLoader::load_stage_2(unsigned flags) +{ + ASSERT(flags & RTLD_GLOBAL); + ASSERT(flags & RTLD_LAZY); + +#ifdef DYNAMIC_LOAD_DEBUG + m_dynamic_object->dump(); +#endif + + if (m_dynamic_object->has_text_relocations()) { + ASSERT(m_text_segment_load_address.get() != 0); + if (0 > mprotect(m_text_segment_load_address.as_ptr(), m_text_segment_size, PROT_READ | PROT_WRITE)) { + perror("mprotect"); // FIXME: dlerror? + return false; + } + } + + do_relocations(); + setup_plt_trampoline(); + + // Clean up our setting of .text to PROT_READ | PROT_WRITE + if (m_dynamic_object->has_text_relocations()) { + if (0 > mprotect(m_text_segment_load_address.as_ptr(), m_text_segment_size, PROT_READ | PROT_EXEC)) { + perror("mprotect"); // FIXME: dlerror? + return false; + } + } + + call_object_init_functions(); + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Loaded %s\n", m_filename.characters()); +#endif + return true; +} + +void ELFDynamicLoader::load_program_headers(const ELFImage& elf_image) +{ + size_t total_required_allocation_size = 0; // NOTE: If we don't have any TEXTREL, we can keep RO data RO, which would be nice + + Vector<ProgramHeaderRegion> program_headers; + + ProgramHeaderRegion* text_region_ptr = nullptr; + ProgramHeaderRegion* data_region_ptr = nullptr; + ProgramHeaderRegion* tls_region_ptr = nullptr; + + elf_image.for_each_program_header([&](const ELFImage::ProgramHeader& program_header) { + ProgramHeaderRegion new_region; + new_region.set_program_header(program_header.raw_header()); + if (new_region.is_load()) + total_required_allocation_size += new_region.required_load_size(); + program_headers.append(move(new_region)); + auto& region = program_headers.last(); + if (region.is_tls_template()) + tls_region_ptr = ®ion; + else if (region.is_load()) { + if (region.is_executable()) + text_region_ptr = ®ion; + else + data_region_ptr = ®ion; + } + }); + + ASSERT(text_region_ptr && data_region_ptr); + + // Process regions in order: .text, .data, .tls + auto* region = text_region_ptr; + void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters()); + if (MAP_FAILED == text_segment_begin) { + ASSERT_NOT_REACHED(); + } + m_text_segment_size = region->required_load_size(); + m_text_segment_load_address = VirtualAddress { (u32)text_segment_begin }; + + region = data_region_ptr; + void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + m_text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters()); + if (MAP_FAILED == data_segment_begin) { + ASSERT_NOT_REACHED(); + } + VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); + memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); + + // FIXME: Do some kind of 'allocate TLS section' or some such from a per-application pool + if (tls_region_ptr) { + region = tls_region_ptr; + // FIXME: This can't be right either. TLS needs some real work i'd say :) + m_tls_segment_address = tls_region_ptr->desired_load_address(); + VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); + memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); + } +} + +void ELFDynamicLoader::do_relocations() +{ + u32 load_base_address = m_dynamic_object->base_address().get(); + + // FIXME: We should really bail on undefined symbols here. + + auto main_relocation_section = m_dynamic_object->relocation_section(); + + main_relocation_section.for_each_relocation([&](const ELFDynamicObject::Relocation& relocation) { + VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", relocation.offset_in_section() / main_relocation_section.entry_size(), relocation.offset(), relocation.type(), relocation.symbol_index()); + u32* patch_ptr = (u32*)(load_base_address + relocation.offset()); + switch (relocation.type()) { + case R_386_NONE: + // Apparently most loaders will just skip these? + // Seems if the 'link editor' generates one something is funky with your code + VERBOSE("None relocation. No symbol, no nothin.\n"); + break; + case R_386_32: { + auto symbol = relocation.symbol(); + VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value()); + u32 symbol_address = symbol.value() + load_base_address; + *patch_ptr += symbol_address; + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_PC32: { + auto symbol = relocation.symbol(); + VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value()); + u32 relative_offset = (symbol.value() - relocation.offset()); + *patch_ptr += relative_offset; + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_GLOB_DAT: { + auto symbol = relocation.symbol(); + VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value()); + u32 symbol_location = load_base_address + symbol.value(); + *patch_ptr = symbol_location; + VERBOSE(" Symbol address: %p\n", *patch_ptr); + break; + } + case R_386_RELATIVE: { + // FIXME: According to the spec, R_386_relative ones must be done first. + // We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT + // However, our compiler is nice enough to put them at the front of the relocations for us :) + VERBOSE("Load address relocation at offset %X\n", relocation.offset()); + VERBOSE(" patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + load_base_address); + *patch_ptr += load_base_address; // + addend for RelA (addend for Rel is stored at addr) + break; + } + case R_386_TLS_TPOFF: { + VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset()); + // FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means... + // FIXME: Check m_has_static_tls and do something different for dynamic TLS + *patch_ptr = relocation.offset() - (u32)m_tls_segment_address.as_ptr() - *patch_ptr; + break; + } + default: + // Raise the alarm! Someone needs to implement this relocation type + dbgprintf("Found a new exciting relocation type %d\n", relocation.type()); + printf("ELFDynamicLoader: Found unknown relocation type %d\n", relocation.type()); + ASSERT_NOT_REACHED(); + break; + } + return IterationDecision::Continue; + }); + + // Handle PLT Global offset table relocations. + m_dynamic_object->plt_relocation_section().for_each_relocation([&](const ELFDynamicObject::Relocation& relocation) { + // FIXME: Or BIND_NOW flag passed in? + if (m_dynamic_object->must_bind_now() || s_always_bind_now) { + // Eagerly BIND_NOW the PLT entries, doing all the symbol looking goodness + // The patch method returns the address for the LAZY fixup path, but we don't need it here + (void)patch_plt_entry(relocation.offset_in_section()); + } else { + // LAZY-ily bind the PLT slots by just adding the base address to the offsets stored there + // This avoids doing symbol lookup, which might be expensive + ASSERT(relocation.type() == R_386_JMP_SLOT); + + u8* relocation_address = relocation.address().as_ptr(); + + *(u32*)relocation_address += load_base_address; + } + return IterationDecision::Continue; + }); + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Done relocating!\n"); +#endif +} + +// Defined in <arch>/plt_trampoline.S +extern "C" void _plt_trampoline(void) __attribute__((visibility("hidden"))); + +void ELFDynamicLoader::setup_plt_trampoline() +{ + VirtualAddress got_address = m_dynamic_object->plt_got_base_address(); + + u32* got_u32_ptr = (u32*)got_address.as_ptr(); + got_u32_ptr[1] = (u32)this; + got_u32_ptr[2] = (u32)&_plt_trampoline; + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Set GOT PLT entries at %p: [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]); +#endif +} + +// Called from our ASM routine _plt_trampoline +extern "C" Elf32_Addr _fixup_plt_entry(ELFDynamicLoader* object, u32 relocation_offset) +{ + return object->patch_plt_entry(relocation_offset); +} + +// offset is in PLT relocation table +Elf32_Addr ELFDynamicLoader::patch_plt_entry(u32 relocation_offset) +{ + auto relocation = m_dynamic_object->plt_relocation_section().relocation_at_offset(relocation_offset); + + ASSERT(relocation.type() == R_386_JMP_SLOT); + + auto sym = relocation.symbol(); + + u8* relocation_address = relocation.address().as_ptr(); + u32 symbol_location = sym.address().get(); + + VERBOSE("ELFDynamicLoader: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address); + + *(u32*)relocation_address = symbol_location; + + return symbol_location; +} + +void ELFDynamicLoader::call_object_init_functions() +{ + typedef void (*InitFunc)(); + auto init_function = (InitFunc)(m_dynamic_object->init_section().address().as_ptr()); + +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Calling DT_INIT at %p\n", init_function); +#endif + (init_function)(); + + auto init_array_section = m_dynamic_object->init_array_section(); + + InitFunc* init_begin = (InitFunc*)(init_array_section.address().as_ptr()); + InitFunc* init_end = init_begin + init_array_section.entry_count(); + while (init_begin != init_end) { + // Android sources claim that these can be -1, to be ignored. + // 0 definitely shows up. Apparently 0/-1 are valid? Confusing. + if (!*init_begin || ((i32)*init_begin == -1)) + continue; +#ifdef DYNAMIC_LOAD_DEBUG + dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin); +#endif + (*init_begin)(); + ++init_begin; + } +} + +u32 ELFDynamicLoader::ProgramHeaderRegion::mmap_prot() const +{ + int prot = 0; + prot |= is_executable() ? PROT_EXEC : 0; + prot |= is_readable() ? PROT_READ : 0; + prot |= is_writable() ? PROT_WRITE : 0; + return prot; +} diff --git a/Libraries/LibELF/ELFDynamicLoader.h b/Libraries/LibELF/ELFDynamicLoader.h new file mode 100644 index 0000000000..4fb5e1d32c --- /dev/null +++ b/Libraries/LibELF/ELFDynamicLoader.h @@ -0,0 +1,89 @@ +#pragma once + +#include <LibELF/ELFDynamicObject.h> +#include <LibELF/ELFImage.h> +#include <LibELF/exec_elf.h> +#include <mman.h> + +#include <AK/OwnPtr.h> +#include <AK/RefCounted.h> +#include <AK/String.h> + +#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1))) + +class ELFDynamicLoader : public RefCounted<ELFDynamicLoader> { +public: + static NonnullRefPtr<ELFDynamicLoader> construct(const char* filename, int fd, size_t file_size); + + ~ELFDynamicLoader(); + + bool is_valid() const { return m_valid; } + + // Load a full ELF image from file into the current process and create an ELFDynamicObject + // from the SHT_DYNAMIC in the file. + bool load_from_image(unsigned flags); + + // Stage 2 of loading: relocations and init functions + // Assumes that the program headers have been loaded and that m_dynamic_object is initialized + // Splitting loading like this allows us to use the same code to relocate a main executable as an elf binary + bool load_stage_2(unsigned flags); + + // Intended for use by dlsym or other internal methods + void* symbol_for_name(const char*); + + void dump(); + + // Will be called from _fixup_plt_entry, as part of the PLT trampoline + Elf32_Addr patch_plt_entry(u32 relocation_offset); + +private: + class ProgramHeaderRegion { + public: + void set_program_header(const Elf32_Phdr& header) { m_program_header = header; } + + // Information from ELF Program header + u32 type() const { return m_program_header.p_type; } + u32 flags() const { return m_program_header.p_flags; } + u32 offset() const { return m_program_header.p_offset; } + VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); } + u32 size_in_memory() const { return m_program_header.p_memsz; } + u32 size_in_image() const { return m_program_header.p_filesz; } + u32 alignment() const { return m_program_header.p_align; } + u32 mmap_prot() const; + bool is_readable() const { return flags() & PF_R; } + bool is_writable() const { return flags() & PF_W; } + bool is_executable() const { return flags() & PF_X; } + bool is_tls_template() const { return type() == PT_TLS; } + bool is_load() const { return type() == PT_LOAD; } + bool is_dynamic() const { return type() == PT_DYNAMIC; } + + u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); } + + private: + Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image + }; + + explicit ELFDynamicLoader(const char* filename, int fd, size_t file_size); + explicit ELFDynamicLoader(Elf32_Dyn* dynamic_location, Elf32_Addr load_address); + + // Stage 1 + void load_program_headers(const ELFImage& elf_image); + + // Stage 2 + void do_relocations(); + void setup_plt_trampoline(); + void call_object_init_functions(); + + String m_filename; + size_t m_file_size { 0 }; + int m_image_fd { -1 }; + void* m_file_mapping { nullptr }; + bool m_valid { true }; + + OwnPtr<ELFDynamicObject> m_dynamic_object; + + VirtualAddress m_text_segment_load_address; + size_t m_text_segment_size; + + VirtualAddress m_tls_segment_address; +}; diff --git a/Libraries/LibELF/ELFDynamicObject.cpp b/Libraries/LibELF/ELFDynamicObject.cpp index ab0d9de3e5..ef2ffbef40 100644 --- a/Libraries/LibELF/ELFDynamicObject.cpp +++ b/Libraries/LibELF/ELFDynamicObject.cpp @@ -1,146 +1,44 @@ -#include <AK/StringBuilder.h> #include <LibELF/ELFDynamicObject.h> +#include <LibELF/exec_elf.h> + +#include <AK/StringBuilder.h> #include <assert.h> -#include <mman.h> #include <stdio.h> -#include <stdlib.h> - -#define DYNAMIC_LOAD_DEBUG -//#define DYNAMIC_LOAD_VERBOSE - -#ifdef DYNAMIC_LOAD_VERBOSE -# define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__) -#else -# define VERBOSE(fmt, ...) \ - do { \ - } while (0) -#endif - -static bool s_always_bind_now = false; -static const char* name_for_dtag(Elf32_Sword tag); +static const char* name_for_dtag(Elf32_Sword d_tag); -// SYSV ELF hash algorithm -// Note that the GNU HASH algorithm has less collisions -static uint32_t calculate_elf_hash(const char* name) +ELFDynamicObject::ELFDynamicObject(VirtualAddress base_address, u32 dynamic_offset) + : m_base_address(base_address) + , m_dynamic_offset(dynamic_offset) { - uint32_t hash = 0; - uint32_t top_nibble_of_hash = 0; - - while (*name != '\0') { - hash = hash << 4; - hash += *name; - name++; - - top_nibble_of_hash = hash & 0xF0000000U; - if (top_nibble_of_hash != 0) - hash ^= top_nibble_of_hash >> 24; - hash &= ~top_nibble_of_hash; - } - - return hash; -} - -NonnullRefPtr<ELFDynamicObject> ELFDynamicObject::construct(const char* filename, int fd, size_t size) -{ - return adopt(*new ELFDynamicObject(filename, fd, size)); -} - -ELFDynamicObject::ELFDynamicObject(const char* filename, int fd, size_t size) - : m_filename(filename) - , m_file_size(size) - , m_image_fd(fd) -{ - String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters()); - - m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters()); - if (MAP_FAILED == m_file_mapping) { - m_valid = false; - return; - } - - m_image = AK::make<ELFImage>((u8*)m_file_mapping); - - m_valid = m_image->is_valid() && m_image->parse() && m_image->is_dynamic(); - - if (!m_valid) { - return; - } - - const ELFImage::DynamicSection probably_dynamic_section = m_image->dynamic_section(); - if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) { - m_valid = false; - return; - } + parse(); } ELFDynamicObject::~ELFDynamicObject() { - if (MAP_FAILED != m_file_mapping) - munmap(m_file_mapping, m_file_size); } -void* ELFDynamicObject::symbol_for_name(const char* name) +void ELFDynamicObject::dump() const { - // FIXME: If we enable gnu hash in the compiler, we should use that here instead - // The algo is way better with less collisions - uint32_t hash_value = calculate_elf_hash(name); - - u8* load_addr = m_text_region->load_address().as_ptr(); - - // NOTE: We need to use the loaded hash/string/symbol tables here to get the right - // addresses. The ones that are in the ELFImage won't cut it, they aren't relocated - u32* hash_table_begin = (u32*)(load_addr + m_hash_table_offset); - Elf32_Sym* symtab = (Elf32_Sym*)(load_addr + m_symbol_table_offset); - const char* strtab = (const char*)load_addr + m_string_table_offset; - - size_t num_buckets = hash_table_begin[0]; - - // This is here for completeness, but, since we're using the fact that every chain - // will end at chain 0 (which means 'not found'), we don't need to check num_chains. - // Interestingly, num_chains is required to be num_symbols - //size_t num_chains = hash_table_begin[1]; - - u32* buckets = &hash_table_begin[2]; - u32* chains = &buckets[num_buckets]; - - for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) { - if (strcmp(name, strtab + symtab[i].st_name) == 0) { - void* symbol_address = load_addr + symtab[i].st_value; -#ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, strtab + symtab[i].st_name, symbol_address); -#endif - return symbol_address; - } - } - - return nullptr; -} - -void ELFDynamicObject::dump() -{ - auto dynamic_section = m_image->dynamic_section(); - StringBuilder builder; builder.append("\nd_tag tag_name value\n"); size_t num_dynamic_sections = 0; - dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) { + for_each_dynamic_entry([&](const ELFDynamicObject::DynamicEntry& entry) { String name_field = String::format("(%s)", name_for_dtag(entry.tag())); builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val()); num_dynamic_sections++; return IterationDecision::Continue; }); - dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", dynamic_section.offset(), num_dynamic_sections); + dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", m_dynamic_offset, num_dynamic_sections); dbgprintf(builder.to_string().characters()); } -void ELFDynamicObject::parse_dynamic_section() +void ELFDynamicObject::parse() { - auto dynamic_section = m_image->dynamic_section(); - dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) { + for_each_dynamic_entry([&](const DynamicEntry& entry) { switch (entry.tag()) { case DT_INIT: m_init_offset = entry.ptr(); @@ -154,6 +52,12 @@ void ELFDynamicObject::parse_dynamic_section() case DT_INIT_ARRAYSZ: m_init_array_size = entry.val(); break; + case DT_FINI_ARRAY: + m_fini_array_offset = entry.ptr(); + break; + case DT_FINI_ARRAYSZ: + m_fini_array_size = entry.val(); + break; case DT_HASH: m_hash_table_offset = entry.ptr(); break; @@ -199,14 +103,10 @@ void ELFDynamicObject::parse_dynamic_section() m_number_of_relocations = entry.val(); break; case DT_FLAGS: - m_must_bind_now = entry.val() & DF_BIND_NOW; - m_has_text_relocations = entry.val() & DF_TEXTREL; - m_should_process_origin = entry.val() & DF_ORIGIN; - m_has_static_thread_local_storage = entry.val() & DF_STATIC_TLS; - m_requires_symbolic_symbol_resolution = entry.val() & DF_SYMBOLIC; + m_dt_flags = entry.val(); break; case DT_TEXTREL: - m_has_text_relocations = true; // This tag seems to exist for legacy reasons only? + m_dt_flags |= DF_TEXTREL; // This tag seems to exist for legacy reasons only? break; default: dbgprintf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag())); @@ -216,280 +116,130 @@ void ELFDynamicObject::parse_dynamic_section() } return IterationDecision::Continue; }); -} -typedef void (*InitFunc)(); + auto hash_section_address = hash_section().address().as_ptr(); + auto num_hash_chains = ((u32*)hash_section_address)[1]; + m_symbol_count = num_hash_chains; +} -bool ELFDynamicObject::load(unsigned flags) +const ELFDynamicObject::Relocation ELFDynamicObject::RelocationSection::relocation(unsigned index) const { - ASSERT(flags & RTLD_GLOBAL); - ASSERT(flags & RTLD_LAZY); - -#ifdef DYNAMIC_LOAD_DEBUG - dump(); -#endif -#ifdef DYNAMIC_LOAD_VERBOSE - m_image->dump(); -#endif - - parse_dynamic_section(); - load_program_headers(); - - if (m_has_text_relocations) { - if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) { - perror("mprotect"); // FIXME: dlerror? - return false; - } - } + ASSERT(index < entry_count()); + unsigned offset_in_section = index * entry_size(); + auto relocation_address = (Elf32_Rel*)address().offset(offset_in_section).as_ptr(); + return Relocation(m_dynamic, *relocation_address, offset_in_section); +} - do_relocations(); - setup_plt_trampoline(); +const ELFDynamicObject::Relocation ELFDynamicObject::RelocationSection::relocation_at_offset(unsigned offset) const +{ + ASSERT(offset <= (m_section_size_bytes - m_entry_size)); + auto relocation_address = (Elf32_Rel*)address().offset(offset).as_ptr(); + return Relocation(m_dynamic, *relocation_address, offset); +} - // Clean up our setting of .text to PROT_READ | PROT_WRITE - if (m_has_text_relocations) { - if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) { - perror("mprotect"); // FIXME: dlerror? - return false; - } - } +const ELFDynamicObject::Symbol ELFDynamicObject::symbol(unsigned index) const +{ + auto symbol_section = Section(*this, m_symbol_table_offset, (m_symbol_count * m_size_of_symbol_table_entry), m_size_of_symbol_table_entry, "DT_SYMTAB"); + auto symbol_entry = (Elf32_Sym*)symbol_section.address().offset(index * symbol_section.entry_size()).as_ptr(); + return Symbol(*this, index, *symbol_entry); +} - call_object_init_functions(); +const ELFDynamicObject::Section ELFDynamicObject::init_section() const +{ + return Section(*this, m_init_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_INIT"); +} -#ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Loaded %s\n", m_filename.characters()); -#endif - // FIXME: return false sometimes? missing symbol etc - return true; +const ELFDynamicObject::Section ELFDynamicObject::fini_section() const +{ + return Section(*this, m_fini_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_FINI"); } -void ELFDynamicObject::load_program_headers() +const ELFDynamicObject::Section ELFDynamicObject::init_array_section() const { - size_t total_required_allocation_size = 0; // NOTE: If we don't have any TEXTREL, we can keep RO data RO, which would be nice - - m_image->for_each_program_header([&](const ELFImage::ProgramHeader& program_header) { - ProgramHeaderRegion new_region(program_header.raw_header()); - if (new_region.is_load()) - total_required_allocation_size += new_region.required_load_size(); - m_program_header_regions.append(move(new_region)); - auto& region = m_program_header_regions.last(); - if (region.is_tls_template()) - m_tls_region = ®ion; - else if (region.is_load()) { - if (region.is_executable()) - m_text_region = ®ion; - else - m_data_region = ®ion; - } - }); + return Section(*this, m_init_array_offset, m_init_array_size, sizeof(void (*)()), "DT_INIT_ARRAY"); +} - ASSERT(m_text_region && m_data_region); - - // Process regions in order: .text, .data, .tls - auto* region = m_text_region; - void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters()); - size_t text_segment_size = region->required_load_size(); - region->set_base_address(VirtualAddress { (u32)text_segment_begin }); - region->set_load_address(VirtualAddress { (u32)text_segment_begin }); - - region = m_data_region; - void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters()); - size_t data_segment_size = region->required_load_size(); - VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); - region->set_base_address(VirtualAddress { (u32)text_segment_begin }); - region->set_load_address(data_segment_actual_addr); - memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); - - if (m_tls_region) { - region = m_data_region; - VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin); - region->set_base_address(VirtualAddress { (u32)text_segment_begin }); - region->set_load_address(tls_segment_actual_addr); - memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image()); - } +const ELFDynamicObject::Section ELFDynamicObject::fini_array_section() const +{ + return Section(*this, m_fini_array_offset, m_fini_array_size, sizeof(void (*)()), "DT_FINI_ARRAY"); +} - // sanity check - u8* end_of_in_memory_image = (u8*)data_segment_begin + data_segment_size; - ASSERT((ptrdiff_t)total_required_allocation_size == (ptrdiff_t)(end_of_in_memory_image - (u8*)text_segment_begin)); +const ELFDynamicObject::HashSection ELFDynamicObject::hash_section() const +{ + return HashSection(Section(*this, m_hash_table_offset, 0, 0, "DT_HASH"), HashType::SYSV); } -void ELFDynamicObject::do_relocations() +const ELFDynamicObject::RelocationSection ELFDynamicObject::relocation_section() const { - auto dyn_relocation_section = m_image->dynamic_relocation_section(); - if (StringView(".rel.dyn") != dyn_relocation_section.name() || SHT_REL != dyn_relocation_section.type()) { - ASSERT_NOT_REACHED(); - } + return RelocationSection(Section(*this, m_relocation_table_offset, m_size_of_relocation_table, m_size_of_relocation_entry, "DT_REL")); +} - u8* load_base_address = m_text_region->base_address().as_ptr(); +const ELFDynamicObject::RelocationSection ELFDynamicObject::plt_relocation_section() const +{ + return RelocationSection(Section(*this, m_plt_relocation_offset_location, m_size_of_plt_relocation_entry_list, m_size_of_relocation_entry, "DT_JMPREL")); +} - int i = -1; +u32 ELFDynamicObject::HashSection::calculate_elf_hash(const char* name) const +{ + // SYSV ELF hash algorithm + // Note that the GNU HASH algorithm has less collisions - // FIXME: We should really bail on undefined symbols here. (but, there's some TLS vars that are currently undef soooo.... :) ) + uint32_t hash = 0; + uint32_t top_nibble_of_hash = 0; - dyn_relocation_section.for_each_relocation([&](const ELFImage::DynamicRelocation& relocation) { - ++i; - VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", i, relocation.offset(), relocation.type(), relocation.symbol_index()); - u32* patch_ptr = (u32*)(load_base_address + relocation.offset()); - switch (relocation.type()) { - case R_386_NONE: - // Apparently most loaders will just skip these? - // Seems if the 'link editor' generates one something is funky with your code - VERBOSE("None relocation. No symbol, no nothin.\n"); - break; - case R_386_32: { - auto symbol = relocation.symbol(); - VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value()); - u32 symbol_address = symbol.value() + (u32)load_base_address; - *patch_ptr += symbol_address; - VERBOSE(" Symbol address: %p\n", *patch_ptr); - break; - } - case R_386_PC32: { - auto symbol = relocation.symbol(); - VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value()); - u32 relative_offset = (symbol.value() - relocation.offset()); - *patch_ptr += relative_offset; - VERBOSE(" Symbol address: %p\n", *patch_ptr); - break; - } - case R_386_GLOB_DAT: { - auto symbol = relocation.symbol(); - VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value()); - u32 symbol_location = (u32)(load_base_address + symbol.value()); - *patch_ptr = symbol_location; - VERBOSE(" Symbol address: %p\n", *patch_ptr); - break; - } - case R_386_RELATIVE: { - // FIXME: According to the spec, R_386_relative ones must be done first. - // We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT - // However, our compiler is nice enough to put them at the front of the relocations for us :) - VERBOSE("Load address relocation at offset %X\n", relocation.offset()); - VERBOSE(" patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + (u32)load_base_address); - *patch_ptr += (u32)load_base_address; // + addend for RelA (addend for Rel is stored at addr) - break; - } - case R_386_TLS_TPOFF: { - VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset()); - // FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means... - // FIXME: Check m_has_static_tls and do something different for dynamic TLS - VirtualAddress tls_region_loctation = m_tls_region->desired_load_address(); - *patch_ptr = relocation.offset() - (u32)tls_region_loctation.as_ptr() - *patch_ptr; - break; - } - default: - // Raise the alarm! Someone needs to implement this relocation type - dbgprintf("Found a new exciting relocation type %d\n", relocation.type()); - printf("ELFDynamicObject: Found unknown relocation type %d\n", relocation.type()); - ASSERT_NOT_REACHED(); - break; - } - return IterationDecision::Continue; - }); + while (*name != '\0') { + hash = hash << 4; + hash += *name; + name++; - // Handle PLT Global offset table relocations. - for (size_t idx = 0; idx < m_size_of_plt_relocation_entry_list; idx += m_size_of_relocation_entry) { - // FIXME: Or BIND_NOW flag passed in? - if (m_must_bind_now || s_always_bind_now) { - // Eagerly BIND_NOW the PLT entries, doing all the symbol looking goodness - // The patch method returns the address for the LAZY fixup path, but we don't need it here - (void)patch_plt_entry(idx); - } else { - // LAZY-ily bind the PLT slots by just adding the base address to the offsets stored there - // This avoids doing symbol lookup, which might be expensive - VirtualAddress relocation_vaddr = m_text_region->load_address().offset(m_plt_relocation_offset_location).offset(idx); - Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_vaddr.as_ptr(); - - ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT); - - auto* image_base_address = m_text_region->base_address().as_ptr(); - u8* relocation_address = image_base_address + jump_slot_relocation->r_offset; - - *(u32*)relocation_address += (u32)image_base_address; - } + top_nibble_of_hash = hash & 0xF0000000U; + if (top_nibble_of_hash != 0) + hash ^= top_nibble_of_hash >> 24; + hash &= ~top_nibble_of_hash; } -#ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Done relocating!\n"); -#endif -} - -// Defined in <arch>/plt_trampoline.S -extern "C" void _plt_trampoline(void) __attribute__((visibility("hidden"))); - -void ELFDynamicObject::setup_plt_trampoline() -{ - const ELFImage::Section& got_section = m_image->lookup_section(".got.plt"); - VirtualAddress got_address = m_text_region->load_address().offset(got_section.address()); - - u32* got_u32_ptr = reinterpret_cast<u32*>(got_address.as_ptr()); - got_u32_ptr[1] = (u32)this; - got_u32_ptr[2] = (u32)&_plt_trampoline; - -#ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Set GOT PLT entries at %p offset(%p): [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_section.offset(), got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]); -#endif + return hash; } -// Called from our ASM routine _plt_trampoline -extern "C" Elf32_Addr _fixup_plt_entry(ELFDynamicObject* object, u32 relocation_idx) +u32 ELFDynamicObject::HashSection::calculate_gnu_hash(const char*) const { - return object->patch_plt_entry(relocation_idx); + // FIXME: Implement the GNU hash algorithm + ASSERT_NOT_REACHED(); } -// offset is in PLT relocation table -Elf32_Addr ELFDynamicObject::patch_plt_entry(u32 relocation_idx) +const ELFDynamicObject::Symbol ELFDynamicObject::HashSection::lookup_symbol(const char* name) const { - VirtualAddress plt_relocation_table_address = m_text_region->load_address().offset(m_plt_relocation_offset_location); - VirtualAddress relocation_entry_address = plt_relocation_table_address.offset(relocation_idx); - Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_entry_address.as_ptr(); - - ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT); - - auto sym = m_image->dynamic_symbol(ELF32_R_SYM(jump_slot_relocation->r_info)); - - auto* image_base_address = m_text_region->base_address().as_ptr(); - u8* relocation_address = image_base_address + jump_slot_relocation->r_offset; - u32 symbol_location = (u32)(image_base_address + sym.value()); + // FIXME: If we enable gnu hash in the compiler, we should use that here instead + // The algo is way better with less collisions + u32 hash_value = (this->*(m_hash_function))(name); - VERBOSE("ELFDynamicObject: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address); + u32* hash_table_begin = (u32*)address().as_ptr(); - *(u32*)relocation_address = symbol_location; + size_t num_buckets = hash_table_begin[0]; - return symbol_location; -} + // This is here for completeness, but, since we're using the fact that every chain + // will end at chain 0 (which means 'not found'), we don't need to check num_chains. + // Interestingly, num_chains is required to be num_symbols + //size_t num_chains = hash_table_begin[1]; -void ELFDynamicObject::call_object_init_functions() -{ - u8* load_addr = m_text_region->load_address().as_ptr(); - InitFunc init_function = (InitFunc)(load_addr + m_init_offset); + u32* buckets = &hash_table_begin[2]; + u32* chains = &buckets[num_buckets]; + for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) { + auto symbol = m_dynamic.symbol(i); + if (strcmp(name, symbol.name()) == 0) { #ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Calling DT_INIT at %p\n", init_function); -#endif - (init_function)(); - - InitFunc* init_begin = (InitFunc*)(load_addr + m_init_array_offset); - u32 init_end = (u32)((u8*)init_begin + m_init_array_size); - while ((u32)init_begin < init_end) { - // Android sources claim that these can be -1, to be ignored. - // 0 definitely shows up. Apparently 0/-1 are valid? Confusing. - if (!*init_begin || ((i32)*init_begin == -1)) - continue; -#ifdef DYNAMIC_LOAD_DEBUG - dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin); + dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, symbol.name(), symbol.address()); #endif - (*init_begin)(); - ++init_begin; + return symbol; + } } + return m_dynamic.the_undefined_symbol(); } -u32 ELFDynamicObject::ProgramHeaderRegion::mmap_prot() const +const char* ELFDynamicObject::symbol_string_table_string(Elf32_Word index) const { - int prot = 0; - prot |= is_executable() ? PROT_EXEC : 0; - prot |= is_readable() ? PROT_READ : 0; - prot |= is_writable() ? PROT_WRITE : 0; - return prot; + return (const char*)base_address().offset(m_string_table_offset + index).as_ptr(); } static const char* name_for_dtag(Elf32_Sword d_tag) diff --git a/Libraries/LibELF/ELFDynamicObject.h b/Libraries/LibELF/ELFDynamicObject.h index 5abedb0abb..42a5ea9114 100644 --- a/Libraries/LibELF/ELFDynamicObject.h +++ b/Libraries/LibELF/ELFDynamicObject.h @@ -1,94 +1,205 @@ #pragma once -#include <LibELF/ELFImage.h> #include <LibELF/exec_elf.h> -#include <dlfcn.h> -#include <mman.h> -#include <AK/OwnPtr.h> -#include <AK/RefCounted.h> -#include <AK/String.h> +#include <Kernel/VM/VirtualAddress.h> -#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1))) - -class ELFDynamicObject : public RefCounted<ELFDynamicObject> { +class ELFDynamicObject { public: - static NonnullRefPtr<ELFDynamicObject> construct(const char* filename, int fd, size_t file_size); - + explicit ELFDynamicObject(VirtualAddress base_address, u32 dynamic_offset); ~ELFDynamicObject(); + void dump() const; + + class DynamicEntry; + class Section; + class RelocationSection; + class Symbol; + class Relocation; + class HashSection; + + class DynamicEntry { + public: + DynamicEntry(const Elf32_Dyn& dyn) + : m_dyn(dyn) + { + } - bool is_valid() const { return m_valid; } + ~DynamicEntry() {} - // FIXME: How can we resolve all of the symbols without having the original elf image for our process? - // RTLD_LAZY only at first probably... though variables ('objects') need resolved at load time every time - bool load(unsigned flags); + Elf32_Sword tag() const { return m_dyn.d_tag; } + Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; } + Elf32_Word val() const { return m_dyn.d_un.d_val; } - // Intended for use by dlsym or other internal methods - void* symbol_for_name(const char*); + private: + const Elf32_Dyn& m_dyn; + }; - void dump(); + class Symbol { + public: + Symbol(const ELFDynamicObject& dynamic, unsigned index, const Elf32_Sym& sym) + : m_dynamic(dynamic) + , m_sym(sym) + , m_index(index) + { + } - // Will be called from _fixup_plt_entry, as part of the PLT trampoline - Elf32_Addr patch_plt_entry(u32 relocation_offset); + ~Symbol() {} -private: - class ProgramHeaderRegion { + const char* name() const { return m_dynamic.symbol_string_table_string(m_sym.st_name); } + unsigned section_index() const { return m_sym.st_shndx; } + unsigned value() const { return m_sym.st_value; } + unsigned size() const { return m_sym.st_size; } + unsigned index() const { return m_index; } + unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); } + unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); } + bool is_undefined() const { return this == &m_dynamic.the_undefined_symbol(); } + VirtualAddress address() const { return m_dynamic.base_address().offset(value()); } + + private: + const ELFDynamicObject& m_dynamic; + const Elf32_Sym& m_sym; + const unsigned m_index; + }; + + class Section { + public: + Section(const ELFDynamicObject& dynamic, unsigned section_offset, unsigned section_size_bytes, unsigned entry_size, const char* name) + : m_dynamic(dynamic) + , m_section_offset(section_offset) + , m_section_size_bytes(section_size_bytes) + , m_entry_size(entry_size) + , m_name(name) + { + } + ~Section() {} + + const char* name() const { return m_name; } + unsigned offset() const { return m_section_offset; } + unsigned size() const { return m_section_size_bytes; } + unsigned entry_size() const { return m_entry_size; } + unsigned entry_count() const { return !entry_size() ? 0 : size() / entry_size(); } + VirtualAddress address() const { return m_dynamic.base_address().offset(m_section_offset); } + + protected: + friend class RelocationSection; + friend class HashSection; + const ELFDynamicObject& m_dynamic; + unsigned m_section_offset; + unsigned m_section_size_bytes; + unsigned m_entry_size; + const char* m_name { nullptr }; + }; + + class RelocationSection : public Section { + public: + RelocationSection(const Section& section) + : Section(section.m_dynamic, section.m_section_offset, section.m_section_size_bytes, section.m_entry_size, section.m_name) + { + } + unsigned relocation_count() const { return entry_count(); } + const Relocation relocation(unsigned index) const; + const Relocation relocation_at_offset(unsigned offset) const; + template<typename F> + void for_each_relocation(F) const; + }; + + class Relocation { + public: + Relocation(const ELFDynamicObject& dynamic, const Elf32_Rel& rel, unsigned offset_in_section) + : m_dynamic(dynamic) + , m_rel(rel) + , m_offset_in_section(offset_in_section) + { + } + + ~Relocation() {} + + unsigned offset_in_section() const { return m_offset_in_section; } + unsigned offset() const { return m_rel.r_offset; } + unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); } + unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); } + const Symbol symbol() const { return m_dynamic.symbol(symbol_index()); } + VirtualAddress address() const { return m_dynamic.base_address().offset(offset()); } + + private: + const ELFDynamicObject& m_dynamic; + const Elf32_Rel& m_rel; + const unsigned m_offset_in_section; + }; + + enum class HashType { + SYSV, + GNU + }; + + class HashSection : public Section { public: - ProgramHeaderRegion(const Elf32_Phdr& header) - : m_program_header(header) + HashSection(const Section& section, HashType hash_type = HashType::SYSV) + : Section(section.m_dynamic, section.m_section_offset, section.m_section_size_bytes, section.m_entry_size, section.m_name) { + switch (hash_type) { + case HashType::SYSV: + m_hash_function = &HashSection::calculate_elf_hash; + break; + case HashType::GNU: + m_hash_function = &HashSection::calculate_gnu_hash; + break; + default: + ASSERT_NOT_REACHED(); + break; + } } - VirtualAddress load_address() const { return m_load_address; } - VirtualAddress base_address() const { return m_image_base_address; } - - void set_load_address(VirtualAddress addr) { m_load_address = addr; } - void set_base_address(VirtualAddress addr) { m_image_base_address = addr; } - - // Information from ELF Program header - u32 type() const { return m_program_header.p_type; } - u32 flags() const { return m_program_header.p_flags; } - u32 offset() const { return m_program_header.p_offset; } - VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); } - u32 size_in_memory() const { return m_program_header.p_memsz; } - u32 size_in_image() const { return m_program_header.p_filesz; } - u32 alignment() const { return m_program_header.p_align; } - u32 mmap_prot() const; - bool is_readable() const { return flags() & PF_R; } - bool is_writable() const { return flags() & PF_W; } - bool is_executable() const { return flags() & PF_X; } - bool is_tls_template() const { return type() == PT_TLS; } - bool is_load() const { return type() == PT_LOAD; } - bool is_dynamic() const { return type() == PT_DYNAMIC; } - - u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); } + const Symbol lookup_symbol(const char*) const; private: - Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image - VirtualAddress m_load_address { 0 }; - VirtualAddress m_image_base_address { 0 }; + u32 calculate_elf_hash(const char* name) const; + u32 calculate_gnu_hash(const char* name) const; + + typedef u32 (HashSection::*HashFunction)(const char*) const; + HashFunction m_hash_function; }; - explicit ELFDynamicObject(const char* filename, int fd, size_t file_size); + unsigned symbol_count() const { return m_symbol_count; } + + const Symbol symbol(unsigned) const; + const Symbol& the_undefined_symbol() const { return m_the_undefined_symbol; } - void parse_dynamic_section(); - void load_program_headers(); - void do_relocations(); - void setup_plt_trampoline(); - void call_object_init_functions(); + const Section init_section() const; + const Section fini_section() const; + const Section init_array_section() const; + const Section fini_array_section() const; - String m_filename; - size_t m_file_size { 0 }; - int m_image_fd { -1 }; - void* m_file_mapping { nullptr }; - bool m_valid { false }; + const HashSection hash_section() const; - OwnPtr<ELFImage> m_image; + const RelocationSection relocation_section() const; + const RelocationSection plt_relocation_section() const; - Vector<ProgramHeaderRegion> m_program_header_regions; - ProgramHeaderRegion* m_text_region { nullptr }; - ProgramHeaderRegion* m_data_region { nullptr }; - ProgramHeaderRegion* m_tls_region { nullptr }; + bool should_process_origin() const { return m_dt_flags & DF_ORIGIN; } + bool requires_symbolic_symbol_resolution() const { return m_dt_flags & DF_SYMBOLIC; } + // Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ + bool has_text_relocations() const { return m_dt_flags & DF_TEXTREL; } + bool must_bind_now() const { return m_dt_flags & DF_BIND_NOW; } + bool has_static_thread_local_storage() const { return m_dt_flags & DF_STATIC_TLS; } + + VirtualAddress plt_got_base_address() const { return m_base_address.offset(m_procedure_linkage_table_offset); } + VirtualAddress base_address() const { return m_base_address; } + +private: + const char* symbol_string_table_string(Elf32_Word) const; + void parse(); + + template<typename F> + void for_each_symbol(F) const; + + template<typename F> + void for_each_dynamic_entry(F) const; + + VirtualAddress m_base_address; + u32 m_dynamic_offset; + Symbol m_the_undefined_symbol { *this, 0, {} }; + + unsigned m_symbol_count { 0 }; // Begin Section information collected from DT_* entries uintptr_t m_init_offset { 0 }; @@ -96,12 +207,14 @@ private: uintptr_t m_init_array_offset { 0 }; size_t m_init_array_size { 0 }; + uintptr_t m_fini_array_offset { 0 }; + size_t m_fini_array_size { 0 }; uintptr_t m_hash_table_offset { 0 }; uintptr_t m_string_table_offset { 0 }; - uintptr_t m_symbol_table_offset { 0 }; size_t m_size_of_string_table { 0 }; + uintptr_t m_symbol_table_offset { 0 }; size_t m_size_of_symbol_table_entry { 0 }; Elf32_Sword m_procedure_linkage_table_relocation_type { -1 }; @@ -110,17 +223,44 @@ private: uintptr_t m_procedure_linkage_table_offset { 0 }; // NOTE: We'll only ever either RELA or REL entries, not both (thank god) + // NOTE: The x86 ABI will only ever genrerate REL entries. size_t m_number_of_relocations { 0 }; size_t m_size_of_relocation_entry { 0 }; size_t m_size_of_relocation_table { 0 }; uintptr_t m_relocation_table_offset { 0 }; // DT_FLAGS - bool m_should_process_origin = false; - bool m_requires_symbolic_symbol_resolution = false; - // Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ - bool m_has_text_relocations = false; - bool m_must_bind_now = false; // FIXME: control with an environment var as well? - bool m_has_static_thread_local_storage = false; + Elf32_Word m_dt_flags { 0 }; // End Section information from DT_* entries }; + +template<typename F> +inline void ELFDynamicObject::RelocationSection::for_each_relocation(F func) const +{ + for (unsigned i = 0; i < relocation_count(); ++i) { + if (func(relocation(i)) == IterationDecision::Break) + break; + } +} + +template<typename F> +inline void ELFDynamicObject::for_each_symbol(F func) const +{ + for (unsigned i = 0; i < symbol_count(); ++i) { + if (func(symbol(i)) == IterationDecision::Break) + break; + } +} + +template<typename F> +inline void ELFDynamicObject::for_each_dynamic_entry(F func) const +{ + auto* dyns = reinterpret_cast<const Elf32_Dyn*>(m_base_address.offset(m_dynamic_offset).as_ptr()); + for (unsigned i = 0;; ++i) { + auto&& dyn = DynamicEntry(dyns[i]); + if (dyn.tag() == DT_NULL) + break; + if (func(dyn) == IterationDecision::Break) + break; + } +} diff --git a/Libraries/LibELF/ELFImage.cpp b/Libraries/LibELF/ELFImage.cpp index 832711afbc..345db584e6 100644 --- a/Libraries/LibELF/ELFImage.cpp +++ b/Libraries/LibELF/ELFImage.cpp @@ -43,11 +43,6 @@ unsigned ELFImage::symbol_count() const return section(m_symbol_table_section_index).entry_count(); } -unsigned ELFImage::dynamic_symbol_count() const -{ - return section(m_dynamic_symbol_table_section_index).entry_count(); -} - void ELFImage::dump() const { dbgprintf("ELFImage{%p} {\n", this); @@ -117,24 +112,11 @@ bool ELFImage::parse() if (sh.sh_type == SHT_STRTAB && i != header().e_shstrndx) { if (StringView(".strtab") == section_header_table_string(sh.sh_name)) m_string_table_section_index = i; - else if (StringView(".dynstr") == section_header_table_string(sh.sh_name)) - m_dynamic_string_table_section_index = i; - else - ASSERT_NOT_REACHED(); } if (sh.sh_type == SHT_DYNAMIC) { ASSERT(!m_dynamic_section_index || m_dynamic_section_index == i); m_dynamic_section_index = i; } - if (sh.sh_type == SHT_DYNSYM) { - ASSERT(!m_dynamic_symbol_table_section_index || m_dynamic_symbol_table_section_index == i); - m_dynamic_symbol_table_section_index = i; - } - if (sh.sh_type == SHT_REL) { - if (StringView(".rel.dyn") == section_header_table_string(sh.sh_name)) { - m_dynamic_relocation_section_index = i; - } - } } // Then create a name-to-index map. @@ -162,14 +144,6 @@ const char* ELFImage::table_string(unsigned offset) const return raw_data(sh.sh_offset + offset); } -const char* ELFImage::dynamic_table_string(unsigned offset) const -{ - auto& sh = section_header(m_dynamic_string_table_section_index); - if (sh.sh_type != SHT_STRTAB) - return nullptr; - return raw_data(sh.sh_offset + offset); -} - const char* ELFImage::raw_data(unsigned offset) const { return reinterpret_cast<const char*>(m_buffer) + offset; @@ -199,13 +173,6 @@ const ELFImage::Symbol ELFImage::symbol(unsigned index) const return Symbol(*this, index, raw_syms[index]); } -const ELFImage::DynamicSymbol ELFImage::dynamic_symbol(unsigned index) const -{ - ASSERT(index < symbol_count()); - auto* raw_syms = reinterpret_cast<const Elf32_Sym*>(raw_data(section(m_dynamic_symbol_table_section_index).offset())); - return DynamicSymbol(*this, index, raw_syms[index]); -} - const ELFImage::Section ELFImage::section(unsigned index) const { ASSERT(index < section_count()); @@ -225,13 +192,6 @@ const ELFImage::Relocation ELFImage::RelocationSection::relocation(unsigned inde return Relocation(m_image, rels[index]); } -const ELFImage::DynamicRelocation ELFImage::DynamicRelocationSection::relocation(unsigned index) const -{ - ASSERT(index < relocation_count()); - auto* rels = reinterpret_cast<const Elf32_Rel*>(m_image.raw_data(offset())); - return DynamicRelocation(m_image, rels[index]); -} - const ELFImage::RelocationSection ELFImage::Section::relocations() const { // FIXME: This is ugly. @@ -263,9 +223,3 @@ const ELFImage::DynamicSection ELFImage::dynamic_section() const ASSERT(is_dynamic()); return section(m_dynamic_section_index); } - -const ELFImage::DynamicRelocationSection ELFImage::dynamic_relocation_section() const -{ - ASSERT(is_dynamic()); - return section(m_dynamic_relocation_section_index); -} diff --git a/Libraries/LibELF/ELFImage.h b/Libraries/LibELF/ELFImage.h index e30e2a71e0..0e2ef04794 100644 --- a/Libraries/LibELF/ELFImage.h +++ b/Libraries/LibELF/ELFImage.h @@ -16,13 +16,9 @@ public: class Section; class RelocationSection; - class DynamicRelocationSection; class Symbol; - class DynamicSymbol; class Relocation; - class DynamicRelocation; class DynamicSection; - class DynamicSectionEntry; class Symbol { public: @@ -50,32 +46,6 @@ public: const unsigned m_index; }; - class DynamicSymbol { - public: - DynamicSymbol(const ELFImage& image, unsigned index, const Elf32_Sym& sym) - : m_image(image) - , m_sym(sym) - , m_index(index) - { - } - - ~DynamicSymbol() {} - - const char* name() const { return m_image.dynamic_table_string(m_sym.st_name); } - unsigned section_index() const { return m_sym.st_shndx; } - unsigned value() const { return m_sym.st_value; } - unsigned size() const { return m_sym.st_size; } - unsigned index() const { return m_index; } - unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); } - unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); } - const Section section() const { return m_image.section(section_index()); } - - private: - const ELFImage& m_image; - const Elf32_Sym& m_sym; - const unsigned m_index; - }; - class ProgramHeader { public: ProgramHeader(const ELFImage& image, unsigned program_header_index) @@ -151,38 +121,6 @@ public: void for_each_relocation(F) const; }; - class DynamicRelocationSection : public Section { - public: - DynamicRelocationSection(const Section& section) - : Section(section.m_image, section.m_section_index) - { - } - unsigned relocation_count() const { return entry_count(); } - const DynamicRelocation relocation(unsigned index) const; - template<typename F> - void for_each_relocation(F) const; - }; - - class DynamicRelocation { - public: - DynamicRelocation(const ELFImage& image, const Elf32_Rel& rel) - : m_image(image) - , m_rel(rel) - { - } - - ~DynamicRelocation() {} - - unsigned offset() const { return m_rel.r_offset; } - unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); } - unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); } - const DynamicSymbol symbol() const { return m_image.dynamic_symbol(symbol_index()); } - - private: - const ELFImage& m_image; - const Elf32_Rel& m_rel; - }; - class Relocation { public: Relocation(const ELFImage& image, const Elf32_Rel& rel) @@ -210,28 +148,6 @@ public: { ASSERT(type() == SHT_DYNAMIC); } - - template<typename F> - void for_each_dynamic_entry(F) const; - }; - - class DynamicSectionEntry { - public: - DynamicSectionEntry(const ELFImage& image, const Elf32_Dyn& dyn) - : m_image(image) - , m_dyn(dyn) - { - } - - ~DynamicSectionEntry() {} - - Elf32_Sword tag() const { return m_dyn.d_tag; } - Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; } - Elf32_Word val() const { return m_dyn.d_un.d_val; } - - private: - const ELFImage& m_image; - const Elf32_Dyn& m_dyn; }; unsigned symbol_count() const; @@ -240,11 +156,9 @@ public: unsigned program_header_count() const; const Symbol symbol(unsigned) const; - const DynamicSymbol dynamic_symbol(unsigned) const; const Section section(unsigned) const; const ProgramHeader program_header(unsigned const) const; const DynamicSection dynamic_section() const; - const DynamicRelocationSection dynamic_relocation_section() const; template<typename F> void for_each_section(F) const; @@ -253,8 +167,6 @@ public: template<typename F> void for_each_symbol(F) const; template<typename F> - void for_each_dynamic_symbol(F) const; - template<typename F> void for_each_program_header(F) const; // NOTE: Returns section(0) if section with name is not found. @@ -276,17 +188,13 @@ private: const char* table_string(unsigned offset) const; const char* section_header_table_string(unsigned offset) const; const char* section_index_to_string(unsigned index) const; - const char* dynamic_table_string(unsigned offset) const; const u8* m_buffer { nullptr }; HashMap<String, unsigned> m_sections; bool m_valid { false }; unsigned m_symbol_table_section_index { 0 }; unsigned m_string_table_section_index { 0 }; - unsigned m_dynamic_symbol_table_section_index { 0 }; // .dynsym - unsigned m_dynamic_string_table_section_index { 0 }; // .dynstr - unsigned m_dynamic_section_index { 0 }; // .dynamic - unsigned m_dynamic_relocation_section_index { 0 }; // .rel.dyn + unsigned m_dynamic_section_index { 0 }; }; template<typename F> @@ -318,15 +226,6 @@ inline void ELFImage::RelocationSection::for_each_relocation(F func) const } template<typename F> -inline void ELFImage::DynamicRelocationSection::for_each_relocation(F func) const -{ - for (unsigned i = 0; i < relocation_count(); ++i) { - if (func(relocation(i)) == IterationDecision::Break) - break; - } -} - -template<typename F> inline void ELFImage::for_each_symbol(F func) const { for (unsigned i = 0; i < symbol_count(); ++i) { @@ -336,30 +235,8 @@ inline void ELFImage::for_each_symbol(F func) const } template<typename F> -inline void ELFImage::for_each_dynamic_symbol(F func) const -{ - for (unsigned i = 0; i < dynamic_symbol_count(); ++i) { - if (func(symbol(i)) == IterationDecision::Break) - break; - } -} - -template<typename F> inline void ELFImage::for_each_program_header(F func) const { for (unsigned i = 0; i < program_header_count(); ++i) func(program_header(i)); } - -template<typename F> -inline void ELFImage::DynamicSection::for_each_dynamic_entry(F func) const -{ - auto* dyns = reinterpret_cast<const Elf32_Dyn*>(m_image.raw_data(offset())); - for (unsigned i = 0;; ++i) { - auto&& dyn = DynamicSectionEntry(m_image, dyns[i]); - if (dyn.tag() == DT_NULL) - break; - if (func(dyn) == IterationDecision::Break) - break; - } -} |