From 4b91e7c821bbe823fbf35229a557f306ec324cc4 Mon Sep 17 00:00:00 2001 From: Itamar Date: Wed, 6 Jan 2021 21:49:25 +0200 Subject: LibDebug: Support shared libraries DebugSession now makes the loader stop after loading the libraries, and parses the loaded libraries of the program before continuing its execution. DebugSession now also supports inserting a breakpoint at a given symbol or source position. Additionally, DebugInfo now takes the base address of its object into consideration. --- Libraries/LibDebug/CMakeLists.txt | 2 +- Libraries/LibDebug/DebugInfo.cpp | 35 +++++-- Libraries/LibDebug/DebugInfo.h | 31 ++++++- Libraries/LibDebug/DebugSession.cpp | 180 ++++++++++++++++++++++++++++++++++-- Libraries/LibDebug/DebugSession.h | 93 +++++++++++++++---- 5 files changed, 307 insertions(+), 34 deletions(-) (limited to 'Libraries/LibDebug') diff --git a/Libraries/LibDebug/CMakeLists.txt b/Libraries/LibDebug/CMakeLists.txt index f4c4277436..a2fa57a9a5 100644 --- a/Libraries/LibDebug/CMakeLists.txt +++ b/Libraries/LibDebug/CMakeLists.txt @@ -11,4 +11,4 @@ set(SOURCES ) serenity_lib(LibDebug debug) -target_link_libraries(LibDebug LibC) +target_link_libraries(LibDebug LibC LibRegex) diff --git a/Libraries/LibDebug/DebugInfo.cpp b/Libraries/LibDebug/DebugInfo.cpp index 10204f23d3..200dcf75f1 100644 --- a/Libraries/LibDebug/DebugInfo.cpp +++ b/Libraries/LibDebug/DebugInfo.cpp @@ -25,6 +25,7 @@ */ #include "DebugInfo.h" +#include #include #include #include @@ -35,8 +36,10 @@ namespace Debug { -DebugInfo::DebugInfo(NonnullOwnPtr elf) +DebugInfo::DebugInfo(NonnullOwnPtr elf, String source_root, FlatPtr base_address) : m_elf(move(elf)) + , m_source_root(source_root) + , m_base_address(base_address) , m_dwarf_info(*m_elf) { prepare_variable_scopes(); @@ -124,6 +127,9 @@ void DebugInfo::prepare_lines() auto start_index = file_path.index_of(serenity_slash).value() + serenity_slash.length(); file_path = file_path.substring(start_index, file_path.length() - start_index); } + if (file_path.starts_with("./") && !m_source_root.is_null()) { + file_path = LexicalPath::canonicalized_path(String::formatted("{}/{}", m_source_root, file_path)); + } m_sorted_lines.append({ line_info.address, file_path, line_info.line }); } quick_sort(m_sorted_lines, [](auto& a, auto& b) { @@ -147,19 +153,34 @@ Optional DebugInfo::get_source_position(u32 target_ad return {}; } -Optional DebugInfo::get_instruction_from_source(const String& file, size_t line) const +Optional DebugInfo::get_address_from_source_position(const String& file, size_t line) const { String file_path = file; + if (!file_path.starts_with("/")) + file_path = String::format("/%s", file_path.characters()); + constexpr char SERENITY_LIBS_PREFIX[] = "/usr/src/serenity"; if (file.starts_with(SERENITY_LIBS_PREFIX)) { file_path = file.substring(sizeof(SERENITY_LIBS_PREFIX), file.length() - sizeof(SERENITY_LIBS_PREFIX)); file_path = String::format("../%s", file_path.characters()); } + + Optional result; for (const auto& line_entry : m_sorted_lines) { - if (line_entry.file == file_path && line_entry.line == line) - return Optional(line_entry.address); + if (!line_entry.file.ends_with(file_path)) + continue; + + if (line_entry.line > line) + continue; + + // We look for the source position that is closest to the desired position, and is not after it. + // For example, get_address_of_source_position("main.cpp", 73) could return the address for an instruction whose location is ("main.cpp", 72) + // as there might not be an instruction mapped for "main.cpp", 73. + if (!result.has_value() || (line_entry.line > result.value().line)) { + result = SourcePositionAndAddress { line_entry.file, line_entry.line, line_entry.address }; + } } - return {}; + return result; } NonnullOwnPtrVector DebugInfo::get_variables_in_current_scope(const PtraceRegisters& regs) const @@ -168,7 +189,7 @@ NonnullOwnPtrVector DebugInfo::get_variables_in_current // TODO: We can store the scopes in a better data structure for (const auto& scope : m_scopes) { - if (regs.eip < scope.address_low || regs.eip >= scope.address_high) + if (regs.eip - m_base_address < scope.address_low || regs.eip - m_base_address >= scope.address_high) continue; for (const auto& die_entry : scope.dies_of_variables) { @@ -336,7 +357,7 @@ Vector DebugInfo::source_lines_in_scope(const Variabl DebugInfo::SourcePosition DebugInfo::SourcePosition::from_line_info(const Dwarf::LineProgram::LineInfo& line) { - return { line.file, line.line, line.address }; + return { line.file, line.line, { line.address } }; } } diff --git a/Libraries/LibDebug/DebugInfo.h b/Libraries/LibDebug/DebugInfo.h index bcb5db3cbc..8c73d7dee4 100644 --- a/Libraries/LibDebug/DebugInfo.h +++ b/Libraries/LibDebug/DebugInfo.h @@ -40,14 +40,30 @@ namespace Debug { class DebugInfo { public: - explicit DebugInfo(NonnullOwnPtr); + explicit DebugInfo(NonnullOwnPtr, String source_root = {}, FlatPtr base_address = 0); const ELF::Image& elf() const { return *m_elf; } struct SourcePosition { FlyString file_path; size_t line_number { 0 }; - u32 address_of_first_statement { 0 }; + Optional address_of_first_statement; + + SourcePosition() + : SourcePosition(String::empty(), 0) + { + } + SourcePosition(String file_path, size_t line_number) + : file_path(file_path) + , line_number(line_number) + { + } + SourcePosition(String file_path, size_t line_number, u32 address_of_first_statement) + : file_path(file_path) + , line_number(line_number) + , address_of_first_statement(address_of_first_statement) + { + } bool operator==(const SourcePosition& other) const { return file_path == other.file_path && line_number == other.line_number; } bool operator!=(const SourcePosition& other) const { return !(*this == other); } @@ -93,7 +109,14 @@ public: NonnullOwnPtrVector get_variables_in_current_scope(const PtraceRegisters&) const; Optional get_source_position(u32 address) const; - Optional get_instruction_from_source(const String& file, size_t line) const; + + struct SourcePositionAndAddress { + String file; + size_t line; + FlatPtr address; + }; + + Optional get_address_from_source_position(const String& file, size_t line) const; template void for_each_source_position(Callback callback) const @@ -120,6 +143,8 @@ private: OwnPtr create_variable_info(const Dwarf::DIE& variable_die, const PtraceRegisters&) const; NonnullOwnPtr m_elf; + String m_source_root; + FlatPtr m_base_address { 0 }; Dwarf::DwarfInfo m_dwarf_info; Vector m_scopes; diff --git a/Libraries/LibDebug/DebugSession.cpp b/Libraries/LibDebug/DebugSession.cpp index 27fb35edc9..d77fbbe0cc 100644 --- a/Libraries/LibDebug/DebugSession.cpp +++ b/Libraries/LibDebug/DebugSession.cpp @@ -25,15 +25,20 @@ */ #include "DebugSession.h" +#include +#include +#include #include +#include +#include #include namespace Debug { -DebugSession::DebugSession(pid_t pid) +DebugSession::DebugSession(pid_t pid, String source_root) : m_debuggee_pid(pid) - , m_executable(map_executable_for_process(pid)) - , m_debug_info(make(reinterpret_cast(m_executable.data()), m_executable.size())) + , m_source_root(source_root) + { } @@ -59,7 +64,7 @@ DebugSession::~DebugSession() } } -OwnPtr DebugSession::exec_and_attach(const String& command) +OwnPtr DebugSession::exec_and_attach(const String& command, String source_root) { auto pid = fork(); @@ -80,7 +85,10 @@ OwnPtr DebugSession::exec_and_attach(const String& command) for (size_t i = 0; i < parts.size(); i++) { args[i] = parts[i].characters(); } - int rc = execvp(args[0], const_cast(args)); + const char** envp = (const char**)calloc(2, sizeof(const char*)); + // This causes loader to stop on a breakpoint before jumping to the entry point of the program. + envp[0] = "_LOADER_BREAKPOINT=1"; + int rc = execvpe(args[0], const_cast(args), const_cast(envp)); if (rc < 0) { perror("execvp"); } @@ -107,7 +115,19 @@ OwnPtr DebugSession::exec_and_attach(const String& command) return nullptr; } - return adopt_own(*new DebugSession(pid)); + auto debug_session = adopt_own(*new DebugSession(pid, source_root)); + + // Continue until breakpoint before entry point of main program + int wstatus = debug_session->continue_debuggee_and_wait(); + if (WSTOPSIG(wstatus) != SIGTRAP) { + dbgln("expected SIGTRAP"); + return nullptr; + } + + // At this point, libraries should have been loaded + debug_session->update_loaded_libs(); + + return move(debug_session); } bool DebugSession::poke(u32* address, u32 data) @@ -268,4 +288,152 @@ void DebugSession::detach() continue_debuggee(); } +Optional DebugSession::insert_breakpoint(const String& symbol_name) +{ + Optional result; + for_each_loaded_library([this, symbol_name, &result](auto& lib) { + // The loader contains its own definitions for LibC symbols, so we don't want to include it in the search. + if (lib.name == "Loader.so") + return IterationDecision::Continue; + + auto symbol = lib.debug_info->elf().find_demangled_function(symbol_name); + if (!symbol.has_value()) + return IterationDecision::Continue; + + auto breakpoint_address = symbol.value().value() + lib.base_address; + bool rc = this->insert_breakpoint(reinterpret_cast(breakpoint_address)); + if (!rc) + return IterationDecision::Break; + + result = InsertBreakpointAtSymbolResult { lib.name, breakpoint_address }; + return IterationDecision::Break; + }); + return result; +} + +Optional DebugSession::insert_breakpoint(const String& file_name, size_t line_number) +{ + auto address_and_source_position = get_address_from_source_position(file_name, line_number); + if (!address_and_source_position.has_value()) + return {}; + + auto address = address_and_source_position.value().address; + bool rc = this->insert_breakpoint(reinterpret_cast(address)); + if (!rc) + return {}; + + auto lib = library_at(address); + ASSERT(lib); + + return InsertBreakpointAtSourcePositionResult { lib->name, address_and_source_position.value().file, address_and_source_position.value().line, address }; +} + +void DebugSession::update_loaded_libs() +{ + auto file = Core::File::construct(String::format("/proc/%u/vm", m_debuggee_pid)); + bool rc = file->open(Core::IODevice::ReadOnly); + ASSERT(rc); + + auto file_contents = file->read_all(); + auto json = JsonValue::from_string(file_contents); + ASSERT(json.has_value()); + + auto vm_entries = json.value().as_array(); + Regex re("(.+): \\.text"); + + auto get_path_to_object = [&re](const String& vm_name) -> Optional { + if (vm_name == "/usr/lib/Loader.so") + return vm_name; + RegexResult result; + auto rc = re.search(vm_name, result); + if (!rc) + return {}; + auto lib_name = result.capture_group_matches.at(0).at(0).view.u8view().to_string(); + if (lib_name.starts_with("/")) + return lib_name; + return String::format("/usr/lib/%s", lib_name.characters()); + }; + + vm_entries.for_each([&](auto& entry) { + // TODO: check that region is executable + auto vm_name = entry.as_object().get("name").as_string(); + + auto object_path = get_path_to_object(vm_name); + if (!object_path.has_value()) + return IterationDecision::Continue; + + String lib_name = object_path.value(); + if (lib_name.ends_with(".so")) + lib_name = LexicalPath(object_path.value()).basename(); + + // FIXME: DebugInfo currently cannot parse the debug information of libgcc_s.so + if (lib_name == "libgcc_s.so") + return IterationDecision::Continue; + + if (m_loaded_libraries.contains(lib_name)) + return IterationDecision::Continue; + + MappedFile lib_file(object_path.value()); + if (!lib_file.is_valid()) + return IterationDecision::Continue; + + FlatPtr base_address = entry.as_object().get("address").as_u32(); + auto debug_info = make(make(reinterpret_cast(lib_file.data()), lib_file.size()), m_source_root, base_address); + auto lib = make(lib_name, move(lib_file), move(debug_info), base_address); + m_loaded_libraries.set(lib_name, move(lib)); + + return IterationDecision::Continue; + }); +} + +const DebugSession::LoadedLibrary* DebugSession::library_at(FlatPtr address) const +{ + const LoadedLibrary* result = nullptr; + for_each_loaded_library([&result, address](const auto& lib) { + if (address >= lib.base_address && address < lib.base_address + lib.debug_info->elf().size()) { + result = &lib; + return IterationDecision::Break; + } + return IterationDecision::Continue; + }); + return result; +} + +Optional DebugSession::symbolicate(FlatPtr address) const +{ + auto* lib = library_at(address); + if (!lib) + return {}; + //FIXME: ELF::Image symlicate() API should return String::empty() if symbol is not found (It currently returns ??) + auto symbol = lib->debug_info->elf().symbolicate(address - lib->base_address); + return { { lib->name, symbol } }; +} + +Optional DebugSession::get_address_from_source_position(const String& file, size_t line) const +{ + Optional result; + for_each_loaded_library([this, file, line, &result](auto& lib) { + // The loader contains its own definitions for LibC symbols, so we don't want to include it in the search. + if (lib.name == "Loader.so") + return IterationDecision::Continue; + + auto source_position_and_address = lib.debug_info->get_address_from_source_position(file, line); + if (!source_position_and_address.has_value()) + return IterationDecision::Continue; + + result = source_position_and_address; + result.value().address += lib.base_address; + return IterationDecision::Break; + }); + return result; +} + +Optional DebugSession::get_source_position(FlatPtr address) const +{ + auto* lib = library_at(address); + if (!lib) + return {}; + return lib->debug_info->get_source_position(address - lib->base_address); +} + } diff --git a/Libraries/LibDebug/DebugSession.h b/Libraries/LibDebug/DebugSession.h index ff0c90fab5..5081135b1a 100644 --- a/Libraries/LibDebug/DebugSession.h +++ b/Libraries/LibDebug/DebugSession.h @@ -45,7 +45,7 @@ namespace Debug { class DebugSession { public: - static OwnPtr exec_and_attach(const String& command); + static OwnPtr exec_and_attach(const String& command, String source_root = {}); ~DebugSession(); @@ -60,11 +60,27 @@ public: }; struct BreakPoint { - void* address; - u32 original_first_word; - BreakPointState state; + void* address { nullptr }; + u32 original_first_word { 0 }; + BreakPointState state { BreakPointState::Disabled }; }; + struct InsertBreakpointAtSymbolResult { + String library_name; + FlatPtr address { 0 }; + }; + + Optional insert_breakpoint(const String& symbol_name); + + struct InsertBreakpointAtSourcePositionResult { + String library_name; + String file_name; + size_t line_number { 0 }; + FlatPtr address { 0 }; + }; + + Optional insert_breakpoint(const String& file_name, size_t line_number); + bool insert_breakpoint(void* address); bool disable_breakpoint(void* address); bool enable_breakpoint(void* address); @@ -95,12 +111,12 @@ public: void detach(); + enum DesiredInitialDebugeeState { + Running, + Stopped + }; template - void run(Callback callback); - - const ELF::Image& elf() const { return m_debug_info.elf(); } - const MappedFile& executable() const { return m_executable; } - const DebugInfo& debug_info() const { return m_debug_info; } + void run(DesiredInitialDebugeeState, Callback); enum DebugDecision { Continue, @@ -116,38 +132,79 @@ public: Exited, }; + struct LoadedLibrary { + String name; + MappedFile file; + NonnullOwnPtr debug_info; + FlatPtr base_address; + + LoadedLibrary(const String& name, MappedFile&& file, NonnullOwnPtr&& debug_info, FlatPtr base_address) + : name(name) + , file(move(file)) + , debug_info(move(debug_info)) + , base_address(base_address) + { + } + }; + + template + void for_each_loaded_library(Func f) const + { + for (const auto& lib_name : m_loaded_libraries.keys()) { + const auto& lib = *m_loaded_libraries.get(lib_name).value(); + if (f(lib) == IterationDecision::Break) + break; + } + } + + const LoadedLibrary* library_at(FlatPtr address) const; + + struct SymbolicationResult { + String library_name; + String symbol; + }; + Optional symbolicate(FlatPtr address) const; + + Optional get_address_from_source_position(const String& file, size_t line) const; + + Optional get_source_position(FlatPtr address) const; + private: - explicit DebugSession(pid_t); + explicit DebugSession(pid_t, String source_root); // x86 breakpoint instruction "int3" static constexpr u8 BREAKPOINT_INSTRUCTION = 0xcc; static MappedFile map_executable_for_process(pid_t); + void update_loaded_libs(); + int m_debuggee_pid { -1 }; + String m_source_root; bool m_is_debuggee_dead { false }; - MappedFile m_executable; - DebugInfo m_debug_info; - HashMap m_breakpoints; + + // Maps from base address to loaded library + HashMap> m_loaded_libraries; }; template -void DebugSession::run(Callback callback) +void DebugSession::run(DesiredInitialDebugeeState initial_debugee_state, Callback callback) { enum class State { + FirstIteration, FreeRun, Syscall, ConsecutiveBreakpoint, SingleStep, }; - State state { State::FreeRun }; + State state { State::FirstIteration }; auto do_continue_and_wait = [&]() { - int wstatus = continue_debuggee_and_wait((state == State::FreeRun) ? ContinueType::FreeRun : ContinueType::Syscall); + int wstatus = continue_debuggee_and_wait((state == State::Syscall) ? ContinueType::Syscall : ContinueType::FreeRun); // FIXME: This check actually only checks whether the debuggee // stopped because it hit a breakpoint/syscall/is in single stepping mode or not @@ -160,10 +217,12 @@ void DebugSession::run(Callback callback) }; for (;;) { - if (state == State::FreeRun || state == State::Syscall) { + if ((state == State::FirstIteration && initial_debugee_state == DesiredInitialDebugeeState::Running) || state == State::FreeRun || state == State::Syscall) { if (do_continue_and_wait()) break; } + if (state == State::FirstIteration) + state = State::FreeRun; auto regs = get_registers(); Optional current_breakpoint; -- cgit v1.2.3