From b21ea54af01300b2f027153122e690ed7a645bb0 Mon Sep 17 00:00:00 2001 From: Lucas CHOLLET Date: Sat, 17 Dec 2022 01:37:40 +0100 Subject: LibCore: Merge two search implementations in `Stream::BufferedStream` `can_read_line()` and `read_until_any_of` used to have two different implementations to search for a needle. This commit factorizes both algorithms. --- Userland/Libraries/LibCore/Stream.h | 103 +++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 42 deletions(-) (limited to 'Userland/Libraries/LibCore') diff --git a/Userland/Libraries/LibCore/Stream.h b/Userland/Libraries/LibCore/Stream.h index 6def650908..e2db4c204f 100644 --- a/Userland/Libraries/LibCore/Stream.h +++ b/Userland/Libraries/LibCore/Stream.h @@ -694,6 +694,8 @@ public: if (!TRY(can_read_line())) return Bytes {}; + auto const candidate = TRY(find_and_populate_until_any_of(candidates, buffer.size())); + if (stream().is_eof()) { if (buffer.size() < m_buffer.used_space()) { // Normally, reading from an EOFed stream and receiving bytes @@ -710,27 +712,9 @@ public: } } - auto const readable_size = min(m_buffer.used_space(), buffer.size()); - - // The intention here is to try to match all of the possible - // delimiter candidates and try to find the longest one we can - // remove from the buffer after copying up to the delimiter to the - // user buffer. - Optional longest_match; - size_t match_size = 0; - for (auto& candidate : candidates) { - auto const result = m_buffer.offset_of(candidate, {}, readable_size); - if (result.has_value()) { - auto previous_match = longest_match.value_or(*result); - if ((previous_match < *result) || (previous_match == *result && match_size < candidate.length())) { - longest_match = result; - match_size = candidate.length(); - } - } - } - if (longest_match.has_value()) { - auto const read_bytes = m_buffer.read(buffer.trim(*longest_match)); - TRY(m_buffer.discard(match_size)); + if (candidate.has_value()) { + auto const read_bytes = m_buffer.read(buffer.trim(candidate->offset)); + TRY(m_buffer.discard(candidate->size)); return read_bytes; } @@ -740,37 +724,72 @@ public: return m_buffer.read(buffer); } - // Returns whether a line can be read, populating the buffer in the process. - ErrorOr can_read_line() + struct Match { + size_t offset {}; + size_t size {}; + }; + + template + ErrorOr> find_and_populate_until_any_of(Array const& candidates, Optional max_offset = {}) { - if (stream().is_eof() && m_buffer.used_space() > 0) - return true; + Optional longest_candidate; + for (auto& candidate : candidates) { + if (candidate.length() >= longest_candidate.value_or(candidate.length())) + longest_candidate = candidate.length(); + } - if (m_buffer.offset_of("\n"sv).has_value()) - return true; + // The intention here is to try to match all the possible + // delimiter candidates and try to find the longest one we can + // remove from the buffer after copying up to the delimiter to the + // user buffer. - if (stream().is_eof()) - return false; + auto const find_candidates = [this, &candidates, &longest_candidate](Optional max_offset = {}) -> Optional { + max_offset = max_offset.value_or(m_buffer.used_space()); + + Optional longest_match; + size_t match_size = 0; + for (auto& candidate : candidates) { + // FIXME: This currently searches through the buffer from the start, + // even if we just appended a small number of bytes at the end. + auto const result = m_buffer.offset_of(candidate, {}, *max_offset); + + if (result.has_value()) { + auto previous_match = longest_match.value_or(*result); + if ((previous_match < *result) || (previous_match == *result && match_size < candidate.length())) { + longest_match = result; + match_size = candidate.length(); + } + } + } - while (m_buffer.empty_space() > 0) { - auto populated_byte_count = TRY(populate_read_buffer()); + if (longest_match.has_value()) + return Match { *longest_match, match_size }; - if (stream().is_eof()) { - // We give the user one last hurrah to read the remaining - // contents as a "line". - return m_buffer.used_space() > 0; - } + return {}; + }; - // FIXME: This currently searches through the buffer from the start, - // even if we just appended a small number of bytes at the end. - if (m_buffer.offset_of("\n"sv).has_value()) - return true; + if (auto first_find = find_candidates(max_offset); first_find.has_value()) + return first_find; - if (populated_byte_count == 0) + while (m_buffer.used_space() < max_offset.value_or(m_buffer.capacity())) { + auto const read_bytes = TRY(populate_read_buffer()); + if (read_bytes == 0) break; + + if (auto first_find = find_candidates(max_offset); first_find.has_value()) + return first_find; } - return false; + return Optional {}; + } + + // Returns whether a line can be read, populating the buffer in the process. + ErrorOr can_read_line() + { + if (stream().is_eof()) + return m_buffer.used_space() > 0; + + return TRY(find_and_populate_until_any_of(Array { "\n"sv })).has_value(); } bool is_eof() const -- cgit v1.2.3