summaryrefslogtreecommitdiff
path: root/AK/CircularBuffer.cpp
diff options
context:
space:
mode:
authorTim Schumacher <timschumi@gmx.de>2023-05-03 11:14:36 +0200
committerAndreas Kling <kling@serenityos.org>2023-05-17 09:08:53 +0200
commit221b91ff61aa4984577d76efcbaacfe1fafd61b4 (patch)
tree096999093758f4c5884c59f373a1599dc09f4ffb /AK/CircularBuffer.cpp
parentd194011570cca93324137e3c697e7f95f2638fe6 (diff)
downloadserenity-221b91ff61aa4984577d76efcbaacfe1fafd61b4.zip
AK: Add `CircularBuffer::find_copy_in_seekback()`
This is useful for compressors, which quite frequently need to find a matching span of data within the seekback.
Diffstat (limited to 'AK/CircularBuffer.cpp')
-rw-r--r--AK/CircularBuffer.cpp103
1 files changed, 103 insertions, 0 deletions
diff --git a/AK/CircularBuffer.cpp b/AK/CircularBuffer.cpp
index 2d80aaaabb..05e6d7fbf1 100644
--- a/AK/CircularBuffer.cpp
+++ b/AK/CircularBuffer.cpp
@@ -249,4 +249,107 @@ ErrorOr<size_t> CircularBuffer::copy_from_seekback(size_t distance, size_t lengt
return length - remaining_length;
}
+ErrorOr<Vector<CircularBuffer::Match>> CircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
+{
+ VERIFY(minimum_length > 0);
+
+ // Clip the maximum length to the amount of data that we actually store.
+ if (maximum_length > m_used_space)
+ maximum_length = m_used_space;
+
+ if (maximum_length < minimum_length)
+ return Vector<Match> {};
+
+ Vector<Match> matches;
+
+ if (distance_hints.has_value()) {
+ // If we have any hints, verify and use those.
+ for (auto const& distance : distance_hints.value()) {
+ // TODO: This does not yet support looping repetitions.
+ if (distance < minimum_length)
+ continue;
+
+ auto needle_offset = (capacity() + m_reading_head) % capacity();
+ auto haystack_offset = (capacity() + m_reading_head - distance) % capacity();
+
+ for (size_t i = 0; i < minimum_length; i++) {
+ if (m_buffer[needle_offset] != m_buffer[haystack_offset])
+ break;
+
+ needle_offset = (needle_offset + 1) % capacity();
+ haystack_offset = (haystack_offset + 1) % capacity();
+
+ if (i + 1 == minimum_length)
+ TRY(matches.try_empend(distance, minimum_length));
+ }
+ }
+ } else {
+ // Otherwise, use memmem to find the initial matches.
+ // Note: We have the read head as our reference point, but `next_read_span_with_seekback` isn't aware of that and continues to use the write head.
+ // Therefore, we need to make sure to slice off the extraneous bytes from the end of the span and shift the returned distances by the correct amount.
+ size_t haystack_offset_from_start = 0;
+ Vector<ReadonlyBytes, 2> haystack;
+ haystack.append(next_read_span_with_seekback(m_seekback_limit));
+ if (haystack[0].size() < m_seekback_limit - used_space())
+ haystack.append(next_read_span_with_seekback(m_seekback_limit - haystack[0].size()));
+
+ haystack.last() = haystack.last().trim(haystack.last().size() - used_space());
+
+ auto needle = next_read_span().trim(minimum_length);
+
+ auto memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
+ while (memmem_match.has_value()) {
+ auto match_offset = memmem_match.release_value();
+
+ // Add the match to the list of matches to work with.
+ TRY(matches.try_empend(m_seekback_limit - used_space() - haystack_offset_from_start - match_offset, minimum_length));
+
+ auto size_to_discard = match_offset + 1;
+
+ // Trim away the already processed bytes from the haystack.
+ haystack_offset_from_start += size_to_discard;
+ while (size_to_discard > 0) {
+ if (haystack[0].size() < size_to_discard) {
+ size_to_discard -= haystack[0].size();
+ haystack.remove(0);
+ } else {
+ haystack[0] = haystack[0].slice(size_to_discard);
+ break;
+ }
+ }
+
+ if (haystack.size() == 0)
+ break;
+
+ // Try and find the next match.
+ memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
+ }
+ }
+
+ // From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
+ // For the remaining part, we will keep checking the next byte incrementally and keep eliminating matches until we eliminated all of them.
+ Vector<Match> next_matches;
+
+ for (size_t offset = minimum_length; offset < maximum_length; offset++) {
+ auto needle_data = m_buffer[(capacity() + m_reading_head + offset) % capacity()];
+
+ for (auto const& match : matches) {
+ auto haystack_data = m_buffer[(capacity() + m_reading_head - match.distance + offset) % capacity()];
+
+ if (haystack_data != needle_data)
+ continue;
+
+ TRY(next_matches.try_empend(match.distance, match.length + 1));
+ }
+
+ if (next_matches.size() == 0)
+ return matches;
+
+ swap(matches, next_matches);
+ next_matches.clear_with_capacity();
+ }
+
+ return matches;
+}
+
}