diff options
author | AnotherTest <ali.mpfard@gmail.com> | 2020-10-25 09:04:39 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-10-29 11:53:01 +0100 |
commit | 0801b1fada4d1eacdde3dc4aff04b9ee9eb73bf8 (patch) | |
tree | b9fefba7e71e8178cc5c43088e185278a760ff64 | |
parent | 2d6d1ca67f4f5c8c9dbf40b24675810a6e778c78 (diff) | |
download | serenity-0801b1fada4d1eacdde3dc4aff04b9ee9eb73bf8.zip |
AK: Make String::matches() capable of reporting match positions too
Also, rewrite StringUtils::match(), because the old implementation was
fairly broken, e.g. "acdcxb" would *not* match "a*?b".
-rw-r--r-- | AK/String.cpp | 5 | ||||
-rw-r--r-- | AK/String.h | 1 | ||||
-rw-r--r-- | AK/StringUtils.cpp | 72 | ||||
-rw-r--r-- | AK/StringUtils.h | 16 | ||||
-rw-r--r-- | AK/StringView.cpp | 5 | ||||
-rw-r--r-- | AK/StringView.h | 1 | ||||
-rw-r--r-- | AK/Tests/TestStringUtils.cpp | 19 |
7 files changed, 86 insertions, 33 deletions
diff --git a/AK/String.cpp b/AK/String.cpp index 3782ad3f58..89e17ce471 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -270,6 +270,11 @@ String String::repeated(char ch, size_t count) return *impl; } +bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const +{ + return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans); +} + bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const { return StringUtils::matches(*this, mask, case_sensitivity); diff --git a/AK/String.h b/AK/String.h index 434584fe73..51e3c51de0 100644 --- a/AK/String.h +++ b/AK/String.h @@ -112,6 +112,7 @@ public: static String repeated(char, size_t count); bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const; + bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const; Optional<int> to_int() const; Optional<unsigned> to_uint() const; diff --git a/AK/StringUtils.cpp b/AK/StringUtils.cpp index 858e1642fb..eb17b558b6 100644 --- a/AK/StringUtils.cpp +++ b/AK/StringUtils.cpp @@ -30,62 +30,70 @@ #include <AK/String.h> #include <AK/StringUtils.h> #include <AK/StringView.h> +#include <AK/Vector.h> namespace AK { namespace StringUtils { -bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity) +bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans) { + auto record_span = [&match_spans](size_t start, size_t length) { + if (match_spans) + match_spans->append({ start, length }); + }; + if (str.is_null() || mask.is_null()) return str.is_null() && mask.is_null(); + if (mask == "*") { + record_span(0, str.length()); + return true; + } + if (case_sensitivity == CaseSensitivity::CaseInsensitive) { const String str_lower = String(str).to_lowercase(); const String mask_lower = String(mask).to_lowercase(); - return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive); + return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive, match_spans); } const char* string_ptr = str.characters_without_null_termination(); + const char* string_start = str.characters_without_null_termination(); const char* string_end = string_ptr + str.length(); const char* mask_ptr = mask.characters_without_null_termination(); const char* mask_end = mask_ptr + mask.length(); - // Match string against mask directly unless we hit a * - while ((string_ptr < string_end) && (mask_ptr < mask_end) && (*mask_ptr != '*')) { - if ((*mask_ptr != *string_ptr) && (*mask_ptr != '?')) - return false; - mask_ptr++; - string_ptr++; - } - - const char* cp = nullptr; - const char* mp = nullptr; - - while (string_ptr < string_end) { - if ((mask_ptr < mask_end) && (*mask_ptr == '*')) { - // If we have only a * left, there is no way to not match. - if (++mask_ptr == mask_end) + auto matches_one = [](char ch, char p) { + if (p == '?') + return true; + return p == ch && ch != 0; + }; + while (string_ptr < string_end && mask_ptr < mask_end) { + auto string_start_ptr = string_ptr; + switch (*mask_ptr) { + case '*': + if (mask_ptr[1] == 0) { + record_span(string_ptr - string_start, string_end - string_ptr); return true; - mp = mask_ptr; - cp = string_ptr + 1; - } else if ((mask_ptr < mask_end) && ((*mask_ptr == *string_ptr) || (*mask_ptr == '?'))) { - mask_ptr++; - string_ptr++; - } else if ((cp != nullptr) && (mp != nullptr)) { - mask_ptr = mp; - string_ptr = cp++; - } else { + } + while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1)) + ++string_ptr; + record_span(string_start_ptr - string_start, string_ptr - string_start_ptr); + --string_ptr; + break; + case '?': + record_span(string_ptr - string_start, 1); + break; + default: + if (!matches_one(*string_ptr, *mask_ptr)) + return false; break; } + ++string_ptr; + ++mask_ptr; } - // Handle any trailing mask - while ((mask_ptr < mask_end) && (*mask_ptr == '*')) - mask_ptr++; - - // If we 'ate' all of the mask and the string then we match. - return (mask_ptr == mask_end) && string_ptr == string_end; + return string_ptr == string_end && mask_ptr == mask_end; } Optional<int> convert_to_int(const StringView& str) diff --git a/AK/StringUtils.h b/AK/StringUtils.h index 7c532175c3..849be54d18 100644 --- a/AK/StringUtils.h +++ b/AK/StringUtils.h @@ -42,9 +42,23 @@ enum class TrimMode { Both }; +struct MaskSpan { + size_t start; + size_t length; + + bool operator==(const MaskSpan& other) const + { + return start == other.start && length == other.length; + } + bool operator!=(const MaskSpan& other) const + { + return !(*this == other); + } +}; + namespace StringUtils { -bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive); +bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive, Vector<MaskSpan>* match_spans = nullptr); Optional<int> convert_to_int(const StringView&); Optional<unsigned> convert_to_uint(const StringView&); Optional<unsigned> convert_to_uint_from_hex(const StringView&); diff --git a/AK/StringView.cpp b/AK/StringView.cpp index ba8f82218a..205ed239af 100644 --- a/AK/StringView.cpp +++ b/AK/StringView.cpp @@ -164,6 +164,11 @@ bool StringView::ends_with(const StringView& str, CaseSensitivity case_sensitivi return StringUtils::ends_with(*this, str, case_sensitivity); } +bool StringView::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const +{ + return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans); +} + bool StringView::matches(const StringView& mask, CaseSensitivity case_sensitivity) const { return StringUtils::matches(*this, mask, case_sensitivity); diff --git a/AK/StringView.h b/AK/StringView.h index 84d610ce23..e46eb3d689 100644 --- a/AK/StringView.h +++ b/AK/StringView.h @@ -87,6 +87,7 @@ public: bool starts_with(char) const; bool ends_with(char) const; bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const; + bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const; bool contains(char) const; bool contains(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive) const; bool equals_ignoring_case(const StringView& other) const; diff --git a/AK/Tests/TestStringUtils.cpp b/AK/Tests/TestStringUtils.cpp index c726100133..55b4ab5fc7 100644 --- a/AK/Tests/TestStringUtils.cpp +++ b/AK/Tests/TestStringUtils.cpp @@ -67,6 +67,25 @@ TEST_CASE(matches_case_insensitive) EXPECT(!AK::StringUtils::matches("acdcb", "a*c?b")); } +TEST_CASE(matches_with_positions) +{ + Vector<AK::MaskSpan> spans; + EXPECT(AK::StringUtils::matches("abbb", "a*", CaseSensitivity::CaseSensitive, &spans)); + EXPECT(spans == Vector<AK::MaskSpan>({ { 1, 3 } })); + + spans.clear(); + EXPECT(AK::StringUtils::matches("abbb", "?*", CaseSensitivity::CaseSensitive, &spans)); + EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 0, 1 }, { 1, 3 } })); + + spans.clear(); + EXPECT(AK::StringUtils::matches("acdcxb", "a*c?b", CaseSensitivity::CaseSensitive, &spans)); + EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 2 }, { 4, 1 } })); + + spans.clear(); + EXPECT(AK::StringUtils::matches("aaaa", "A*", CaseSensitivity::CaseInsensitive, &spans)); + EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 3 } })); +} + TEST_CASE(convert_to_int) { auto value = AK::StringUtils::convert_to_int(StringView()); |