summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnotherTest <ali.mpfard@gmail.com>2020-10-25 09:04:39 +0330
committerAndreas Kling <kling@serenityos.org>2020-10-29 11:53:01 +0100
commit0801b1fada4d1eacdde3dc4aff04b9ee9eb73bf8 (patch)
treeb9fefba7e71e8178cc5c43088e185278a760ff64
parent2d6d1ca67f4f5c8c9dbf40b24675810a6e778c78 (diff)
downloadserenity-0801b1fada4d1eacdde3dc4aff04b9ee9eb73bf8.zip
AK: Make String::matches() capable of reporting match positions too
Also, rewrite StringUtils::match(), because the old implementation was fairly broken, e.g. "acdcxb" would *not* match "a*?b".
-rw-r--r--AK/String.cpp5
-rw-r--r--AK/String.h1
-rw-r--r--AK/StringUtils.cpp72
-rw-r--r--AK/StringUtils.h16
-rw-r--r--AK/StringView.cpp5
-rw-r--r--AK/StringView.h1
-rw-r--r--AK/Tests/TestStringUtils.cpp19
7 files changed, 86 insertions, 33 deletions
diff --git a/AK/String.cpp b/AK/String.cpp
index 3782ad3f58..89e17ce471 100644
--- a/AK/String.cpp
+++ b/AK/String.cpp
@@ -270,6 +270,11 @@ String String::repeated(char ch, size_t count)
return *impl;
}
+bool String::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
+{
+ return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
+}
+
bool String::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
{
return StringUtils::matches(*this, mask, case_sensitivity);
diff --git a/AK/String.h b/AK/String.h
index 434584fe73..51e3c51de0 100644
--- a/AK/String.h
+++ b/AK/String.h
@@ -112,6 +112,7 @@ public:
static String repeated(char, size_t count);
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
+ bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
Optional<int> to_int() const;
Optional<unsigned> to_uint() const;
diff --git a/AK/StringUtils.cpp b/AK/StringUtils.cpp
index 858e1642fb..eb17b558b6 100644
--- a/AK/StringUtils.cpp
+++ b/AK/StringUtils.cpp
@@ -30,62 +30,70 @@
#include <AK/String.h>
#include <AK/StringUtils.h>
#include <AK/StringView.h>
+#include <AK/Vector.h>
namespace AK {
namespace StringUtils {
-bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity)
+bool matches(const StringView& str, const StringView& mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
{
+ auto record_span = [&match_spans](size_t start, size_t length) {
+ if (match_spans)
+ match_spans->append({ start, length });
+ };
+
if (str.is_null() || mask.is_null())
return str.is_null() && mask.is_null();
+ if (mask == "*") {
+ record_span(0, str.length());
+ return true;
+ }
+
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
const String str_lower = String(str).to_lowercase();
const String mask_lower = String(mask).to_lowercase();
- return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive);
+ return matches(str_lower, mask_lower, CaseSensitivity::CaseSensitive, match_spans);
}
const char* string_ptr = str.characters_without_null_termination();
+ const char* string_start = str.characters_without_null_termination();
const char* string_end = string_ptr + str.length();
const char* mask_ptr = mask.characters_without_null_termination();
const char* mask_end = mask_ptr + mask.length();
- // Match string against mask directly unless we hit a *
- while ((string_ptr < string_end) && (mask_ptr < mask_end) && (*mask_ptr != '*')) {
- if ((*mask_ptr != *string_ptr) && (*mask_ptr != '?'))
- return false;
- mask_ptr++;
- string_ptr++;
- }
-
- const char* cp = nullptr;
- const char* mp = nullptr;
-
- while (string_ptr < string_end) {
- if ((mask_ptr < mask_end) && (*mask_ptr == '*')) {
- // If we have only a * left, there is no way to not match.
- if (++mask_ptr == mask_end)
+ auto matches_one = [](char ch, char p) {
+ if (p == '?')
+ return true;
+ return p == ch && ch != 0;
+ };
+ while (string_ptr < string_end && mask_ptr < mask_end) {
+ auto string_start_ptr = string_ptr;
+ switch (*mask_ptr) {
+ case '*':
+ if (mask_ptr[1] == 0) {
+ record_span(string_ptr - string_start, string_end - string_ptr);
return true;
- mp = mask_ptr;
- cp = string_ptr + 1;
- } else if ((mask_ptr < mask_end) && ((*mask_ptr == *string_ptr) || (*mask_ptr == '?'))) {
- mask_ptr++;
- string_ptr++;
- } else if ((cp != nullptr) && (mp != nullptr)) {
- mask_ptr = mp;
- string_ptr = cp++;
- } else {
+ }
+ while (string_ptr < string_end && !matches(string_ptr, mask_ptr + 1))
+ ++string_ptr;
+ record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
+ --string_ptr;
+ break;
+ case '?':
+ record_span(string_ptr - string_start, 1);
+ break;
+ default:
+ if (!matches_one(*string_ptr, *mask_ptr))
+ return false;
break;
}
+ ++string_ptr;
+ ++mask_ptr;
}
- // Handle any trailing mask
- while ((mask_ptr < mask_end) && (*mask_ptr == '*'))
- mask_ptr++;
-
- // If we 'ate' all of the mask and the string then we match.
- return (mask_ptr == mask_end) && string_ptr == string_end;
+ return string_ptr == string_end && mask_ptr == mask_end;
}
Optional<int> convert_to_int(const StringView& str)
diff --git a/AK/StringUtils.h b/AK/StringUtils.h
index 7c532175c3..849be54d18 100644
--- a/AK/StringUtils.h
+++ b/AK/StringUtils.h
@@ -42,9 +42,23 @@ enum class TrimMode {
Both
};
+struct MaskSpan {
+ size_t start;
+ size_t length;
+
+ bool operator==(const MaskSpan& other) const
+ {
+ return start == other.start && length == other.length;
+ }
+ bool operator!=(const MaskSpan& other) const
+ {
+ return !(*this == other);
+ }
+};
+
namespace StringUtils {
-bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive);
+bool matches(const StringView& str, const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive, Vector<MaskSpan>* match_spans = nullptr);
Optional<int> convert_to_int(const StringView&);
Optional<unsigned> convert_to_uint(const StringView&);
Optional<unsigned> convert_to_uint_from_hex(const StringView&);
diff --git a/AK/StringView.cpp b/AK/StringView.cpp
index ba8f82218a..205ed239af 100644
--- a/AK/StringView.cpp
+++ b/AK/StringView.cpp
@@ -164,6 +164,11 @@ bool StringView::ends_with(const StringView& str, CaseSensitivity case_sensitivi
return StringUtils::ends_with(*this, str, case_sensitivity);
}
+bool StringView::matches(const StringView& mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
+{
+ return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
+}
+
bool StringView::matches(const StringView& mask, CaseSensitivity case_sensitivity) const
{
return StringUtils::matches(*this, mask, case_sensitivity);
diff --git a/AK/StringView.h b/AK/StringView.h
index 84d610ce23..e46eb3d689 100644
--- a/AK/StringView.h
+++ b/AK/StringView.h
@@ -87,6 +87,7 @@ public:
bool starts_with(char) const;
bool ends_with(char) const;
bool matches(const StringView& mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
+ bool matches(const StringView& mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
bool contains(char) const;
bool contains(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
bool equals_ignoring_case(const StringView& other) const;
diff --git a/AK/Tests/TestStringUtils.cpp b/AK/Tests/TestStringUtils.cpp
index c726100133..55b4ab5fc7 100644
--- a/AK/Tests/TestStringUtils.cpp
+++ b/AK/Tests/TestStringUtils.cpp
@@ -67,6 +67,25 @@ TEST_CASE(matches_case_insensitive)
EXPECT(!AK::StringUtils::matches("acdcb", "a*c?b"));
}
+TEST_CASE(matches_with_positions)
+{
+ Vector<AK::MaskSpan> spans;
+ EXPECT(AK::StringUtils::matches("abbb", "a*", CaseSensitivity::CaseSensitive, &spans));
+ EXPECT(spans == Vector<AK::MaskSpan>({ { 1, 3 } }));
+
+ spans.clear();
+ EXPECT(AK::StringUtils::matches("abbb", "?*", CaseSensitivity::CaseSensitive, &spans));
+ EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 0, 1 }, { 1, 3 } }));
+
+ spans.clear();
+ EXPECT(AK::StringUtils::matches("acdcxb", "a*c?b", CaseSensitivity::CaseSensitive, &spans));
+ EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 2 }, { 4, 1 } }));
+
+ spans.clear();
+ EXPECT(AK::StringUtils::matches("aaaa", "A*", CaseSensitivity::CaseInsensitive, &spans));
+ EXPECT_EQ(spans, Vector<AK::MaskSpan>({ { 1, 3 } }));
+}
+
TEST_CASE(convert_to_int)
{
auto value = AK::StringUtils::convert_to_int(StringView());