diff options
author | Eli Youngs <eli.m.youngs@gmail.com> | 2022-12-17 00:51:00 -0800 |
---|---|---|
committer | Andrew Kaster <andrewdkaster@gmail.com> | 2023-01-06 13:52:21 -0700 |
commit | 87a961534f5424c340c59515d453b81eab7b81d3 (patch) | |
tree | c30d16d3aa7fd2fbf934782a00b1f065db0c4576 | |
parent | 5bf2cce839fc6c953609f0297f30def65bbc7046 (diff) | |
download | serenity-87a961534f5424c340c59515d453b81eab7b81d3.zip |
LibRegex: Prevent patterns from matching the empty string twice
Previously, if a pattern matched the empty string (e.g. ".*"), it would
match the string twice instead of once. Among other issues, this caused
a Regex replacement to duplicate its expected output, since it would
replace "both" empty matches.
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 12 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.cpp | 5 |
2 files changed, 17 insertions, 0 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 452d97fed5..366b414819 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -1089,6 +1089,18 @@ TEST_CASE(single_match_flag) } } +TEST_CASE(empty_string_wildcard_match) +{ + { + // Ensure that the wildcard ".*" matches the empty string exactly once + Regex<ECMA262> re(".*"sv, ECMAScriptFlags::Global); + auto result = re.match(""sv); + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.matches.size(), 1u); + EXPECT_EQ(result.matches.first().view.to_deprecated_string(), ""sv); + } +} + TEST_CASE(inversion_state_in_char_class) { { diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index 71caf5cecd..bf6571680a 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -222,6 +222,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona // Nothing was *actually* matched, so append an empty match. append_match(input, state, view_index); ++match_count; + + // This prevents a regex pattern like ".*" from matching the empty string + // multiple times, once in this block and once in the following for loop. + if (view_index == 0 && view_length == 0) + ++view_index; } } } |