summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2021-04-23 03:55:19 +0430
committerLinus Groh <mail@linusgroh.de>2021-04-23 10:05:04 +0200
commitbf9c04a3dadc6d2785054ee772e06c016b278e28 (patch)
tree4ffa75e063ab1033f17ff917f39fe9c79082fc7b /Userland
parentbb40d4d5fff1ca729c14171561d03621d765854e (diff)
downloadserenity-bf9c04a3dadc6d2785054ee772e06c016b278e28.zip
LibRegex: Implement multiline stateful matches
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js50
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.cpp21
2 files changed, 69 insertions, 2 deletions
diff --git a/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js b/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js
index 182ecee457..f1e86dcafc 100644
--- a/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js
+++ b/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js
@@ -155,3 +155,53 @@ test("undefined match result", () => {
r.exec = () => ({});
expect(r[Symbol.replace]()).toBe("undefined");
});
+
+// Multiline test
+test("multiline match", () => {
+ let reg = /\s*\/\/.*$/gm;
+ let string = `
+(
+(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
+(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
+(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
+(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
+)(%[0-9a-zA-Z]{1,})? // %eth0 %1
+`;
+
+ let res = reg.exec(string);
+ expect(res.length).toBe(1);
+ expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
+ expect(res.index).toBe(46);
+});
+
+test("multiline stateful match", () => {
+ let reg = /\s*\/\/.*$/gm;
+ let string = `
+(
+(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
+(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
+(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
+(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
+(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
+)(%[0-9a-zA-Z]{1,})? // %eth0 %1
+`;
+
+ let res = reg.exec(string);
+ expect(res.length).toBe(1);
+ expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
+ expect(res.index).toBe(46);
+
+ res = reg.exec(string);
+ expect(res.length).toBe(1);
+ expect(res[0]).toBe(
+ " // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4"
+ );
+ expect(res.index).toBe(231);
+});
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp
index c5e481b4d9..566ac74fd5 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.cpp
+++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp
@@ -82,9 +82,21 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
input.regex_options = m_regex_options | regex_options.value_or({}).value();
input.start_offset = m_pattern.start_offset;
output.operations = 0;
+ size_t lines_to_skip = 0;
- if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
- VERIFY(views.size() == 1);
+ if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
+ if (views.size() > 1 && input.start_offset > views.first().length()) {
+ dbgln("Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
+ for (auto& view : views) {
+ if (input.start_offset < view.length() + 1)
+ break;
+ ++lines_to_skip;
+ input.start_offset -= view.length() + 1;
+ input.global_offset += view.length() + 1;
+ }
+ dbgln("Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
+ }
+ }
if (c_match_preallocation_count) {
output.matches.ensure_capacity(c_match_preallocation_count);
@@ -127,6 +139,11 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
continue_search = false;
for (auto& view : views) {
+ if (lines_to_skip != 0) {
+ ++input.line;
+ --lines_to_skip;
+ continue;
+ }
input.view = view;
dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view);