diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2021-04-23 03:55:19 +0430 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-04-23 10:05:04 +0200 |
commit | bf9c04a3dadc6d2785054ee772e06c016b278e28 (patch) | |
tree | 4ffa75e063ab1033f17ff917f39fe9c79082fc7b /Userland/Libraries | |
parent | bb40d4d5fff1ca729c14171561d03621d765854e (diff) | |
download | serenity-bf9c04a3dadc6d2785054ee772e06c016b278e28.zip |
LibRegex: Implement multiline stateful matches
Diffstat (limited to 'Userland/Libraries')
-rw-r--r-- | Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js | 50 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexMatcher.cpp | 21 |
2 files changed, 69 insertions, 2 deletions
diff --git a/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js b/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js index 182ecee457..f1e86dcafc 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js +++ b/Userland/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js @@ -155,3 +155,53 @@ test("undefined match result", () => { r.exec = () => ({}); expect(r[Symbol.replace]()).toBe("undefined"); }); + +// Multiline test +test("multiline match", () => { + let reg = /\s*\/\/.*$/gm; + let string = ` +( +(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8 +(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4 +(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4 +(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4 +)(%[0-9a-zA-Z]{1,})? // %eth0 %1 +`; + + let res = reg.exec(string); + expect(res.length).toBe(1); + expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8"); + expect(res.index).toBe(46); +}); + +test("multiline stateful match", () => { + let reg = /\s*\/\/.*$/gm; + let string = ` +( +(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8 +(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4 +(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4 +(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4 +(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4 +)(%[0-9a-zA-Z]{1,})? // %eth0 %1 +`; + + let res = reg.exec(string); + expect(res.length).toBe(1); + expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8"); + expect(res.index).toBe(46); + + res = reg.exec(string); + expect(res.length).toBe(1); + expect(res[0]).toBe( + " // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4" + ); + expect(res.index).toBe(231); +}); diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index c5e481b4d9..566ac74fd5 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -82,9 +82,21 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional input.regex_options = m_regex_options | regex_options.value_or({}).value(); input.start_offset = m_pattern.start_offset; output.operations = 0; + size_t lines_to_skip = 0; - if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) - VERIFY(views.size() == 1); + if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { + if (views.size() > 1 && input.start_offset > views.first().length()) { + dbgln("Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); + for (auto& view : views) { + if (input.start_offset < view.length() + 1) + break; + ++lines_to_skip; + input.start_offset -= view.length() + 1; + input.global_offset += view.length() + 1; + } + dbgln("Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); + } + } if (c_match_preallocation_count) { output.matches.ensure_capacity(c_match_preallocation_count); @@ -127,6 +139,11 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional continue_search = false; for (auto& view : views) { + if (lines_to_skip != 0) { + ++input.line; + --lines_to_skip; + continue; + } input.view = view; dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); |