diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2023-03-25 16:36:05 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-03-25 15:44:05 +0100 |
commit | 6fc9f5fa281e11d9da06424de6f52f04a7afc9dc (patch) | |
tree | 75957d9ae6bcf714b616fe1b97308ae90c02a89c | |
parent | 59a76b12797d1729ba5f95a4ff4e5094b95f0b10 (diff) | |
download | serenity-6fc9f5fa281e11d9da06424de6f52f04a7afc9dc.zip |
LibRegex: Make ^ and $ accept all `LineTerminator`s instead of just '\n'
Also adds a couple tests.
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexByteCode.cpp | 14 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexOptions.h | 2 |
3 files changed, 13 insertions, 8 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 3fef06d6d5..0e47191a18 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -624,6 +624,8 @@ TEST_CASE(ECMA262_parse) TEST_CASE(ECMA262_match) { + constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline; + struct _test { StringView pattern; StringView subject; @@ -698,6 +700,9 @@ TEST_CASE(ECMA262_match) { "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive }, { "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive }, { "."sv, "\n\r\u2028\u2029"sv, false }, // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll. + { "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline. + { "^a"sv, "\ra"sv, true, global_multiline.value() }, + { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() }, }; // clang-format on diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 6bc1ee1f11..cb4daff96e 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -11,6 +11,11 @@ #include <AK/StringBuilder.h> #include <LibUnicode/CharacterTypes.h> +// U+2028 LINE SEPARATOR +constexpr static u32 const LineSeparator { 0x2028 }; +// U+2029 PARAGRAPH SEPARATOR +constexpr static u32 const ParagraphSeparator { 0x2029 }; + namespace regex { StringView OpCode::name(OpCodeId opcode_id) @@ -277,7 +282,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) { auto input_view = input.view.substring_view(state.string_position - 1, 1)[0]; - return input_view == '\n'; + return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator; } return false; @@ -330,7 +335,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) { auto input_view = input.view.substring_view(state.string_position, 1)[0]; - return input_view == '\n'; + return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator; } return false; @@ -499,11 +504,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M if (input.view.length() <= state.string_position) return ExecutionResult::Failed_ExecuteLowPrioForks; - // U+2028 LINE SEPARATOR - constexpr static u32 const LineSeparator { 0x2028 }; - // U+2029 PARAGRAPH SEPARATOR - constexpr static u32 const ParagraphSeparator { 0x2029 }; - auto input_view = input.view.substring_view(state.string_position, 1)[0]; auto is_equivalent_to_newline = input_view == '\n' || (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics) diff --git a/Userland/Libraries/LibRegex/RegexOptions.h b/Userland/Libraries/LibRegex/RegexOptions.h index c6a6291fb1..d244de8ebf 100644 --- a/Userland/Libraries/LibRegex/RegexOptions.h +++ b/Userland/Libraries/LibRegex/RegexOptions.h @@ -116,7 +116,7 @@ public: void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); } void set_flag(T flag) { *this |= flag; } bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); } - T value() const { return m_flags; } + constexpr T value() const { return m_flags; } private: T m_flags { T::Default }; |