summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2023-03-25 16:36:05 +0330
committerAndreas Kling <kling@serenityos.org>2023-03-25 15:44:05 +0100
commit6fc9f5fa281e11d9da06424de6f52f04a7afc9dc (patch)
tree75957d9ae6bcf714b616fe1b97308ae90c02a89c
parent59a76b12797d1729ba5f95a4ff4e5094b95f0b10 (diff)
downloadserenity-6fc9f5fa281e11d9da06424de6f52f04a7afc9dc.zip
LibRegex: Make ^ and $ accept all `LineTerminator`s instead of just '\n'
Also adds a couple tests.
-rw-r--r--Tests/LibRegex/Regex.cpp5
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp14
-rw-r--r--Userland/Libraries/LibRegex/RegexOptions.h2
3 files changed, 13 insertions, 8 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 3fef06d6d5..0e47191a18 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -624,6 +624,8 @@ TEST_CASE(ECMA262_parse)
TEST_CASE(ECMA262_match)
{
+ constexpr auto global_multiline = ECMAScriptFlags::Global | ECMAScriptFlags::Multiline;
+
struct _test {
StringView pattern;
StringView subject;
@@ -698,6 +700,9 @@ TEST_CASE(ECMA262_match)
{ "^[a-sy-z]$"sv, "y"sv, true, ECMAScriptFlags::Insensitive },
{ "^[a-sy-z]$"sv, "u"sv, false, ECMAScriptFlags::Insensitive },
{ "."sv, "\n\r\u2028\u2029"sv, false }, // Dot should not match any of CR/LF/LS/PS in ECMA262 mode without DotAll.
+ { "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline.
+ { "^a"sv, "\ra"sv, true, global_multiline.value() },
+ { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
};
// clang-format on
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index 6bc1ee1f11..cb4daff96e 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -11,6 +11,11 @@
#include <AK/StringBuilder.h>
#include <LibUnicode/CharacterTypes.h>
+// U+2028 LINE SEPARATOR
+constexpr static u32 const LineSeparator { 0x2028 };
+// U+2029 PARAGRAPH SEPARATOR
+constexpr static u32 const ParagraphSeparator { 0x2029 };
+
namespace regex {
StringView OpCode::name(OpCodeId opcode_id)
@@ -277,7 +282,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
- return input_view == '\n';
+ return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
}
return false;
@@ -330,7 +335,7 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
auto input_view = input.view.substring_view(state.string_position, 1)[0];
- return input_view == '\n';
+ return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
}
return false;
@@ -499,11 +504,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (input.view.length() <= state.string_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;
- // U+2028 LINE SEPARATOR
- constexpr static u32 const LineSeparator { 0x2028 };
- // U+2029 PARAGRAPH SEPARATOR
- constexpr static u32 const ParagraphSeparator { 0x2029 };
-
auto input_view = input.view.substring_view(state.string_position, 1)[0];
auto is_equivalent_to_newline = input_view == '\n'
|| (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)
diff --git a/Userland/Libraries/LibRegex/RegexOptions.h b/Userland/Libraries/LibRegex/RegexOptions.h
index c6a6291fb1..d244de8ebf 100644
--- a/Userland/Libraries/LibRegex/RegexOptions.h
+++ b/Userland/Libraries/LibRegex/RegexOptions.h
@@ -116,7 +116,7 @@ public:
void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
void set_flag(T flag) { *this |= flag; }
bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
- T value() const { return m_flags; }
+ constexpr T value() const { return m_flags; }
private:
T m_flags { T::Default };