summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibRegex/RegexByteCode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Userland/Libraries/LibRegex/RegexByteCode.cpp')
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp14
1 files changed, 13 insertions, 1 deletions
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index eb9b19dbc9..4a1021ff77 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -659,6 +659,18 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(MatchInput const& input, Match
ALWAYS_INLINE void OpCode_Compare::compare_character_class(MatchInput const& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
{
+ auto is_space_or_line_terminator = [](u32 code_point) {
+ static auto space_separator = Unicode::general_category_from_string("Space_Separator"sv);
+ if (!space_separator.has_value())
+ return is_ascii_space(code_point);
+
+ if ((code_point == 0x0a) || (code_point == 0x0d) || (code_point == 0x2028) || (code_point == 0x2029))
+ return true;
+ if ((code_point == 0x09) || (code_point == 0x0b) || (code_point == 0x0c) || (code_point == 0xfeff))
+ return true;
+ return Unicode::code_point_has_general_category(code_point, *space_separator);
+ };
+
switch (character_class) {
case CharClass::Alnum:
if (is_ascii_alphanumeric(ch)) {
@@ -729,7 +741,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(MatchInput const& inp
}
break;
case CharClass::Space:
- if (is_ascii_space(ch)) {
+ if (is_space_or_line_terminator(ch)) {
if (inverse)
inverse_matched = true;
else