summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Tests/LibRegex/Regex.cpp2
-rw-r--r--Userland/Libraries/LibC/regex.h2
-rw-r--r--Userland/Libraries/LibRegex/RegexByteCode.cpp41
-rw-r--r--Userland/Libraries/LibRegex/RegexMatcher.cpp10
-rw-r--r--Userland/Libraries/LibRegex/RegexOptions.h3
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp3
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.h14
7 files changed, 55 insertions, 20 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index c9bb6e1622..f8cc70dcd3 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -684,6 +684,8 @@ TEST_CASE(ECMA262_match)
{ "(\0|a)"sv, "a"sv, true }, // #9686, Should allow null bytes in pattern
{ "(.*?)a(?!(a+)b\\2c)\\2(.*)"sv, "baaabaac"sv, true }, // #6042, Groups inside lookarounds may be referenced outside, but their contents appear empty if the pattern in the lookaround fails.
{ "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
+ { "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
+ { "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
};
// clang-format on
diff --git a/Userland/Libraries/LibC/regex.h b/Userland/Libraries/LibC/regex.h
index 3697286de6..a01ea1d3c4 100644
--- a/Userland/Libraries/LibC/regex.h
+++ b/Userland/Libraries/LibC/regex.h
@@ -83,6 +83,7 @@ enum __RegexAllFlags {
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
__Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
__Regex_Internal_BrowserExtended = __Regex_Global << 15, // Internal flag; enable browser-specific ECMA262 extensions.
+ __Regex_Internal_ConsiderNewline = __Regex_Global << 16, // Internal flag; allow matchers to consider newlines as line separators.
__Regex_Last = __Regex_SkipTrimEmptyMatches
};
@@ -97,7 +98,6 @@ enum __RegexAllFlags {
#define REG_NOTBOL __Regex_MatchNotBeginOfLine // The circumflex character (^), when taken as a special character, will not match the beginning of string.
#define REG_NOTEOL __Regex_MatchNotEndOfLine // The dollar sign ($), when taken as a special character, will not match the end of string.
-//static_assert (sizeof(FlagsUnderlyingType) * 8 >= regex::POSIXFlags::Last << 1), "flags type too small")
#define REG_SEARCH __Regex_Last << 1
int regcomp(regex_t*, const char*, int);
diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp
index e0e6cbe175..eb9b19dbc9 100644
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@@ -273,12 +273,23 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const&
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input, MatchState& state) const
{
- if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
+ auto is_at_line_boundary = [&] {
+ if (state.string_position == 0)
+ return true;
+
+ if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
+ auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
+ return input_view == '\n';
+ }
+
+ return false;
+ }();
+ if (is_at_line_boundary && (input.regex_options & AllFlags::MatchNotBeginOfLine))
return ExecutionResult::Failed_ExecuteLowPrioForks;
- if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
- || (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
- || (0 == state.string_position && (input.regex_options & AllFlags::Global)))
+ if ((is_at_line_boundary && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
+ || (!is_at_line_boundary && (input.regex_options & AllFlags::MatchNotBeginOfLine))
+ || (is_at_line_boundary && (input.regex_options & AllFlags::Global)))
return ExecutionResult::Continue;
return ExecutionResult::Failed_ExecuteLowPrioForks;
@@ -315,11 +326,22 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& in
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, MatchState& state) const
{
- if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine))
+ auto is_at_line_boundary = [&] {
+ if (state.string_position == input.view.length())
+ return true;
+
+ if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
+ auto input_view = input.view.substring_view(state.string_position, 1)[0];
+ return input_view == '\n';
+ }
+
+ return false;
+ }();
+ if (is_at_line_boundary && (input.regex_options & AllFlags::MatchNotEndOfLine))
return ExecutionResult::Failed_ExecuteLowPrioForks;
- if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine))
- || (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
+ if ((is_at_line_boundary && !(input.regex_options & AllFlags::MatchNotEndOfLine))
+ || (!is_at_line_boundary && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
return ExecutionResult::Continue;
return ExecutionResult::Failed_ExecuteLowPrioForks;
@@ -461,8 +483,9 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (input.view.length() <= state.string_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;
- VERIFY(!current_inversion_state());
- advance_string_position(state, input.view);
+ auto input_view = input.view.substring_view(state.string_position, 1)[0];
+ if (input_view != '\n' || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)))
+ advance_string_position(state, input.view, input_view);
} else if (compare_type == CharacterCompareType::String) {
VERIFY(!current_inversion_state());
diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp
index 16fa1b4ec1..ddd8522f69 100644
--- a/Userland/Libraries/LibRegex/RegexMatcher.cpp
+++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp
@@ -41,7 +41,7 @@ Regex<Parser>::Regex(String pattern, typename ParserTraits<Parser>::OptionsType
run_optimization_passes();
if (parser_result.error == regex::Error::NoError)
- matcher = make<Matcher<Parser>>(this, regex_options);
+ matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value()));
}
template<class Parser>
@@ -51,7 +51,7 @@ Regex<Parser>::Regex(regex::Parser::Result parse_result, String pattern, typenam
{
run_optimization_passes();
if (parser_result.error == regex::Error::NoError)
- matcher = make<Matcher<Parser>>(this, regex_options);
+ matcher = make<Matcher<Parser>>(this, regex_options | static_cast<decltype(regex_options.value())>(parse_result.options.value()));
}
template<class Parser>
@@ -104,8 +104,10 @@ RegexResult Matcher<Parser>::match(RegexStringView view, Optional<typename Parse
{
AllOptions options = m_regex_options | regex_options.value_or({}).value();
- if (options.has_flag_set(AllFlags::Multiline))
- return match(view.lines(), regex_options); // FIXME: how do we know, which line ending a line has (1char or 2char)? This is needed to get the correct match offsets from start of string...
+ if constexpr (!IsSame<Parser, ECMA262>) {
+ if (options.has_flag_set(AllFlags::Multiline))
+ return match(view.lines(), regex_options); // FIXME: how do we know, which line ending a line has (1char or 2char)? This is needed to get the correct match offsets from start of string...
+ }
Vector<RegexStringView> views;
views.append(view);
diff --git a/Userland/Libraries/LibRegex/RegexOptions.h b/Userland/Libraries/LibRegex/RegexOptions.h
index 24dba533ac..c9a3533c5b 100644
--- a/Userland/Libraries/LibRegex/RegexOptions.h
+++ b/Userland/Libraries/LibRegex/RegexOptions.h
@@ -16,7 +16,7 @@
namespace regex {
-using FlagsUnderlyingType = u16;
+using FlagsUnderlyingType = u32;
enum class AllFlags {
Global = __Regex_Global, // All matches (don't return after first match)
@@ -35,6 +35,7 @@ enum class AllFlags {
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
+ Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
Last = Internal_BrowserExtended,
};
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 61b5ff1843..564f57e612 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -194,7 +194,8 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
move(m_parser_state.match_length_minimum),
move(m_parser_state.error),
move(m_parser_state.error_token),
- m_parser_state.named_capture_groups.keys()
+ m_parser_state.named_capture_groups.keys(),
+ m_parser_state.regex_options,
};
}
diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h
index b7b3691835..3610d3c72a 100644
--- a/Userland/Libraries/LibRegex/RegexParser.h
+++ b/Userland/Libraries/LibRegex/RegexParser.h
@@ -54,6 +54,7 @@ public:
Error error;
Token error_token;
Vector<FlyString> capture_groups;
+ AllOptions options;
};
explicit Parser(Lexer& lexer)
@@ -71,6 +72,7 @@ public:
Result parse(Optional<AllOptions> regex_options = {});
bool has_error() const { return m_parser_state.error != Error::NoError; }
Error error() const { return m_parser_state.error; }
+ AllOptions options() const { return m_parser_state.regex_options; }
protected:
virtual bool parse_internal(ByteCode&, size_t& match_length_minimum) = 0;
@@ -170,14 +172,16 @@ private:
};
class PosixExtendedParser final : public AbstractPosixParser {
+ constexpr static auto default_options = static_cast<PosixFlags>(AllFlags::SingleLine) | static_cast<PosixFlags>(AllFlags::Internal_ConsiderNewline);
+
public:
explicit PosixExtendedParser(Lexer& lexer)
- : AbstractPosixParser(lexer)
+ : AbstractPosixParser(lexer, default_options)
{
}
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
- : AbstractPosixParser(lexer, regex_options.value_or({}))
+ : AbstractPosixParser(lexer, regex_options.value_or({}) | default_options.value())
{
}
@@ -195,15 +199,17 @@ private:
};
class ECMA262Parser final : public Parser {
+ constexpr static ECMAScriptOptions default_options = static_cast<ECMAScriptFlags>(AllFlags::Internal_ConsiderNewline);
+
public:
explicit ECMA262Parser(Lexer& lexer)
- : Parser(lexer)
+ : Parser(lexer, default_options)
{
m_capture_groups_in_scope.empend();
}
ECMA262Parser(Lexer& lexer, Optional<typename ParserTraits<ECMA262Parser>::OptionsType> regex_options)
- : Parser(lexer, regex_options.value_or({}))
+ : Parser(lexer, regex_options.value_or({}) | default_options.value())
{
m_should_use_browser_extended_grammar = regex_options.has_value() && regex_options->has_flag_set(ECMAScriptFlags::BrowserExtended);
m_capture_groups_in_scope.empend();