summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Groh <mail@linusgroh.de>2020-11-29 19:10:46 +0000
committerAndreas Kling <kling@serenityos.org>2020-11-29 20:35:52 +0100
commiteea7cabdbc59d8b5b6aa37a7414b6c20a06bf8d8 (patch)
treedb37611d8307c4235429ee63557ee004f9c52805
parent1279d2256c5ca871bc7d15e13792f882e34ff3f1 (diff)
downloadserenity-eea7cabdbc59d8b5b6aa37a7414b6c20a06bf8d8.zip
LibRegex: Use match_ordinary_characters() in ECMA262Parser::parse_atom()
Otherwise we would only match TokenType::Char, making all of these invalid: - /foo,bar/ - /foo\/bar/ - /foo=bar/ - /foo-bar/ - /foo:bar/ Fixes #4243.
-rw-r--r--Libraries/LibRegex/RegexParser.cpp28
-rw-r--r--Libraries/LibRegex/RegexParser.h2
-rw-r--r--Libraries/LibRegex/Tests/Regex.cpp1
3 files changed, 16 insertions, 15 deletions
diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp
index 662a8bee8c..0cc6940b38 100644
--- a/Libraries/LibRegex/RegexParser.cpp
+++ b/Libraries/LibRegex/RegexParser.cpp
@@ -154,6 +154,19 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
};
}
+ALWAYS_INLINE bool Parser::match_ordinary_characters()
+{
+ // NOTE: This method must not be called during bracket and repetition parsing!
+ // FIXME: Add assertion for that?
+ auto type = m_parser_state.current_token.type();
+ return (type == TokenType::Char
+ || type == TokenType::Comma
+ || type == TokenType::Slash
+ || type == TokenType::EqualSign
+ || type == TokenType::HyphenMinus
+ || type == TokenType::Colon);
+}
+
// =============================
// PosixExtended Parser
// =============================
@@ -172,19 +185,6 @@ ALWAYS_INLINE bool PosixExtendedParser::match_repetition_symbol()
|| type == TokenType::LeftCurly);
}
-ALWAYS_INLINE bool PosixExtendedParser::match_ordinary_characters()
-{
- // NOTE: This method must not be called during bracket and repetition parsing!
- // FIXME: Add assertion for that?
- auto type = m_parser_state.current_token.type();
- return (type == TokenType::Char
- || type == TokenType::Comma
- || type == TokenType::Slash
- || type == TokenType::EqualSign
- || type == TokenType::HyphenMinus
- || type == TokenType::Colon);
-}
-
ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& bytecode_to_repeat, size_t& match_length_minimum)
{
if (match(TokenType::LeftCurly)) {
@@ -964,7 +964,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo
return false;
}
- if (match(TokenType::Char)) {
+ if (match_ordinary_characters()) {
auto token = consume().value();
match_length_minimum += 1;
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[0] } });
diff --git a/Libraries/LibRegex/RegexParser.h b/Libraries/LibRegex/RegexParser.h
index 7227e9fe84..eb80c6f583 100644
--- a/Libraries/LibRegex/RegexParser.h
+++ b/Libraries/LibRegex/RegexParser.h
@@ -90,6 +90,7 @@ protected:
ALWAYS_INLINE bool match(TokenType type) const;
ALWAYS_INLINE bool match(char ch) const;
+ ALWAYS_INLINE bool match_ordinary_characters();
ALWAYS_INLINE Token consume();
ALWAYS_INLINE Token consume(TokenType type, Error error);
ALWAYS_INLINE bool consume(const String&);
@@ -145,7 +146,6 @@ public:
private:
ALWAYS_INLINE bool match_repetition_symbol();
- ALWAYS_INLINE bool match_ordinary_characters();
bool parse_internal(ByteCode&, size_t&) override;
diff --git a/Libraries/LibRegex/Tests/Regex.cpp b/Libraries/LibRegex/Tests/Regex.cpp
index 0175f70fbc..6ad210d7fb 100644
--- a/Libraries/LibRegex/Tests/Regex.cpp
+++ b/Libraries/LibRegex/Tests/Regex.cpp
@@ -488,6 +488,7 @@ TEST_CASE(ECMA262_parse)
"^[\\w+/_-]+[=]{0,2}$", // #4189
"^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)", // #4189
"\\/", // #4189
+ ",/=-:", // #4243
"\\x", // Even invalid escapes are allowed if ~unicode.
};