diff options
author | Linus Groh <mail@linusgroh.de> | 2020-11-29 19:10:46 +0000 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-11-29 20:35:52 +0100 |
commit | eea7cabdbc59d8b5b6aa37a7414b6c20a06bf8d8 (patch) | |
tree | db37611d8307c4235429ee63557ee004f9c52805 | |
parent | 1279d2256c5ca871bc7d15e13792f882e34ff3f1 (diff) | |
download | serenity-eea7cabdbc59d8b5b6aa37a7414b6c20a06bf8d8.zip |
LibRegex: Use match_ordinary_characters() in ECMA262Parser::parse_atom()
Otherwise we would only match TokenType::Char, making all of these invalid:
- /foo,bar/
- /foo\/bar/
- /foo=bar/
- /foo-bar/
- /foo:bar/
Fixes #4243.
-rw-r--r-- | Libraries/LibRegex/RegexParser.cpp | 28 | ||||
-rw-r--r-- | Libraries/LibRegex/RegexParser.h | 2 | ||||
-rw-r--r-- | Libraries/LibRegex/Tests/Regex.cpp | 1 |
3 files changed, 16 insertions, 15 deletions
diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index 662a8bee8c..0cc6940b38 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -154,6 +154,19 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options) }; } +ALWAYS_INLINE bool Parser::match_ordinary_characters() +{ + // NOTE: This method must not be called during bracket and repetition parsing! + // FIXME: Add assertion for that? + auto type = m_parser_state.current_token.type(); + return (type == TokenType::Char + || type == TokenType::Comma + || type == TokenType::Slash + || type == TokenType::EqualSign + || type == TokenType::HyphenMinus + || type == TokenType::Colon); +} + // ============================= // PosixExtended Parser // ============================= @@ -172,19 +185,6 @@ ALWAYS_INLINE bool PosixExtendedParser::match_repetition_symbol() || type == TokenType::LeftCurly); } -ALWAYS_INLINE bool PosixExtendedParser::match_ordinary_characters() -{ - // NOTE: This method must not be called during bracket and repetition parsing! - // FIXME: Add assertion for that? - auto type = m_parser_state.current_token.type(); - return (type == TokenType::Char - || type == TokenType::Comma - || type == TokenType::Slash - || type == TokenType::EqualSign - || type == TokenType::HyphenMinus - || type == TokenType::Colon); -} - ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& bytecode_to_repeat, size_t& match_length_minimum) { if (match(TokenType::LeftCurly)) { @@ -964,7 +964,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo return false; } - if (match(TokenType::Char)) { + if (match_ordinary_characters()) { auto token = consume().value(); match_length_minimum += 1; stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[0] } }); diff --git a/Libraries/LibRegex/RegexParser.h b/Libraries/LibRegex/RegexParser.h index 7227e9fe84..eb80c6f583 100644 --- a/Libraries/LibRegex/RegexParser.h +++ b/Libraries/LibRegex/RegexParser.h @@ -90,6 +90,7 @@ protected: ALWAYS_INLINE bool match(TokenType type) const; ALWAYS_INLINE bool match(char ch) const; + ALWAYS_INLINE bool match_ordinary_characters(); ALWAYS_INLINE Token consume(); ALWAYS_INLINE Token consume(TokenType type, Error error); ALWAYS_INLINE bool consume(const String&); @@ -145,7 +146,6 @@ public: private: ALWAYS_INLINE bool match_repetition_symbol(); - ALWAYS_INLINE bool match_ordinary_characters(); bool parse_internal(ByteCode&, size_t&) override; diff --git a/Libraries/LibRegex/Tests/Regex.cpp b/Libraries/LibRegex/Tests/Regex.cpp index 0175f70fbc..6ad210d7fb 100644 --- a/Libraries/LibRegex/Tests/Regex.cpp +++ b/Libraries/LibRegex/Tests/Regex.cpp @@ -488,6 +488,7 @@ TEST_CASE(ECMA262_parse) "^[\\w+/_-]+[=]{0,2}$", // #4189 "^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)", // #4189 "\\/", // #4189 + ",/=-:", // #4243 "\\x", // Even invalid escapes are allowed if ~unicode. }; |