diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-07-31 18:06:53 -0400 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-08-02 21:02:09 +0430 |
commit | 4de43128275ed08fba70eaa732eab34fd39f23b8 (patch) | |
tree | 8c1a0947202482c2bb7658ee7ee3e01f36f67393 | |
parent | 011514a3840c312c5fe122eb707b8f297e87e1b7 (diff) | |
download | serenity-4de43128275ed08fba70eaa732eab34fd39f23b8.zip |
LibRegex: Support property escapes of the form \p{Type=Value}
Before now, only binary properties could be parsed. Non-binary props are
of the form "Type=Value", where "Type" may be General_Category, Script,
or Script_Extension (or their aliases). Of these, LibUnicode currently
supports General_Category, so LibRegex can parse only that type.
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 6 | ||||
-rw-r--r-- | Userland/Libraries/LibRegex/RegexParser.cpp | 45 |
2 files changed, 40 insertions, 11 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 29d2d634ef..8d04e5da1c 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -667,6 +667,12 @@ TEST_CASE(ECMA262_property_match) { "\\p{Cased_Letter}", "a", true, ECMAScriptFlags::Unicode }, { "\\p{Cased_Letter}", "A", true, ECMAScriptFlags::Unicode }, { "\\p{Cased_Letter}", "9", false, ECMAScriptFlags::Unicode }, + { "\\p{General_Category=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode }, + { "\\p{General_Category=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode }, + { "\\p{General_Category=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode }, + { "\\p{gc=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode }, + { "\\p{gc=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode }, + { "\\p{gc=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode }, }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index fc433444a6..10ac0d7fd1 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -1976,21 +1976,44 @@ Optional<ECMA262Parser::PropertyEscape> ECMA262Parser::read_unicode_property_esc { consume(TokenType::LeftCurly, Error::InvalidPattern); - auto start_token = m_parser_state.current_token; - size_t offset = 0; - while (match(TokenType::Char)) { - if (m_parser_state.current_token.value() == "}") - break; - offset += consume().value().length(); + // Note: clang-format is disabled here because it doesn't handle templated lambdas yet. + // clang-format off + auto read_until = [&]<typename... Ts>(Ts&&... terminators) { + auto start_token = m_parser_state.current_token; + size_t offset = 0; + + while (match(TokenType::Char)) { + if (m_parser_state.current_token.value().is_one_of(forward<Ts>(terminators)...)) + break; + offset += consume().value().length(); + } + + return StringView { start_token.value().characters_without_null_termination(), offset }; + }; + // clang-format on + + StringView property_type; + StringView property_name = read_until("="sv, "}"sv); + + if (try_skip("="sv)) { + if (property_name.is_empty()) + return {}; + property_type = property_name; + property_name = read_until("}"sv); } - StringView property_name { start_token.value().characters_without_null_termination(), offset }; consume(TokenType::RightCurly, Error::InvalidPattern); - if (auto property = Unicode::property_from_string(property_name); property.has_value()) - return { *property }; - if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value()) - return { *general_category }; + if (property_type.is_empty()) { + if (auto property = Unicode::property_from_string(property_name); property.has_value()) + return { *property }; + if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value()) + return { *general_category }; + } else if ((property_type == "General_Category"sv) || (property_type == "gc"sv)) { + if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value()) + return { *general_category }; + } + return {}; } |