summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-07-31 18:06:53 -0400
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2021-08-02 21:02:09 +0430
commit4de43128275ed08fba70eaa732eab34fd39f23b8 (patch)
tree8c1a0947202482c2bb7658ee7ee3e01f36f67393
parent011514a3840c312c5fe122eb707b8f297e87e1b7 (diff)
downloadserenity-4de43128275ed08fba70eaa732eab34fd39f23b8.zip
LibRegex: Support property escapes of the form \p{Type=Value}
Before now, only binary properties could be parsed. Non-binary props are of the form "Type=Value", where "Type" may be General_Category, Script, or Script_Extension (or their aliases). Of these, LibUnicode currently supports General_Category, so LibRegex can parse only that type.
-rw-r--r--Tests/LibRegex/Regex.cpp6
-rw-r--r--Userland/Libraries/LibRegex/RegexParser.cpp45
2 files changed, 40 insertions, 11 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 29d2d634ef..8d04e5da1c 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -667,6 +667,12 @@ TEST_CASE(ECMA262_property_match)
{ "\\p{Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
{ "\\p{Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
{ "\\p{Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
+ { "\\p{General_Category=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
+ { "\\p{General_Category=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
+ { "\\p{General_Category=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
+ { "\\p{gc=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
+ { "\\p{gc=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
+ { "\\p{gc=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index fc433444a6..10ac0d7fd1 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -1976,21 +1976,44 @@ Optional<ECMA262Parser::PropertyEscape> ECMA262Parser::read_unicode_property_esc
{
consume(TokenType::LeftCurly, Error::InvalidPattern);
- auto start_token = m_parser_state.current_token;
- size_t offset = 0;
- while (match(TokenType::Char)) {
- if (m_parser_state.current_token.value() == "}")
- break;
- offset += consume().value().length();
+ // Note: clang-format is disabled here because it doesn't handle templated lambdas yet.
+ // clang-format off
+ auto read_until = [&]<typename... Ts>(Ts&&... terminators) {
+ auto start_token = m_parser_state.current_token;
+ size_t offset = 0;
+
+ while (match(TokenType::Char)) {
+ if (m_parser_state.current_token.value().is_one_of(forward<Ts>(terminators)...))
+ break;
+ offset += consume().value().length();
+ }
+
+ return StringView { start_token.value().characters_without_null_termination(), offset };
+ };
+ // clang-format on
+
+ StringView property_type;
+ StringView property_name = read_until("="sv, "}"sv);
+
+ if (try_skip("="sv)) {
+ if (property_name.is_empty())
+ return {};
+ property_type = property_name;
+ property_name = read_until("}"sv);
}
- StringView property_name { start_token.value().characters_without_null_termination(), offset };
consume(TokenType::RightCurly, Error::InvalidPattern);
- if (auto property = Unicode::property_from_string(property_name); property.has_value())
- return { *property };
- if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
- return { *general_category };
+ if (property_type.is_empty()) {
+ if (auto property = Unicode::property_from_string(property_name); property.has_value())
+ return { *property };
+ if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
+ return { *general_category };
+ } else if ((property_type == "General_Category"sv) || (property_type == "gc"sv)) {
+ if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
+ return { *general_category };
+ }
+
return {};
}