summaryrefslogtreecommitdiff
path: root/Tests
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-18 17:17:18 -0400
committerAndreas Kling <kling@serenityos.org>2021-08-19 23:49:25 +0200
commit4f2cbe119b675ad669d48ddbd2cef317c6d616de (patch)
tree1634296a83dc2ae841022745d42097e90a6d3b9b /Tests
parent6131c0485e433646bc8f6495e3a5127ef75d90c9 (diff)
downloadserenity-4f2cbe119b675ad669d48ddbd2cef317c6d616de.zip
LibRegex: Allow Unicode escape sequences in capture group names
Unfortunately, this requires a slight divergence in the way the capture group names are stored. Previously, the generated byte code would simply store a view into the regex pattern string, so no string copying was required. Now, the escape sequences are decoded into a new string, and a vector of all parsed capture group names are stored in a vector in the parser result structure. The byte code then stores a view into the corresponding string in that vector.
Diffstat (limited to 'Tests')
-rw-r--r--Tests/LibRegex/Regex.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 8cb3a87c9e..7a14f30eb9 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -698,6 +698,9 @@ TEST_CASE(ECMA262_unicode_match)
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abcπŸ˜€ef"sv, true, ECMAScriptFlags::Unicode },
+ { "(?<𝓑𝓻𝓸𝔀𝓷>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
+ { "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
+ { "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {