diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-08-18 17:17:18 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-08-19 23:49:25 +0200 |
commit | 4f2cbe119b675ad669d48ddbd2cef317c6d616de (patch) | |
tree | 1634296a83dc2ae841022745d42097e90a6d3b9b /Tests | |
parent | 6131c0485e433646bc8f6495e3a5127ef75d90c9 (diff) | |
download | serenity-4f2cbe119b675ad669d48ddbd2cef317c6d616de.zip |
LibRegex: Allow Unicode escape sequences in capture group names
Unfortunately, this requires a slight divergence in the way the capture
group names are stored. Previously, the generated byte code would simply
store a view into the regex pattern string, so no string copying was
required.
Now, the escape sequences are decoded into a new string, and a vector
of all parsed capture group names are stored in a vector in the parser
result structure. The byte code then stores a view into the
corresponding string in that vector.
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 8cb3a87c9e..7a14f30eb9 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -698,6 +698,9 @@ TEST_CASE(ECMA262_unicode_match) { "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode }, { "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode }, { "(?<=.{3})f"sv, "abcπef"sv, true, ECMAScriptFlags::Unicode }, + { "(?<ππ»πΈππ·>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode }, + { "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode }, + { "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode }, }; for (auto& test : tests) { |