summaryrefslogtreecommitdiff
path: root/Tests/LibRegex/Regex.cpp
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-07-22 09:25:58 -0400
committerLinus Groh <mail@linusgroh.de>2021-07-23 23:06:57 +0100
commit345ef6abba68c1a96e5f5e365f1a2e8dc1762955 (patch)
treecac7c9e213b7ce3d26b1cc775cd748356eea76f2 /Tests/LibRegex/Regex.cpp
parent0e6375558dc60811a390ee014d76486ff28d8746 (diff)
downloadserenity-345ef6abba68c1a96e5f5e365f1a2e8dc1762955.zip
LibRegex: Support ECMA-262 Unicode escapes of the form "\u{code_point}"
When the Unicode flag is set, regular expressions may escape code points by surrounding the hexadecimal code point with curly braces, e.g. \u{41} is the character "A". When the Unicode flag is not set, this should be considered a repetition symbol - \u{41} is the character "u" repeated 41 times. This is left as a TODO for now.
Diffstat (limited to 'Tests/LibRegex/Regex.cpp')
-rw-r--r--Tests/LibRegex/Regex.cpp6
1 files changed, 6 insertions, 0 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index a4731e62d3..a1f6f084d6 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -510,6 +510,11 @@ TEST_CASE(ECMA262_parse)
{ "\\uxxxx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
{ "\\ud83d", regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\ud83d\\uxxxx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+ { "\\u{0}", regex::Error::NoError, ECMAScriptFlags::Unicode },
+ { "\\u{10ffff}", regex::Error::NoError, ECMAScriptFlags::Unicode },
+ { "\\u{10ffff", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+ { "\\u{10ffffx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+ { "\\u{110000}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {
@@ -605,6 +610,7 @@ TEST_CASE(ECMA262_unicode_match)
{ "\\ude00", "😀", false, ECMAScriptFlags::Unicode },
{ "\\ud83d\\ude00", "😀", true },
{ "\\ud83d\\ude00", "😀", true, ECMAScriptFlags::Unicode },
+ { "\\u{1f600}", "😀", true, ECMAScriptFlags::Unicode },
{ "\\ud83d\\ud83d", "\xed\xa0\xbd\xed\xa0\xbd", true },
{ "\\ud83d\\ud83d", "\xed\xa0\xbd\xed\xa0\xbd", true, ECMAScriptFlags::Unicode },
};