diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-07-22 09:25:58 -0400 |
---|---|---|
committer | Linus Groh <mail@linusgroh.de> | 2021-07-23 23:06:57 +0100 |
commit | 345ef6abba68c1a96e5f5e365f1a2e8dc1762955 (patch) | |
tree | cac7c9e213b7ce3d26b1cc775cd748356eea76f2 /Tests/LibRegex/Regex.cpp | |
parent | 0e6375558dc60811a390ee014d76486ff28d8746 (diff) | |
download | serenity-345ef6abba68c1a96e5f5e365f1a2e8dc1762955.zip |
LibRegex: Support ECMA-262 Unicode escapes of the form "\u{code_point}"
When the Unicode flag is set, regular expressions may escape code points
by surrounding the hexadecimal code point with curly braces, e.g. \u{41}
is the character "A".
When the Unicode flag is not set, this should be considered a repetition
symbol - \u{41} is the character "u" repeated 41 times. This is left as
a TODO for now.
Diffstat (limited to 'Tests/LibRegex/Regex.cpp')
-rw-r--r-- | Tests/LibRegex/Regex.cpp | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index a4731e62d3..a1f6f084d6 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -510,6 +510,11 @@ TEST_CASE(ECMA262_parse) { "\\uxxxx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode }, { "\\ud83d", regex::Error::NoError, ECMAScriptFlags::Unicode }, { "\\ud83d\\uxxxx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode }, + { "\\u{0}", regex::Error::NoError, ECMAScriptFlags::Unicode }, + { "\\u{10ffff}", regex::Error::NoError, ECMAScriptFlags::Unicode }, + { "\\u{10ffff", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode }, + { "\\u{10ffffx", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode }, + { "\\u{110000}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode }, }; for (auto& test : tests) { @@ -605,6 +610,7 @@ TEST_CASE(ECMA262_unicode_match) { "\\ude00", "😀", false, ECMAScriptFlags::Unicode }, { "\\ud83d\\ude00", "😀", true }, { "\\ud83d\\ude00", "😀", true, ECMAScriptFlags::Unicode }, + { "\\u{1f600}", "😀", true, ECMAScriptFlags::Unicode }, { "\\ud83d\\ud83d", "\xed\xa0\xbd\xed\xa0\xbd", true }, { "\\ud83d\\ud83d", "\xed\xa0\xbd\xed\xa0\xbd", true, ECMAScriptFlags::Unicode }, }; |