diff options
author | Timothy Flynn <trflynn89@pm.me> | 2021-08-17 22:24:17 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-08-19 23:49:25 +0200 |
commit | dd44a5e9488eb0758d8bf29f112d01b6837f7215 (patch) | |
tree | a29792be80f9342d447f8734717ac22a8fb91674 /Userland | |
parent | fd8ccedf2b21a49571100b5a56d9a1f5d28b834c (diff) | |
download | serenity-dd44a5e9488eb0758d8bf29f112d01b6837f7215.zip |
LibJS: Use GenericLexer to consume escaped code points
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibJS/Token.cpp | 69 |
1 files changed, 17 insertions, 52 deletions
diff --git a/Userland/Libraries/LibJS/Token.cpp b/Userland/Libraries/LibJS/Token.cpp index 0944dbd04f..5e918b6557 100644 --- a/Userland/Libraries/LibJS/Token.cpp +++ b/Userland/Libraries/LibJS/Token.cpp @@ -10,7 +10,6 @@ #include <AK/CharacterTypes.h> #include <AK/GenericLexer.h> #include <AK/StringBuilder.h> -#include <AK/Utf16View.h> namespace JS { @@ -103,16 +102,6 @@ String Token::string_value(StringValueStatus& status) const return {}; }; - auto decode_surrogate = [&lexer]() -> Optional<u16> { - u16 surrogate = 0; - for (int j = 0; j < 4; ++j) { - if (!lexer.next_is(is_ascii_hex_digit)) - return {}; - surrogate = (surrogate << 4u) | hex2int(lexer.consume()); - } - return surrogate; - }; - StringBuilder builder; while (!lexer.is_eof()) { // No escape, consume one char and continue @@ -121,6 +110,23 @@ String Token::string_value(StringValueStatus& status) const continue; } + // Unicode escape + if (lexer.next_is("\\u"sv)) { + auto code_point_or_error = lexer.consume_escaped_code_point(); + + if (code_point_or_error.is_error()) { + switch (code_point_or_error.error()) { + case GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape: + return encoding_failure(StringValueStatus::MalformedUnicodeEscape); + case GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow: + return encoding_failure(StringValueStatus::UnicodeEscapeOverflow); + } + } + + builder.append_code_point(code_point_or_error.value()); + continue; + } + lexer.ignore(); VERIFY(!lexer.is_eof()); @@ -150,47 +156,6 @@ String Token::string_value(StringValueStatus& status) const builder.append_code_point(code_point); continue; } - // Unicode escape - if (lexer.next_is('u')) { - lexer.ignore(); - u32 code_point = 0; - if (lexer.next_is('{')) { - lexer.ignore(); - while (true) { - if (!lexer.next_is(is_ascii_hex_digit)) - return encoding_failure(StringValueStatus::MalformedUnicodeEscape); - auto new_code_point = (code_point << 4u) | hex2int(lexer.consume()); - if (new_code_point < code_point) - return encoding_failure(StringValueStatus::UnicodeEscapeOverflow); - code_point = new_code_point; - if (lexer.next_is('}')) - break; - } - lexer.ignore(); - } else { - auto high_surrogate = decode_surrogate(); - if (!high_surrogate.has_value()) - return encoding_failure(StringValueStatus::MalformedUnicodeEscape); - - if (Utf16View::is_high_surrogate(*high_surrogate) && lexer.consume_specific("\\u"sv)) { - auto low_surrogate = decode_surrogate(); - if (!low_surrogate.has_value()) - return encoding_failure(StringValueStatus::MalformedUnicodeEscape); - - if (Utf16View::is_low_surrogate(*low_surrogate)) { - code_point = Utf16View::decode_surrogate_pair(*high_surrogate, *low_surrogate); - } else { - builder.append_code_point(*high_surrogate); - code_point = *low_surrogate; - } - - } else { - code_point = *high_surrogate; - } - } - builder.append_code_point(code_point); - continue; - } // In non-strict mode LegacyOctalEscapeSequence is allowed in strings: // https://tc39.es/ecma262/#sec-additional-syntax-string-literals |