summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorTimothy Flynn <trflynn89@pm.me>2021-08-17 22:24:17 -0400
committerAndreas Kling <kling@serenityos.org>2021-08-19 23:49:25 +0200
commitdd44a5e9488eb0758d8bf29f112d01b6837f7215 (patch)
treea29792be80f9342d447f8734717ac22a8fb91674 /Userland
parentfd8ccedf2b21a49571100b5a56d9a1f5d28b834c (diff)
downloadserenity-dd44a5e9488eb0758d8bf29f112d01b6837f7215.zip
LibJS: Use GenericLexer to consume escaped code points
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibJS/Token.cpp69
1 files changed, 17 insertions, 52 deletions
diff --git a/Userland/Libraries/LibJS/Token.cpp b/Userland/Libraries/LibJS/Token.cpp
index 0944dbd04f..5e918b6557 100644
--- a/Userland/Libraries/LibJS/Token.cpp
+++ b/Userland/Libraries/LibJS/Token.cpp
@@ -10,7 +10,6 @@
#include <AK/CharacterTypes.h>
#include <AK/GenericLexer.h>
#include <AK/StringBuilder.h>
-#include <AK/Utf16View.h>
namespace JS {
@@ -103,16 +102,6 @@ String Token::string_value(StringValueStatus& status) const
return {};
};
- auto decode_surrogate = [&lexer]() -> Optional<u16> {
- u16 surrogate = 0;
- for (int j = 0; j < 4; ++j) {
- if (!lexer.next_is(is_ascii_hex_digit))
- return {};
- surrogate = (surrogate << 4u) | hex2int(lexer.consume());
- }
- return surrogate;
- };
-
StringBuilder builder;
while (!lexer.is_eof()) {
// No escape, consume one char and continue
@@ -121,6 +110,23 @@ String Token::string_value(StringValueStatus& status) const
continue;
}
+ // Unicode escape
+ if (lexer.next_is("\\u"sv)) {
+ auto code_point_or_error = lexer.consume_escaped_code_point();
+
+ if (code_point_or_error.is_error()) {
+ switch (code_point_or_error.error()) {
+ case GenericLexer::UnicodeEscapeError::MalformedUnicodeEscape:
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+ case GenericLexer::UnicodeEscapeError::UnicodeEscapeOverflow:
+ return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
+ }
+ }
+
+ builder.append_code_point(code_point_or_error.value());
+ continue;
+ }
+
lexer.ignore();
VERIFY(!lexer.is_eof());
@@ -150,47 +156,6 @@ String Token::string_value(StringValueStatus& status) const
builder.append_code_point(code_point);
continue;
}
- // Unicode escape
- if (lexer.next_is('u')) {
- lexer.ignore();
- u32 code_point = 0;
- if (lexer.next_is('{')) {
- lexer.ignore();
- while (true) {
- if (!lexer.next_is(is_ascii_hex_digit))
- return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
- auto new_code_point = (code_point << 4u) | hex2int(lexer.consume());
- if (new_code_point < code_point)
- return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
- code_point = new_code_point;
- if (lexer.next_is('}'))
- break;
- }
- lexer.ignore();
- } else {
- auto high_surrogate = decode_surrogate();
- if (!high_surrogate.has_value())
- return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
-
- if (Utf16View::is_high_surrogate(*high_surrogate) && lexer.consume_specific("\\u"sv)) {
- auto low_surrogate = decode_surrogate();
- if (!low_surrogate.has_value())
- return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
-
- if (Utf16View::is_low_surrogate(*low_surrogate)) {
- code_point = Utf16View::decode_surrogate_pair(*high_surrogate, *low_surrogate);
- } else {
- builder.append_code_point(*high_surrogate);
- code_point = *low_surrogate;
- }
-
- } else {
- code_point = *high_surrogate;
- }
- }
- builder.append_code_point(code_point);
- continue;
- }
// In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
// https://tc39.es/ecma262/#sec-additional-syntax-string-literals