summaryrefslogtreecommitdiff
path: root/Libraries
diff options
context:
space:
mode:
authorMatthew Olsson <matthewcolsson@gmail.com>2020-05-16 23:27:25 -0700
committerAndreas Kling <kling@serenityos.org>2020-05-18 17:58:17 +0200
commite415dd4e9c11fe065f62909ef25266609a2678dc (patch)
tree2bdfa3ab5152f4aea0261240c08acd4457664d14 /Libraries
parentb3090678a9d9d1a8bda18a1781557d0547882da9 (diff)
downloadserenity-e415dd4e9c11fe065f62909ef25266609a2678dc.zip
LibJS: Handle hex and unicode escape sequences in string literals
Introduces the following syntax: '\x55' '\u26a0' '\u{1f41e}'
Diffstat (limited to 'Libraries')
-rw-r--r--Libraries/LibJS/Parser.cpp28
-rw-r--r--Libraries/LibJS/Parser.h1
-rw-r--r--Libraries/LibJS/Tests/string-escapes.js17
-rw-r--r--Libraries/LibJS/Token.cpp72
-rw-r--r--Libraries/LibJS/Token.h9
5 files changed, 118 insertions, 9 deletions
diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp
index bafb72454a..3b1343079f 100644
--- a/Libraries/LibJS/Parser.cpp
+++ b/Libraries/LibJS/Parser.cpp
@@ -405,7 +405,7 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
case TokenType::BoolLiteral:
return create_ast_node<BooleanLiteral>(consume().bool_value());
case TokenType::StringLiteral:
- return create_ast_node<StringLiteral>(consume().string_value());
+ return parse_string_literal(consume());
case TokenType::NullLiteral:
consume();
return create_ast_node<NullLiteral>();
@@ -494,7 +494,7 @@ NonnullRefPtr<ObjectExpression> Parser::parse_object_expression()
property_value = create_ast_node<Identifier>(identifier);
need_colon = false;
} else if (match(TokenType::StringLiteral)) {
- property_key = create_ast_node<StringLiteral>(consume(TokenType::StringLiteral).string_value());
+ property_key = parse_string_literal(consume());
} else if (match(TokenType::NumericLiteral)) {
property_key = create_ast_node<StringLiteral>(consume(TokenType::NumericLiteral).value());
} else if (match(TokenType::BracketOpen)) {
@@ -559,6 +559,28 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
return create_ast_node<ArrayExpression>(move(elements));
}
+NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
+{
+ auto status = Token::StringValueStatus::Ok;
+ auto string = token.string_value(status);
+ if (status != Token::StringValueStatus::Ok) {
+ String message;
+ if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
+ auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
+ message = String::format("Malformed %s escape sequence", type);
+ } else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
+ message = "Unicode codepoint must not be greater than 0x10ffff in escape sequence";
+ }
+
+ syntax_error(
+ message,
+ m_parser_state.m_current_token.line_number(),
+ m_parser_state.m_current_token.line_column()
+ );
+ }
+ return create_ast_node<StringLiteral>(string);
+}
+
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
{
consume(TokenType::TemplateLiteralStart);
@@ -579,7 +601,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
if (match(TokenType::TemplateLiteralString)) {
auto token = consume();
- expressions.append(create_ast_node<StringLiteral>(token.string_value()));
+ expressions.append(parse_string_literal(token));
if (is_tagged)
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
} else if (match(TokenType::TemplateLiteralExprStart)) {
diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h
index 91f90953ea..1f5d2f4fec 100644
--- a/Libraries/LibJS/Parser.h
+++ b/Libraries/LibJS/Parser.h
@@ -70,6 +70,7 @@ public:
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
NonnullRefPtr<ObjectExpression> parse_object_expression();
NonnullRefPtr<ArrayExpression> parse_array_expression();
+ NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
diff --git a/Libraries/LibJS/Tests/string-escapes.js b/Libraries/LibJS/Tests/string-escapes.js
new file mode 100644
index 0000000000..ad9f47ea12
--- /dev/null
+++ b/Libraries/LibJS/Tests/string-escapes.js
@@ -0,0 +1,17 @@
+load("test-common.js")
+
+try {
+ assert("\x55" === "U");
+ assert("\X55" === "X55");
+ assert(`\x55` === "U");
+ assert(`\X55` === "X55");
+
+ assert("\u26a0" === "⚠");
+ assert(`\u26a0` === "⚠");
+ assert("\u{1f41e}" === "🐞");
+ assert(`\u{1f41e}` === "🐞");
+
+ console.log("PASS");
+} catch (e) {
+ console.log("FAIL: " + e);
+}
diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp
index ffb95908a5..cc3a1ee0eb 100644
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@@ -27,6 +27,7 @@
#include "Token.h"
#include <AK/Assertions.h>
#include <AK/StringBuilder.h>
+#include <AK/Utf32View.h>
#include <ctype.h>
namespace JS {
@@ -72,13 +73,26 @@ double Token::double_value() const
return strtod(value_string.characters(), nullptr);
}
-String Token::string_value() const
+static u32 hex2int(char x)
+{
+ ASSERT(isxdigit(x));
+ if (x >= '0' && x <= '9')
+ return x - '0';
+ return 10u + (tolower(x) - 'a');
+}
+
+String Token::string_value(StringValueStatus& status) const
{
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
auto is_template = type() == TokenType::TemplateLiteralString;
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
+ auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
+ status = parse_status;
+ return {};
+ };
+
StringBuilder builder;
for (size_t i = offset; i < m_value.length() - offset; ++i) {
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
@@ -114,14 +128,62 @@ String Token::string_value() const
case '\\':
builder.append('\\');
break;
+ case 'x': {
+ if (i + 2 >= m_value.length() - offset)
+ return encoding_failure(StringValueStatus::MalformedHexEscape);
+
+ auto digit1 = m_value[++i];
+ auto digit2 = m_value[++i];
+ if (!isxdigit(digit1) || !isxdigit(digit2))
+ return encoding_failure(StringValueStatus::MalformedHexEscape);
+ builder.append(static_cast<char>(hex2int(digit1) * 16 + hex2int(digit2)));
+ break;
+ }
+ case 'u': {
+ if (i + 1 >= m_value.length() - offset)
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+ u32 code_point = m_value[++i];
+
+ if (code_point == '{') {
+ code_point = 0;
+ do {
+ if (i + 1 >= m_value.length() - offset)
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+
+ auto ch = m_value[++i];
+ if (!isxdigit(ch))
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+
+ auto new_code_point = (code_point << 4u) | hex2int(ch);
+ if (new_code_point < code_point)
+ return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
+ code_point = new_code_point;
+ } while (m_value[i + 1] != '}');
+ ++i;
+ } else {
+ if (i + 3 >= m_value.length() - offset || !isxdigit(code_point))
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+
+ code_point = hex2int(code_point);
+ for (int j = 0; j < 3; ++j) {
+ auto ch = m_value[++i];
+ if (!isxdigit(ch))
+ return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
+ code_point = (code_point << 4u) | hex2int(ch);
+ }
+ }
+
+ builder.append({ &code_point, 1 });
+ break;
+ }
default:
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
builder.append(m_value[i]);
- } else {
- // FIXME: Also parse octal, hex and unicode sequences
- // should anything else generate a syntax error?
- builder.append(m_value[i]);
+ break;
}
+
+ // FIXME: Also parse octal. Should anything else generate a syntax error?
+ builder.append(m_value[i]);
}
} else {
builder.append(m_value[i]);
diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h
index c66d3c5431..884a249f27 100644
--- a/Libraries/LibJS/Token.h
+++ b/Libraries/LibJS/Token.h
@@ -172,9 +172,16 @@ public:
size_t line_number() const { return m_line_number; }
size_t line_column() const { return m_line_column; }
double double_value() const;
- String string_value() const;
bool bool_value() const;
+ enum class StringValueStatus {
+ Ok,
+ MalformedHexEscape,
+ MalformedUnicodeEscape,
+ UnicodeEscapeOverflow,
+ };
+ String string_value(StringValueStatus& status) const;
+
bool is_identifier_name() const;
private: