summaryrefslogtreecommitdiff
path: root/Libraries
diff options
context:
space:
mode:
authormattco98 <matthewcolsson@gmail.com>2020-05-03 15:41:14 -0700
committerAndreas Kling <kling@serenityos.org>2020-05-04 16:46:31 +0200
commitadb4accab3668e60a6998c4b492d1cf7be11f9d1 (patch)
tree14dc0ee4b84d5cb7b9b4b6697d6481633c5f8a39 /Libraries
parent2fdeb464f739b5aada58d1910334336c8169da5b (diff)
downloadserenity-adb4accab3668e60a6998c4b492d1cf7be11f9d1.zip
LibJS: Add template literals
Adds fully functioning template literals. Because template literals contain expressions, most of the work has to be done in the Lexer rather than the Parser. And because of the complexity of template literals (expressions, nesting, escapes, etc), the Lexer needs to have some template-related state. When entering a new template literal, a TemplateLiteralStart token is emitted. When inside a literal, all text will be parsed up until a '${' or '`' (or EOF, but that's a syntax error) is seen, and then a TemplateLiteralExprStart token is emitted. At this point, the Lexer proceeds as normal, however it keeps track of the number of opening and closing curly braces it has seen in order to determine the close of the expression. Once it finds a matching curly brace for the '${', a TemplateLiteralExprEnd token is emitted and the state is updated accordingly. When the Lexer is inside of a template literal, but not an expression, and sees a '`', this must be the closing grave: a TemplateLiteralEnd token is emitted. The state required to correctly parse template strings consists of a vector (for nesting) of two pieces of information: whether or not we are in a template expression (as opposed to a template string); and the count of the number of unmatched open curly braces we have seen (only applicable if the Lexer is currently in a template expression). TODO: Add support for template literal newlines in the JS REPL (this will cause a syntax error currently): > `foo > bar` 'foo bar'
Diffstat (limited to 'Libraries')
-rw-r--r--Libraries/LibGUI/JSSyntaxHighlighter.cpp6
-rw-r--r--Libraries/LibJS/AST.cpp34
-rw-r--r--Libraries/LibJS/AST.h18
-rw-r--r--Libraries/LibJS/Lexer.cpp94
-rw-r--r--Libraries/LibJS/Lexer.h6
-rw-r--r--Libraries/LibJS/Parser.cpp38
-rw-r--r--Libraries/LibJS/Parser.h1
-rw-r--r--Libraries/LibJS/Tests/template-literals.js45
-rw-r--r--Libraries/LibJS/Token.cpp24
-rw-r--r--Libraries/LibJS/Token.h7
10 files changed, 230 insertions, 43 deletions
diff --git a/Libraries/LibGUI/JSSyntaxHighlighter.cpp b/Libraries/LibGUI/JSSyntaxHighlighter.cpp
index 1659fd4263..08769b2a56 100644
--- a/Libraries/LibGUI/JSSyntaxHighlighter.cpp
+++ b/Libraries/LibGUI/JSSyntaxHighlighter.cpp
@@ -42,7 +42,9 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type)
case JS::TokenType::NumericLiteral:
return { palette.syntax_number() };
case JS::TokenType::StringLiteral:
- case JS::TokenType::TemplateLiteral:
+ case JS::TokenType::TemplateLiteralStart:
+ case JS::TokenType::TemplateLiteralEnd:
+ case JS::TokenType::TemplateLiteralString:
case JS::TokenType::RegexLiteral:
case JS::TokenType::UnterminatedStringLiteral:
return { palette.syntax_string() };
@@ -55,6 +57,8 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type)
case JS::TokenType::ParenClose:
case JS::TokenType::ParenOpen:
case JS::TokenType::Semicolon:
+ case JS::TokenType::TemplateLiteralExprStart:
+ case JS::TokenType::TemplateLiteralExprEnd:
return { palette.syntax_punctuation() };
case JS::TokenType::Ampersand:
case JS::TokenType::AmpersandEquals:
diff --git a/Libraries/LibJS/AST.cpp b/Libraries/LibJS/AST.cpp
index 28d56b5698..a53eac4e00 100644
--- a/Libraries/LibJS/AST.cpp
+++ b/Libraries/LibJS/AST.cpp
@@ -1229,6 +1229,28 @@ Value ArrayExpression::execute(Interpreter& interpreter) const
return array;
}
+void TemplateLiteral::dump(int indent) const
+{
+ ASTNode::dump(indent);
+
+ for (auto& expression : expressions())
+ expression.dump(indent + 1);
+}
+
+Value TemplateLiteral::execute(Interpreter& interpreter) const
+{
+ StringBuilder string_builder;
+
+ for (auto& expression : expressions()) {
+ auto expr = expression.execute(interpreter);
+ if (interpreter.exception())
+ return {};
+ string_builder.append(expr.to_string());
+ }
+
+ return js_string(interpreter, string_builder.build());
+}
+
void TryStatement::dump(int indent) const
{
ASTNode::dump(indent);
@@ -1398,15 +1420,15 @@ Value ConditionalExpression::execute(Interpreter& interpreter) const
void ConditionalExpression::dump(int indent) const
{
ASTNode::dump(indent);
- print_indent(indent);
+ print_indent(indent + 1);
printf("(Test)\n");
- m_test->dump(indent + 1);
- print_indent(indent);
+ m_test->dump(indent + 2);
+ print_indent(indent + 1);
printf("(Consequent)\n");
- m_consequent->dump(indent + 1);
- print_indent(indent);
+ m_consequent->dump(indent + 2);
+ print_indent(indent + 1);
printf("(Alternate)\n");
- m_alternate->dump(indent + 1);
+ m_alternate->dump(indent + 2);
}
void SequenceExpression::dump(int indent) const
diff --git a/Libraries/LibJS/AST.h b/Libraries/LibJS/AST.h
index 338facf098..571b035c76 100644
--- a/Libraries/LibJS/AST.h
+++ b/Libraries/LibJS/AST.h
@@ -762,6 +762,24 @@ private:
Vector<RefPtr<Expression>> m_elements;
};
+class TemplateLiteral final : public Expression {
+public:
+ TemplateLiteral(NonnullRefPtrVector<Expression> expressions)
+ : m_expressions(expressions)
+ {
+ }
+
+ virtual Value execute(Interpreter&) const override;
+ virtual void dump(int indent) const override;
+
+ const NonnullRefPtrVector<Expression>& expressions() const { return m_expressions; }
+
+private:
+ virtual const char* class_name() const override { return "TemplateLiteral"; }
+
+ const NonnullRefPtrVector<Expression> m_expressions;
+};
+
class MemberExpression final : public Expression {
public:
MemberExpression(NonnullRefPtr<Expression> object, NonnullRefPtr<Expression> property, bool computed = false)
diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp
index cfcead6906..1c5c17046c 100644
--- a/Libraries/LibJS/Lexer.cpp
+++ b/Libraries/LibJS/Lexer.cpp
@@ -244,34 +244,74 @@ void Lexer::syntax_error(const char* msg)
Token Lexer::next()
{
size_t trivia_start = m_position;
+ auto in_template = !m_template_states.is_empty();
- // consume whitespace and comments
- while (true) {
- if (isspace(m_current_char)) {
- do {
- consume();
- } while (isspace(m_current_char));
- } else if (is_line_comment_start()) {
- consume();
- do {
+ if (!in_template || m_template_states.last().in_expr) {
+ // consume whitespace and comments
+ while (true) {
+ if (isspace(m_current_char)) {
+ do {
+ consume();
+ } while (isspace(m_current_char));
+ } else if (is_line_comment_start()) {
consume();
- } while (!is_eof() && m_current_char != '\n');
- } else if (is_block_comment_start()) {
- consume();
- do {
+ do {
+ consume();
+ } while (!is_eof() && m_current_char != '\n');
+ } else if (is_block_comment_start()) {
consume();
- } while (!is_eof() && !is_block_comment_end());
- consume(); // consume *
- consume(); // consume /
- } else {
- break;
+ do {
+ consume();
+ } while (!is_eof() && !is_block_comment_end());
+ consume(); // consume *
+ consume(); // consume /
+ } else {
+ break;
+ }
}
}
size_t value_start = m_position;
auto token_type = TokenType::Invalid;
- if (is_identifier_start()) {
+ if (m_current_char == '`') {
+ consume();
+
+ if (!in_template) {
+ token_type = TokenType::TemplateLiteralStart;
+ m_template_states.append({ false, 0 });
+ } else {
+ if (m_template_states.last().in_expr) {
+ m_template_states.append({ false, 0 });
+ token_type = TokenType::TemplateLiteralStart;
+ } else {
+ m_template_states.take_last();
+ token_type = TokenType::TemplateLiteralEnd;
+ }
+ }
+ } else if (in_template && m_template_states.last().in_expr && m_template_states.last().open_bracket_count == 0 && m_current_char == '}') {
+ consume();
+ token_type = TokenType::TemplateLiteralExprEnd;
+ m_template_states.last().in_expr = false;
+ } else if (in_template && !m_template_states.last().in_expr) {
+ if (is_eof()) {
+ token_type = TokenType::UnterminatedTemplateLiteral;
+ m_template_states.take_last();
+ } else if (match('$', '{')) {
+ token_type = TokenType::TemplateLiteralExprStart;
+ consume();
+ consume();
+ m_template_states.last().in_expr = true;
+ } else {
+ while (!match('$', '{') && m_current_char != '`' && !is_eof()) {
+ if (match('\\', '$') || match('\\', '`'))
+ consume();
+ consume();
+ }
+
+ token_type = TokenType::TemplateLiteralString;
+ }
+ } else if (is_identifier_start()) {
// identifier or keyword
do {
consume();
@@ -339,7 +379,7 @@ Token Lexer::next()
}
}
token_type = TokenType::NumericLiteral;
- } else if (m_current_char == '"' || m_current_char == '\'' || m_current_char == '`') {
+ } else if (m_current_char == '"' || m_current_char == '\'') {
char stop_char = m_current_char;
consume();
while (m_current_char != stop_char && m_current_char != '\n' && !is_eof()) {
@@ -353,10 +393,7 @@ Token Lexer::next()
token_type = TokenType::UnterminatedStringLiteral;
} else {
consume();
- if (stop_char == '`')
- token_type = TokenType::TemplateLiteral;
- else
- token_type = TokenType::StringLiteral;
+ token_type = TokenType::StringLiteral;
}
} else if (m_current_char == EOF) {
token_type = TokenType::Eof;
@@ -416,6 +453,14 @@ Token Lexer::next()
}
}
+ if (!m_template_states.is_empty() && m_template_states.last().in_expr) {
+ if (token_type == TokenType::CurlyOpen) {
+ m_template_states.last().open_bracket_count++;
+ } else if (token_type == TokenType::CurlyClose) {
+ m_template_states.last().open_bracket_count--;
+ }
+ }
+
m_current_token = Token(
token_type,
m_source.substring_view(trivia_start - 1, value_start - trivia_start),
@@ -425,4 +470,5 @@ Token Lexer::next()
return m_current_token;
}
+
}
diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h
index d9be445eb6..f5a4ae51cd 100644
--- a/Libraries/LibJS/Lexer.h
+++ b/Libraries/LibJS/Lexer.h
@@ -71,6 +71,12 @@ private:
size_t m_line_column = 1;
bool m_log_errors = true;
+ struct TemplateState {
+ bool in_expr;
+ u8 open_bracket_count;
+ };
+ Vector<TemplateState> m_template_states;
+
static HashMap<String, TokenType> s_keywords;
static HashMap<String, TokenType> s_three_char_tokens;
static HashMap<String, TokenType> s_two_char_tokens;
diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp
index 10383c57fb..4ff34318ed 100644
--- a/Libraries/LibJS/Parser.cpp
+++ b/Libraries/LibJS/Parser.cpp
@@ -394,6 +394,8 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
return parse_function_node<FunctionExpression>();
case TokenType::BracketOpen:
return parse_array_expression();
+ case TokenType::TemplateLiteralStart:
+ return parse_template_literal();
case TokenType::New:
return parse_new_expression();
default:
@@ -536,6 +538,40 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
return create_ast_node<ArrayExpression>(move(elements));
}
+NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal()
+{
+ consume(TokenType::TemplateLiteralStart);
+
+ NonnullRefPtrVector<Expression> expressions;
+
+ while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
+ if (match(TokenType::TemplateLiteralString)) {
+ expressions.append(create_ast_node<StringLiteral>(consume().string_value()));
+ } else if (match(TokenType::TemplateLiteralExprStart)) {
+ consume(TokenType::TemplateLiteralExprStart);
+ if (match(TokenType::TemplateLiteralExprEnd)) {
+ syntax_error("Empty template literal expression block");
+ return create_ast_node<TemplateLiteral>(expressions);
+ }
+
+ expressions.append(parse_expression(0));
+ if (match(TokenType::UnterminatedTemplateLiteral)) {
+ syntax_error("Unterminated template literal");
+ return create_ast_node<TemplateLiteral>(expressions);
+ }
+ consume(TokenType::TemplateLiteralExprEnd);
+ }
+ }
+
+ if (match(TokenType::UnterminatedTemplateLiteral)) {
+ syntax_error("Unterminated template literal");
+ } else {
+ consume(TokenType::TemplateLiteralEnd);
+ }
+
+ return create_ast_node<TemplateLiteral>(expressions);
+}
+
NonnullRefPtr<Expression> Parser::parse_expression(int min_precedence, Associativity associativity)
{
auto expression = parse_primary_expression();
@@ -1087,7 +1123,7 @@ bool Parser::match_expression() const
return type == TokenType::BoolLiteral
|| type == TokenType::NumericLiteral
|| type == TokenType::StringLiteral
- || type == TokenType::TemplateLiteral
+ || type == TokenType::TemplateLiteralStart
|| type == TokenType::NullLiteral
|| type == TokenType::Identifier
|| type == TokenType::New
diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h
index e11429399b..273dce7c53 100644
--- a/Libraries/LibJS/Parser.h
+++ b/Libraries/LibJS/Parser.h
@@ -69,6 +69,7 @@ public:
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
NonnullRefPtr<ObjectExpression> parse_object_expression();
NonnullRefPtr<ArrayExpression> parse_array_expression();
+ NonnullRefPtr<TemplateLiteral> parse_template_literal();
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
NonnullRefPtr<NewExpression> parse_new_expression();
diff --git a/Libraries/LibJS/Tests/template-literals.js b/Libraries/LibJS/Tests/template-literals.js
new file mode 100644
index 0000000000..71734cb0d2
--- /dev/null
+++ b/Libraries/LibJS/Tests/template-literals.js
@@ -0,0 +1,45 @@
+load("test-common.js");
+
+try {
+ assert(`foo` === "foo");
+ assert(`foo{` === "foo{");
+ assert(`foo}` === "foo}");
+ assert(`foo$` === "foo$");
+ assert(`foo\`` === "foo`")
+ assert(`foo\$` === "foo$");
+
+ assert(`foo ${undefined}` === "foo undefined");
+ assert(`foo ${null}` === "foo null");
+ assert(`foo ${5}` === "foo 5");
+ assert(`foo ${true}` === "foo true");
+ assert(`foo ${"bar"}` === "foo bar");
+ assert(`foo \${"bar"}` === 'foo ${"bar"}');
+
+ assert(`foo ${{}}` === "foo [object Object]");
+ assert(`foo ${{ bar: { baz: "qux" }}}` === "foo [object Object]");
+ assert(`foo ${"bar"} ${"baz"}` === "foo bar baz");
+ assert(`${"foo"} bar baz` === "foo bar baz");
+ assert(`${"foo bar baz"}` === "foo bar baz");
+
+ let a = 27;
+ assert(`${a}` === "27");
+ assert(`foo ${a}` === "foo 27");
+ assert(`foo ${a ? "bar" : "baz"}` === "foo bar");
+ assert(`foo ${(() => a)()}` === "foo 27");
+
+ assert(`foo ${`bar`}` === "foo bar");
+ assert(`${`${`${`${"foo"}`} bar`}`}` === "foo bar");
+ assert(`foo
+ bar` === "foo\n bar");
+
+ assertThrowsError(() => {
+ `${b}`;
+ }, {
+ error: ReferenceError,
+ message: "'b' not known"
+ })
+
+ console.log("PASS");
+} catch (e) {
+ console.log("FAIL: " + e);
+}
diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp
index 3375146ca2..ffb95908a5 100644
--- a/Libraries/LibJS/Token.cpp
+++ b/Libraries/LibJS/Token.cpp
@@ -74,10 +74,14 @@ double Token::double_value() const
String Token::string_value() const
{
- ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteral);
+ ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
+ auto is_template = type() == TokenType::TemplateLiteralString;
+
+ auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
+
StringBuilder builder;
- for (size_t i = 1; i < m_value.length() - 1; ++i) {
- if (m_value[i] == '\\' && i + 1 < m_value.length() - 1) {
+ for (size_t i = offset; i < m_value.length() - offset; ++i) {
+ if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
i++;
switch (m_value[i]) {
case 'b':
@@ -107,18 +111,18 @@ String Token::string_value() const
case '"':
builder.append('"');
break;
- case '`':
- builder.append('`');
- break;
case '\\':
builder.append('\\');
break;
default:
- // FIXME: Also parse octal, hex and unicode sequences
- // should anything else generate a syntax error?
- builder.append(m_value[i]);
+ if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
+ builder.append(m_value[i]);
+ } else {
+ // FIXME: Also parse octal, hex and unicode sequences
+ // should anything else generate a syntax error?
+ builder.append(m_value[i]);
+ }
}
-
} else {
builder.append(m_value[i]);
}
diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h
index e272eff804..b3242703d8 100644
--- a/Libraries/LibJS/Token.h
+++ b/Libraries/LibJS/Token.h
@@ -112,7 +112,11 @@ namespace JS {
__ENUMERATE_JS_TOKEN(SlashEquals) \
__ENUMERATE_JS_TOKEN(StringLiteral) \
__ENUMERATE_JS_TOKEN(Switch) \
- __ENUMERATE_JS_TOKEN(TemplateLiteral) \
+ __ENUMERATE_JS_TOKEN(TemplateLiteralEnd) \
+ __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd) \
+ __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart) \
+ __ENUMERATE_JS_TOKEN(TemplateLiteralStart) \
+ __ENUMERATE_JS_TOKEN(TemplateLiteralString) \
__ENUMERATE_JS_TOKEN(This) \
__ENUMERATE_JS_TOKEN(Throw) \
__ENUMERATE_JS_TOKEN(Tilde) \
@@ -122,6 +126,7 @@ namespace JS {
__ENUMERATE_JS_TOKEN(UnsignedShiftRight) \
__ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals) \
__ENUMERATE_JS_TOKEN(UnterminatedStringLiteral) \
+ __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral) \
__ENUMERATE_JS_TOKEN(Var) \
__ENUMERATE_JS_TOKEN(Void) \
__ENUMERATE_JS_TOKEN(While) \