diff options
author | mattco98 <matthewcolsson@gmail.com> | 2020-05-03 15:41:14 -0700 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-05-04 16:46:31 +0200 |
commit | adb4accab3668e60a6998c4b492d1cf7be11f9d1 (patch) | |
tree | 14dc0ee4b84d5cb7b9b4b6697d6481633c5f8a39 /Libraries | |
parent | 2fdeb464f739b5aada58d1910334336c8169da5b (diff) | |
download | serenity-adb4accab3668e60a6998c4b492d1cf7be11f9d1.zip |
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
Diffstat (limited to 'Libraries')
-rw-r--r-- | Libraries/LibGUI/JSSyntaxHighlighter.cpp | 6 | ||||
-rw-r--r-- | Libraries/LibJS/AST.cpp | 34 | ||||
-rw-r--r-- | Libraries/LibJS/AST.h | 18 | ||||
-rw-r--r-- | Libraries/LibJS/Lexer.cpp | 94 | ||||
-rw-r--r-- | Libraries/LibJS/Lexer.h | 6 | ||||
-rw-r--r-- | Libraries/LibJS/Parser.cpp | 38 | ||||
-rw-r--r-- | Libraries/LibJS/Parser.h | 1 | ||||
-rw-r--r-- | Libraries/LibJS/Tests/template-literals.js | 45 | ||||
-rw-r--r-- | Libraries/LibJS/Token.cpp | 24 | ||||
-rw-r--r-- | Libraries/LibJS/Token.h | 7 |
10 files changed, 230 insertions, 43 deletions
diff --git a/Libraries/LibGUI/JSSyntaxHighlighter.cpp b/Libraries/LibGUI/JSSyntaxHighlighter.cpp index 1659fd4263..08769b2a56 100644 --- a/Libraries/LibGUI/JSSyntaxHighlighter.cpp +++ b/Libraries/LibGUI/JSSyntaxHighlighter.cpp @@ -42,7 +42,9 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type) case JS::TokenType::NumericLiteral: return { palette.syntax_number() }; case JS::TokenType::StringLiteral: - case JS::TokenType::TemplateLiteral: + case JS::TokenType::TemplateLiteralStart: + case JS::TokenType::TemplateLiteralEnd: + case JS::TokenType::TemplateLiteralString: case JS::TokenType::RegexLiteral: case JS::TokenType::UnterminatedStringLiteral: return { palette.syntax_string() }; @@ -55,6 +57,8 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type) case JS::TokenType::ParenClose: case JS::TokenType::ParenOpen: case JS::TokenType::Semicolon: + case JS::TokenType::TemplateLiteralExprStart: + case JS::TokenType::TemplateLiteralExprEnd: return { palette.syntax_punctuation() }; case JS::TokenType::Ampersand: case JS::TokenType::AmpersandEquals: diff --git a/Libraries/LibJS/AST.cpp b/Libraries/LibJS/AST.cpp index 28d56b5698..a53eac4e00 100644 --- a/Libraries/LibJS/AST.cpp +++ b/Libraries/LibJS/AST.cpp @@ -1229,6 +1229,28 @@ Value ArrayExpression::execute(Interpreter& interpreter) const return array; } +void TemplateLiteral::dump(int indent) const +{ + ASTNode::dump(indent); + + for (auto& expression : expressions()) + expression.dump(indent + 1); +} + +Value TemplateLiteral::execute(Interpreter& interpreter) const +{ + StringBuilder string_builder; + + for (auto& expression : expressions()) { + auto expr = expression.execute(interpreter); + if (interpreter.exception()) + return {}; + string_builder.append(expr.to_string()); + } + + return js_string(interpreter, string_builder.build()); +} + void TryStatement::dump(int indent) const { ASTNode::dump(indent); @@ -1398,15 +1420,15 @@ Value ConditionalExpression::execute(Interpreter& interpreter) const void ConditionalExpression::dump(int indent) const { ASTNode::dump(indent); - print_indent(indent); + print_indent(indent + 1); printf("(Test)\n"); - m_test->dump(indent + 1); - print_indent(indent); + m_test->dump(indent + 2); + print_indent(indent + 1); printf("(Consequent)\n"); - m_consequent->dump(indent + 1); - print_indent(indent); + m_consequent->dump(indent + 2); + print_indent(indent + 1); printf("(Alternate)\n"); - m_alternate->dump(indent + 1); + m_alternate->dump(indent + 2); } void SequenceExpression::dump(int indent) const diff --git a/Libraries/LibJS/AST.h b/Libraries/LibJS/AST.h index 338facf098..571b035c76 100644 --- a/Libraries/LibJS/AST.h +++ b/Libraries/LibJS/AST.h @@ -762,6 +762,24 @@ private: Vector<RefPtr<Expression>> m_elements; }; +class TemplateLiteral final : public Expression { +public: + TemplateLiteral(NonnullRefPtrVector<Expression> expressions) + : m_expressions(expressions) + { + } + + virtual Value execute(Interpreter&) const override; + virtual void dump(int indent) const override; + + const NonnullRefPtrVector<Expression>& expressions() const { return m_expressions; } + +private: + virtual const char* class_name() const override { return "TemplateLiteral"; } + + const NonnullRefPtrVector<Expression> m_expressions; +}; + class MemberExpression final : public Expression { public: MemberExpression(NonnullRefPtr<Expression> object, NonnullRefPtr<Expression> property, bool computed = false) diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp index cfcead6906..1c5c17046c 100644 --- a/Libraries/LibJS/Lexer.cpp +++ b/Libraries/LibJS/Lexer.cpp @@ -244,34 +244,74 @@ void Lexer::syntax_error(const char* msg) Token Lexer::next() { size_t trivia_start = m_position; + auto in_template = !m_template_states.is_empty(); - // consume whitespace and comments - while (true) { - if (isspace(m_current_char)) { - do { - consume(); - } while (isspace(m_current_char)); - } else if (is_line_comment_start()) { - consume(); - do { + if (!in_template || m_template_states.last().in_expr) { + // consume whitespace and comments + while (true) { + if (isspace(m_current_char)) { + do { + consume(); + } while (isspace(m_current_char)); + } else if (is_line_comment_start()) { consume(); - } while (!is_eof() && m_current_char != '\n'); - } else if (is_block_comment_start()) { - consume(); - do { + do { + consume(); + } while (!is_eof() && m_current_char != '\n'); + } else if (is_block_comment_start()) { consume(); - } while (!is_eof() && !is_block_comment_end()); - consume(); // consume * - consume(); // consume / - } else { - break; + do { + consume(); + } while (!is_eof() && !is_block_comment_end()); + consume(); // consume * + consume(); // consume / + } else { + break; + } } } size_t value_start = m_position; auto token_type = TokenType::Invalid; - if (is_identifier_start()) { + if (m_current_char == '`') { + consume(); + + if (!in_template) { + token_type = TokenType::TemplateLiteralStart; + m_template_states.append({ false, 0 }); + } else { + if (m_template_states.last().in_expr) { + m_template_states.append({ false, 0 }); + token_type = TokenType::TemplateLiteralStart; + } else { + m_template_states.take_last(); + token_type = TokenType::TemplateLiteralEnd; + } + } + } else if (in_template && m_template_states.last().in_expr && m_template_states.last().open_bracket_count == 0 && m_current_char == '}') { + consume(); + token_type = TokenType::TemplateLiteralExprEnd; + m_template_states.last().in_expr = false; + } else if (in_template && !m_template_states.last().in_expr) { + if (is_eof()) { + token_type = TokenType::UnterminatedTemplateLiteral; + m_template_states.take_last(); + } else if (match('$', '{')) { + token_type = TokenType::TemplateLiteralExprStart; + consume(); + consume(); + m_template_states.last().in_expr = true; + } else { + while (!match('$', '{') && m_current_char != '`' && !is_eof()) { + if (match('\\', '$') || match('\\', '`')) + consume(); + consume(); + } + + token_type = TokenType::TemplateLiteralString; + } + } else if (is_identifier_start()) { // identifier or keyword do { consume(); @@ -339,7 +379,7 @@ Token Lexer::next() } } token_type = TokenType::NumericLiteral; - } else if (m_current_char == '"' || m_current_char == '\'' || m_current_char == '`') { + } else if (m_current_char == '"' || m_current_char == '\'') { char stop_char = m_current_char; consume(); while (m_current_char != stop_char && m_current_char != '\n' && !is_eof()) { @@ -353,10 +393,7 @@ Token Lexer::next() token_type = TokenType::UnterminatedStringLiteral; } else { consume(); - if (stop_char == '`') - token_type = TokenType::TemplateLiteral; - else - token_type = TokenType::StringLiteral; + token_type = TokenType::StringLiteral; } } else if (m_current_char == EOF) { token_type = TokenType::Eof; @@ -416,6 +453,14 @@ Token Lexer::next() } } + if (!m_template_states.is_empty() && m_template_states.last().in_expr) { + if (token_type == TokenType::CurlyOpen) { + m_template_states.last().open_bracket_count++; + } else if (token_type == TokenType::CurlyClose) { + m_template_states.last().open_bracket_count--; + } + } + m_current_token = Token( token_type, m_source.substring_view(trivia_start - 1, value_start - trivia_start), @@ -425,4 +470,5 @@ Token Lexer::next() return m_current_token; } + } diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h index d9be445eb6..f5a4ae51cd 100644 --- a/Libraries/LibJS/Lexer.h +++ b/Libraries/LibJS/Lexer.h @@ -71,6 +71,12 @@ private: size_t m_line_column = 1; bool m_log_errors = true; + struct TemplateState { + bool in_expr; + u8 open_bracket_count; + }; + Vector<TemplateState> m_template_states; + static HashMap<String, TokenType> s_keywords; static HashMap<String, TokenType> s_three_char_tokens; static HashMap<String, TokenType> s_two_char_tokens; diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp index 10383c57fb..4ff34318ed 100644 --- a/Libraries/LibJS/Parser.cpp +++ b/Libraries/LibJS/Parser.cpp @@ -394,6 +394,8 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression() return parse_function_node<FunctionExpression>(); case TokenType::BracketOpen: return parse_array_expression(); + case TokenType::TemplateLiteralStart: + return parse_template_literal(); case TokenType::New: return parse_new_expression(); default: @@ -536,6 +538,40 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression() return create_ast_node<ArrayExpression>(move(elements)); } +NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal() +{ + consume(TokenType::TemplateLiteralStart); + + NonnullRefPtrVector<Expression> expressions; + + while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) { + if (match(TokenType::TemplateLiteralString)) { + expressions.append(create_ast_node<StringLiteral>(consume().string_value())); + } else if (match(TokenType::TemplateLiteralExprStart)) { + consume(TokenType::TemplateLiteralExprStart); + if (match(TokenType::TemplateLiteralExprEnd)) { + syntax_error("Empty template literal expression block"); + return create_ast_node<TemplateLiteral>(expressions); + } + + expressions.append(parse_expression(0)); + if (match(TokenType::UnterminatedTemplateLiteral)) { + syntax_error("Unterminated template literal"); + return create_ast_node<TemplateLiteral>(expressions); + } + consume(TokenType::TemplateLiteralExprEnd); + } + } + + if (match(TokenType::UnterminatedTemplateLiteral)) { + syntax_error("Unterminated template literal"); + } else { + consume(TokenType::TemplateLiteralEnd); + } + + return create_ast_node<TemplateLiteral>(expressions); +} + NonnullRefPtr<Expression> Parser::parse_expression(int min_precedence, Associativity associativity) { auto expression = parse_primary_expression(); @@ -1087,7 +1123,7 @@ bool Parser::match_expression() const return type == TokenType::BoolLiteral || type == TokenType::NumericLiteral || type == TokenType::StringLiteral - || type == TokenType::TemplateLiteral + || type == TokenType::TemplateLiteralStart || type == TokenType::NullLiteral || type == TokenType::Identifier || type == TokenType::New diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h index e11429399b..273dce7c53 100644 --- a/Libraries/LibJS/Parser.h +++ b/Libraries/LibJS/Parser.h @@ -69,6 +69,7 @@ public: NonnullRefPtr<Expression> parse_unary_prefixed_expression(); NonnullRefPtr<ObjectExpression> parse_object_expression(); NonnullRefPtr<ArrayExpression> parse_array_expression(); + NonnullRefPtr<TemplateLiteral> parse_template_literal(); NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right); NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>); NonnullRefPtr<NewExpression> parse_new_expression(); diff --git a/Libraries/LibJS/Tests/template-literals.js b/Libraries/LibJS/Tests/template-literals.js new file mode 100644 index 0000000000..71734cb0d2 --- /dev/null +++ b/Libraries/LibJS/Tests/template-literals.js @@ -0,0 +1,45 @@ +load("test-common.js"); + +try { + assert(`foo` === "foo"); + assert(`foo{` === "foo{"); + assert(`foo}` === "foo}"); + assert(`foo$` === "foo$"); + assert(`foo\`` === "foo`") + assert(`foo\$` === "foo$"); + + assert(`foo ${undefined}` === "foo undefined"); + assert(`foo ${null}` === "foo null"); + assert(`foo ${5}` === "foo 5"); + assert(`foo ${true}` === "foo true"); + assert(`foo ${"bar"}` === "foo bar"); + assert(`foo \${"bar"}` === 'foo ${"bar"}'); + + assert(`foo ${{}}` === "foo [object Object]"); + assert(`foo ${{ bar: { baz: "qux" }}}` === "foo [object Object]"); + assert(`foo ${"bar"} ${"baz"}` === "foo bar baz"); + assert(`${"foo"} bar baz` === "foo bar baz"); + assert(`${"foo bar baz"}` === "foo bar baz"); + + let a = 27; + assert(`${a}` === "27"); + assert(`foo ${a}` === "foo 27"); + assert(`foo ${a ? "bar" : "baz"}` === "foo bar"); + assert(`foo ${(() => a)()}` === "foo 27"); + + assert(`foo ${`bar`}` === "foo bar"); + assert(`${`${`${`${"foo"}`} bar`}`}` === "foo bar"); + assert(`foo + bar` === "foo\n bar"); + + assertThrowsError(() => { + `${b}`; + }, { + error: ReferenceError, + message: "'b' not known" + }) + + console.log("PASS"); +} catch (e) { + console.log("FAIL: " + e); +} diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp index 3375146ca2..ffb95908a5 100644 --- a/Libraries/LibJS/Token.cpp +++ b/Libraries/LibJS/Token.cpp @@ -74,10 +74,14 @@ double Token::double_value() const String Token::string_value() const { - ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteral); + ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString); + auto is_template = type() == TokenType::TemplateLiteralString; + + auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1; + StringBuilder builder; - for (size_t i = 1; i < m_value.length() - 1; ++i) { - if (m_value[i] == '\\' && i + 1 < m_value.length() - 1) { + for (size_t i = offset; i < m_value.length() - offset; ++i) { + if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) { i++; switch (m_value[i]) { case 'b': @@ -107,18 +111,18 @@ String Token::string_value() const case '"': builder.append('"'); break; - case '`': - builder.append('`'); - break; case '\\': builder.append('\\'); break; default: - // FIXME: Also parse octal, hex and unicode sequences - // should anything else generate a syntax error? - builder.append(m_value[i]); + if (is_template && (m_value[i] == '$' || m_value[i] == '`')) { + builder.append(m_value[i]); + } else { + // FIXME: Also parse octal, hex and unicode sequences + // should anything else generate a syntax error? + builder.append(m_value[i]); + } } - } else { builder.append(m_value[i]); } diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h index e272eff804..b3242703d8 100644 --- a/Libraries/LibJS/Token.h +++ b/Libraries/LibJS/Token.h @@ -112,7 +112,11 @@ namespace JS { __ENUMERATE_JS_TOKEN(SlashEquals) \ __ENUMERATE_JS_TOKEN(StringLiteral) \ __ENUMERATE_JS_TOKEN(Switch) \ - __ENUMERATE_JS_TOKEN(TemplateLiteral) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralEnd) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralStart) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralString) \ __ENUMERATE_JS_TOKEN(This) \ __ENUMERATE_JS_TOKEN(Throw) \ __ENUMERATE_JS_TOKEN(Tilde) \ @@ -122,6 +126,7 @@ namespace JS { __ENUMERATE_JS_TOKEN(UnsignedShiftRight) \ __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals) \ __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral) \ + __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral) \ __ENUMERATE_JS_TOKEN(Var) \ __ENUMERATE_JS_TOKEN(Void) \ __ENUMERATE_JS_TOKEN(While) \ |