diff options
-rw-r--r-- | Libraries/LibJS/AST.cpp | 16 | ||||
-rw-r--r-- | Libraries/LibJS/AST.h | 56 | ||||
-rw-r--r-- | Libraries/LibJS/Forward.h | 1 | ||||
-rw-r--r-- | Libraries/LibJS/Lexer.cpp | 234 | ||||
-rw-r--r-- | Libraries/LibJS/Lexer.h | 61 | ||||
-rw-r--r-- | Libraries/LibJS/Makefile | 3 | ||||
-rw-r--r-- | Libraries/LibJS/Parser.cpp | 289 | ||||
-rw-r--r-- | Libraries/LibJS/Parser.h | 68 | ||||
-rw-r--r-- | Libraries/LibJS/Token.cpp | 182 | ||||
-rw-r--r-- | Libraries/LibJS/Token.h | 122 | ||||
-rw-r--r-- | Userland/js.cpp | 73 |
11 files changed, 1069 insertions, 36 deletions
diff --git a/Libraries/LibJS/AST.cpp b/Libraries/LibJS/AST.cpp index 3cb34f15bc..9ff915d931 100644 --- a/Libraries/LibJS/AST.cpp +++ b/Libraries/LibJS/AST.cpp @@ -44,6 +44,11 @@ Value FunctionDeclaration::execute(Interpreter& interpreter) const return Value(function); } +Value ExpressionStatement::execute(Interpreter& interpreter) const +{ + return m_expression->execute(interpreter); +} + Value CallExpression::execute(Interpreter& interpreter) const { if (name() == "$gc") { @@ -61,7 +66,7 @@ Value CallExpression::execute(Interpreter& interpreter) const Value ReturnStatement::execute(Interpreter& interpreter) const { - auto value = argument().execute(interpreter); + auto value = argument() ? argument()->execute(interpreter) : js_undefined(); interpreter.do_return(); return value; } @@ -310,7 +315,8 @@ void FunctionDeclaration::dump(int indent) const void ReturnStatement::dump(int indent) const { ASTNode::dump(indent); - argument().dump(indent + 1); + if (argument()) + argument()->dump(indent + 1); } void IfStatement::dump(int indent) const @@ -413,6 +419,12 @@ void ObjectExpression::dump(int indent) const ASTNode::dump(indent); } +void ExpressionStatement::dump(int indent) const +{ + ASTNode::dump(indent); + m_expression->dump(indent + 1); +} + Value ObjectExpression::execute(Interpreter& interpreter) const { return Value(interpreter.heap().allocate<Object>()); diff --git a/Libraries/LibJS/AST.h b/Libraries/LibJS/AST.h index e61285d9cd..97d318dbb6 100644 --- a/Libraries/LibJS/AST.h +++ b/Libraries/LibJS/AST.h @@ -48,7 +48,31 @@ protected: private: }; -class ScopeNode : public ASTNode { +class Statement : public ASTNode { +}; + +class ErrorStatement final : public Statement { +public: + Value execute(Interpreter&) const { return js_undefined(); } + const char* class_name() const override { return "ErrorStatement"; } +}; + +class ExpressionStatement final : public Statement { +public: + ExpressionStatement(NonnullOwnPtr<Expression> expression) + : m_expression(move(expression)) + { + } + + Value execute(Interpreter&) const override; + const char* class_name() const override { return "ExpressionStatement"; } + virtual void dump(int indent) const override; + +private: + NonnullOwnPtr<Expression> m_expression; +}; + +class ScopeNode : public Statement { public: template<typename T, typename... Args> T& append(Args&&... args) @@ -57,8 +81,12 @@ public: m_children.append(move(child)); return static_cast<T&>(m_children.last()); } + void append(NonnullOwnPtr<Statement> child) + { + m_children.append(move(child)); + } - const NonnullOwnPtrVector<ASTNode>& children() const { return m_children; } + const NonnullOwnPtrVector<Statement>& children() const { return m_children; } virtual Value execute(Interpreter&) const override; virtual void dump(int indent) const override; @@ -66,7 +94,7 @@ protected: ScopeNode() {} private: - NonnullOwnPtrVector<ASTNode> m_children; + NonnullOwnPtrVector<Statement> m_children; }; class Program : public ScopeNode { @@ -85,7 +113,7 @@ private: virtual const char* class_name() const override { return "BlockStatement"; } }; -class FunctionDeclaration : public ASTNode { +class FunctionDeclaration : public Statement { public: FunctionDeclaration(String name, NonnullOwnPtr<ScopeNode> body) : m_name(move(name)) @@ -110,14 +138,20 @@ class Expression : public ASTNode { public: }; -class ReturnStatement : public ASTNode { +class ErrorExpression final : public Expression { +public: + Value execute(Interpreter&) const { return js_undefined(); } + const char* class_name() const override { return "ErrorExpression"; } +}; + +class ReturnStatement : public Statement { public: - explicit ReturnStatement(NonnullOwnPtr<Expression> argument) + explicit ReturnStatement(OwnPtr<Expression> argument) : m_argument(move(argument)) { } - const Expression& argument() const { return *m_argument; } + const Expression* argument() const { return m_argument; } virtual Value execute(Interpreter&) const override; virtual void dump(int indent) const override; @@ -125,10 +159,10 @@ public: private: virtual const char* class_name() const override { return "ReturnStatement"; } - NonnullOwnPtr<Expression> m_argument; + OwnPtr<Expression> m_argument; }; -class IfStatement : public ASTNode { +class IfStatement : public Statement { public: IfStatement(NonnullOwnPtr<Expression> predicate, NonnullOwnPtr<ScopeNode> consequent, NonnullOwnPtr<ScopeNode> alternate) : m_predicate(move(predicate)) @@ -152,7 +186,7 @@ private: NonnullOwnPtr<ScopeNode> m_alternate; }; -class WhileStatement : public ASTNode { +class WhileStatement : public Statement { public: WhileStatement(NonnullOwnPtr<Expression> predicate, NonnullOwnPtr<ScopeNode> body) : m_predicate(move(predicate)) @@ -337,7 +371,7 @@ enum class DeclarationType { Let, }; -class VariableDeclaration : public ASTNode { +class VariableDeclaration : public Statement { public: VariableDeclaration(NonnullOwnPtr<Identifier> name, OwnPtr<Expression> initializer, DeclarationType declaration_type) : m_declaration_type(declaration_type) diff --git a/Libraries/LibJS/Forward.h b/Libraries/LibJS/Forward.h index 0af451f63e..4f38acf973 100644 --- a/Libraries/LibJS/Forward.h +++ b/Libraries/LibJS/Forward.h @@ -30,6 +30,7 @@ namespace JS { class ASTNode; class Cell; +class Expression; class Heap; class HeapBlock; class Interpreter; diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp new file mode 100644 index 0000000000..979764e3b5 --- /dev/null +++ b/Libraries/LibJS/Lexer.cpp @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Lexer.h" +#include <AK/HashMap.h> +#include <AK/StringBuilder.h> +#include <ctype.h> +#include <stdio.h> + +namespace JS { + +HashMap<String, TokenType> Lexer::s_keywords; +HashMap<String, TokenType> Lexer::s_two_char_tokens; +HashMap<char, TokenType> Lexer::s_single_char_tokens; + +Lexer::Lexer(StringView source) + : m_source(source) + , m_current_token(TokenType::Eof, StringView(nullptr), StringView(nullptr)) +{ + if (s_keywords.is_empty()) { + s_keywords.set("true", TokenType::BoolLiteral); + s_keywords.set("false", TokenType::BoolLiteral); + s_keywords.set("catch", TokenType::Catch); + s_keywords.set("class", TokenType::Class); + s_keywords.set("const", TokenType::Const); + s_keywords.set("delete", TokenType::Delete); + s_keywords.set("do", TokenType::Do); + s_keywords.set("else", TokenType::Else); + s_keywords.set("finally", TokenType::Finally); + s_keywords.set("function", TokenType::Function); + s_keywords.set("if", TokenType::If); + s_keywords.set("interface", TokenType::Interface); + s_keywords.set("let", TokenType::Let); + s_keywords.set("new", TokenType::New); + s_keywords.set("null", TokenType::NullLiteral); + s_keywords.set("return", TokenType::Return); + s_keywords.set("try", TokenType::Try); + s_keywords.set("var", TokenType::Var); + s_keywords.set("while", TokenType::While); + } + + if (s_two_char_tokens.is_empty()) { + s_two_char_tokens.set("+=", TokenType::PlusEquals); + s_two_char_tokens.set("-=", TokenType::MinusEquals); + s_two_char_tokens.set("*=", TokenType::AsteriskEquals); + s_two_char_tokens.set("/=", TokenType::SlashEquals); + s_two_char_tokens.set("%=", TokenType::PercentEquals); + s_two_char_tokens.set("&=", TokenType::AmpersandEquals); + s_two_char_tokens.set("|=", TokenType::PipeEquals); + s_two_char_tokens.set("&&", TokenType::DoubleAmpersand); + s_two_char_tokens.set("||", TokenType::DoublePipe); + s_two_char_tokens.set("==", TokenType::EqualsEquals); + s_two_char_tokens.set("!=", TokenType::ExclamationMarkEquals); + s_two_char_tokens.set("--", TokenType::MinusMinus); + s_two_char_tokens.set("++", TokenType::PlusPlus); + s_two_char_tokens.set("<<", TokenType::ShiftLeft); + s_two_char_tokens.set(">>", TokenType::ShiftRight); + } + + if (s_single_char_tokens.is_empty()) { + s_single_char_tokens.set('&', TokenType::Ampersand); + s_single_char_tokens.set('*', TokenType::Asterisk); + s_single_char_tokens.set('[', TokenType::BracketOpen); + s_single_char_tokens.set(']', TokenType::BracketClose); + s_single_char_tokens.set(',', TokenType::Comma); + s_single_char_tokens.set('{', TokenType::CurlyOpen); + s_single_char_tokens.set('}', TokenType::CurlyClose); + s_single_char_tokens.set('=', TokenType::Equals); + s_single_char_tokens.set('!', TokenType::ExclamationMark); + s_single_char_tokens.set('-', TokenType::Minus); + s_single_char_tokens.set('(', TokenType::ParenOpen); + s_single_char_tokens.set(')', TokenType::ParenClose); + s_single_char_tokens.set('%', TokenType::Percent); + s_single_char_tokens.set('.', TokenType::Period); + s_single_char_tokens.set('|', TokenType::Pipe); + s_single_char_tokens.set('+', TokenType::Plus); + s_single_char_tokens.set('?', TokenType::QuestionMark); + s_single_char_tokens.set(';', TokenType::Semicolon); + s_single_char_tokens.set('/', TokenType::Slash); + s_single_char_tokens.set('<', TokenType::LessThan); + s_single_char_tokens.set('>', TokenType::GreaterThan); + } + consume(); +} + +void Lexer::consume() +{ + if (is_eof()) { + m_current_char = EOF; + return; + } + + m_current_char = m_source[m_position++]; +} + +bool Lexer::is_eof() const +{ + return m_position >= m_source.length(); +} + +bool Lexer::is_identifier_start() const +{ + return isalpha(m_current_char) || m_current_char == '_' || m_current_char == '$'; +} + +bool Lexer::is_identifier_middle() const +{ + return is_identifier_start() || isdigit(m_current_char); +} + +bool Lexer::is_line_comment_start() const +{ + return m_current_char == '/' && m_position < m_source.length() && m_source[m_position] == '/'; +} + +bool Lexer::is_block_comment_start() const +{ + return m_current_char == '/' && m_position < m_source.length() && m_source[m_position] == '*'; +} + +bool Lexer::is_block_comment_end() const +{ + return m_current_char == '*' && m_position < m_source.length() && m_source[m_position] == '/'; +} + +Token Lexer::next() +{ + size_t trivia_start = m_position; + + // consume up whitespace and comments + while (true) { + if (isspace(m_current_char)) { + do { + consume(); + } while (!is_eof() && isspace(m_current_char)); + } else if (is_line_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && m_current_char != '\n'); + } else if (is_block_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && !is_block_comment_end()); + consume(); // consume * + consume(); // consume / + } else { + break; + } + } + + size_t value_start = m_position; + TokenType token_type; + + if (is_identifier_start()) { + // identifier or keyword + do { + consume(); + } while (is_identifier_middle()); + + StringView value = m_source.substring_view(value_start - 1, m_position - value_start); + auto it = s_keywords.find(value); + if (it == s_keywords.end()) { + token_type = TokenType::Identifier; + } else { + token_type = it->value; + } + } else if (isdigit(m_current_char)) { + consume(); + while (isdigit(m_current_char)) { + consume(); + } + token_type = TokenType::NumericLiteral; + } else if (m_current_char == EOF) { + token_type = TokenType::Eof; + } else { + bool found_two_char_token = false; + if (!is_eof()) { + char secondChar = m_source[m_position]; + char twoChars[] { (char)m_current_char, secondChar, 0 }; + auto it = s_two_char_tokens.find(twoChars); + if (it != s_two_char_tokens.end()) { + found_two_char_token = true; + consume(); + consume(); + token_type = it->value; + } + } + + if (!found_two_char_token) { + auto it = s_single_char_tokens.find(m_current_char); + if (it != s_single_char_tokens.end()) { + consume(); + token_type = it->value; + } else { + consume(); + token_type = TokenType::Invalid; + } + } + } + + m_current_token = Token( + token_type, + m_source.substring_view(trivia_start - 1, value_start - trivia_start), + m_source.substring_view(value_start - 1, m_position - value_start)); + + return m_current_token; +} + +} diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h new file mode 100644 index 0000000000..01d1fdc1ce --- /dev/null +++ b/Libraries/LibJS/Lexer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "Token.h" + +#include <AK/HashMap.h> +#include <AK/String.h> +#include <AK/StringView.h> + +namespace JS { + +class Lexer { +public: + explicit Lexer(StringView source); + Token next(); + +private: + void consume(); + bool is_eof() const; + bool is_identifier_start() const; + bool is_identifier_middle() const; + bool is_line_comment_start() const; + bool is_block_comment_start() const; + bool is_block_comment_end() const; + + StringView m_source; + size_t m_position = 0; + Token m_current_token; + int m_current_char; + + static HashMap<String, TokenType> s_keywords; + static HashMap<String, TokenType> s_two_char_tokens; + static HashMap<char, TokenType> s_single_char_tokens; +}; + +} diff --git a/Libraries/LibJS/Makefile b/Libraries/LibJS/Makefile index d1c7452397..8e07c0c206 100644 --- a/Libraries/LibJS/Makefile +++ b/Libraries/LibJS/Makefile @@ -5,9 +5,12 @@ OBJS = \ Heap.o \ HeapBlock.o \ Interpreter.o \ + Lexer.o \ Object.o \ + Parser.o \ PrimitiveString.o \ StringObject.o \ + Token.o \ Value.o LIBRARY = libjs.a diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp new file mode 100644 index 0000000000..194bc01ddd --- /dev/null +++ b/Libraries/LibJS/Parser.cpp @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Parser.h" +#include <AK/StdLibExtras.h> +#include <stdio.h> + +namespace JS { +Parser::Parser(Lexer lexer) + : m_lexer(move(lexer)) + , m_current_token(m_lexer.next()) +{ +} + +NonnullOwnPtr<Program> Parser::parse_program() +{ + auto program = make<Program>(); + while (!done()) { + if (match(TokenType::Semicolon)) { + consume(); + } else if (match_statement()) { + program->append(parse_statement()); + } else { + expected("statement"); + consume(); + } + } + return program; +} + +NonnullOwnPtr<Statement> Parser::parse_statement() +{ + if (match_expression()) { + return make<JS::ExpressionStatement>(parse_expression()); + } + + switch (m_current_token.type()) { + case TokenType::Function: + return parse_function_declaration(); + case TokenType::CurlyOpen: + return parse_block_statement(); + case TokenType::Return: + return parse_return_statement(); + case TokenType::Var: + return parse_variable_declaration(); + default: + m_has_errors = true; + expected("statement (missing switch case)"); + consume(); + return make<ErrorStatement>(); + } +} + +NonnullOwnPtr<Expression> Parser::parse_primary_expression() +{ + switch (m_current_token.type()) { + case TokenType::ParenOpen: { + consume(TokenType::ParenOpen); + auto expression = parse_expression(); + consume(TokenType::ParenClose); + return expression; + } + case TokenType::Identifier: + return make<Identifier>(consume().value()); + case TokenType::NumericLiteral: + return make<Literal>(Value(consume().double_value())); + case TokenType::BoolLiteral: + return make<Literal>(Value(consume().bool_value())); + case TokenType::CurlyOpen: + return parse_object_expression(); + default: + m_has_errors = true; + expected("primary expression (missing switch case)"); + consume(); + return make<ErrorExpression>(); + } +} + +NonnullOwnPtr<ObjectExpression> Parser::parse_object_expression() +{ + // FIXME: Parse actual object expression + consume(TokenType::CurlyOpen); + consume(TokenType::CurlyClose); + return make<ObjectExpression>(); +} + +NonnullOwnPtr<Expression> Parser::parse_expression() +{ + auto expression = parse_primary_expression(); + while (match_secondary_expression()) { + expression = parse_secondary_expression(move(expression)); + } + return expression; +} + +NonnullOwnPtr<Expression> Parser::parse_secondary_expression(NonnullOwnPtr<Expression> lhs) +{ + switch (m_current_token.type()) { + case TokenType::Plus: + consume(); + return make<BinaryExpression>(BinaryOp::Plus, move(lhs), parse_expression()); + case TokenType::Minus: + consume(); + return make<BinaryExpression>(BinaryOp::Minus, move(lhs), parse_expression()); + case TokenType::ParenOpen: + return parse_call_expression(move(lhs)); + case TokenType::Equals: + consume(); + return make<AssignmentExpression>(AssignmentOp::Assign, move(lhs), parse_expression()); + default: + m_has_errors = true; + expected("secondary expression (missing switch case)"); + consume(); + return make<ErrorExpression>(); + } +} + +NonnullOwnPtr<CallExpression> Parser::parse_call_expression(NonnullOwnPtr<Expression> lhs) +{ + // FIXME: allow arguments + consume(TokenType::ParenOpen); + consume(TokenType::ParenClose); + + // FIXME: Allow lhs expression instead of just a string + if (lhs->is_identifier()) { + return make<CallExpression>(static_cast<Identifier*>(lhs.ptr())->string()); + } + + m_has_errors = true; + return make<CallExpression>("***ERROR***"); +} + +NonnullOwnPtr<ReturnStatement> Parser::parse_return_statement() +{ + consume(TokenType::Return); + if (match_expression()) { + return make<ReturnStatement>(parse_expression()); + } + return make<ReturnStatement>(nullptr); +} + +NonnullOwnPtr<BlockStatement> Parser::parse_block_statement() +{ + auto block = make<BlockStatement>(); + consume(TokenType::CurlyOpen); + while (!done() && !match(TokenType::CurlyClose)) { + if (match(TokenType::Semicolon)) { + consume(); + } else if (match_statement()) { + block->append(parse_statement()); + } else { + expected("statement"); + consume(); + } + } + consume(TokenType::CurlyClose); + return block; +} + +NonnullOwnPtr<FunctionDeclaration> Parser::parse_function_declaration() +{ + consume(TokenType::Function); + auto name = consume(TokenType::Identifier).value(); + consume(TokenType::ParenOpen); + while (match(TokenType::Identifier)) { + // FIXME: actually add parameters to function + consume(TokenType::Identifier); + if (match(TokenType::ParenClose)) { + break; + } + consume(TokenType::Comma); + } + consume(TokenType::ParenClose); + auto body = parse_block_statement(); + return make<FunctionDeclaration>(name, move(body)); +} + +NonnullOwnPtr<VariableDeclaration> Parser::parse_variable_declaration() +{ + consume(TokenType::Var); + auto name = consume(TokenType::Identifier).value(); + OwnPtr<Expression> initializer; + if (match(TokenType::Equals)) { + consume(); + initializer = parse_expression(); + } + return make<VariableDeclaration>(make<Identifier>(name), move(initializer), DeclarationType::Var); +} + +bool Parser::match(TokenType type) const +{ + return m_current_token.type() == type; +} + +bool Parser::match_expression() const +{ + auto type = m_current_token.type(); + return type == TokenType::BoolLiteral + || type == TokenType::NumericLiteral + || type == TokenType::StringLiteral + || type == TokenType::NullLiteral + || type == TokenType::Identifier + || type == TokenType::New + || type == TokenType::CurlyOpen + || type == TokenType::BracketOpen + || type == TokenType::ParenOpen; +} + +bool Parser::match_secondary_expression() const +{ + auto type = m_current_token.type(); + return type == TokenType::Plus + || type == TokenType::Minus + || type == TokenType::Asterisk + || type == TokenType::Slash + || type == TokenType::Equals + || type == TokenType::ParenOpen; +} + +bool Parser::match_statement() const +{ + auto type = m_current_token.type(); + return match_expression() + || type == TokenType::Function + || type == TokenType::Return + || type == TokenType::Let + || type == TokenType::Catch + || type == TokenType::Class + || type == TokenType::Delete + || type == TokenType::Do + || type == TokenType::If + || type == TokenType::Try + || type == TokenType::While + || type == TokenType::Const + || type == TokenType::CurlyOpen + || type == TokenType::Var; +} + +bool Parser::done() const +{ + return match(TokenType::Eof); +} + +Token Parser::consume() +{ + auto oldToken = m_current_token; + m_current_token = m_lexer.next(); + return oldToken; +} + +Token Parser::consume(TokenType type) +{ + if (m_current_token.type() != type) { + m_has_errors = true; + fprintf(stderr, "Error: Unexpected token %s. Expected %s\n", m_current_token.name(), Token::name(type)); + } + return consume(); +} + +void Parser::expected(const char* what) +{ + m_has_errors = true; + fprintf(stderr, "Error: Unexpected token %s. Expected %s\n", m_current_token.name(), what); +} + +} diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h new file mode 100644 index 0000000000..1800004c16 --- /dev/null +++ b/Libraries/LibJS/Parser.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "AST.h" +#include "Lexer.h" +#include <AK/NonnullOwnPtr.h> + +namespace JS { +class Parser { +public: + explicit Parser(Lexer lexer); + + NonnullOwnPtr<Program> parse_program(); + + NonnullOwnPtr<Statement> parse_statement(); + NonnullOwnPtr<BlockStatement> parse_block_statement(); + NonnullOwnPtr<ReturnStatement> parse_return_statement(); + NonnullOwnPtr<FunctionDeclaration> parse_function_declaration(); + NonnullOwnPtr<VariableDeclaration> parse_variable_declaration(); + + NonnullOwnPtr<Expression> parse_expression(); + NonnullOwnPtr<Expression> parse_primary_expression(); + NonnullOwnPtr<ObjectExpression> parse_object_expression(); + NonnullOwnPtr<Expression> parse_secondary_expression(NonnullOwnPtr<Expression>); + NonnullOwnPtr<CallExpression> parse_call_expression(NonnullOwnPtr<Expression>); + + bool has_errors() const { return m_has_errors; } + +private: + bool match_expression() const; + bool match_secondary_expression() const; + bool match_statement() const; + bool match(TokenType type) const; + bool done() const; + void expected(const char* what); + Token consume(); + Token consume(TokenType type); + + Lexer m_lexer; + Token m_current_token; + bool m_has_errors = false; +}; +} diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp new file mode 100644 index 0000000000..3bf3d3e7bd --- /dev/null +++ b/Libraries/LibJS/Token.cpp @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Token.h" + +namespace JS { + +const char* Token::name(TokenType type) +{ + switch (type) { + case TokenType::Ampersand: + return "Ampersand"; + case TokenType::AmpersandEquals: + return "AmpersandEquals"; + case TokenType::Asterisk: + return "Asterisk"; + case TokenType::AsteriskEquals: + return "AsteriskEquals"; + case TokenType::BoolLiteral: + return "BoolLiteral"; + case TokenType::BracketOpen: + return "BracketOpen"; + case TokenType::BracketClose: + return "BracketClose"; + case TokenType::Catch: + return "Catch"; + case TokenType::Class: + return "Class"; + case TokenType::Comma: + return "Comma"; + case TokenType::Const: + return "Const"; + case TokenType::CurlyClose: + return "CurlyClose"; + case TokenType::CurlyOpen: + return "CurlyOpen"; + case TokenType::Delete: + return "Delete"; + case TokenType::Do: + return "Do"; + case TokenType::DoubleAmpersand: + return "DoubleAmpersand"; + case TokenType::DoublePipe: + return "DoublePipe"; + case TokenType::Else: + return "Else"; + case TokenType::Eof: + return "Eof"; + case TokenType::Equals: + return "Equals"; + case TokenType::EqualsEquals: + return "EqualsEquals"; + case TokenType::ExclamationMark: + return "ExclamationMark"; + case TokenType::ExclamationMarkEquals: + return "ExclamationMarkEquals"; + case TokenType::Finally: + return "Finally"; + case TokenType::Function: + return "Function"; + case TokenType::GreaterThan: + return "GreaterThan"; + case TokenType::Identifier: + return "Identifier"; + case TokenType::If: + return "If"; + case TokenType::Interface: + return "Interface"; + case TokenType::Invalid: + return "Invalid"; + case TokenType::LessThan: + return "LessThan"; + case TokenType::Let: + return "Let"; + case TokenType::Minus: + return "Minus"; + case TokenType::MinusEquals: + return "MinusEquals"; + case TokenType::MinusMinus: + return "MinusMinus"; + case TokenType::New: + return "New"; + case TokenType::NullLiteral: + return "NullLiteral"; + case TokenType::NumericLiteral: + return "NumericLiteral"; + case TokenType::ParenClose: + return "ParenClose"; + case TokenType::ParenOpen: + return "ParenOpen"; + case TokenType::Percent: + return "Percent"; + case TokenType::PercentEquals: + return "PercentEquals"; + case TokenType::Period: + return "Period"; + case TokenType::Pipe: + return "Pipe"; + case TokenType::PipeEquals: + return "PipeEquals"; + case TokenType::Plus: + return "Plus"; + case TokenType::PlusEquals: + return "PlusEquals"; + case TokenType::PlusPlus: + return "PlusPlus"; + case TokenType::QuestionMark: + return "QuestionMark"; + case TokenType::RegexLiteral: + return "RegexLiteral"; + case TokenType::Return: + return "Return"; + case TokenType::Semicolon: + return "Semicolon"; + case TokenType::ShiftLeft: + return "ShiftLeft"; + case TokenType::ShiftRight: + return "ShiftRight"; + case TokenType::Slash: + return "Slash"; + case TokenType::SlashEquals: + return "SlashEquals"; + case TokenType::StringLiteral: + return "StringLiteral"; + case TokenType::Try: + return "Try"; + case TokenType::Var: + return "Var"; + case TokenType::While: + return "While"; + default: + return "<Unknown>"; + } +} + +const char* Token::name() const +{ + return name(m_type); +} + +double Token::double_value() const +{ + // FIXME: need to parse double instead of int + bool ok; + return m_value.to_int(ok); +} + +String Token::string_value() const +{ + // FIXME: unescape the string and remove quotes + return m_value; +} + +bool Token::bool_value() const +{ + return m_value == "true"; +} + +} diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h new file mode 100644 index 0000000000..e7f6282304 --- /dev/null +++ b/Libraries/LibJS/Token.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/String.h> +#include <AK/StringView.h> + +namespace JS { + +enum class TokenType { + Ampersand, + AmpersandEquals, + Asterisk, + AsteriskEquals, + BoolLiteral, + BracketClose, + BracketOpen, + Catch, + Class, + Comma, + Const, + CurlyClose, + CurlyOpen, + Delete, + Do, + DoubleAmpersand, + DoublePipe, + Else, + Eof, + Equals, + EqualsEquals, + ExclamationMark, + ExclamationMarkEquals, + Finally, + Function, + GreaterThan, + Identifier, + If, + Interface, + Invalid, + LessThan, + Let, + Minus, + MinusEquals, + MinusMinus, + New, + NullLiteral, + NumericLiteral, + ParenClose, + ParenOpen, + Percent, + PercentEquals, + Period, + Pipe, + PipeEquals, + Plus, + PlusEquals, + PlusPlus, + QuestionMark, + RegexLiteral, + Return, + Semicolon, + ShiftLeft, + ShiftRight, + Slash, + SlashEquals, + StringLiteral, + Try, + Var, + While +}; + +class Token { +public: + Token(TokenType type, StringView trivia, StringView value) + : m_type(type) + , m_trivia(trivia) + , m_value(value) + { + } + + TokenType type() const { return m_type; } + const char* name() const; + static const char* name(TokenType); + + const StringView& trivia() const { return m_trivia; } + const StringView& value() const { return m_value; } + double double_value() const; + String string_value() const; + bool bool_value() const; + +private: + TokenType m_type; + StringView m_trivia; + StringView m_value; +}; + +} diff --git a/Userland/js.cpp b/Userland/js.cpp index cc6e09a2ad..6c919fdb6b 100644 --- a/Userland/js.cpp +++ b/Userland/js.cpp @@ -28,20 +28,20 @@ #include <LibJS/AST.h> #include <LibJS/Interpreter.h> #include <LibJS/Object.h> +#include <LibJS/Parser.h> #include <LibJS/PrimitiveString.h> #include <LibJS/Value.h> #include <stdio.h> -#define PROGRAM 4 +#define PROGRAM 6 -static void build_program(JS::Program&, JS::Heap&); +static NonnullOwnPtr<JS::Program> build_program(JS::Heap&); int main() { JS::Interpreter interpreter; - auto program = make<JS::Program>(); - build_program(*program, interpreter.heap()); + auto program = build_program(interpreter.heap()); program->dump(0); auto result = interpreter.run(*program); @@ -55,7 +55,7 @@ int main() } #if PROGRAM == 1 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr<JS::Program> build_program(JS::Heap&) { // function foo() { return (1 + 2) + 3; } // foo(); @@ -70,11 +70,13 @@ void build_program(JS::Program& program, JS::Heap&) make<JS::Literal>(JS::Value(2))), make<JS::Literal>(JS::Value(3)))); - program.append<JS::FunctionDeclaration>("foo", move(block)); - program.append<JS::CallExpression>("foo"); + auto program = make<JS::Program>(); + program->append<JS::FunctionDeclaration>("foo", move(block)); + program->append<JS::ExpressionStatement>(make<JS::CallExpression>("foo")); + return program; } #elif PROGRAM == 2 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr<JS::Program> build_program(JS::Heap&) { // c = 1; // function foo() { @@ -84,10 +86,11 @@ void build_program(JS::Program& program, JS::Heap&) // } // foo(); - program.append<JS::AssignmentExpression>( + auto program = make<JS::Program>(); + program->append<JS::ExpressionStatement>(make<JS::AssignmentExpression>( JS::AssignmentOp::Assign, make<JS::Identifier>("c"), - make<JS::Literal>(JS::Value(1))); + make<JS::Literal>(JS::Value(1)))); auto block = make<JS::BlockStatement>(); block->append<JS::VariableDeclaration>( @@ -107,11 +110,14 @@ void build_program(JS::Program& program, JS::Heap&) make<JS::Identifier>("a"), make<JS::Identifier>("b")), make<JS::Identifier>("c"))); - program.append<JS::FunctionDeclaration>("foo", move(block)); - program.append<JS::CallExpression>("foo"); + + program->append<JS::FunctionDeclaration>("foo", move(block)); + program->append<JS::ExpressionStatement>(make<JS::CallExpression>("foo")); + + return program; } #elif PROGRAM == 3 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr<JS::Program> build_program(JS::Heap&) { // function foo() { // var x = {}; @@ -124,13 +130,15 @@ void build_program(JS::Program& program, JS::Heap&) make<JS::Identifier>("x"), make<JS::ObjectExpression>(), JS::DeclarationType::Var); - block->append<JS::CallExpression>("$gc"); + block->append<JS::ExpressionStatement>(make<JS::CallExpression>("$gc")); - program.append<JS::FunctionDeclaration>("foo", move(block)); - program.append<JS::CallExpression>("foo"); + auto program = make<JS::Program>(); + program->append<JS::FunctionDeclaration>("foo", move(block)); + program->append<JS::ExpressionStatement>(make<JS::CallExpression>("foo")); + return program; } #elif PROGRAM == 4 -void build_program(JS::Program& program, JS::Heap&) +NonnullOwnPtr<JS::Program> build_program(JS::Heap&) { // function foo() { // function bar() { @@ -147,19 +155,38 @@ void build_program(JS::Program& program, JS::Heap&) auto block_foo = make<JS::BlockStatement>(); block_foo->append<JS::FunctionDeclaration>("bar", move(block_bar)); - block_foo->append<JS::CallExpression>("bar"); + block_foo->append<JS::ExpressionStatement>(make<JS::CallExpression>("bar")); block_foo->append<JS::ReturnStatement>(make<JS::Identifier>("y")); - program.append<JS::FunctionDeclaration>("foo", move(block_foo)); - program.append<JS::CallExpression>("foo"); + auto program = make<JS::Program>(); + program->append<JS::FunctionDeclaration>("foo", move(block_foo)); + program->append<JS::ExpressionStatement>(make<JS::CallExpression>("foo")); + return program; } #elif PROGRAM == 5 -void build_program(JS::Program& program, JS::Heap& heap) +NonnullOwnPtr<JS::Program> build_program(JS::Heap& heap) { // "hello friends".length - program.append<JS::MemberExpression>( + auto program = make<JS::Program>(); + program->append<JS::ExpressionStatement>(make<JS::MemberExpression>( make<JS::Literal>(JS::Value(js_string(heap, "hello friends"))), - make<JS::Identifier>("length")); + make<JS::Identifier>("length"))); + + return program; +} +#elif PROGRAM == 6 +NonnullOwnPtr<JS::Program> build_program(JS::Heap&) +{ + const char* source = "var foo = 1;\n" + "function bar() {\n" + " return 38;\n" + "}\n" + "foo = {};\n" + "foo = bar() + 4;\n" + "foo;\n"; + + auto parser = JS::Parser(JS::Lexer(source)); + return parser.parse_program(); } #endif |