diff options
author | Itamar <itamar8910@gmail.com> | 2021-01-23 16:47:20 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-01-27 21:10:57 +0100 |
commit | c96b6987c42ed14644560e29ba19c33dc0ae0223 (patch) | |
tree | 79fb9120f139c70b2abd94238f43e63b44afade8 | |
parent | aec9658b4f217cb5e1b15720499eacb85e95b6ac (diff) | |
download | serenity-c96b6987c42ed14644560e29ba19c33dc0ae0223.zip |
LibCpp: Add the beginning of a C++ parser
This parser will be used by the C++ langauge server to provide better
auto-complete (& maybe also other things in the future).
It is designed to be error tolerant, and keeps track of the position
spans of the AST nodes, which should be useful later for incremental
parsing.
-rw-r--r-- | AK/ScopeLogger.h | 65 | ||||
-rw-r--r-- | Meta/CMake/all_the_debug_macros.cmake | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/AST.cpp | 382 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/AST.h | 585 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/Lexer.cpp | 22 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/Lexer.h | 23 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/Parser.cpp | 1000 | ||||
-rw-r--r-- | Userland/Libraries/LibCpp/Parser.h | 162 | ||||
-rw-r--r-- | Userland/Utilities/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Userland/Utilities/CppParserTest.cpp | 64 |
11 files changed, 2298 insertions, 9 deletions
diff --git a/AK/ScopeLogger.h b/AK/ScopeLogger.h new file mode 100644 index 0000000000..748fbd11b2 --- /dev/null +++ b/AK/ScopeLogger.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020, Denis Campredon <deni_@hotmail.fr> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/StringBuilder.h> + +#ifdef DEBUG_SPAM + +namespace AK { +class ScopeLogger { +public: + ScopeLogger(StringView&& fun) + : m_fun(fun) + { + StringBuilder sb; + + for (auto indent = m_depth++; indent > 0; indent--) + sb.append(' '); + dbgln("\033[1;{}m{}entering {}\033[0m", m_depth % 8 + 30, sb.to_string(), m_fun); + } + ~ScopeLogger() + { + StringBuilder sb; + + for (auto indent = --m_depth; indent > 0; indent--) + sb.append(' '); + dbgln("\033[1;{}m{}leaving {}\033[0m", (m_depth + 1) % 8 + 30, sb.to_string(), m_fun); + } + +private: + static inline size_t m_depth = 0; + StringView m_fun; +}; +} + +using AK::ScopeLogger; +# define SCOPE_LOGGER() auto tmp##__COUNTER__ = ScopeLogger(__PRETTY_FUNCTION__); + +#else +# define SCOPE_LOGGER() +#endif diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index 2e54cb1fc7..2d93233773 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -159,6 +159,7 @@ set(UPDATE_COALESCING_DEBUG ON) set(VOLATILE_PAGE_RANGES_DEBUG ON) set(WSMESSAGELOOP_DEBUG ON) set(GPT_DEBUG ON) +set(CPP_DEBUG ON) # False positive: DEBUG is a flag but it works differently. # set(DEBUG ON) diff --git a/Userland/Libraries/LibCpp/AST.cpp b/Userland/Libraries/LibCpp/AST.cpp new file mode 100644 index 0000000000..a2b6db8cfe --- /dev/null +++ b/Userland/Libraries/LibCpp/AST.cpp @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "AST.h" +#include "AK/LogStream.h" + +namespace Cpp { + +static void print_indent(int indent) +{ + for (int i = 0; i < indent * 2; ++i) + dbgprintf(" "); +} + +void ASTNode::dump(size_t indent) const +{ + print_indent(indent); + dbgprintf("%s[%lu:%lu->%lu:%lu]\n", class_name(), start().line, start().column, end().line, end().column); +} + +void TranslationUnit::dump(size_t indent) const +{ + ASTNode::dump(indent); + for (const auto& child : m_children) { + child.dump(indent + 1); + } +} + +void FunctionDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_return_type->dump(indent + 1); + if (!m_name.is_null()) { + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + } + print_indent(indent + 1); + dbgprintf("(\n"); + for (const auto& arg : m_parameters) { + arg.dump(indent + 1); + } + print_indent(indent + 1); + dbgprintf(")\n"); + if (!m_definition.is_null()) { + m_definition->dump(indent + 1); + } +} + +NonnullRefPtrVector<Declaration> FunctionDeclaration::declarations() const +{ + NonnullRefPtrVector<Declaration> declarations; + for (auto& arg : m_parameters) { + declarations.append(arg); + } + return declarations; +} + +void Type::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); +} + +void Parameter::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (!m_name.is_null()) { + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + } + m_type->dump(indent + 1); + // print_indent(indent); + // dbgprintf("%s [%s]\n", m_name.is_null() ? "" : m_name.to_string().characters(), m_type->name().to_string().characters()); +} + +void FunctionDefinition::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("{\n"); + for (const auto& statement : m_statements) { + statement.dump(indent + 1); + } + print_indent(indent); + dbgprintf("}\n"); +} + +NonnullRefPtrVector<Declaration> FunctionDefinition::declarations() const +{ + NonnullRefPtrVector<Declaration> declarations; + for (auto& statement : m_statements) { + declarations.append(statement.declarations()); + } + return declarations; +} + +void VariableDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_type->dump(indent + 1); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + if (m_initial_value) + m_initial_value->dump(indent + 1); +} + +void Identifier::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); +} + +void NumericLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_value.to_string().characters()); +} + +void BinaryExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case BinaryOp::Addition: + op_string = "+"; + break; + case BinaryOp::Subtraction: + op_string = "-"; + break; + case BinaryOp::Multiplication: + op_string = "*"; + break; + case BinaryOp::Division: + op_string = "/"; + break; + case BinaryOp::Modulo: + op_string = "%"; + break; + case BinaryOp::GreaterThan: + op_string = ">"; + break; + case BinaryOp::GreaterThanEquals: + op_string = ">="; + break; + case BinaryOp::LessThan: + op_string = "<"; + break; + case BinaryOp::LessThanEquals: + op_string = "<="; + break; + case BinaryOp::BitwiseAnd: + op_string = "&"; + break; + case BinaryOp::BitwiseOr: + op_string = "|"; + break; + case BinaryOp::BitwiseXor: + op_string = "^"; + break; + case BinaryOp::LeftShift: + op_string = "<<"; + break; + case BinaryOp::RightShift: + op_string = ">>"; + break; + } + + m_lhs->dump(indent + 1); + print_indent(indent + 1); + ASSERT(op_string); + dbgprintf("%s\n", op_string); + m_rhs->dump(indent + 1); +} + +void AssignmentExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case AssignmentOp::Assignment: + op_string = "="; + break; + case AssignmentOp::AdditionAssignment: + op_string = "+="; + break; + case AssignmentOp::SubtractionAssignment: + op_string = "-="; + break; + } + + m_lhs->dump(indent + 1); + print_indent(indent + 1); + ASSERT(op_string); + dbgprintf("%s\n", op_string); + m_rhs->dump(indent + 1); +} + +void FunctionCall::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (const auto& arg : m_arguments) { + arg.dump(indent + 1); + } +} + +void StringLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_value.to_string().characters()); +} + +void ReturnStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_value->dump(indent + 1); +} + +void EnumDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (auto& entry : m_entries) { + print_indent(indent + 1); + dbgprintf("%s\n", entry.to_string().characters()); + } +} + +void StructOrClassDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent); + dbgprintf("%s\n", m_name.to_string().characters()); + for (auto& member : m_members) { + member.dump(indent + 1); + } +} + +void MemberDeclaration::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_type->dump(indent + 1); + print_indent(indent + 1); + dbgprintf("%s\n", m_name.to_string().characters()); + if (m_initial_value) { + m_initial_value->dump(indent + 2); + } +} + +void UnaryExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + + const char* op_string = nullptr; + switch (m_op) { + case UnaryOp::BitwiseNot: + op_string = "~"; + break; + case UnaryOp::Not: + op_string = "!"; + break; + case UnaryOp::Plus: + op_string = "+"; + break; + case UnaryOp::Minus: + op_string = "-"; + break; + case UnaryOp::PlusPlus: + op_string = "++"; + break; + default: + op_string = "<invalid>"; + } + + ASSERT(op_string); + print_indent(indent + 1); + dbgprintf("%s\n", op_string); + m_lhs->dump(indent + 1); +} + +void BooleanLiteral::dump(size_t indent) const +{ + ASTNode::dump(indent); + print_indent(indent + 1); + dbgprintf("%s\n", m_value ? "true" : "false"); +} + +void Pointer::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (!m_pointee.is_null()) { + m_pointee->dump(indent + 1); + } +} + +void MemberExpression::dump(size_t indent) const +{ + ASTNode::dump(indent); + m_object->dump(indent + 1); + m_property->dump(indent + 1); +} + +void BlockStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + for (auto& statement : m_statements) { + statement.dump(indent + 1); + } +} + +void ForStatement::dump(size_t indent) const +{ + ASTNode::dump(indent); + if (m_init) + m_init->dump(indent + 1); + if (m_test) + m_test->dump(indent + 1); + if (m_update) + m_update->dump(indent + 1); + if (m_body) + m_body->dump(indent + 1); +} + +NonnullRefPtrVector<Declaration> Statement::declarations() const +{ + if (is_declaration()) { + NonnullRefPtrVector<Declaration> vec; + const auto& decl = static_cast<const Declaration&>(*this); + vec.empend(const_cast<Declaration&>(decl)); + return vec; + } + return {}; +} + +NonnullRefPtrVector<Declaration> ForStatement::declarations() const +{ + auto declarations = m_init->declarations(); + declarations.append(m_body->declarations()); + return declarations; +} + +NonnullRefPtrVector<Declaration> BlockStatement::declarations() const +{ + NonnullRefPtrVector<Declaration> declarations; + for (auto& statement : m_statements) { + declarations.append(statement.declarations()); + } + return declarations; +} + +} diff --git a/Userland/Libraries/LibCpp/AST.h b/Userland/Libraries/LibCpp/AST.h new file mode 100644 index 0000000000..5f31b0f2e4 --- /dev/null +++ b/Userland/Libraries/LibCpp/AST.h @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "Applications/Piano/Music.h" +#include <AK/NonnullRefPtrVector.h> +#include <AK/Optional.h> +#include <AK/RefCounted.h> +#include <AK/String.h> +#include <AK/Vector.h> +#include <LibCpp/Lexer.h> + +namespace Cpp { + +class ASTNode; +class TranslationUnit; +class Declaration; +class FunctionDefinition; +class Type; +class Parameter; +class Statement; + +class ASTNode : public RefCounted<ASTNode> { +public: + virtual ~ASTNode() = default; + virtual const char* class_name() const = 0; + virtual void dump(size_t indent) const; + + ASTNode* parent() const { return m_parent; } + Position start() const + { + ASSERT(m_start.has_value()); + return m_start.value(); + } + Position end() const + { + ASSERT(m_end.has_value()); + return m_end.value(); + } + void set_end(const Position& end) { m_end = end; } + void set_parent(ASTNode& parent) { m_parent = &parent; } + + virtual NonnullRefPtrVector<Declaration> declarations() const { return {}; } + + virtual bool is_identifier() const { return false; } + virtual bool is_member_expression() const { return false; } + virtual bool is_variable_or_parameter_declaration() const { return false; } + +protected: + ASTNode(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : m_parent(parent) + , m_start(start) + , m_end(end) + { + } + +private: + ASTNode* m_parent { nullptr }; + Optional<Position> m_start; + Optional<Position> m_end; +}; + +class TranslationUnit : public ASTNode { + +public: + virtual ~TranslationUnit() override = default; + const NonnullRefPtrVector<Declaration>& children() const { return m_children; } + virtual const char* class_name() const override { return "TranslationUnit"; } + virtual void dump(size_t indent) const override; + void append(NonnullRefPtr<Declaration> child) + { + m_children.append(move(child)); + } + virtual NonnullRefPtrVector<Declaration> declarations() const override { return m_children; } + +public: + TranslationUnit(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : ASTNode(parent, start, end) + { + } + +private: + NonnullRefPtrVector<Declaration> m_children; +}; + +class Statement : public ASTNode { +public: + virtual ~Statement() override = default; + virtual const char* class_name() const override { return "Statement"; } + + virtual bool is_declaration() const { return false; } + virtual NonnullRefPtrVector<Declaration> declarations() const override; + +protected: + Statement(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : ASTNode(parent, start, end) + { + } +}; + +class Declaration : public Statement { + +public: + virtual bool is_declaration() const override { return true; } + virtual bool is_variable_declaration() const { return false; } + virtual bool is_parameter() const { return false; } + virtual bool is_struct_or_class() const { return false; } + +protected: + Declaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } +}; + +class InvalidDeclaration : public Declaration { + +public: + virtual ~InvalidDeclaration() override = default; + virtual const char* class_name() const override { return "InvalidDeclaration"; } + InvalidDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Declaration(parent, start, end) + { + } +}; + +class FunctionDeclaration : public Declaration { +public: + virtual ~FunctionDeclaration() override = default; + virtual const char* class_name() const override { return "FunctionDeclaration"; } + virtual void dump(size_t indent) const override; + const StringView& name() const { return m_name; } + RefPtr<FunctionDefinition> definition() { return m_definition; } + + FunctionDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Declaration(parent, start, end) + { + } + + virtual NonnullRefPtrVector<Declaration> declarations() const override; + + StringView m_name; + RefPtr<Type> m_return_type; + NonnullRefPtrVector<Parameter> m_parameters; + RefPtr<FunctionDefinition> m_definition; +}; + +class VariableOrParameterDeclaration : public Declaration { +public: + virtual ~VariableOrParameterDeclaration() override = default; + virtual bool is_variable_or_parameter_declaration() const override { return true; } + + StringView m_name; + RefPtr<Type> m_type; + +protected: + VariableOrParameterDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Declaration(parent, start, end) + { + } +}; + +class Parameter : public VariableOrParameterDeclaration { +public: + virtual ~Parameter() override = default; + virtual const char* class_name() const override { return "Parameter"; } + virtual void dump(size_t indent) const override; + + Parameter(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name) + : VariableOrParameterDeclaration(parent, start, end) + { + m_name = name; + } + + virtual bool is_parameter() const override { return true; } +}; + +class Type : public ASTNode { +public: + virtual ~Type() override = default; + virtual const char* class_name() const override { return "Type"; } + const StringView& name() const { return m_name; } + virtual void dump(size_t indent) const override; + + Type(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name) + : ASTNode(parent, start, end) + , m_name(name) + { + } + + StringView m_name; +}; + +class Pointer : public Type { +public: + virtual ~Pointer() override = default; + virtual const char* class_name() const override { return "Pointer"; } + virtual void dump(size_t indent) const override; + + Pointer(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Type(parent, start, end, {}) + { + } + + RefPtr<Type> m_pointee; +}; + +class FunctionDefinition : public ASTNode { +public: + virtual ~FunctionDefinition() override = default; + virtual const char* class_name() const override { return "FunctionDefinition"; } + NonnullRefPtrVector<Statement>& statements() { return m_statements; } + virtual void dump(size_t indent) const override; + + FunctionDefinition(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : ASTNode(parent, start, end) + { + } + + virtual NonnullRefPtrVector<Declaration> declarations() const override; + + NonnullRefPtrVector<Statement> m_statements; +}; + +class InvalidStatement : public Statement { +public: + virtual ~InvalidStatement() override = default; + virtual const char* class_name() const override { return "InvalidStatement"; } + InvalidStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } +}; + +class Expression : public Statement { +public: + virtual ~Expression() override = default; + virtual const char* class_name() const override { return "Expression"; } + +protected: + Expression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } +}; + +class InvalidExpression : public Expression { +public: + virtual ~InvalidExpression() override = default; + virtual const char* class_name() const override { return "InvalidExpression"; } + InvalidExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } +}; + +class VariableDeclaration : public VariableOrParameterDeclaration { +public: + virtual ~VariableDeclaration() override = default; + virtual const char* class_name() const override { return "VariableDeclaration"; } + virtual void dump(size_t indent) const override; + + VariableDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : VariableOrParameterDeclaration(parent, start, end) + { + } + + virtual bool is_variable_declaration() const override { return true; } + + RefPtr<Expression> m_initial_value; +}; + +class Identifier : public Expression { +public: + virtual ~Identifier() override = default; + virtual const char* class_name() const override { return "Identifier"; } + virtual void dump(size_t indent) const override; + + Identifier(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView name) + : Expression(parent, start, end) + , m_name(name) + { + } + Identifier(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Identifier(parent, start, end, {}) + { + } + + virtual bool is_identifier() const override { return true; } + + StringView m_name; +}; + +class NumericLiteral : public Expression { +public: + virtual ~NumericLiteral() override = default; + virtual const char* class_name() const override { return "NumricLiteral"; } + virtual void dump(size_t indent) const override; + + NumericLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end, StringView value) + : Expression(parent, start, end) + , m_value(value) + { + } + + StringView m_value; +}; + +class BooleanLiteral : public Expression { +public: + virtual ~BooleanLiteral() override = default; + virtual const char* class_name() const override { return "BooleanLiteral"; } + virtual void dump(size_t indent) const override; + + BooleanLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end, bool value) + : Expression(parent, start, end) + , m_value(value) + { + } + + bool m_value; +}; + +enum class BinaryOp { + Addition, + Subtraction, + Multiplication, + Division, + Modulo, + GreaterThan, + GreaterThanEquals, + LessThan, + LessThanEquals, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + LeftShift, + RightShift, +}; + +class BinaryExpression : public Expression { +public: + BinaryExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + virtual ~BinaryExpression() override = default; + virtual const char* class_name() const override { return "BinaryExpression"; } + virtual void dump(size_t indent) const override; + + BinaryOp m_op; + RefPtr<Expression> m_lhs; + RefPtr<Expression> m_rhs; +}; + +enum class AssignmentOp { + Assignment, + AdditionAssignment, + SubtractionAssignment, +}; + +class AssignmentExpression : public Expression { +public: + AssignmentExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + virtual ~AssignmentExpression() override = default; + virtual const char* class_name() const override { return "AssignmentExpression"; } + virtual void dump(size_t indent) const override; + + AssignmentOp m_op; + RefPtr<Expression> m_lhs; + RefPtr<Expression> m_rhs; +}; + +class FunctionCall final : public Expression { +public: + FunctionCall(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + ~FunctionCall() override = default; + virtual const char* class_name() const override { return "FunctionCall"; } + virtual void dump(size_t indent) const override; + + StringView m_name; + NonnullRefPtrVector<Expression> m_arguments; +}; + +class StringLiteral final : public Expression { +public: + StringLiteral(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + ~StringLiteral() override = default; + virtual const char* class_name() const override { return "StringLiteral"; } + virtual void dump(size_t indent) const override; + + StringView m_value; +}; + +class ReturnStatement : public Statement { +public: + virtual ~ReturnStatement() override = default; + virtual const char* class_name() const override { return "ReturnStatement"; } + + ReturnStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } + virtual void dump(size_t indent) const override; + + RefPtr<Expression> m_value; +}; + +class EnumDeclaration : public Declaration { +public: + virtual ~EnumDeclaration() override = default; + virtual const char* class_name() const override { return "EnumDeclaration"; } + virtual void dump(size_t indent) const override; + + EnumDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Declaration(parent, start, end) + { + } + + StringView m_name; + Vector<StringView> m_entries; +}; + +class MemberDeclaration : public Declaration { +public: + virtual ~MemberDeclaration() override = default; + virtual const char* class_name() const override { return "MemberDeclaration"; } + virtual void dump(size_t indent) const override; + + MemberDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Declaration(parent, start, end) + { + } + + RefPtr<Type> m_type; + StringView m_name; + RefPtr<Expression> m_initial_value; +}; + +class StructOrClassDeclaration : public Declaration { +public: + virtual ~StructOrClassDeclaration() override = default; + virtual const char* class_name() const override { return "StructOrClassDeclaration"; } + virtual void dump(size_t indent) const override; + virtual bool is_struct_or_class() const override { return true; } + + enum class Type { + Struct, + Class + }; + + StructOrClassDeclaration(ASTNode* parent, Optional<Position> start, Optional<Position> end, StructOrClassDeclaration::Type type) + : Declaration(parent, start, end) + , m_type(type) + { + } + + StructOrClassDeclaration::Type m_type; + StringView m_name; + NonnullRefPtrVector<MemberDeclaration> m_members; +}; + +enum class UnaryOp { + Invalid, + BitwiseNot, + Not, + Plus, + Minus, + PlusPlus, +}; + +class UnaryExpression : public Expression { +public: + UnaryExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + virtual ~UnaryExpression() override = default; + virtual const char* class_name() const override { return "UnaryExpression"; } + virtual void dump(size_t indent) const override; + + UnaryOp m_op; + RefPtr<Expression> m_lhs; +}; + +class MemberExpression : public Expression { +public: + MemberExpression(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Expression(parent, start, end) + { + } + + virtual ~MemberExpression() override = default; + virtual const char* class_name() const override { return "MemberExpression"; } + virtual void dump(size_t indent) const override; + virtual bool is_member_expression() const override { return true; } + + RefPtr<Expression> m_object; + RefPtr<Identifier> m_property; +}; + +class ForStatement : public Statement { +public: + ForStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } + + virtual ~ForStatement() override = default; + virtual const char* class_name() const override { return "ForStatement"; } + virtual void dump(size_t indent) const override; + + virtual NonnullRefPtrVector<Declaration> declarations() const override; + + RefPtr<VariableDeclaration> m_init; + RefPtr<Expression> m_test; + RefPtr<Expression> m_update; + RefPtr<Statement> m_body; +}; + +class BlockStatement final : public Statement { +public: + BlockStatement(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } + + virtual ~BlockStatement() override = default; + virtual const char* class_name() const override { return "BlockStatement"; } + virtual void dump(size_t indent) const override; + + virtual NonnullRefPtrVector<Declaration> declarations() const override; + + NonnullRefPtrVector<Statement> m_statements; +}; + +class Comment final : public Statement { +public: + Comment(ASTNode* parent, Optional<Position> start, Optional<Position> end) + : Statement(parent, start, end) + { + } + + virtual ~Comment() override = default; + virtual const char* class_name() const override { return "Comment"; } +}; +} diff --git a/Userland/Libraries/LibCpp/CMakeLists.txt b/Userland/Libraries/LibCpp/CMakeLists.txt index f9e022bddd..73eaf2bb4d 100644 --- a/Userland/Libraries/LibCpp/CMakeLists.txt +++ b/Userland/Libraries/LibCpp/CMakeLists.txt @@ -1,5 +1,7 @@ set(SOURCES + AST.cpp Lexer.cpp + Parser.cpp ) serenity_lib(LibCpp cpp) diff --git a/Userland/Libraries/LibCpp/Lexer.cpp b/Userland/Libraries/LibCpp/Lexer.cpp index 831822e5a5..5273f87dd4 100644 --- a/Userland/Libraries/LibCpp/Lexer.cpp +++ b/Userland/Libraries/LibCpp/Lexer.cpp @@ -581,12 +581,13 @@ Vector<Token> Lexer::lex() commit_token(Token::Type::IncludePath); begin_token(); } - } + } else { + while (peek() && peek() != '\n') + consume(); - while (peek() && peek() != '\n') - consume(); + commit_token(Token::Type::PreprocessorStatement); + } - commit_token(Token::Type::PreprocessorStatement); continue; } if (ch == '/' && peek(1) == '/') { @@ -786,4 +787,17 @@ Vector<Token> Lexer::lex() return tokens; } +bool Position::operator<(const Position& other) const +{ + return line < other.line || (line == other.line && column < other.column); +} +bool Position::operator>(const Position& other) const +{ + return !(*this < other) && !(*this == other); +} +bool Position::operator==(const Position& other) const +{ + return line == other.line && column == other.column; +} + } diff --git a/Userland/Libraries/LibCpp/Lexer.h b/Userland/Libraries/LibCpp/Lexer.h index 351dee8aa5..3e7188f9ae 100644 --- a/Userland/Libraries/LibCpp/Lexer.h +++ b/Userland/Libraries/LibCpp/Lexer.h @@ -96,11 +96,16 @@ namespace Cpp { __TOKEN(Float) \ __TOKEN(Keyword) \ __TOKEN(KnownType) \ - __TOKEN(Identifier) + __TOKEN(Identifier) \ + __TOKEN(EOF_TOKEN) struct Position { - size_t line; - size_t column; + size_t line { 0 }; + size_t column { 0 }; + + bool operator<(const Position&) const; + bool operator>(const Position&) const; + bool operator==(const Position&) const; }; struct Token { @@ -110,9 +115,9 @@ struct Token { #undef __TOKEN }; - const char* to_string() const + static const char* type_to_string(Type t) { - switch (m_type) { + switch (t) { #define __TOKEN(x) \ case Type::x: \ return #x; @@ -122,6 +127,14 @@ struct Token { ASSERT_NOT_REACHED(); } + const char* to_string() const + { + return type_to_string(m_type); + } + Position start() const { return m_start; } + Position end() const { return m_end; } + Type type() const { return m_type; } + Type m_type { Type::Unknown }; Position m_start; Position m_end; diff --git a/Userland/Libraries/LibCpp/Parser.cpp b/Userland/Libraries/LibCpp/Parser.cpp new file mode 100644 index 0000000000..a3447d1b61 --- /dev/null +++ b/Userland/Libraries/LibCpp/Parser.cpp @@ -0,0 +1,1000 @@ +/* + * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// #define CPP_DEBUG + +#ifdef CPP_DEBUG +# define DEBUG_SPAM +#endif + +#include "Parser.h" +#include "AK/LogStream.h" +#include "AST.h" +#include <AK/ScopeGuard.h> +#include <AK/ScopeLogger.h> +#include <LibCpp/Lexer.h> + +namespace Cpp { + +Parser::Parser(const StringView& program) + : m_program(program) + , m_lines(m_program.split_view("\n", true)) +{ + Lexer lexer(m_program); + for (auto& token : lexer.lex()) { + if (token.m_type == Token::Type::Whitespace) + continue; + m_tokens.append(move(token)); + } +#ifdef CPP_DEBUG + dbgln("Program:"); + dbgln("{}", m_program); + dbgln("Tokens:"); + for (auto& token : m_tokens) { + dbgln("{}", token.to_string()); + } +#endif +} + +NonnullRefPtr<TranslationUnit> Parser::parse() +{ + SCOPE_LOGGER(); + auto unit = create_root_ast_node(m_tokens.first().m_start, m_tokens.last().m_end); + while (!done()) { + if (match_comment()) { + consume(Token::Type::Comment); + continue; + } + + if (match_preprocessor()) { + consume_preprocessor(); + continue; + } + + auto declaration = match_declaration(); + if (declaration.has_value()) { + unit->append(parse_declaration(*unit, declaration.value())); + continue; + } + + error("unexpected token"); + consume(); + } + return unit; +} + +Optional<Parser::DeclarationType> Parser::match_declaration() +{ + switch (m_state.context) { + case Context::InTranslationUnit: + return match_declaration_in_translation_unit(); + case Context::InFunctionDefinition: + return match_declaration_in_function_definition(); + default: + error("unexpected context"); + return {}; + } +} + +NonnullRefPtr<Declaration> Parser::parse_declaration(ASTNode& parent, DeclarationType declaration_type) +{ + switch (declaration_type) { + case DeclarationType::Function: + return parse_function_declaration(parent); + case DeclarationType::Variable: + return parse_variable_declaration(parent); + case DeclarationType::Enum: + return parse_enum_declaration(parent); + case DeclarationType::Struct: + return parse_struct_or_class_declaration(parent, StructOrClassDeclaration::Type::Struct); + default: + error("unexpected declaration type"); + return create_ast_node<InvalidDeclaration>(parent, position(), position()); + } +} + +NonnullRefPtr<FunctionDeclaration> Parser::parse_function_declaration(ASTNode& parent) +{ + auto func = create_ast_node<FunctionDeclaration>(parent, position(), {}); + + auto return_type_token = consume(Token::Type::KnownType); + auto function_name = consume(Token::Type::Identifier); + consume(Token::Type::LeftParen); + auto parameters = parse_parameter_list(*func); + consume(Token::Type::RightParen); + + RefPtr<FunctionDefinition> body; + Position func_end {}; + if (peek(Token::Type::LeftCurly).has_value()) { + body = parse_function_definition(*func); + func_end = body->end(); + } else { + func_end = position(); + consume(Token::Type::Semicolon); + } + + func->m_name = text_of_token(function_name); + func->m_return_type = create_ast_node<Type>(*func, return_type_token.m_start, return_type_token.m_end, text_of_token(return_type_token)); + if (parameters.has_value()) + func->m_parameters = move(parameters.value()); + func->m_definition = move(body); + func->set_end(func_end); + return func; +} + +NonnullRefPtr<FunctionDefinition> Parser::parse_function_definition(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto func = create_ast_node<FunctionDefinition>(parent, position(), {}); + consume(Token::Type::LeftCurly); + while (!eof() && peek().m_type != Token::Type::RightCurly) { + func->statements().append(parse_statement(func)); + } + func->set_end(position()); + if (!eof()) + consume(Token::Type::RightCurly); + return func; +} + +NonnullRefPtr<Statement> Parser::parse_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + ArmedScopeGuard consume_semicolumn([this]() { + consume(Token::Type::Semicolon); + }); + + if (match_block_statement()) { + consume_semicolumn.disarm(); + return parse_block_statement(parent); + } + if (match_comment()) { + consume_semicolumn.disarm(); + return parse_comment(parent); + } + if (match_variable_declaration()) { + return parse_variable_declaration(parent); + } + if (match_expression()) { + return parse_expression(parent); + } + if (match_keyword("return")) { + return parse_return_statement(parent); + } + if (match_keyword("for")) { + consume_semicolumn.disarm(); + return parse_for_statement(parent); + } else { + error("unexpected statement type"); + consume_semicolumn.disarm(); + consume(); + return create_ast_node<InvalidStatement>(parent, position(), position()); + } +} + +NonnullRefPtr<Comment> Parser::parse_comment(ASTNode& parent) +{ + auto comment = create_ast_node<Comment>(parent, position(), {}); + consume(Token::Type::Comment); + comment->set_end(position()); + return comment; +} + +bool Parser::match_block_statement() +{ + return peek().type() == Token::Type::LeftCurly; +} + +NonnullRefPtr<BlockStatement> Parser::parse_block_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto block_statement = create_ast_node<BlockStatement>(parent, position(), {}); + consume(Token::Type::LeftCurly); + while (peek().type() != Token::Type::RightCurly) { + block_statement->m_statements.append(parse_statement(*block_statement)); + } + consume(Token::Type::RightCurly); + block_statement->set_end(position()); + return block_statement; +} + +bool Parser::match_variable_declaration() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + + if (!peek(Token::Type::KnownType).has_value() && !peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (!peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (match(Token::Type::Equals)) { + consume(Token::Type::Equals); + if (!match_expression()) { + error("initial value of variable is not an expression"); + return false; + } + } + + return true; +} + +NonnullRefPtr<VariableDeclaration> Parser::parse_variable_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto var = create_ast_node<VariableDeclaration>(parent, position(), {}); + auto type_token = consume(); + if (type_token.type() != Token::Type::KnownType && type_token.type() != Token::Type::Identifier) { + error("unexpected token for variable type"); + var->set_end(type_token.end()); + return var; + } + auto identifier_token = consume(Token::Type::Identifier); + RefPtr<Expression> initial_value; + + if (match(Token::Type::Equals)) { + consume(Token::Type::Equals); + initial_value = parse_expression(var); + } + + var->set_end(position()); + var->m_type = create_ast_node<Type>(var, type_token.m_start, type_token.m_end, text_of_token(type_token)); + var->m_name = text_of_token(identifier_token); + var->m_initial_value = move(initial_value); + + return var; +} + +NonnullRefPtr<Expression> Parser::parse_expression(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto expression = parse_primary_expression(parent); + // TODO: remove eof() logic, should still work without it + if (eof() || match(Token::Type::Semicolon)) { + return expression; + } + + NonnullRefPtrVector<Expression> secondary_expressions; + + while (match_secondary_expression()) { + // FIXME: Handle operator precedence + expression = parse_secondary_expression(parent, expression); + secondary_expressions.append(expression); + } + + for (size_t i = 0; secondary_expressions.size() != 0 && i < secondary_expressions.size() - 1; ++i) { + secondary_expressions[i].set_parent(secondary_expressions[i + 1]); + } + + return expression; +} + +bool Parser::match_secondary_expression() +{ + auto type = peek().type(); + return type == Token::Type::Plus + || type == Token::Type::PlusEquals + || type == Token::Type::Minus + || type == Token::Type::MinusEquals + || type == Token::Type::Asterisk + || type == Token::Type::AsteriskEquals + || type == Token::Type::Percent + || type == Token::Type::PercentEquals + || type == Token::Type::Equals + || type == Token::Type::Greater + || type == Token::Type::Greater + || type == Token::Type::Less + || type == Token::Type::LessEquals + || type == Token::Type::Dot + || type == Token::Type::PlusPlus + || type == Token::Type::MinusMinus + || type == Token::Type::And + || type == Token::Type::AndEquals + || type == Token::Type::Pipe + || type == Token::Type::PipeEquals + || type == Token::Type::Caret + || type == Token::Type::CaretEquals + || type == Token::Type::LessLess + || type == Token::Type::LessLessEquals + || type == Token::Type::GreaterGreater + || type == Token::Type::GreaterGreaterEquals + || type == Token::Type::AndAnd + || type == Token::Type::PipePipe; +} + +NonnullRefPtr<Expression> Parser::parse_primary_expression(ASTNode& parent) +{ + SCOPE_LOGGER(); + // TODO: remove eof() logic, should still work without it + if (eof()) { + auto node = create_ast_node<Identifier>(parent, position(), position()); + return node; + } + + if (match_unary_expression()) + return parse_unary_expression(parent); + + if (match_literal()) { + return parse_literal(parent); + } + switch (peek().type()) { + case Token::Type::Identifier: { + if (match_function_call()) + return parse_function_call(parent); + auto token = consume(); + return create_ast_node<Identifier>(parent, token.m_start, token.m_end, text_of_token(token)); + } + default: { + error("could not parse primary expression"); + auto token = consume(); + return create_ast_node<InvalidExpression>(parent, token.m_start, token.m_end); + } + } +} + +bool Parser::match_literal() +{ + switch (peek().type()) { + case Token::Type::Integer: + return true; + case Token::Type::DoubleQuotedString: + return true; + case Token::Type::Keyword: { + return match_boolean_literal(); + } + default: + return false; + } +} + +bool Parser::match_unary_expression() +{ + auto type = peek().type(); + return type == Token::Type::PlusPlus + || type == Token::Type::MinusMinus + || type == Token::Type::ExclamationMark + || type == Token::Type::Tilde + || type == Token::Type::Plus + || type == Token::Type::Minus; +} + +NonnullRefPtr<UnaryExpression> Parser::parse_unary_expression(ASTNode& parent) +{ + auto unary_exp = create_ast_node<UnaryExpression>(parent, position(), {}); + auto op_token = consume(); + UnaryOp op { UnaryOp::Invalid }; + switch (op_token.type()) { + case Token::Type::Minus: + op = UnaryOp::Minus; + break; + case Token::Type::Plus: + op = UnaryOp::Plus; + break; + case Token::Type::ExclamationMark: + op = UnaryOp::Not; + break; + case Token::Type::Tilde: + op = UnaryOp::BitwiseNot; + break; + case Token::Type::PlusPlus: + op = UnaryOp::PlusPlus; + break; + default: + break; + } + unary_exp->m_op = op; + auto lhs = parse_expression(*unary_exp); + unary_exp->m_lhs = lhs; + unary_exp->set_end(lhs->end()); + return unary_exp; +} + +NonnullRefPtr<Expression> Parser::parse_literal(ASTNode& parent) +{ + switch (peek().type()) { + case Token::Type::Integer: { + auto token = consume(); + return create_ast_node<NumericLiteral>(parent, token.m_start, token.m_end, text_of_token(token)); + } + case Token::Type::DoubleQuotedString: { + return parse_string_literal(parent); + } + case Token::Type::Keyword: { + if (match_boolean_literal()) + return parse_boolean_literal(parent); + [[fallthrough]]; + } + default: { + error("could not parse literal"); + auto token = consume(); + return create_ast_node<InvalidExpression>(parent, token.m_start, token.m_end); + } + } +} + +NonnullRefPtr<Expression> Parser::parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs) +{ + SCOPE_LOGGER(); + switch (peek().m_type) { + case Token::Type::Plus: + return parse_binary_expression(parent, lhs, BinaryOp::Addition); + case Token::Type::Less: + return parse_binary_expression(parent, lhs, BinaryOp::LessThan); + case Token::Type::Equals: + return parse_assignment_expression(parent, lhs, AssignmentOp::Assignment); + case Token::Type::Dot: { + consume(); + auto exp = create_ast_node<MemberExpression>(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_object = move(lhs); + auto property_token = consume(Token::Type::Identifier); + exp->m_property = create_ast_node<Identifier>(*exp, property_token.start(), property_token.end(), text_of_token(property_token)); + exp->set_end(property_token.end()); + return exp; + } + default: { + error(String::formatted("unexpected operator for expression. operator: {}", peek().to_string())); + auto token = consume(); + return create_ast_node<InvalidExpression>(parent, token.start(), token.end()); + } + } +} + +NonnullRefPtr<BinaryExpression> Parser::parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp op) +{ + consume(); // Operator + auto exp = create_ast_node<BinaryExpression>(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_op = op; + exp->m_lhs = move(lhs); + auto rhs = parse_expression(exp); + exp->set_end(rhs->end()); + exp->m_rhs = move(rhs); + return exp; +} + +NonnullRefPtr<AssignmentExpression> Parser::parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp op) +{ + consume(); // Operator + auto exp = create_ast_node<AssignmentExpression>(parent, lhs->start(), {}); + lhs->set_parent(*exp); + exp->m_op = op; + exp->m_lhs = move(lhs); + auto rhs = parse_expression(exp); + exp->set_end(rhs->end()); + exp->m_rhs = move(rhs); + return exp; +} + +Optional<Parser::DeclarationType> Parser::match_declaration_in_translation_unit() +{ + if (match_function_declaration()) + return DeclarationType::Function; + if (match_enum_declaration()) + return DeclarationType::Enum; + if (match_struct_declaration()) + return DeclarationType::Struct; + return {}; +} + +bool Parser::match_enum_declaration() +{ + return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "enum"; +} + +bool Parser::match_struct_declaration() +{ + return peek().type() == Token::Type::Keyword && text_of_token(peek()) == "struct"; +} + +bool Parser::match_function_declaration() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + + if (!peek(Token::Type::KnownType).has_value()) + return false; + consume(); + + if (!peek(Token::Type::Identifier).has_value()) + return false; + consume(); + + if (!peek(Token::Type::LeftParen).has_value()) + return false; + consume(); + + while (consume().m_type != Token::Type::RightParen && !eof()) { }; + + if (peek(Token::Type::Semicolon).has_value() || peek(Token::Type::LeftCurly).has_value()) + return true; + + return false; +} + +Optional<NonnullRefPtrVector<Parameter>> Parser::parse_parameter_list(ASTNode& parent) +{ + SCOPE_LOGGER(); + NonnullRefPtrVector<Parameter> parameters; + while (peek().m_type != Token::Type::RightParen && !eof()) { + auto type = parse_type(parent); + + auto name_identifier = peek(Token::Type::Identifier); + if (name_identifier.has_value()) + consume(Token::Type::Identifier); + + StringView name; + if (name_identifier.has_value()) + name = text_of_token(name_identifier.value()); + + auto param = create_ast_node<Parameter>(parent, type->start(), name_identifier.has_value() ? name_identifier.value().m_end : type->end(), name); + + param->m_type = move(type); + parameters.append(move(param)); + if (peek(Token::Type::Comma).has_value()) + consume(Token::Type::Comma); + } + return parameters; +} + +bool Parser::match_comment() +{ + return match(Token::Type::Comment); +} + +bool Parser::match_whitespace() +{ + return match(Token::Type::Whitespace); +} + +bool Parser::match_preprocessor() +{ + return match(Token::Type::PreprocessorStatement) || match(Token::Type::IncludeStatement); +} + +void Parser::consume_preprocessor() +{ + SCOPE_LOGGER(); + switch (peek().type()) { + case Token::Type::PreprocessorStatement: + consume(); + break; + case Token::Type::IncludeStatement: + consume(); + consume(Token::Type::IncludePath); + break; + default: + error("unexpected token while parsing preprocessor statement"); + consume(); + } +} + +Optional<Token> Parser::consume_whitespace() +{ + SCOPE_LOGGER(); + return consume(Token::Type::Whitespace); +} + +Token Parser::consume(Token::Type type) +{ + auto token = consume(); + if (token.type() != type) + error(String::formatted("expected {} at {}:{}, found: {}", Token::type_to_string(type), token.start().line, token.start().column, Token::type_to_string(token.type()))); + return token; +} + +bool Parser::match(Token::Type type) +{ + return peek().m_type == type; +} + +Token Parser::consume() +{ + if (eof()) { + error("C++ Parser: out of tokens"); + return { Token::Type::EOF_TOKEN, position(), position() }; + } + return m_tokens[m_state.token_index++]; +} + +Token Parser::peek() const +{ + if (eof()) { + return { Token::Type::EOF_TOKEN, position(), position() }; + } + return m_tokens[m_state.token_index]; +} + +Optional<Token> Parser::peek(Token::Type type) const +{ + auto token = peek(); + if (token.m_type == type) + return token; + return {}; +} + +void Parser::save_state() +{ + m_saved_states.append(m_state); +} + +void Parser::load_state() +{ + m_state = m_saved_states.take_last(); +} + +Optional<Parser::DeclarationType> Parser::match_declaration_in_function_definition() +{ + ASSERT_NOT_REACHED(); +} + +bool Parser::done() +{ + return m_state.token_index == m_tokens.size(); +} + +StringView Parser::text_of_token(const Cpp::Token& token) +{ + ASSERT(token.m_start.line == token.m_end.line); + ASSERT(token.m_start.column <= token.m_end.column); + return m_lines[token.m_start.line].substring_view(token.m_start.column, token.m_end.column - token.m_start.column + 1); +} + +StringView Parser::text_of_node(const ASTNode& node) const +{ + if (node.start().line == node.end().line) { + ASSERT(node.start().column <= node.end().column); + return m_lines[node.start().line].substring_view(node.start().column, node.end().column - node.start().column + 1); + } + + auto index_of_position([this](auto position) { + size_t start_index = 0; + for (size_t line = 0; line < position.line; ++line) { + start_index += m_lines[line].length() + 1; + } + start_index += position.column; + return start_index; + }); + auto start_index = index_of_position(node.start()); + auto end_index = index_of_position(node.end()); + ASSERT(end_index >= start_index); + return m_program.substring_view(start_index, end_index - start_index); +} + +void Parser::error(StringView message) +{ + SCOPE_LOGGER(); + if (message.is_null() || message.is_empty()) + message = "<empty>"; + String formatted_message; + if (m_state.token_index >= m_tokens.size()) { + formatted_message = String::formatted("C++ Parsed error on EOF.{}", message); + } else { + formatted_message = String::formatted("C++ Parser error: {}. token: {} ({}:{})", + message, + m_state.token_index < m_tokens.size() ? text_of_token(m_tokens[m_state.token_index]) : "EOF", + m_tokens[m_state.token_index].m_start.line, + m_tokens[m_state.token_index].m_start.column); + } + m_errors.append(formatted_message); + dbgln("{}", formatted_message); +} + +bool Parser::match_expression() +{ + auto token_type = peek().m_type; + return token_type == Token::Type::Integer + || token_type == Token::Type::Float + || token_type == Token::Type::Identifier + || match_unary_expression(); +} + +bool Parser::eof() const +{ + return m_state.token_index >= m_tokens.size(); +} + +Position Parser::position() const +{ + if (eof()) + return m_tokens.last().m_end; + return peek().m_start; +} + +RefPtr<ASTNode> Parser::eof_node() const +{ + ASSERT(m_tokens.size()); + return node_at(m_tokens.last().m_end); +} + +RefPtr<ASTNode> Parser::node_at(Position pos) const +{ + ASSERT(!m_tokens.is_empty()); + RefPtr<ASTNode> match_node; + for (auto& node : m_nodes) { + if (node.start() > pos || node.end() < pos) + continue; + if (!match_node) + match_node = node; + else if (node_span_size(node) < node_span_size(*match_node)) + match_node = node; + } + return match_node; +} + +Optional<Token> Parser::token_at(Position pos) const +{ + for (auto& token : m_tokens) { + if (token.start() > pos || token.end() < pos) + continue; + return token; + } + return {}; +} + +size_t Parser::node_span_size(const ASTNode& node) const +{ + if (node.start().line == node.end().line) + return node.end().column - node.start().column; + + size_t span_size = m_lines[node.start().line].length() - node.start().column; + for (size_t line = node.start().line + 1; line < node.end().line; ++line) { + span_size += m_lines[line].length(); + } + return span_size + m_lines[node.end().line].length() - node.end().column; +} + +void Parser::print_tokens() const +{ + for (auto& token : m_tokens) { + dbgln("{}", token.to_string()); + } +} + +bool Parser::match_function_call() +{ + save_state(); + ScopeGuard state_guard = [this] { load_state(); }; + if (!match(Token::Type::Identifier)) + return false; + consume(); + return match(Token::Type::LeftParen); +} + +NonnullRefPtr<FunctionCall> Parser::parse_function_call(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto call = create_ast_node<FunctionCall>(parent, position(), {}); + auto name_identifier = consume(Token::Type::Identifier); + call->m_name = text_of_token(name_identifier); + + NonnullRefPtrVector<Expression> args; + consume(Token::Type::LeftParen); + while (peek().type() != Token::Type::RightParen && !eof()) { + args.append(parse_expression(*call)); + if (peek().type() == Token::Type::Comma) + consume(Token::Type::Comma); + } + consume(Token::Type::RightParen); + call->m_arguments = move(args); + call->set_end(position()); + return call; +} + +NonnullRefPtr<StringLiteral> Parser::parse_string_literal(ASTNode& parent) +{ + SCOPE_LOGGER(); + Optional<size_t> start_token_index; + Optional<size_t> end_token_index; + while (!eof()) { + auto token = peek(); + if (token.type() != Token::Type::DoubleQuotedString && token.type() != Token::Type::EscapeSequence) { + ASSERT(start_token_index.has_value()); + // TODO: don't consume + end_token_index = m_state.token_index - 1; + break; + } + if (!start_token_index.has_value()) + start_token_index = m_state.token_index; + consume(); + } + ASSERT(start_token_index.has_value()); + ASSERT(end_token_index.has_value()); + Token start_token = m_tokens[start_token_index.value()]; + Token end_token = m_tokens[end_token_index.value()]; + ASSERT(start_token.start().line == end_token.start().line); + + auto text = m_lines[start_token.start().line].substring_view(start_token.start().column, end_token.end().column - start_token.start().column + 1); + auto string_literal = create_ast_node<StringLiteral>(parent, start_token.start(), end_token.end()); + string_literal->m_value = text; + return string_literal; +} + +NonnullRefPtr<ReturnStatement> Parser::parse_return_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto return_statement = create_ast_node<ReturnStatement>(parent, position(), {}); + consume(Token::Type::Keyword); + auto expression = parse_expression(*return_statement); + return_statement->m_value = expression; + return_statement->set_end(expression->end()); + return return_statement; +} + +NonnullRefPtr<EnumDeclaration> Parser::parse_enum_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto enum_decl = create_ast_node<EnumDeclaration>(parent, position(), {}); + consume_keyword("enum"); + auto name_token = consume(Token::Type::Identifier); + enum_decl->m_name = text_of_token(name_token); + consume(Token::Type::LeftCurly); + while (peek().type() != Token::Type::RightCurly && !eof()) { + enum_decl->m_entries.append(text_of_token(consume(Token::Type::Identifier))); + if (peek().type() != Token::Type::Comma) { + break; + } + consume(Token::Type::Comma); + } + consume(Token::Type::RightCurly); + consume(Token::Type::Semicolon); + enum_decl->set_end(position()); + return enum_decl; +} + +Token Parser::consume_keyword(const String& keyword) +{ + auto token = consume(); + if (token.type() != Token::Type::Keyword) { + error(String::formatted("unexpected token: {}, expected Keyword", token.to_string())); + return token; + } + if (text_of_token(token) != keyword) { + error(String::formatted("unexpected keyword: {}, expected {}", text_of_token(token), keyword)); + return token; + } + return token; +} + +bool Parser::match_keyword(const String& keyword) +{ + auto token = peek(); + if (token.type() != Token::Type::Keyword) { + return false; + } + if (text_of_token(token) != keyword) { + return false; + } + return true; +} + +NonnullRefPtr<StructOrClassDeclaration> Parser::parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type type) +{ + SCOPE_LOGGER(); + auto decl = create_ast_node<StructOrClassDeclaration>(parent, position(), {}, type); + switch (type) { + case StructOrClassDeclaration::Type::Struct: + consume_keyword("struct"); + break; + case StructOrClassDeclaration::Type::Class: + consume_keyword("class"); + break; + } + auto name_token = consume(Token::Type::Identifier); + decl->m_name = text_of_token(name_token); + + consume(Token::Type::LeftCurly); + + while (peek().type() != Token::Type::RightCurly && !eof()) { + decl->m_members.append(parse_member_declaration(*decl)); + } + + consume(Token::Type::RightCurly); + consume(Token::Type::Semicolon); + decl->set_end(position()); + return decl; +} + +NonnullRefPtr<MemberDeclaration> Parser::parse_member_declaration(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto member_decl = create_ast_node<MemberDeclaration>(parent, position(), {}); + auto type_token = consume(); + auto identifier_token = consume(Token::Type::Identifier); + RefPtr<Expression> initial_value; + + if (match(Token::Type::LeftCurly)) { + consume(Token::Type::LeftCurly); + initial_value = parse_expression(*member_decl); + consume(Token::Type::RightCurly); + } + + member_decl->m_type = create_ast_node<Type>(*member_decl, type_token.m_start, type_token.m_end, text_of_token(type_token)); + member_decl->m_name = text_of_token(identifier_token); + member_decl->m_initial_value = move(initial_value); + consume(Token::Type::Semicolon); + member_decl->set_end(position()); + + return member_decl; +} + +NonnullRefPtr<BooleanLiteral> Parser::parse_boolean_literal(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto token = consume(Token::Type::Keyword); + auto text = text_of_token(token); + // text == "true" || text == "false"; + bool value = (text == "true"); + return create_ast_node<BooleanLiteral>(parent, token.start(), token.end(), value); +} + +bool Parser::match_boolean_literal() +{ + auto token = peek(); + if (token.type() != Token::Type::Keyword) + return false; + auto text = text_of_token(token); + return text == "true" || text == "false"; +} + +NonnullRefPtr<Type> Parser::parse_type(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto token = consume(); + auto type = create_ast_node<Type>(parent, token.start(), token.end(), text_of_token(token)); + if (token.type() != Token::Type::KnownType && token.type() != Token::Type::Identifier) { + error(String::formatted("unexpected token for type: {}", token.to_string())); + return type; + } + while (peek().type() == Token::Type::Asterisk) { + auto asterisk = consume(); + auto ptr = create_ast_node<Pointer>(type, asterisk.start(), asterisk.end()); + ptr->m_pointee = type; + type = ptr; + } + return type; +} + +NonnullRefPtr<ForStatement> Parser::parse_for_statement(ASTNode& parent) +{ + SCOPE_LOGGER(); + auto for_statement = create_ast_node<ForStatement>(parent, position(), {}); + consume(Token::Type::Keyword); + consume(Token::Type::LeftParen); + for_statement->m_init = parse_variable_declaration(*for_statement); + consume(Token::Type::Semicolon); + for_statement->m_test = parse_expression(*for_statement); + consume(Token::Type::Semicolon); + for_statement->m_update = parse_expression(*for_statement); + consume(Token::Type::RightParen); + for_statement->m_body = parse_statement(*for_statement); + for_statement->set_end(for_statement->m_body->end()); + return for_statement; +} + +} diff --git a/Userland/Libraries/LibCpp/Parser.h b/Userland/Libraries/LibCpp/Parser.h new file mode 100644 index 0000000000..013d1ff697 --- /dev/null +++ b/Userland/Libraries/LibCpp/Parser.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "AK/NonnullRefPtr.h" +#include "AST.h" +#include <LibCpp/Lexer.h> + +namespace Cpp { + +class Parser final { +public: + explicit Parser(const StringView&); + ~Parser() = default; + + NonnullRefPtr<TranslationUnit> parse(); + bool eof() const; + + RefPtr<ASTNode> eof_node() const; + RefPtr<ASTNode> node_at(Position) const; + Optional<Token> token_at(Position) const; + RefPtr<const TranslationUnit> root_node() const { return m_root_node; } + StringView text_of_node(const ASTNode&) const; + void print_tokens() const; + Vector<String> errors() const { return m_errors; } + +private: + enum class DeclarationType { + Function, + Variable, + Enum, + Struct, + }; + + bool done(); + + Optional<DeclarationType> match_declaration(); + Optional<DeclarationType> match_declaration_in_translation_unit(); + Optional<DeclarationType> match_declaration_in_function_definition(); + bool match_function_declaration(); + bool match_comment(); + bool match_preprocessor(); + bool match_whitespace(); + bool match_variable_declaration(); + bool match_expression(); + bool match_function_call(); + bool match_secondary_expression(); + bool match_enum_declaration(); + bool match_struct_declaration(); + bool match_literal(); + bool match_unary_expression(); + bool match_boolean_literal(); + bool match_keyword(const String&); + bool match_block_statement(); + + Optional<NonnullRefPtrVector<Parameter>> parse_parameter_list(ASTNode& parent); + Optional<Token> consume_whitespace(); + void consume_preprocessor(); + + NonnullRefPtr<Declaration> parse_declaration(ASTNode& parent, DeclarationType); + NonnullRefPtr<FunctionDeclaration> parse_function_declaration(ASTNode& parent); + NonnullRefPtr<FunctionDefinition> parse_function_definition(ASTNode& parent); + NonnullRefPtr<Statement> parse_statement(ASTNode& parent); + NonnullRefPtr<VariableDeclaration> parse_variable_declaration(ASTNode& parent); + NonnullRefPtr<Expression> parse_expression(ASTNode& parent); + NonnullRefPtr<Expression> parse_primary_expression(ASTNode& parent); + NonnullRefPtr<Expression> parse_secondary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs); + NonnullRefPtr<FunctionCall> parse_function_call(ASTNode& parent); + NonnullRefPtr<StringLiteral> parse_string_literal(ASTNode& parent); + NonnullRefPtr<ReturnStatement> parse_return_statement(ASTNode& parent); + NonnullRefPtr<EnumDeclaration> parse_enum_declaration(ASTNode& parent); + NonnullRefPtr<StructOrClassDeclaration> parse_struct_or_class_declaration(ASTNode& parent, StructOrClassDeclaration::Type); + NonnullRefPtr<MemberDeclaration> parse_member_declaration(ASTNode& parent); + NonnullRefPtr<Expression> parse_literal(ASTNode& parent); + NonnullRefPtr<UnaryExpression> parse_unary_expression(ASTNode& parent); + NonnullRefPtr<BooleanLiteral> parse_boolean_literal(ASTNode& parent); + NonnullRefPtr<Type> parse_type(ASTNode& parent); + NonnullRefPtr<BinaryExpression> parse_binary_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, BinaryOp); + NonnullRefPtr<AssignmentExpression> parse_assignment_expression(ASTNode& parent, NonnullRefPtr<Expression> lhs, AssignmentOp); + NonnullRefPtr<ForStatement> parse_for_statement(ASTNode& parent); + NonnullRefPtr<BlockStatement> parse_block_statement(ASTNode& parent); + NonnullRefPtr<Comment> parse_comment(ASTNode& parent); + + bool match(Token::Type); + Token consume(Token::Type); + Token consume(); + Token consume_keyword(const String&); + Token peek() const; + Optional<Token> peek(Token::Type) const; + Position position() const; + + void save_state(); + void load_state(); + + enum class Context { + InTranslationUnit, + InFunctionDefinition, + }; + + struct State { + Context context { Context::InTranslationUnit }; + size_t token_index { 0 }; + }; + + StringView text_of_token(const Cpp::Token& token); + void error(StringView message = {}); + + size_t node_span_size(const ASTNode& node) const; + + template<class T, class... Args> + NonnullRefPtr<T> + create_ast_node(ASTNode& parent, const Position& start, Optional<Position> end, Args&&... args) + { + auto node = adopt(*new T(&parent, start, end, forward<Args>(args)...)); + m_nodes.append(node); + return node; + } + + NonnullRefPtr<TranslationUnit> + create_root_ast_node(const Position& start, Position end) + { + auto node = adopt(*new TranslationUnit(nullptr, start, end)); + m_nodes.append(node); + m_root_node = node; + return node; + } + + StringView m_program; + Vector<StringView> m_lines; + Vector<Token> m_tokens; + State m_state; + Vector<State> m_saved_states; + RefPtr<TranslationUnit> m_root_node; + NonnullRefPtrVector<ASTNode> m_nodes; + Vector<String> m_errors; +}; + +} diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 9352734b1b..cea432a38d 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -47,3 +47,4 @@ target_link_libraries(test-web LibWeb) target_link_libraries(tt LibPthread) target_link_libraries(grep LibRegex) target_link_libraries(gunzip LibCompress) +target_link_libraries(CppParserTest LibCpp) diff --git a/Userland/Utilities/CppParserTest.cpp b/Userland/Utilities/CppParserTest.cpp new file mode 100644 index 0000000000..2fd78329aa --- /dev/null +++ b/Userland/Utilities/CppParserTest.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "AK/Forward.h" +#include "LibCpp/AST.h" +#include <LibCore/ArgsParser.h> +#include <LibCore/File.h> +#include <LibCpp/Parser.h> + +int main(int argc, char** argv) +{ + Core::ArgsParser args_parser; + const char* path = nullptr; + bool tokens_mode = false; + args_parser.add_option(tokens_mode, "Print Tokens", "tokens", 'T'); + args_parser.add_positional_argument(path, "Cpp File", "cpp-file", Core::ArgsParser::Required::No); + args_parser.parse(argc, argv); + + if (!path) + path = "Source/little/main.cpp"; + auto file = Core::File::construct(path); + if (!file->open(Core::IODevice::ReadOnly)) { + perror("open"); + exit(1); + } + auto content = file->read_all(); + StringView content_view(content); + ::Cpp::Parser parser(content_view); + if (tokens_mode) { + parser.print_tokens(); + return 0; + } + auto root = parser.parse(); + + dbgln("Parser errors:"); + for (auto& error : parser.errors()) { + dbgln("{}", error); + } + + root->dump(0); +} |