diff options
author | Jan de Visser <jan@de-visser.net> | 2021-06-21 10:57:44 -0400 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-06-24 00:36:53 +0200 |
commit | 4198f7e1af7997e0615891289194752f1375f5a1 (patch) | |
tree | ecea66688f05bb6304ddb6b7c5fe27ec2f57cfe3 /Userland/Libraries/LibSQL/AST | |
parent | e0f1c237d21be3e2831092319f50bb429ad1f7a9 (diff) | |
download | serenity-4198f7e1af7997e0615891289194752f1375f5a1.zip |
LibSQL: Move Lexer and Parser machinery to AST directory
The SQL engine is expected to be a fairly sizeable piece of software.
Therefore we're starting to restructure the codebase for growth.
Diffstat (limited to 'Userland/Libraries/LibSQL/AST')
-rw-r--r-- | Userland/Libraries/LibSQL/AST/AST.h | 978 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Lexer.cpp | 332 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Lexer.h | 57 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Parser.cpp | 1092 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Parser.h | 134 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/SyntaxHighlighter.cpp | 112 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/SyntaxHighlighter.h | 28 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Token.cpp | 53 | ||||
-rw-r--r-- | Userland/Libraries/LibSQL/AST/Token.h | 242 |
9 files changed, 3028 insertions, 0 deletions
diff --git a/Userland/Libraries/LibSQL/AST/AST.h b/Userland/Libraries/LibSQL/AST/AST.h new file mode 100644 index 0000000000..8f22bbab65 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/AST.h @@ -0,0 +1,978 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/NonnullRefPtr.h> +#include <AK/NonnullRefPtrVector.h> +#include <AK/RefCounted.h> +#include <AK/RefPtr.h> +#include <AK/String.h> +#include <LibSQL/AST/Token.h> +#include <LibSQL/Forward.h> + +namespace SQL::AST { + +template<class T, class... Args> +static inline NonnullRefPtr<T> +create_ast_node(Args&&... args) +{ + return adopt_ref(*new T(forward<Args>(args)...)); +} + +class ASTNode : public RefCounted<ASTNode> { +public: + virtual ~ASTNode() { } + +protected: + ASTNode() = default; +}; + +//================================================================================================== +// Language types +//================================================================================================== + +class SignedNumber final : public ASTNode { +public: + explicit SignedNumber(double value) + : m_value(value) + { + } + + double value() const { return m_value; } + +private: + double m_value; +}; + +class TypeName : public ASTNode { +public: + TypeName(String name, NonnullRefPtrVector<SignedNumber> signed_numbers) + : m_name(move(name)) + , m_signed_numbers(move(signed_numbers)) + { + VERIFY(m_signed_numbers.size() <= 2); + } + + const String& name() const { return m_name; } + const NonnullRefPtrVector<SignedNumber>& signed_numbers() const { return m_signed_numbers; } + +private: + String m_name; + NonnullRefPtrVector<SignedNumber> m_signed_numbers; +}; + +class ColumnDefinition : public ASTNode { +public: + ColumnDefinition(String name, NonnullRefPtr<TypeName> type_name) + : m_name(move(name)) + , m_type_name(move(type_name)) + { + } + + const String& name() const { return m_name; } + const NonnullRefPtr<TypeName>& type_name() const { return m_type_name; } + +private: + String m_name; + NonnullRefPtr<TypeName> m_type_name; +}; + +class CommonTableExpression : public ASTNode { +public: + CommonTableExpression(String table_name, Vector<String> column_names, NonnullRefPtr<Select> select_statement) + : m_table_name(move(table_name)) + , m_column_names(move(column_names)) + , m_select_statement(move(select_statement)) + { + } + + const String& table_name() const { return m_table_name; } + const Vector<String>& column_names() const { return m_column_names; } + const NonnullRefPtr<Select>& select_statement() const { return m_select_statement; } + +private: + String m_table_name; + Vector<String> m_column_names; + NonnullRefPtr<Select> m_select_statement; +}; + +class CommonTableExpressionList : public ASTNode { +public: + CommonTableExpressionList(bool recursive, NonnullRefPtrVector<CommonTableExpression> common_table_expressions) + : m_recursive(recursive) + , m_common_table_expressions(move(common_table_expressions)) + { + VERIFY(!m_common_table_expressions.is_empty()); + } + + bool recursive() const { return m_recursive; } + const NonnullRefPtrVector<CommonTableExpression>& common_table_expressions() const { return m_common_table_expressions; } + +private: + bool m_recursive; + NonnullRefPtrVector<CommonTableExpression> m_common_table_expressions; +}; + +class QualifiedTableName : public ASTNode { +public: + QualifiedTableName(String schema_name, String table_name, String alias) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_alias(move(alias)) + { + } + + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + const String& alias() const { return m_alias; } + +private: + String m_schema_name; + String m_table_name; + String m_alias; +}; + +class ReturningClause : public ASTNode { +public: + struct ColumnClause { + NonnullRefPtr<Expression> expression; + String column_alias; + }; + + ReturningClause() = default; + + explicit ReturningClause(Vector<ColumnClause> columns) + : m_columns(move(columns)) + { + } + + bool return_all_columns() const { return m_columns.is_empty(); }; + const Vector<ColumnClause>& columns() const { return m_columns; } + +private: + Vector<ColumnClause> m_columns; +}; + +enum class ResultType { + All, + Table, + Expression, +}; + +class ResultColumn : public ASTNode { +public: + ResultColumn() = default; + + explicit ResultColumn(String table_name) + : m_type(ResultType::Table) + , m_table_name(move(table_name)) + { + } + + ResultColumn(NonnullRefPtr<Expression> expression, String column_alias) + : m_type(ResultType::Expression) + , m_expression(move(expression)) + , m_column_alias(move(column_alias)) + { + } + + ResultType type() const { return m_type; } + + bool select_from_table() const { return !m_table_name.is_null(); } + const String& table_name() const { return m_table_name; } + + bool select_from_expression() const { return !m_expression.is_null(); } + const RefPtr<Expression>& expression() const { return m_expression; } + const String& column_alias() const { return m_column_alias; } + +private: + ResultType m_type { ResultType::All }; + + String m_table_name {}; + + RefPtr<Expression> m_expression {}; + String m_column_alias {}; +}; + +class GroupByClause : public ASTNode { +public: + GroupByClause(NonnullRefPtrVector<Expression> group_by_list, RefPtr<Expression> having_clause) + : m_group_by_list(move(group_by_list)) + , m_having_clause(move(having_clause)) + { + VERIFY(!m_group_by_list.is_empty()); + } + + const NonnullRefPtrVector<Expression>& group_by_list() const { return m_group_by_list; } + const RefPtr<Expression>& having_clause() const { return m_having_clause; } + +private: + NonnullRefPtrVector<Expression> m_group_by_list; + RefPtr<Expression> m_having_clause; +}; + +class TableOrSubquery : public ASTNode { +public: + TableOrSubquery() = default; + + TableOrSubquery(String schema_name, String table_name, String table_alias) + : m_is_table(true) + , m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_table_alias(move(table_alias)) + { + } + + explicit TableOrSubquery(NonnullRefPtrVector<TableOrSubquery> subqueries) + : m_is_subquery(!subqueries.is_empty()) + , m_subqueries(move(subqueries)) + { + } + + bool is_table() const { return m_is_table; } + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + const String& table_alias() const { return m_table_alias; } + + bool is_subquery() const { return m_is_subquery; } + const NonnullRefPtrVector<TableOrSubquery>& subqueries() const { return m_subqueries; } + +private: + bool m_is_table { false }; + String m_schema_name {}; + String m_table_name {}; + String m_table_alias {}; + + bool m_is_subquery { false }; + NonnullRefPtrVector<TableOrSubquery> m_subqueries {}; +}; + +enum class Order { + Ascending, + Descending, +}; + +enum class Nulls { + First, + Last, +}; + +class OrderingTerm : public ASTNode { +public: + OrderingTerm(NonnullRefPtr<Expression> expression, String collation_name, Order order, Nulls nulls) + : m_expression(move(expression)) + , m_collation_name(move(collation_name)) + , m_order(order) + , m_nulls(nulls) + { + } + + const NonnullRefPtr<Expression>& expression() const { return m_expression; } + const String& collation_name() const { return m_collation_name; } + Order order() const { return m_order; } + Nulls nulls() const { return m_nulls; } + +private: + NonnullRefPtr<Expression> m_expression; + String m_collation_name; + Order m_order; + Nulls m_nulls; +}; + +class LimitClause : public ASTNode { +public: + LimitClause(NonnullRefPtr<Expression> limit_expression, RefPtr<Expression> offset_expression) + : m_limit_expression(move(limit_expression)) + , m_offset_expression(move(offset_expression)) + { + } + + const NonnullRefPtr<Expression>& limit_expression() const { return m_limit_expression; } + const RefPtr<Expression>& offset_expression() const { return m_offset_expression; } + +private: + NonnullRefPtr<Expression> m_limit_expression; + RefPtr<Expression> m_offset_expression; +}; + +//================================================================================================== +// Expressions +//================================================================================================== + +class Expression : public ASTNode { +}; + +class ErrorExpression final : public Expression { +}; + +class NumericLiteral : public Expression { +public: + explicit NumericLiteral(double value) + : m_value(value) + { + } + + double value() const { return m_value; } + +private: + double m_value; +}; + +class StringLiteral : public Expression { +public: + explicit StringLiteral(String value) + : m_value(move(value)) + { + } + + const String& value() const { return m_value; } + +private: + String m_value; +}; + +class BlobLiteral : public Expression { +public: + explicit BlobLiteral(String value) + : m_value(move(value)) + { + } + + const String& value() const { return m_value; } + +private: + String m_value; +}; + +class NullLiteral : public Expression { +}; + +class NestedExpression : public Expression { +public: + const NonnullRefPtr<Expression>& expression() const { return m_expression; } + +protected: + explicit NestedExpression(NonnullRefPtr<Expression> expression) + : m_expression(move(expression)) + { + } + +private: + NonnullRefPtr<Expression> m_expression; +}; + +class NestedDoubleExpression : public Expression { +public: + const NonnullRefPtr<Expression>& lhs() const { return m_lhs; } + const NonnullRefPtr<Expression>& rhs() const { return m_rhs; } + +protected: + NestedDoubleExpression(NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs) + : m_lhs(move(lhs)) + , m_rhs(move(rhs)) + { + } + +private: + NonnullRefPtr<Expression> m_lhs; + NonnullRefPtr<Expression> m_rhs; +}; + +class InvertibleNestedExpression : public NestedExpression { +public: + bool invert_expression() const { return m_invert_expression; } + +protected: + InvertibleNestedExpression(NonnullRefPtr<Expression> expression, bool invert_expression) + : NestedExpression(move(expression)) + , m_invert_expression(invert_expression) + { + } + +private: + bool m_invert_expression; +}; + +class InvertibleNestedDoubleExpression : public NestedDoubleExpression { +public: + bool invert_expression() const { return m_invert_expression; } + +protected: + InvertibleNestedDoubleExpression(NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs, bool invert_expression) + : NestedDoubleExpression(move(lhs), move(rhs)) + , m_invert_expression(invert_expression) + { + } + +private: + bool m_invert_expression; +}; + +class ColumnNameExpression : public Expression { +public: + ColumnNameExpression(String schema_name, String table_name, String column_name) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_column_name(move(column_name)) + { + } + + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + const String& column_name() const { return m_column_name; } + +private: + String m_schema_name; + String m_table_name; + String m_column_name; +}; + +enum class UnaryOperator { + Minus, + Plus, + BitwiseNot, + Not, +}; + +class UnaryOperatorExpression : public NestedExpression { +public: + UnaryOperatorExpression(UnaryOperator type, NonnullRefPtr<Expression> expression) + : NestedExpression(move(expression)) + , m_type(type) + { + } + + UnaryOperator type() const { return m_type; } + +private: + UnaryOperator m_type; +}; + +enum class BinaryOperator { + // Note: These are in order of highest-to-lowest operator precedence. + Concatenate, + Multiplication, + Division, + Modulo, + Plus, + Minus, + ShiftLeft, + ShiftRight, + BitwiseAnd, + BitwiseOr, + LessThan, + LessThanEquals, + GreaterThan, + GreaterThanEquals, + Equals, + NotEquals, + And, + Or, +}; + +class BinaryOperatorExpression : public NestedDoubleExpression { +public: + BinaryOperatorExpression(BinaryOperator type, NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs) + : NestedDoubleExpression(move(lhs), move(rhs)) + , m_type(type) + { + } + + BinaryOperator type() const { return m_type; } + +private: + BinaryOperator m_type; +}; + +class ChainedExpression : public Expression { +public: + explicit ChainedExpression(NonnullRefPtrVector<Expression> expressions) + : m_expressions(move(expressions)) + { + } + + const NonnullRefPtrVector<Expression>& expressions() const { return m_expressions; } + +private: + NonnullRefPtrVector<Expression> m_expressions; +}; + +class CastExpression : public NestedExpression { +public: + CastExpression(NonnullRefPtr<Expression> expression, NonnullRefPtr<TypeName> type_name) + : NestedExpression(move(expression)) + , m_type_name(move(type_name)) + { + } + + const NonnullRefPtr<TypeName>& type_name() const { return m_type_name; } + +private: + NonnullRefPtr<TypeName> m_type_name; +}; + +class CaseExpression : public Expression { +public: + struct WhenThenClause { + NonnullRefPtr<Expression> when; + NonnullRefPtr<Expression> then; + }; + + CaseExpression(RefPtr<Expression> case_expression, Vector<WhenThenClause> when_then_clauses, RefPtr<Expression> else_expression) + : m_case_expression(case_expression) + , m_when_then_clauses(when_then_clauses) + , m_else_expression(else_expression) + { + VERIFY(!m_when_then_clauses.is_empty()); + } + + const RefPtr<Expression>& case_expression() const { return m_case_expression; } + const Vector<WhenThenClause>& when_then_clauses() const { return m_when_then_clauses; } + const RefPtr<Expression>& else_expression() const { return m_else_expression; } + +private: + RefPtr<Expression> m_case_expression; + Vector<WhenThenClause> m_when_then_clauses; + RefPtr<Expression> m_else_expression; +}; + +class ExistsExpression : public Expression { +public: + ExistsExpression(NonnullRefPtr<Select> select_statement, bool invert_expression) + : m_select_statement(move(select_statement)) + , m_invert_expression(invert_expression) + { + } + + const NonnullRefPtr<Select>& select_statement() const { return m_select_statement; } + bool invert_expression() const { return m_invert_expression; } + +private: + NonnullRefPtr<Select> m_select_statement; + bool m_invert_expression; +}; + +class CollateExpression : public NestedExpression { +public: + CollateExpression(NonnullRefPtr<Expression> expression, String collation_name) + : NestedExpression(move(expression)) + , m_collation_name(move(collation_name)) + { + } + + const String& collation_name() const { return m_collation_name; } + +private: + String m_collation_name; +}; + +enum class MatchOperator { + Like, + Glob, + Match, + Regexp, +}; + +class MatchExpression : public InvertibleNestedDoubleExpression { +public: + MatchExpression(MatchOperator type, NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs, RefPtr<Expression> escape, bool invert_expression) + : InvertibleNestedDoubleExpression(move(lhs), move(rhs), invert_expression) + , m_type(type) + , m_escape(move(escape)) + { + } + + MatchOperator type() const { return m_type; } + const RefPtr<Expression>& escape() const { return m_escape; } + +private: + MatchOperator m_type; + RefPtr<Expression> m_escape; +}; + +class NullExpression : public InvertibleNestedExpression { +public: + NullExpression(NonnullRefPtr<Expression> expression, bool invert_expression) + : InvertibleNestedExpression(move(expression), invert_expression) + { + } +}; + +class IsExpression : public InvertibleNestedDoubleExpression { +public: + IsExpression(NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs, bool invert_expression) + : InvertibleNestedDoubleExpression(move(lhs), move(rhs), invert_expression) + { + } +}; + +class BetweenExpression : public InvertibleNestedDoubleExpression { +public: + BetweenExpression(NonnullRefPtr<Expression> expression, NonnullRefPtr<Expression> lhs, NonnullRefPtr<Expression> rhs, bool invert_expression) + : InvertibleNestedDoubleExpression(move(lhs), move(rhs), invert_expression) + , m_expression(move(expression)) + { + } + + const NonnullRefPtr<Expression>& expression() const { return m_expression; } + +private: + NonnullRefPtr<Expression> m_expression; +}; + +class InSelectionExpression : public InvertibleNestedExpression { +public: + InSelectionExpression(NonnullRefPtr<Expression> expression, NonnullRefPtr<Select> select_statement, bool invert_expression) + : InvertibleNestedExpression(move(expression), invert_expression) + , m_select_statement(move(select_statement)) + { + } + + const NonnullRefPtr<Select>& select_statement() const { return m_select_statement; } + +private: + NonnullRefPtr<Select> m_select_statement; +}; + +class InChainedExpression : public InvertibleNestedExpression { +public: + InChainedExpression(NonnullRefPtr<Expression> expression, NonnullRefPtr<ChainedExpression> expression_chain, bool invert_expression) + : InvertibleNestedExpression(move(expression), invert_expression) + , m_expression_chain(move(expression_chain)) + { + } + + const NonnullRefPtr<ChainedExpression>& expression_chain() const { return m_expression_chain; } + +private: + NonnullRefPtr<ChainedExpression> m_expression_chain; +}; + +class InTableExpression : public InvertibleNestedExpression { +public: + InTableExpression(NonnullRefPtr<Expression> expression, String schema_name, String table_name, bool invert_expression) + : InvertibleNestedExpression(move(expression), invert_expression) + , m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + { + } + + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + +private: + String m_schema_name; + String m_table_name; +}; + +//================================================================================================== +// Statements +//================================================================================================== + +class Statement : public ASTNode { +}; + +class ErrorStatement final : public Statement { +}; + +class CreateTable : public Statement { +public: + CreateTable(String schema_name, String table_name, RefPtr<Select> select_statement, bool is_temporary, bool is_error_if_table_exists) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_select_statement(move(select_statement)) + , m_is_temporary(is_temporary) + , m_is_error_if_table_exists(is_error_if_table_exists) + { + } + + CreateTable(String schema_name, String table_name, NonnullRefPtrVector<ColumnDefinition> columns, bool is_temporary, bool is_error_if_table_exists) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_columns(move(columns)) + , m_is_temporary(is_temporary) + , m_is_error_if_table_exists(is_error_if_table_exists) + { + } + + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + + bool has_selection() const { return !m_select_statement.is_null(); } + const RefPtr<Select>& select_statement() const { return m_select_statement; } + + bool has_columns() const { return !m_columns.is_empty(); } + const NonnullRefPtrVector<ColumnDefinition>& columns() const { return m_columns; } + + bool is_temporary() const { return m_is_temporary; } + bool is_error_if_table_exists() const { return m_is_error_if_table_exists; } + +private: + String m_schema_name; + String m_table_name; + RefPtr<Select> m_select_statement; + NonnullRefPtrVector<ColumnDefinition> m_columns; + bool m_is_temporary; + bool m_is_error_if_table_exists; +}; + +class AlterTable : public Statement { +public: + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + +protected: + AlterTable(String schema_name, String table_name) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + { + } + +private: + String m_schema_name; + String m_table_name; +}; + +class RenameTable : public AlterTable { +public: + RenameTable(String schema_name, String table_name, String new_table_name) + : AlterTable(move(schema_name), move(table_name)) + , m_new_table_name(move(new_table_name)) + { + } + + const String& new_table_name() const { return m_new_table_name; } + +private: + String m_new_table_name; +}; + +class RenameColumn : public AlterTable { +public: + RenameColumn(String schema_name, String table_name, String column_name, String new_column_name) + : AlterTable(move(schema_name), move(table_name)) + , m_column_name(move(column_name)) + , m_new_column_name(move(new_column_name)) + { + } + + const String& column_name() const { return m_column_name; } + const String& new_column_name() const { return m_new_column_name; } + +private: + String m_column_name; + String m_new_column_name; +}; + +class AddColumn : public AlterTable { +public: + AddColumn(String schema_name, String table_name, NonnullRefPtr<ColumnDefinition> column) + : AlterTable(move(schema_name), move(table_name)) + , m_column(move(column)) + { + } + + const NonnullRefPtr<ColumnDefinition>& column() const { return m_column; } + +private: + NonnullRefPtr<ColumnDefinition> m_column; +}; + +class DropColumn : public AlterTable { +public: + DropColumn(String schema_name, String table_name, String column_name) + : AlterTable(move(schema_name), move(table_name)) + , m_column_name(move(column_name)) + { + } + + const String& column_name() const { return m_column_name; } + +private: + String m_column_name; +}; + +class DropTable : public Statement { +public: + DropTable(String schema_name, String table_name, bool is_error_if_table_does_not_exist) + : m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_is_error_if_table_does_not_exist(is_error_if_table_does_not_exist) + { + } + + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + bool is_error_if_table_does_not_exist() const { return m_is_error_if_table_does_not_exist; } + +private: + String m_schema_name; + String m_table_name; + bool m_is_error_if_table_does_not_exist; +}; + +enum class ConflictResolution { + Abort, + Fail, + Ignore, + Replace, + Rollback, +}; + +class Insert : public Statement { +public: + Insert(RefPtr<CommonTableExpressionList> common_table_expression_list, ConflictResolution conflict_resolution, String schema_name, String table_name, String alias, Vector<String> column_names, NonnullRefPtrVector<ChainedExpression> chained_expressions) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_conflict_resolution(conflict_resolution) + , m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_alias(move(alias)) + , m_column_names(move(column_names)) + , m_chained_expressions(move(chained_expressions)) + { + } + + Insert(RefPtr<CommonTableExpressionList> common_table_expression_list, ConflictResolution conflict_resolution, String schema_name, String table_name, String alias, Vector<String> column_names, RefPtr<Select> select_statement) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_conflict_resolution(conflict_resolution) + , m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_alias(move(alias)) + , m_column_names(move(column_names)) + , m_select_statement(move(select_statement)) + { + } + + Insert(RefPtr<CommonTableExpressionList> common_table_expression_list, ConflictResolution conflict_resolution, String schema_name, String table_name, String alias, Vector<String> column_names) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_conflict_resolution(conflict_resolution) + , m_schema_name(move(schema_name)) + , m_table_name(move(table_name)) + , m_alias(move(alias)) + , m_column_names(move(column_names)) + { + } + + const RefPtr<CommonTableExpressionList>& common_table_expression_list() const { return m_common_table_expression_list; } + ConflictResolution conflict_resolution() const { return m_conflict_resolution; } + const String& schema_name() const { return m_schema_name; } + const String& table_name() const { return m_table_name; } + const String& alias() const { return m_alias; } + const Vector<String>& column_names() const { return m_column_names; } + + bool default_values() const { return !has_expressions() && !has_selection(); }; + + bool has_expressions() const { return !m_chained_expressions.is_empty(); } + const NonnullRefPtrVector<ChainedExpression>& chained_expressions() const { return m_chained_expressions; } + + bool has_selection() const { return !m_select_statement.is_null(); } + const RefPtr<Select>& select_statement() const { return m_select_statement; } + +private: + RefPtr<CommonTableExpressionList> m_common_table_expression_list; + ConflictResolution m_conflict_resolution; + String m_schema_name; + String m_table_name; + String m_alias; + Vector<String> m_column_names; + NonnullRefPtrVector<ChainedExpression> m_chained_expressions; + RefPtr<Select> m_select_statement; +}; + +class Update : public Statement { +public: + struct UpdateColumns { + Vector<String> column_names; + NonnullRefPtr<Expression> expression; + }; + + Update(RefPtr<CommonTableExpressionList> common_table_expression_list, ConflictResolution conflict_resolution, NonnullRefPtr<QualifiedTableName> qualified_table_name, Vector<UpdateColumns> update_columns, NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list, RefPtr<Expression> where_clause, RefPtr<ReturningClause> returning_clause) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_conflict_resolution(conflict_resolution) + , m_qualified_table_name(move(qualified_table_name)) + , m_update_columns(move(update_columns)) + , m_table_or_subquery_list(move(table_or_subquery_list)) + , m_where_clause(move(where_clause)) + , m_returning_clause(move(returning_clause)) + { + } + + const RefPtr<CommonTableExpressionList>& common_table_expression_list() const { return m_common_table_expression_list; } + ConflictResolution conflict_resolution() const { return m_conflict_resolution; } + const NonnullRefPtr<QualifiedTableName>& qualified_table_name() const { return m_qualified_table_name; } + const Vector<UpdateColumns>& update_columns() const { return m_update_columns; } + const NonnullRefPtrVector<TableOrSubquery>& table_or_subquery_list() const { return m_table_or_subquery_list; } + const RefPtr<Expression>& where_clause() const { return m_where_clause; } + const RefPtr<ReturningClause>& returning_clause() const { return m_returning_clause; } + +private: + RefPtr<CommonTableExpressionList> m_common_table_expression_list; + ConflictResolution m_conflict_resolution; + NonnullRefPtr<QualifiedTableName> m_qualified_table_name; + Vector<UpdateColumns> m_update_columns; + NonnullRefPtrVector<TableOrSubquery> m_table_or_subquery_list; + RefPtr<Expression> m_where_clause; + RefPtr<ReturningClause> m_returning_clause; +}; + +class Delete : public Statement { +public: + Delete(RefPtr<CommonTableExpressionList> common_table_expression_list, NonnullRefPtr<QualifiedTableName> qualified_table_name, RefPtr<Expression> where_clause, RefPtr<ReturningClause> returning_clause) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_qualified_table_name(move(qualified_table_name)) + , m_where_clause(move(where_clause)) + , m_returning_clause(move(returning_clause)) + { + } + + const RefPtr<CommonTableExpressionList>& common_table_expression_list() const { return m_common_table_expression_list; } + const NonnullRefPtr<QualifiedTableName>& qualified_table_name() const { return m_qualified_table_name; } + const RefPtr<Expression>& where_clause() const { return m_where_clause; } + const RefPtr<ReturningClause>& returning_clause() const { return m_returning_clause; } + +private: + RefPtr<CommonTableExpressionList> m_common_table_expression_list; + NonnullRefPtr<QualifiedTableName> m_qualified_table_name; + RefPtr<Expression> m_where_clause; + RefPtr<ReturningClause> m_returning_clause; +}; + +class Select : public Statement { +public: + Select(RefPtr<CommonTableExpressionList> common_table_expression_list, bool select_all, NonnullRefPtrVector<ResultColumn> result_column_list, NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list, RefPtr<Expression> where_clause, RefPtr<GroupByClause> group_by_clause, NonnullRefPtrVector<OrderingTerm> ordering_term_list, RefPtr<LimitClause> limit_clause) + : m_common_table_expression_list(move(common_table_expression_list)) + , m_select_all(move(select_all)) + , m_result_column_list(move(result_column_list)) + , m_table_or_subquery_list(move(table_or_subquery_list)) + , m_where_clause(move(where_clause)) + , m_group_by_clause(move(group_by_clause)) + , m_ordering_term_list(move(ordering_term_list)) + , m_limit_clause(move(limit_clause)) + { + } + + const RefPtr<CommonTableExpressionList>& common_table_expression_list() const { return m_common_table_expression_list; } + bool select_all() const { return m_select_all; } + const NonnullRefPtrVector<ResultColumn>& result_column_list() const { return m_result_column_list; } + const NonnullRefPtrVector<TableOrSubquery>& table_or_subquery_list() const { return m_table_or_subquery_list; } + const RefPtr<Expression>& where_clause() const { return m_where_clause; } + const RefPtr<GroupByClause>& group_by_clause() const { return m_group_by_clause; } + const NonnullRefPtrVector<OrderingTerm>& ordering_term_list() const { return m_ordering_term_list; } + const RefPtr<LimitClause>& limit_clause() const { return m_limit_clause; } + +private: + RefPtr<CommonTableExpressionList> m_common_table_expression_list; + bool m_select_all; + NonnullRefPtrVector<ResultColumn> m_result_column_list; + NonnullRefPtrVector<TableOrSubquery> m_table_or_subquery_list; + RefPtr<Expression> m_where_clause; + RefPtr<GroupByClause> m_group_by_clause; + NonnullRefPtrVector<OrderingTerm> m_ordering_term_list; + RefPtr<LimitClause> m_limit_clause; +}; + +} diff --git a/Userland/Libraries/LibSQL/AST/Lexer.cpp b/Userland/Libraries/LibSQL/AST/Lexer.cpp new file mode 100644 index 0000000000..49c1602243 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Lexer.cpp @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Lexer.h" +#include <AK/Debug.h> +#include <ctype.h> + +namespace SQL::AST { + +HashMap<String, TokenType> Lexer::s_keywords; +HashMap<char, TokenType> Lexer::s_one_char_tokens; +HashMap<String, TokenType> Lexer::s_two_char_tokens; + +Lexer::Lexer(StringView source) + : m_source(source) +{ + if (s_keywords.is_empty()) { +#define __ENUMERATE_SQL_TOKEN(value, type, category) \ + if (TokenCategory::category == TokenCategory::Keyword) \ + s_keywords.set(value, TokenType::type); + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + } + + if (s_one_char_tokens.is_empty()) { +#define __ENUMERATE_SQL_TOKEN(value, type, category) \ + if (TokenCategory::category != TokenCategory::Keyword && StringView(value).length() == 1) \ + s_one_char_tokens.set(value[0], TokenType::type); + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + } + + if (s_two_char_tokens.is_empty()) { +#define __ENUMERATE_SQL_TOKEN(value, type, category) \ + if (TokenCategory::category != TokenCategory::Keyword && StringView(value).length() == 2) \ + s_two_char_tokens.set(value, TokenType::type); + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + } + + consume(); +} + +Token Lexer::next() +{ + bool found_invalid_comment = consume_whitespace_and_comments(); + + size_t value_start = m_position; + size_t value_start_line_number = m_line_number; + size_t value_start_column_number = m_line_column; + auto token_type = TokenType::Invalid; + + if (is_eof()) { + token_type = found_invalid_comment ? TokenType::Invalid : TokenType::Eof; + } else if (is_numeric_literal_start()) { + token_type = TokenType::NumericLiteral; + if (!consume_numeric_literal()) + token_type = TokenType::Invalid; + } else if (is_string_literal_start()) { + token_type = TokenType::StringLiteral; + if (!consume_string_literal()) + token_type = TokenType::Invalid; + } else if (is_blob_literal_start()) { + token_type = TokenType::BlobLiteral; + if (!consume_blob_literal()) + token_type = TokenType::Invalid; + } else if (is_identifier_start()) { + do { + consume(); + } while (is_identifier_middle()); + + if (auto it = s_keywords.find(m_source.substring_view(value_start - 1, m_position - value_start)); it != s_keywords.end()) { + token_type = it->value; + } else { + token_type = TokenType::Identifier; + } + } else { + bool found_two_char_token = false; + if (m_position < m_source.length()) { + if (auto it = s_two_char_tokens.find(m_source.substring_view(m_position - 1, 2)); it != s_two_char_tokens.end()) { + found_two_char_token = true; + token_type = it->value; + consume(); + consume(); + } + } + + bool found_one_char_token = false; + if (!found_two_char_token) { + if (auto it = s_one_char_tokens.find(m_current_char); it != s_one_char_tokens.end()) { + found_one_char_token = true; + token_type = it->value; + consume(); + } + } + + if (!found_two_char_token && !found_one_char_token) { + token_type = TokenType::Invalid; + consume(); + } + } + + Token token(token_type, m_source.substring_view(value_start - 1, m_position - value_start), value_start_line_number, value_start_column_number); + + if constexpr (SQL_DEBUG) { + dbgln("------------------------------"); + dbgln("Token: {}", token.name()); + dbgln("Value: {}", token.value()); + dbgln("Line: {}, Column: {}", token.line_number(), token.line_column()); + dbgln("------------------------------"); + } + + return token; +} + +void Lexer::consume() +{ + auto did_reach_eof = [this] { + if (m_position != m_source.length()) + return false; + m_eof = true; + m_current_char = '\0'; + ++m_line_column; + ++m_position; + return true; + }; + + if (m_position > m_source.length()) + return; + + if (did_reach_eof()) + return; + + if (is_line_break()) { + ++m_line_number; + m_line_column = 1; + } else { + ++m_line_column; + } + + m_current_char = m_source[m_position++]; +} + +bool Lexer::consume_whitespace_and_comments() +{ + bool found_invalid_comment = false; + + while (true) { + if (isspace(m_current_char)) { + do { + consume(); + } while (isspace(m_current_char)); + } else if (is_line_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && !is_line_break()); + } else if (is_block_comment_start()) { + consume(); + do { + consume(); + } while (!is_eof() && !is_block_comment_end()); + if (is_eof()) + found_invalid_comment = true; + consume(); // consume * + if (is_eof()) + found_invalid_comment = true; + consume(); // consume / + } else { + break; + } + } + + return found_invalid_comment; +} + +bool Lexer::consume_numeric_literal() +{ + // https://sqlite.org/syntax/numeric-literal.html + bool is_valid_numeric_literal = true; + + if (m_current_char == '0') { + consume(); + if (m_current_char == '.') { + consume(); + while (isdigit(m_current_char)) + consume(); + if (m_current_char == 'e' || m_current_char == 'E') + is_valid_numeric_literal = consume_exponent(); + } else if (m_current_char == 'e' || m_current_char == 'E') { + is_valid_numeric_literal = consume_exponent(); + } else if (m_current_char == 'x' || m_current_char == 'X') { + is_valid_numeric_literal = consume_hexadecimal_number(); + } else if (isdigit(m_current_char)) { + do { + consume(); + } while (isdigit(m_current_char)); + } + } else { + do { + consume(); + } while (isdigit(m_current_char)); + + if (m_current_char == '.') { + consume(); + while (isdigit(m_current_char)) + consume(); + } + if (m_current_char == 'e' || m_current_char == 'E') + is_valid_numeric_literal = consume_exponent(); + } + + return is_valid_numeric_literal; +} + +bool Lexer::consume_string_literal() +{ + // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)" + bool is_valid_string_literal = true; + consume(); + + while (!is_eof() && !is_string_literal_end()) + consume(); + + if (is_eof()) + is_valid_string_literal = false; + consume(); + + return is_valid_string_literal; +} + +bool Lexer::consume_blob_literal() +{ + // https://sqlite.org/lang_expr.html - See "3. Literal Values (Constants)" + consume(); + return consume_string_literal(); +} + +bool Lexer::consume_exponent() +{ + consume(); + if (m_current_char == '-' || m_current_char == '+') + consume(); + + if (!isdigit(m_current_char)) + return false; + + while (isdigit(m_current_char)) { + consume(); + } + return true; +} + +bool Lexer::consume_hexadecimal_number() +{ + consume(); + if (!isxdigit(m_current_char)) + return false; + + while (isxdigit(m_current_char)) + consume(); + + return true; +} + +bool Lexer::match(char a, char b) const +{ + if (m_position >= m_source.length()) + return false; + + return m_current_char == a && m_source[m_position] == b; +} + +bool Lexer::is_identifier_start() const +{ + return isalpha(m_current_char) || m_current_char == '_'; +} + +bool Lexer::is_identifier_middle() const +{ + return is_identifier_start() || isdigit(m_current_char); +} + +bool Lexer::is_numeric_literal_start() const +{ + return isdigit(m_current_char) || (m_current_char == '.' && m_position < m_source.length() && isdigit(m_source[m_position])); +} + +bool Lexer::is_string_literal_start() const +{ + return m_current_char == '\''; +} + +bool Lexer::is_string_literal_end() const +{ + return m_current_char == '\'' && !(m_position < m_source.length() && m_source[m_position] == '\''); +} + +bool Lexer::is_blob_literal_start() const +{ + return match('x', '\'') || match('X', '\''); +} + +bool Lexer::is_line_comment_start() const +{ + return match('-', '-'); +} + +bool Lexer::is_block_comment_start() const +{ + return match('/', '*'); +} + +bool Lexer::is_block_comment_end() const +{ + return match('*', '/'); +} + +bool Lexer::is_line_break() const +{ + return m_current_char == '\n'; +} + +bool Lexer::is_eof() const +{ + return m_eof; +} + +} diff --git a/Userland/Libraries/LibSQL/AST/Lexer.h b/Userland/Libraries/LibSQL/AST/Lexer.h new file mode 100644 index 0000000000..77abcdec45 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Lexer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include "Token.h" +#include <AK/HashMap.h> +#include <AK/String.h> +#include <AK/StringView.h> + +namespace SQL::AST { + +class Lexer { +public: + explicit Lexer(StringView source); + + Token next(); + +private: + void consume(); + + bool consume_whitespace_and_comments(); + bool consume_numeric_literal(); + bool consume_string_literal(); + bool consume_blob_literal(); + bool consume_exponent(); + bool consume_hexadecimal_number(); + + bool match(char a, char b) const; + bool is_identifier_start() const; + bool is_identifier_middle() const; + bool is_numeric_literal_start() const; + bool is_string_literal_start() const; + bool is_string_literal_end() const; + bool is_blob_literal_start() const; + bool is_line_comment_start() const; + bool is_block_comment_start() const; + bool is_block_comment_end() const; + bool is_line_break() const; + bool is_eof() const; + + static HashMap<String, TokenType> s_keywords; + static HashMap<char, TokenType> s_one_char_tokens; + static HashMap<String, TokenType> s_two_char_tokens; + + StringView m_source; + size_t m_line_number { 1 }; + size_t m_line_column { 0 }; + char m_current_char { 0 }; + bool m_eof { false }; + size_t m_position { 0 }; +}; + +} diff --git a/Userland/Libraries/LibSQL/AST/Parser.cpp b/Userland/Libraries/LibSQL/AST/Parser.cpp new file mode 100644 index 0000000000..ae771c9ae0 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Parser.cpp @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Parser.h" +#include <AK/ScopeGuard.h> +#include <AK/TypeCasts.h> + +namespace SQL::AST { + +Parser::Parser(Lexer lexer) + : m_parser_state(move(lexer)) +{ +} + +NonnullRefPtr<Statement> Parser::next_statement() +{ + auto terminate_statement = [this](auto statement) { + consume(TokenType::SemiColon); + return statement; + }; + + if (match(TokenType::With)) { + auto common_table_expression_list = parse_common_table_expression_list(); + if (!common_table_expression_list) + return create_ast_node<ErrorStatement>(); + + return terminate_statement(parse_statement_with_expression_list(move(common_table_expression_list))); + } + + return terminate_statement(parse_statement()); +} + +NonnullRefPtr<Statement> Parser::parse_statement() +{ + switch (m_parser_state.m_token.type()) { + case TokenType::Create: + return parse_create_table_statement(); + case TokenType::Alter: + return parse_alter_table_statement(); + case TokenType::Drop: + return parse_drop_table_statement(); + case TokenType::Insert: + return parse_insert_statement({}); + case TokenType::Update: + return parse_update_statement({}); + case TokenType::Delete: + return parse_delete_statement({}); + case TokenType::Select: + return parse_select_statement({}); + default: + expected("CREATE, ALTER, DROP, INSERT, UPDATE, DELETE, or SELECT"); + return create_ast_node<ErrorStatement>(); + } +} + +NonnullRefPtr<Statement> Parser::parse_statement_with_expression_list(RefPtr<CommonTableExpressionList> common_table_expression_list) +{ + switch (m_parser_state.m_token.type()) { + case TokenType::Insert: + return parse_insert_statement(move(common_table_expression_list)); + case TokenType::Update: + return parse_update_statement(move(common_table_expression_list)); + case TokenType::Delete: + return parse_delete_statement(move(common_table_expression_list)); + case TokenType::Select: + return parse_select_statement(move(common_table_expression_list)); + default: + expected("INSERT, UPDATE, DELETE or SELECT"); + return create_ast_node<ErrorStatement>(); + } +} + +NonnullRefPtr<CreateTable> Parser::parse_create_table_statement() +{ + // https://sqlite.org/lang_createtable.html + consume(TokenType::Create); + + bool is_temporary = false; + if (consume_if(TokenType::Temp) || consume_if(TokenType::Temporary)) + is_temporary = true; + + consume(TokenType::Table); + + bool is_error_if_table_exists = true; + if (consume_if(TokenType::If)) { + consume(TokenType::Not); + consume(TokenType::Exists); + is_error_if_table_exists = false; + } + + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + if (consume_if(TokenType::As)) { + auto select_statement = parse_select_statement({}); + return create_ast_node<CreateTable>(move(schema_name), move(table_name), move(select_statement), is_temporary, is_error_if_table_exists); + } + + NonnullRefPtrVector<ColumnDefinition> column_definitions; + parse_comma_separated_list(true, [&]() { column_definitions.append(parse_column_definition()); }); + + // FIXME: Parse "table-constraint". + + return create_ast_node<CreateTable>(move(schema_name), move(table_name), move(column_definitions), is_temporary, is_error_if_table_exists); +} + +NonnullRefPtr<AlterTable> Parser::parse_alter_table_statement() +{ + // https://sqlite.org/lang_altertable.html + consume(TokenType::Alter); + consume(TokenType::Table); + + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + if (consume_if(TokenType::Add)) { + consume_if(TokenType::Column); // COLUMN is optional. + auto column = parse_column_definition(); + return create_ast_node<AddColumn>(move(schema_name), move(table_name), move(column)); + } + + if (consume_if(TokenType::Drop)) { + consume_if(TokenType::Column); // COLUMN is optional. + auto column = consume(TokenType::Identifier).value(); + return create_ast_node<DropColumn>(move(schema_name), move(table_name), move(column)); + } + + consume(TokenType::Rename); + + if (consume_if(TokenType::To)) { + auto new_table_name = consume(TokenType::Identifier).value(); + return create_ast_node<RenameTable>(move(schema_name), move(table_name), move(new_table_name)); + } + + consume_if(TokenType::Column); // COLUMN is optional. + auto column_name = consume(TokenType::Identifier).value(); + consume(TokenType::To); + auto new_column_name = consume(TokenType::Identifier).value(); + return create_ast_node<RenameColumn>(move(schema_name), move(table_name), move(column_name), move(new_column_name)); +} + +NonnullRefPtr<DropTable> Parser::parse_drop_table_statement() +{ + // https://sqlite.org/lang_droptable.html + consume(TokenType::Drop); + consume(TokenType::Table); + + bool is_error_if_table_does_not_exist = true; + if (consume_if(TokenType::If)) { + consume(TokenType::Exists); + is_error_if_table_does_not_exist = false; + } + + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + return create_ast_node<DropTable>(move(schema_name), move(table_name), is_error_if_table_does_not_exist); +} + +NonnullRefPtr<Insert> Parser::parse_insert_statement(RefPtr<CommonTableExpressionList> common_table_expression_list) +{ + // https://sqlite.org/lang_insert.html + consume(TokenType::Insert); + auto conflict_resolution = parse_conflict_resolution(); + consume(TokenType::Into); + + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + String alias; + if (consume_if(TokenType::As)) + alias = consume(TokenType::Identifier).value(); + + Vector<String> column_names; + if (match(TokenType::ParenOpen)) + parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); }); + + NonnullRefPtrVector<ChainedExpression> chained_expressions; + RefPtr<Select> select_statement; + + if (consume_if(TokenType::Values)) { + parse_comma_separated_list(false, [&]() { + if (auto chained_expression = parse_chained_expression(); chained_expression.has_value()) + chained_expressions.append(move(chained_expression.value())); + else + expected("Chained expression"); + }); + } else if (match(TokenType::Select)) { + select_statement = parse_select_statement({}); + } else { + consume(TokenType::Default); + consume(TokenType::Values); + } + + RefPtr<ReturningClause> returning_clause; + if (match(TokenType::Returning)) + returning_clause = parse_returning_clause(); + + // FIXME: Parse 'upsert-clause'. + + if (!chained_expressions.is_empty()) + return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names), move(chained_expressions)); + if (!select_statement.is_null()) + return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names), move(select_statement)); + + return create_ast_node<Insert>(move(common_table_expression_list), conflict_resolution, move(schema_name), move(table_name), move(alias), move(column_names)); +} + +NonnullRefPtr<Update> Parser::parse_update_statement(RefPtr<CommonTableExpressionList> common_table_expression_list) +{ + // https://sqlite.org/lang_update.html + consume(TokenType::Update); + auto conflict_resolution = parse_conflict_resolution(); + auto qualified_table_name = parse_qualified_table_name(); + consume(TokenType::Set); + + Vector<Update::UpdateColumns> update_columns; + parse_comma_separated_list(false, [&]() { + Vector<String> column_names; + if (match(TokenType::ParenOpen)) { + parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); }); + } else { + column_names.append(consume(TokenType::Identifier).value()); + } + + consume(TokenType::Equals); + update_columns.append({ move(column_names), parse_expression() }); + }); + + NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list; + if (consume_if(TokenType::From)) { + // FIXME: Parse join-clause. + parse_comma_separated_list(false, [&]() { table_or_subquery_list.append(parse_table_or_subquery()); }); + } + + RefPtr<Expression> where_clause; + if (consume_if(TokenType::Where)) + where_clause = parse_expression(); + + RefPtr<ReturningClause> returning_clause; + if (match(TokenType::Returning)) + returning_clause = parse_returning_clause(); + + return create_ast_node<Update>(move(common_table_expression_list), conflict_resolution, move(qualified_table_name), move(update_columns), move(table_or_subquery_list), move(where_clause), move(returning_clause)); +} + +NonnullRefPtr<Delete> Parser::parse_delete_statement(RefPtr<CommonTableExpressionList> common_table_expression_list) +{ + // https://sqlite.org/lang_delete.html + consume(TokenType::Delete); + consume(TokenType::From); + auto qualified_table_name = parse_qualified_table_name(); + + RefPtr<Expression> where_clause; + if (consume_if(TokenType::Where)) + where_clause = parse_expression(); + + RefPtr<ReturningClause> returning_clause; + if (match(TokenType::Returning)) + returning_clause = parse_returning_clause(); + + return create_ast_node<Delete>(move(common_table_expression_list), move(qualified_table_name), move(where_clause), move(returning_clause)); +} + +NonnullRefPtr<Select> Parser::parse_select_statement(RefPtr<CommonTableExpressionList> common_table_expression_list) +{ + // https://sqlite.org/lang_select.html + consume(TokenType::Select); + + bool select_all = !consume_if(TokenType::Distinct); + consume_if(TokenType::All); // ALL is the default, so ignore it if specified. + + NonnullRefPtrVector<ResultColumn> result_column_list; + parse_comma_separated_list(false, [&]() { result_column_list.append(parse_result_column()); }); + + NonnullRefPtrVector<TableOrSubquery> table_or_subquery_list; + if (consume_if(TokenType::From)) { + // FIXME: Parse join-clause. + parse_comma_separated_list(false, [&]() { table_or_subquery_list.append(parse_table_or_subquery()); }); + } + + RefPtr<Expression> where_clause; + if (consume_if(TokenType::Where)) + where_clause = parse_expression(); + + RefPtr<GroupByClause> group_by_clause; + if (consume_if(TokenType::Group)) { + consume(TokenType::By); + + NonnullRefPtrVector<Expression> group_by_list; + parse_comma_separated_list(false, [&]() { group_by_list.append(parse_expression()); }); + + if (!group_by_list.is_empty()) { + RefPtr<Expression> having_clause; + if (consume_if(TokenType::Having)) + having_clause = parse_expression(); + + group_by_clause = create_ast_node<GroupByClause>(move(group_by_list), move(having_clause)); + } + } + + // FIXME: Parse 'WINDOW window-name AS window-defn'. + // FIXME: Parse 'compound-operator'. + + NonnullRefPtrVector<OrderingTerm> ordering_term_list; + if (consume_if(TokenType::Order)) { + consume(TokenType::By); + parse_comma_separated_list(false, [&]() { ordering_term_list.append(parse_ordering_term()); }); + } + + RefPtr<LimitClause> limit_clause; + if (consume_if(TokenType::Limit)) { + auto limit_expression = parse_expression(); + + RefPtr<Expression> offset_expression; + if (consume_if(TokenType::Offset)) { + offset_expression = parse_expression(); + } else if (consume_if(TokenType::Comma)) { + // Note: The limit clause may instead be defined as "offset-expression, limit-expression", effectively reversing the + // order of the expressions. SQLite notes "this is counter-intuitive" and "to avoid confusion, programmers are strongly + // encouraged to ... avoid using a LIMIT clause with a comma-separated offset." + syntax_error("LIMIT clauses of the form 'LIMIT <expr>, <expr>' are not supported"); + } + + limit_clause = create_ast_node<LimitClause>(move(limit_expression), move(offset_expression)); + } + + return create_ast_node<Select>(move(common_table_expression_list), select_all, move(result_column_list), move(table_or_subquery_list), move(where_clause), move(group_by_clause), move(ordering_term_list), move(limit_clause)); +} + +RefPtr<CommonTableExpressionList> Parser::parse_common_table_expression_list() +{ + consume(TokenType::With); + bool recursive = consume_if(TokenType::Recursive); + + NonnullRefPtrVector<CommonTableExpression> common_table_expression; + parse_comma_separated_list(false, [&]() { common_table_expression.append(parse_common_table_expression()); }); + + if (common_table_expression.is_empty()) { + expected("Common table expression list"); + return {}; + } + + return create_ast_node<CommonTableExpressionList>(recursive, move(common_table_expression)); +} + +NonnullRefPtr<Expression> Parser::parse_expression() +{ + if (++m_parser_state.m_current_expression_depth > Limits::maximum_expression_tree_depth) { + syntax_error(String::formatted("Exceeded maximum expression tree depth of {}", Limits::maximum_expression_tree_depth)); + return create_ast_node<ErrorExpression>(); + } + + // https://sqlite.org/lang_expr.html + auto expression = parse_primary_expression(); + + if (match_secondary_expression()) + expression = parse_secondary_expression(move(expression)); + + // FIXME: Parse 'bind-parameter'. + // FIXME: Parse 'function-name'. + // FIXME: Parse 'raise-function'. + + --m_parser_state.m_current_expression_depth; + return expression; +} + +NonnullRefPtr<Expression> Parser::parse_primary_expression() +{ + if (auto expression = parse_literal_value_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_column_name_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_unary_operator_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_chained_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_cast_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_case_expression(); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_exists_expression(false); expression.has_value()) + return move(expression.value()); + + expected("Primary Expression"); + consume(); + + return create_ast_node<ErrorExpression>(); +} + +NonnullRefPtr<Expression> Parser::parse_secondary_expression(NonnullRefPtr<Expression> primary) +{ + if (auto expression = parse_binary_operator_expression(primary); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_collate_expression(primary); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_is_expression(primary); expression.has_value()) + return move(expression.value()); + + bool invert_expression = false; + if (consume_if(TokenType::Not)) + invert_expression = true; + + if (auto expression = parse_match_expression(primary, invert_expression); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_null_expression(primary, invert_expression); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_between_expression(primary, invert_expression); expression.has_value()) + return move(expression.value()); + + if (auto expression = parse_in_expression(primary, invert_expression); expression.has_value()) + return move(expression.value()); + + expected("Secondary Expression"); + consume(); + + return create_ast_node<ErrorExpression>(); +} + +bool Parser::match_secondary_expression() const +{ + return match(TokenType::Not) + || match(TokenType::DoublePipe) + || match(TokenType::Asterisk) + || match(TokenType::Divide) + || match(TokenType::Modulus) + || match(TokenType::Plus) + || match(TokenType::Minus) + || match(TokenType::ShiftLeft) + || match(TokenType::ShiftRight) + || match(TokenType::Ampersand) + || match(TokenType::Pipe) + || match(TokenType::LessThan) + || match(TokenType::LessThanEquals) + || match(TokenType::GreaterThan) + || match(TokenType::GreaterThanEquals) + || match(TokenType::Equals) + || match(TokenType::EqualsEquals) + || match(TokenType::NotEquals1) + || match(TokenType::NotEquals2) + || match(TokenType::And) + || match(TokenType::Or) + || match(TokenType::Collate) + || match(TokenType::Is) + || match(TokenType::Like) + || match(TokenType::Glob) + || match(TokenType::Match) + || match(TokenType::Regexp) + || match(TokenType::Isnull) + || match(TokenType::Notnull) + || match(TokenType::Between) + || match(TokenType::In); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_literal_value_expression() +{ + if (match(TokenType::NumericLiteral)) { + auto value = consume().double_value(); + return create_ast_node<NumericLiteral>(value); + } + if (match(TokenType::StringLiteral)) { + // TODO: Should the surrounding ' ' be removed here? + auto value = consume().value(); + return create_ast_node<StringLiteral>(value); + } + if (match(TokenType::BlobLiteral)) { + // TODO: Should the surrounding x' ' be removed here? + auto value = consume().value(); + return create_ast_node<BlobLiteral>(value); + } + if (consume_if(TokenType::Null)) + return create_ast_node<NullLiteral>(); + + return {}; +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_column_name_expression(String with_parsed_identifier, bool with_parsed_period) +{ + if (with_parsed_identifier.is_null() && !match(TokenType::Identifier)) + return {}; + + String first_identifier; + if (with_parsed_identifier.is_null()) + first_identifier = consume(TokenType::Identifier).value(); + else + first_identifier = move(with_parsed_identifier); + + String schema_name; + String table_name; + String column_name; + + if (with_parsed_period || consume_if(TokenType::Period)) { + String second_identifier = consume(TokenType::Identifier).value(); + + if (consume_if(TokenType::Period)) { + schema_name = move(first_identifier); + table_name = move(second_identifier); + column_name = consume(TokenType::Identifier).value(); + } else { + table_name = move(first_identifier); + column_name = move(second_identifier); + } + } else { + column_name = move(first_identifier); + } + + return create_ast_node<ColumnNameExpression>(move(schema_name), move(table_name), move(column_name)); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_unary_operator_expression() +{ + if (consume_if(TokenType::Minus)) + return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Minus, parse_expression()); + + if (consume_if(TokenType::Plus)) + return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Plus, parse_expression()); + + if (consume_if(TokenType::Tilde)) + return create_ast_node<UnaryOperatorExpression>(UnaryOperator::BitwiseNot, parse_expression()); + + if (consume_if(TokenType::Not)) { + if (match(TokenType::Exists)) + return parse_exists_expression(true); + else + return create_ast_node<UnaryOperatorExpression>(UnaryOperator::Not, parse_expression()); + } + + return {}; +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_binary_operator_expression(NonnullRefPtr<Expression> lhs) +{ + if (consume_if(TokenType::DoublePipe)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Concatenate, move(lhs), parse_expression()); + + if (consume_if(TokenType::Asterisk)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Multiplication, move(lhs), parse_expression()); + + if (consume_if(TokenType::Divide)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Division, move(lhs), parse_expression()); + + if (consume_if(TokenType::Modulus)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Modulo, move(lhs), parse_expression()); + + if (consume_if(TokenType::Plus)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Plus, move(lhs), parse_expression()); + + if (consume_if(TokenType::Minus)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Minus, move(lhs), parse_expression()); + + if (consume_if(TokenType::ShiftLeft)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::ShiftLeft, move(lhs), parse_expression()); + + if (consume_if(TokenType::ShiftRight)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::ShiftRight, move(lhs), parse_expression()); + + if (consume_if(TokenType::Ampersand)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::BitwiseAnd, move(lhs), parse_expression()); + + if (consume_if(TokenType::Pipe)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::BitwiseOr, move(lhs), parse_expression()); + + if (consume_if(TokenType::LessThan)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::LessThan, move(lhs), parse_expression()); + + if (consume_if(TokenType::LessThanEquals)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::LessThanEquals, move(lhs), parse_expression()); + + if (consume_if(TokenType::GreaterThan)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::GreaterThan, move(lhs), parse_expression()); + + if (consume_if(TokenType::GreaterThanEquals)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::GreaterThanEquals, move(lhs), parse_expression()); + + if (consume_if(TokenType::Equals) || consume_if(TokenType::EqualsEquals)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Equals, move(lhs), parse_expression()); + + if (consume_if(TokenType::NotEquals1) || consume_if(TokenType::NotEquals2)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::NotEquals, move(lhs), parse_expression()); + + if (consume_if(TokenType::And)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::And, move(lhs), parse_expression()); + + if (consume_if(TokenType::Or)) + return create_ast_node<BinaryOperatorExpression>(BinaryOperator::Or, move(lhs), parse_expression()); + + return {}; +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_chained_expression() +{ + if (!consume_if(TokenType::ParenOpen)) + return {}; + + if (match(TokenType::Select)) + return parse_exists_expression(false, TokenType::Select); + + NonnullRefPtrVector<Expression> expressions; + parse_comma_separated_list(false, [&]() { expressions.append(parse_expression()); }); + consume(TokenType::ParenClose); + + return create_ast_node<ChainedExpression>(move(expressions)); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_cast_expression() +{ + if (!match(TokenType::Cast)) + return {}; + + consume(TokenType::Cast); + consume(TokenType::ParenOpen); + auto expression = parse_expression(); + consume(TokenType::As); + auto type_name = parse_type_name(); + consume(TokenType::ParenClose); + + return create_ast_node<CastExpression>(move(expression), move(type_name)); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_case_expression() +{ + if (!match(TokenType::Case)) + return {}; + + consume(); + + RefPtr<Expression> case_expression; + if (!match(TokenType::When)) { + case_expression = parse_expression(); + } + + Vector<CaseExpression::WhenThenClause> when_then_clauses; + + do { + consume(TokenType::When); + auto when = parse_expression(); + consume(TokenType::Then); + auto then = parse_expression(); + + when_then_clauses.append({ move(when), move(then) }); + + if (!match(TokenType::When)) + break; + } while (!match(TokenType::Eof)); + + RefPtr<Expression> else_expression; + if (consume_if(TokenType::Else)) + else_expression = parse_expression(); + + consume(TokenType::End); + return create_ast_node<CaseExpression>(move(case_expression), move(when_then_clauses), move(else_expression)); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_exists_expression(bool invert_expression, TokenType opening_token) +{ + VERIFY((opening_token == TokenType::Exists) || (opening_token == TokenType::Select)); + + if ((opening_token == TokenType::Exists) && !consume_if(TokenType::Exists)) + return {}; + + if (opening_token == TokenType::Exists) + consume(TokenType::ParenOpen); + auto select_statement = parse_select_statement({}); + consume(TokenType::ParenClose); + + return create_ast_node<ExistsExpression>(move(select_statement), invert_expression); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_collate_expression(NonnullRefPtr<Expression> expression) +{ + if (!match(TokenType::Collate)) + return {}; + + consume(); + String collation_name = consume(TokenType::Identifier).value(); + + return create_ast_node<CollateExpression>(move(expression), move(collation_name)); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_is_expression(NonnullRefPtr<Expression> expression) +{ + if (!match(TokenType::Is)) + return {}; + + consume(); + + bool invert_expression = false; + if (match(TokenType::Not)) { + consume(); + invert_expression = true; + } + + auto rhs = parse_expression(); + return create_ast_node<IsExpression>(move(expression), move(rhs), invert_expression); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_match_expression(NonnullRefPtr<Expression> lhs, bool invert_expression) +{ + auto parse_escape = [this]() { + RefPtr<Expression> escape; + if (consume_if(TokenType::Escape)) + escape = parse_expression(); + return escape; + }; + + if (consume_if(TokenType::Like)) + return create_ast_node<MatchExpression>(MatchOperator::Like, move(lhs), parse_expression(), parse_escape(), invert_expression); + + if (consume_if(TokenType::Glob)) + return create_ast_node<MatchExpression>(MatchOperator::Glob, move(lhs), parse_expression(), parse_escape(), invert_expression); + + if (consume_if(TokenType::Match)) + return create_ast_node<MatchExpression>(MatchOperator::Match, move(lhs), parse_expression(), parse_escape(), invert_expression); + + if (consume_if(TokenType::Regexp)) + return create_ast_node<MatchExpression>(MatchOperator::Regexp, move(lhs), parse_expression(), parse_escape(), invert_expression); + + return {}; +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_null_expression(NonnullRefPtr<Expression> expression, bool invert_expression) +{ + if (!match(TokenType::Isnull) && !match(TokenType::Notnull) && !(invert_expression && match(TokenType::Null))) + return {}; + + auto type = consume().type(); + invert_expression |= (type == TokenType::Notnull); + + return create_ast_node<NullExpression>(move(expression), invert_expression); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_between_expression(NonnullRefPtr<Expression> expression, bool invert_expression) +{ + if (!match(TokenType::Between)) + return {}; + + consume(); + + auto nested = parse_expression(); + if (!is<BinaryOperatorExpression>(*nested)) { + expected("Binary Expression"); + return create_ast_node<ErrorExpression>(); + } + + const auto& binary_expression = static_cast<const BinaryOperatorExpression&>(*nested); + if (binary_expression.type() != BinaryOperator::And) { + expected("AND Expression"); + return create_ast_node<ErrorExpression>(); + } + + return create_ast_node<BetweenExpression>(move(expression), binary_expression.lhs(), binary_expression.rhs(), invert_expression); +} + +Optional<NonnullRefPtr<Expression>> Parser::parse_in_expression(NonnullRefPtr<Expression> expression, bool invert_expression) +{ + if (!match(TokenType::In)) + return {}; + + consume(); + + if (consume_if(TokenType::ParenOpen)) { + if (match(TokenType::Select)) { + auto select_statement = parse_select_statement({}); + return create_ast_node<InSelectionExpression>(move(expression), move(select_statement), invert_expression); + } + + // FIXME: Consolidate this with parse_chained_expression(). That method consumes the opening paren as + // well, and also requires at least one expression (whereas this allows for an empty chain). + NonnullRefPtrVector<Expression> expressions; + if (!match(TokenType::ParenClose)) + parse_comma_separated_list(false, [&]() { expressions.append(parse_expression()); }); + + consume(TokenType::ParenClose); + + auto chain = create_ast_node<ChainedExpression>(move(expressions)); + return create_ast_node<InChainedExpression>(move(expression), move(chain), invert_expression); + } + + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + if (match(TokenType::ParenOpen)) { + // FIXME: Parse "table-function". + return {}; + } + + return create_ast_node<InTableExpression>(move(expression), move(schema_name), move(table_name), invert_expression); +} + +NonnullRefPtr<ColumnDefinition> Parser::parse_column_definition() +{ + // https://sqlite.org/syntax/column-def.html + auto name = consume(TokenType::Identifier).value(); + + auto type_name = match(TokenType::Identifier) + ? parse_type_name() + // https://www.sqlite.org/datatype3.html: If no type is specified then the column has affinity BLOB. + : create_ast_node<TypeName>("BLOB", NonnullRefPtrVector<SignedNumber> {}); + + // FIXME: Parse "column-constraint". + + return create_ast_node<ColumnDefinition>(move(name), move(type_name)); +} + +NonnullRefPtr<TypeName> Parser::parse_type_name() +{ + // https: //sqlite.org/syntax/type-name.html + auto name = consume(TokenType::Identifier).value(); + NonnullRefPtrVector<SignedNumber> signed_numbers; + + if (consume_if(TokenType::ParenOpen)) { + signed_numbers.append(parse_signed_number()); + + if (consume_if(TokenType::Comma)) + signed_numbers.append(parse_signed_number()); + + consume(TokenType::ParenClose); + } + + return create_ast_node<TypeName>(move(name), move(signed_numbers)); +} + +NonnullRefPtr<SignedNumber> Parser::parse_signed_number() +{ + // https://sqlite.org/syntax/signed-number.html + bool is_positive = true; + + if (consume_if(TokenType::Plus)) + is_positive = true; + else if (consume_if(TokenType::Minus)) + is_positive = false; + + if (match(TokenType::NumericLiteral)) { + auto number = consume(TokenType::NumericLiteral).double_value(); + return create_ast_node<SignedNumber>(is_positive ? number : (number * -1)); + } + + expected("NumericLiteral"); + return create_ast_node<SignedNumber>(0); +} + +NonnullRefPtr<CommonTableExpression> Parser::parse_common_table_expression() +{ + // https://sqlite.org/syntax/common-table-expression.html + auto table_name = consume(TokenType::Identifier).value(); + + Vector<String> column_names; + if (match(TokenType::ParenOpen)) + parse_comma_separated_list(true, [&]() { column_names.append(consume(TokenType::Identifier).value()); }); + + consume(TokenType::As); + consume(TokenType::ParenOpen); + auto select_statement = parse_select_statement({}); + consume(TokenType::ParenClose); + + return create_ast_node<CommonTableExpression>(move(table_name), move(column_names), move(select_statement)); +} + +NonnullRefPtr<QualifiedTableName> Parser::parse_qualified_table_name() +{ + // https://sqlite.org/syntax/qualified-table-name.html + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + String alias; + if (consume_if(TokenType::As)) + alias = consume(TokenType::Identifier).value(); + + // Note: The qualified-table-name spec may include an "INDEXED BY index-name" or "NOT INDEXED" clause. This is a SQLite extension + // "designed to help detect undesirable query plan changes during regression testing", and "application developers are admonished + // to omit all use of INDEXED BY during application design, implementation, testing, and tuning". Our implementation purposefully + // omits parsing INDEXED BY for now until there is good reason to add support. + + return create_ast_node<QualifiedTableName>(move(schema_name), move(table_name), move(alias)); +} + +NonnullRefPtr<ReturningClause> Parser::parse_returning_clause() +{ + // https://sqlite.org/syntax/returning-clause.html + consume(TokenType::Returning); + + if (consume_if(TokenType::Asterisk)) + return create_ast_node<ReturningClause>(); + + Vector<ReturningClause::ColumnClause> columns; + parse_comma_separated_list(false, [&]() { + auto expression = parse_expression(); + + String column_alias; + if (consume_if(TokenType::As) || match(TokenType::Identifier)) + column_alias = consume(TokenType::Identifier).value(); + + columns.append({ move(expression), move(column_alias) }); + }); + + return create_ast_node<ReturningClause>(move(columns)); +} + +NonnullRefPtr<ResultColumn> Parser::parse_result_column() +{ + // https://sqlite.org/syntax/result-column.html + if (consume_if(TokenType::Asterisk)) + return create_ast_node<ResultColumn>(); + + // If we match an identifier now, we don't know whether it is a table-name of the form "table-name.*", or if it is the start of a + // column-name-expression, until we try to parse the asterisk. So if we consume an identifier and a period, but don't find an + // asterisk, hold onto that information to form a column-name-expression later. + String table_name; + bool parsed_period = false; + + if (match(TokenType::Identifier)) { + table_name = consume().value(); + parsed_period = consume_if(TokenType::Period); + if (parsed_period && consume_if(TokenType::Asterisk)) + return create_ast_node<ResultColumn>(move(table_name)); + } + + auto expression = table_name.is_null() + ? parse_expression() + : static_cast<NonnullRefPtr<Expression>>(*parse_column_name_expression(move(table_name), parsed_period)); + + String column_alias; + if (consume_if(TokenType::As) || match(TokenType::Identifier)) + column_alias = consume(TokenType::Identifier).value(); + + return create_ast_node<ResultColumn>(move(expression), move(column_alias)); +} + +NonnullRefPtr<TableOrSubquery> Parser::parse_table_or_subquery() +{ + if (++m_parser_state.m_current_subquery_depth > Limits::maximum_subquery_depth) + syntax_error(String::formatted("Exceeded maximum subquery depth of {}", Limits::maximum_subquery_depth)); + + ScopeGuard guard([&]() { --m_parser_state.m_current_subquery_depth; }); + + // https://sqlite.org/syntax/table-or-subquery.html + if (match(TokenType::Identifier)) { + String schema_name; + String table_name; + parse_schema_and_table_name(schema_name, table_name); + + String table_alias; + if (consume_if(TokenType::As) || match(TokenType::Identifier)) + table_alias = consume(TokenType::Identifier).value(); + + return create_ast_node<TableOrSubquery>(move(schema_name), move(table_name), move(table_alias)); + } + + // FIXME: Parse join-clause. + + NonnullRefPtrVector<TableOrSubquery> subqueries; + parse_comma_separated_list(true, [&]() { subqueries.append(parse_table_or_subquery()); }); + + return create_ast_node<TableOrSubquery>(move(subqueries)); +} + +NonnullRefPtr<OrderingTerm> Parser::parse_ordering_term() +{ + // https://sqlite.org/syntax/ordering-term.html + auto expression = parse_expression(); + + String collation_name; + if (is<CollateExpression>(*expression)) { + const auto& collate = static_cast<const CollateExpression&>(*expression); + collation_name = collate.collation_name(); + expression = collate.expression(); + } else if (consume_if(TokenType::Collate)) { + collation_name = consume(TokenType::Identifier).value(); + } + + Order order = consume_if(TokenType::Desc) ? Order::Descending : Order::Ascending; + consume_if(TokenType::Asc); // ASC is the default, so ignore it if specified. + + Nulls nulls = order == Order::Ascending ? Nulls::First : Nulls::Last; + if (consume_if(TokenType::Nulls)) { + if (consume_if(TokenType::First)) + nulls = Nulls::First; + else if (consume_if(TokenType::Last)) + nulls = Nulls::Last; + else + expected("FIRST or LAST"); + } + + return create_ast_node<OrderingTerm>(move(expression), move(collation_name), order, nulls); +} + +void Parser::parse_schema_and_table_name(String& schema_name, String& table_name) +{ + String schema_or_table_name = consume(TokenType::Identifier).value(); + + if (consume_if(TokenType::Period)) { + schema_name = move(schema_or_table_name); + table_name = consume(TokenType::Identifier).value(); + } else { + table_name = move(schema_or_table_name); + } +} + +ConflictResolution Parser::parse_conflict_resolution() +{ + // https://sqlite.org/lang_conflict.html + if (consume_if(TokenType::Or)) { + if (consume_if(TokenType::Abort)) + return ConflictResolution::Abort; + if (consume_if(TokenType::Fail)) + return ConflictResolution::Fail; + if (consume_if(TokenType::Ignore)) + return ConflictResolution::Ignore; + if (consume_if(TokenType::Replace)) + return ConflictResolution::Replace; + if (consume_if(TokenType::Rollback)) + return ConflictResolution::Rollback; + + expected("ABORT, FAIL, IGNORE, REPLACE, or ROLLBACK"); + } + + return ConflictResolution::Abort; +} + +Token Parser::consume() +{ + auto old_token = m_parser_state.m_token; + m_parser_state.m_token = m_parser_state.m_lexer.next(); + return old_token; +} + +Token Parser::consume(TokenType expected_type) +{ + if (!match(expected_type)) { + expected(Token::name(expected_type)); + } + return consume(); +} + +bool Parser::consume_if(TokenType expected_type) +{ + if (!match(expected_type)) + return false; + + consume(); + return true; +} + +bool Parser::match(TokenType type) const +{ + return m_parser_state.m_token.type() == type; +} + +void Parser::expected(StringView what) +{ + syntax_error(String::formatted("Unexpected token {}, expected {}", m_parser_state.m_token.name(), what)); +} + +void Parser::syntax_error(String message) +{ + m_parser_state.m_errors.append({ move(message), position() }); +} + +Parser::Position Parser::position() const +{ + return { + m_parser_state.m_token.line_number(), + m_parser_state.m_token.line_column() + }; +} + +Parser::ParserState::ParserState(Lexer lexer) + : m_lexer(move(lexer)) + , m_token(m_lexer.next()) +{ +} + +} diff --git a/Userland/Libraries/LibSQL/AST/Parser.h b/Userland/Libraries/LibSQL/AST/Parser.h new file mode 100644 index 0000000000..466c2f4150 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Parser.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/String.h> +#include <AK/StringView.h> +#include <LibSQL/AST/AST.h> +#include <LibSQL/AST/Lexer.h> +#include <LibSQL/AST/Token.h> + +namespace SQL::AST { + +namespace Limits { +// https://www.sqlite.org/limits.html +constexpr size_t maximum_expression_tree_depth = 1000; +constexpr size_t maximum_subquery_depth = 100; +} + +class Parser { + struct Position { + size_t line { 0 }; + size_t column { 0 }; + }; + + struct Error { + String message; + Position position; + + String to_string() const + { + return String::formatted("{} (line: {}, column: {})", message, position.line, position.column); + } + }; + +public: + explicit Parser(Lexer lexer); + + NonnullRefPtr<Statement> next_statement(); + + bool has_errors() const { return m_parser_state.m_errors.size(); } + const Vector<Error>& errors() const { return m_parser_state.m_errors; } + +protected: + NonnullRefPtr<Expression> parse_expression(); // Protected for unit testing. + +private: + struct ParserState { + explicit ParserState(Lexer); + + Lexer m_lexer; + Token m_token; + Vector<Error> m_errors; + size_t m_current_expression_depth { 0 }; + size_t m_current_subquery_depth { 0 }; + }; + + NonnullRefPtr<Statement> parse_statement(); + NonnullRefPtr<Statement> parse_statement_with_expression_list(RefPtr<CommonTableExpressionList>); + NonnullRefPtr<CreateTable> parse_create_table_statement(); + NonnullRefPtr<AlterTable> parse_alter_table_statement(); + NonnullRefPtr<DropTable> parse_drop_table_statement(); + NonnullRefPtr<Insert> parse_insert_statement(RefPtr<CommonTableExpressionList>); + NonnullRefPtr<Update> parse_update_statement(RefPtr<CommonTableExpressionList>); + NonnullRefPtr<Delete> parse_delete_statement(RefPtr<CommonTableExpressionList>); + NonnullRefPtr<Select> parse_select_statement(RefPtr<CommonTableExpressionList>); + RefPtr<CommonTableExpressionList> parse_common_table_expression_list(); + + NonnullRefPtr<Expression> parse_primary_expression(); + NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression> primary); + bool match_secondary_expression() const; + Optional<NonnullRefPtr<Expression>> parse_literal_value_expression(); + Optional<NonnullRefPtr<Expression>> parse_column_name_expression(String with_parsed_identifier = {}, bool with_parsed_period = false); + Optional<NonnullRefPtr<Expression>> parse_unary_operator_expression(); + Optional<NonnullRefPtr<Expression>> parse_binary_operator_expression(NonnullRefPtr<Expression> lhs); + Optional<NonnullRefPtr<Expression>> parse_chained_expression(); + Optional<NonnullRefPtr<Expression>> parse_cast_expression(); + Optional<NonnullRefPtr<Expression>> parse_case_expression(); + Optional<NonnullRefPtr<Expression>> parse_exists_expression(bool invert_expression, TokenType opening_token = TokenType::Exists); + Optional<NonnullRefPtr<Expression>> parse_collate_expression(NonnullRefPtr<Expression> expression); + Optional<NonnullRefPtr<Expression>> parse_is_expression(NonnullRefPtr<Expression> expression); + Optional<NonnullRefPtr<Expression>> parse_match_expression(NonnullRefPtr<Expression> lhs, bool invert_expression); + Optional<NonnullRefPtr<Expression>> parse_null_expression(NonnullRefPtr<Expression> expression, bool invert_expression); + Optional<NonnullRefPtr<Expression>> parse_between_expression(NonnullRefPtr<Expression> expression, bool invert_expression); + Optional<NonnullRefPtr<Expression>> parse_in_expression(NonnullRefPtr<Expression> expression, bool invert_expression); + + NonnullRefPtr<ColumnDefinition> parse_column_definition(); + NonnullRefPtr<TypeName> parse_type_name(); + NonnullRefPtr<SignedNumber> parse_signed_number(); + NonnullRefPtr<CommonTableExpression> parse_common_table_expression(); + NonnullRefPtr<QualifiedTableName> parse_qualified_table_name(); + NonnullRefPtr<ReturningClause> parse_returning_clause(); + NonnullRefPtr<ResultColumn> parse_result_column(); + NonnullRefPtr<TableOrSubquery> parse_table_or_subquery(); + NonnullRefPtr<OrderingTerm> parse_ordering_term(); + void parse_schema_and_table_name(String& schema_name, String& table_name); + ConflictResolution parse_conflict_resolution(); + + template<typename ParseCallback> + void parse_comma_separated_list(bool surrounded_by_parentheses, ParseCallback&& parse_callback) + { + if (surrounded_by_parentheses) + consume(TokenType::ParenOpen); + + while (!has_errors() && !match(TokenType::Eof)) { + parse_callback(); + + if (!match(TokenType::Comma)) + break; + + consume(TokenType::Comma); + }; + + if (surrounded_by_parentheses) + consume(TokenType::ParenClose); + } + + Token consume(); + Token consume(TokenType type); + bool consume_if(TokenType type); + bool match(TokenType type) const; + + void expected(StringView what); + void syntax_error(String message); + + Position position() const; + + ParserState m_parser_state; +}; + +} diff --git a/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.cpp b/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.cpp new file mode 100644 index 0000000000..da5de16a67 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021, Dylan Katz <dykatz@uw.edu> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Debug.h> +#include <LibGfx/Palette.h> +#include <LibSQL/AST/Lexer.h> +#include <LibSQL/AST/SyntaxHighlighter.h> + +namespace SQL::AST { + +static Syntax::TextStyle style_for_token_type(Gfx::Palette const& palette, TokenType type) +{ + switch (Token::category(type)) { + case TokenCategory::Keyword: + return { palette.syntax_keyword(), true }; + case TokenCategory::Identifier: + return { palette.syntax_identifier(), false }; + case TokenCategory::Number: + return { palette.syntax_number(), false }; + case TokenCategory::Blob: + case TokenCategory::String: + return { palette.syntax_string(), false }; + case TokenCategory::Operator: + return { palette.syntax_operator(), false }; + case TokenCategory::Punctuation: + return { palette.syntax_punctuation(), false }; + case TokenCategory::Invalid: + default: + return { palette.base_text(), false }; + } +} + +bool SyntaxHighlighter::is_identifier(u64 token) const +{ + auto sql_token = static_cast<TokenType>(static_cast<size_t>(token)); + return sql_token == TokenType::Identifier; +} + +void SyntaxHighlighter::rehighlight(Palette const& palette) +{ + auto text = m_client->get_text(); + + Lexer lexer(text); + + Vector<GUI::TextDocumentSpan> spans; + + auto append_token = [&](StringView str, Token const& token) { + if (str.is_empty()) + return; + + GUI::TextPosition position { token.line_number() - 1, token.line_column() - 1 }; + for (char c : str) { + if (c == '\n') { + position.set_line(position.line() + 1); + position.set_column(0); + } else + position.set_column(position.column() + 1); + } + + GUI::TextDocumentSpan span; + span.range.set_start({ token.line_number() - 1, token.line_column() - 1 }); + span.range.set_end({ position.line(), position.column() }); + auto style = style_for_token_type(palette, token.type()); + span.attributes.color = style.color; + span.attributes.bold = style.bold; + span.data = static_cast<u64>(token.type()); + spans.append(span); + + dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "{} @ '{}' {}:{} - {}:{}", + token.name(), + token.value(), + span.range.start().line(), span.range.start().column(), + span.range.end().line(), span.range.end().column()); + }; + + for (;;) { + auto token = lexer.next(); + append_token(token.value(), token); + if (token.type() == TokenType::Eof) + break; + } + + m_client->do_set_spans(move(spans)); + + m_has_brace_buddies = false; + highlight_matching_token_pair(); + + m_client->do_update(); +} + +Vector<SyntaxHighlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const +{ + static Vector<SyntaxHighlighter::MatchingTokenPair> pairs; + if (pairs.is_empty()) { + pairs.append({ static_cast<u64>(TokenType::ParenOpen), static_cast<u64>(TokenType::ParenClose) }); + } + return pairs; +} + +bool SyntaxHighlighter::token_types_equal(u64 token1, u64 token2) const +{ + return static_cast<TokenType>(token1) == static_cast<TokenType>(token2); +} + +SyntaxHighlighter::~SyntaxHighlighter() +{ +} + +} diff --git a/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.h b/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.h new file mode 100644 index 0000000000..981b5b8091 --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/SyntaxHighlighter.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021, Dylan Katz <dykatz@uw.edu> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <LibSyntax/Highlighter.h> + +namespace SQL::AST { + +class SyntaxHighlighter final : public Syntax::Highlighter { +public: + SyntaxHighlighter() { } + virtual ~SyntaxHighlighter() override; + + virtual bool is_identifier(u64) const override; + + virtual Syntax::Language language() const override { return Syntax::Language::SQL; } + virtual void rehighlight(Palette const&) override; + +protected: + virtual Vector<MatchingTokenPair> matching_token_pairs_impl() const override; + virtual bool token_types_equal(u64, u64) const override; +}; + +} diff --git a/Userland/Libraries/LibSQL/AST/Token.cpp b/Userland/Libraries/LibSQL/AST/Token.cpp new file mode 100644 index 0000000000..2780b39a1d --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Token.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Token.h" +#include <AK/Assertions.h> +#include <AK/String.h> +#include <stdlib.h> + +namespace SQL::AST { + +StringView Token::name(TokenType type) +{ + switch (type) { +#define __ENUMERATE_SQL_TOKEN(value, type, category) \ + case TokenType::type: \ + return #type; + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + default: + VERIFY_NOT_REACHED(); + } +} + +TokenCategory Token::category(TokenType type) +{ + switch (type) { +#define __ENUMERATE_SQL_TOKEN(value, type, category) \ + case TokenType::type: \ + return TokenCategory::category; + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + default: + VERIFY_NOT_REACHED(); + } +} + +double Token::double_value() const +{ + VERIFY(type() == TokenType::NumericLiteral); + String value(m_value); + + if (value[0] == '0' && value.length() >= 2) { + if (value[1] == 'x' || value[1] == 'X') + return static_cast<double>(strtoul(value.characters() + 2, nullptr, 16)); + } + + return strtod(value.characters(), nullptr); +} + +} diff --git a/Userland/Libraries/LibSQL/AST/Token.h b/Userland/Libraries/LibSQL/AST/Token.h new file mode 100644 index 0000000000..e8dc8ff5cb --- /dev/null +++ b/Userland/Libraries/LibSQL/AST/Token.h @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/HashMap.h> +#include <AK/StringView.h> + +namespace SQL::AST { + +// https://sqlite.org/lang_keywords.html +#define ENUMERATE_SQL_TOKENS \ + __ENUMERATE_SQL_TOKEN("ABORT", Abort, Keyword) \ + __ENUMERATE_SQL_TOKEN("ACTION", Action, Keyword) \ + __ENUMERATE_SQL_TOKEN("ADD", Add, Keyword) \ + __ENUMERATE_SQL_TOKEN("AFTER", After, Keyword) \ + __ENUMERATE_SQL_TOKEN("ALL", All, Keyword) \ + __ENUMERATE_SQL_TOKEN("ALTER", Alter, Keyword) \ + __ENUMERATE_SQL_TOKEN("ALWAYS", Always, Keyword) \ + __ENUMERATE_SQL_TOKEN("ANALYZE", Analyze, Keyword) \ + __ENUMERATE_SQL_TOKEN("AND", And, Keyword) \ + __ENUMERATE_SQL_TOKEN("AS", As, Keyword) \ + __ENUMERATE_SQL_TOKEN("ASC", Asc, Keyword) \ + __ENUMERATE_SQL_TOKEN("ATTACH", Attach, Keyword) \ + __ENUMERATE_SQL_TOKEN("AUTOINCREMENT", Autoincrement, Keyword) \ + __ENUMERATE_SQL_TOKEN("BEFORE", Before, Keyword) \ + __ENUMERATE_SQL_TOKEN("BEGIN", Begin, Keyword) \ + __ENUMERATE_SQL_TOKEN("BETWEEN", Between, Keyword) \ + __ENUMERATE_SQL_TOKEN("BY", By, Keyword) \ + __ENUMERATE_SQL_TOKEN("CASCADE", Cascade, Keyword) \ + __ENUMERATE_SQL_TOKEN("CASE", Case, Keyword) \ + __ENUMERATE_SQL_TOKEN("CAST", Cast, Keyword) \ + __ENUMERATE_SQL_TOKEN("CHECK", Check, Keyword) \ + __ENUMERATE_SQL_TOKEN("COLLATE", Collate, Keyword) \ + __ENUMERATE_SQL_TOKEN("COLUMN", Column, Keyword) \ + __ENUMERATE_SQL_TOKEN("COMMIT", Commit, Keyword) \ + __ENUMERATE_SQL_TOKEN("CONFLICT", Conflict, Keyword) \ + __ENUMERATE_SQL_TOKEN("CONSTRAINT", Constraint, Keyword) \ + __ENUMERATE_SQL_TOKEN("CREATE", Create, Keyword) \ + __ENUMERATE_SQL_TOKEN("CROSS", Cross, Keyword) \ + __ENUMERATE_SQL_TOKEN("CURRENT", Current, Keyword) \ + __ENUMERATE_SQL_TOKEN("CURRENT_DATE", CurrentDate, Keyword) \ + __ENUMERATE_SQL_TOKEN("CURRENT_TIME", CurrentTime, Keyword) \ + __ENUMERATE_SQL_TOKEN("CURRENT_TIMESTAMP", CurrentTimestamp, Keyword) \ + __ENUMERATE_SQL_TOKEN("DATABASE", Database, Keyword) \ + __ENUMERATE_SQL_TOKEN("DEFAULT", Default, Keyword) \ + __ENUMERATE_SQL_TOKEN("DEFERRABLE", Deferrable, Keyword) \ + __ENUMERATE_SQL_TOKEN("DEFERRED", Deferred, Keyword) \ + __ENUMERATE_SQL_TOKEN("DELETE", Delete, Keyword) \ + __ENUMERATE_SQL_TOKEN("DESC", Desc, Keyword) \ + __ENUMERATE_SQL_TOKEN("DETACH", Detach, Keyword) \ + __ENUMERATE_SQL_TOKEN("DISTINCT", Distinct, Keyword) \ + __ENUMERATE_SQL_TOKEN("DO", Do, Keyword) \ + __ENUMERATE_SQL_TOKEN("DROP", Drop, Keyword) \ + __ENUMERATE_SQL_TOKEN("EACH", Each, Keyword) \ + __ENUMERATE_SQL_TOKEN("ELSE", Else, Keyword) \ + __ENUMERATE_SQL_TOKEN("END", End, Keyword) \ + __ENUMERATE_SQL_TOKEN("ESCAPE", Escape, Keyword) \ + __ENUMERATE_SQL_TOKEN("EXCEPT", Except, Keyword) \ + __ENUMERATE_SQL_TOKEN("EXCLUDE", Exclude, Keyword) \ + __ENUMERATE_SQL_TOKEN("EXCLUSIVE", Exclusive, Keyword) \ + __ENUMERATE_SQL_TOKEN("EXISTS", Exists, Keyword) \ + __ENUMERATE_SQL_TOKEN("EXPLAIN", Explain, Keyword) \ + __ENUMERATE_SQL_TOKEN("FAIL", Fail, Keyword) \ + __ENUMERATE_SQL_TOKEN("FILTER", Filter, Keyword) \ + __ENUMERATE_SQL_TOKEN("FIRST", First, Keyword) \ + __ENUMERATE_SQL_TOKEN("FOLLOWING", Following, Keyword) \ + __ENUMERATE_SQL_TOKEN("FOR", For, Keyword) \ + __ENUMERATE_SQL_TOKEN("FOREIGN", Foreign, Keyword) \ + __ENUMERATE_SQL_TOKEN("FROM", From, Keyword) \ + __ENUMERATE_SQL_TOKEN("FULL", Full, Keyword) \ + __ENUMERATE_SQL_TOKEN("GENERATED", Generated, Keyword) \ + __ENUMERATE_SQL_TOKEN("GLOB", Glob, Keyword) \ + __ENUMERATE_SQL_TOKEN("GROUP", Group, Keyword) \ + __ENUMERATE_SQL_TOKEN("GROUPS", Groups, Keyword) \ + __ENUMERATE_SQL_TOKEN("HAVING", Having, Keyword) \ + __ENUMERATE_SQL_TOKEN("IF", If, Keyword) \ + __ENUMERATE_SQL_TOKEN("IGNORE", Ignore, Keyword) \ + __ENUMERATE_SQL_TOKEN("IMMEDIATE", Immediate, Keyword) \ + __ENUMERATE_SQL_TOKEN("IN", In, Keyword) \ + __ENUMERATE_SQL_TOKEN("INDEX", Index, Keyword) \ + __ENUMERATE_SQL_TOKEN("INDEXED", Indexed, Keyword) \ + __ENUMERATE_SQL_TOKEN("INITIALLY", Initially, Keyword) \ + __ENUMERATE_SQL_TOKEN("INNER", Inner, Keyword) \ + __ENUMERATE_SQL_TOKEN("INSERT", Insert, Keyword) \ + __ENUMERATE_SQL_TOKEN("INSTEAD", Instead, Keyword) \ + __ENUMERATE_SQL_TOKEN("INTERSECT", Intersect, Keyword) \ + __ENUMERATE_SQL_TOKEN("INTO", Into, Keyword) \ + __ENUMERATE_SQL_TOKEN("IS", Is, Keyword) \ + __ENUMERATE_SQL_TOKEN("ISNULL", Isnull, Keyword) \ + __ENUMERATE_SQL_TOKEN("JOIN", Join, Keyword) \ + __ENUMERATE_SQL_TOKEN("KEY", Key, Keyword) \ + __ENUMERATE_SQL_TOKEN("LAST", Last, Keyword) \ + __ENUMERATE_SQL_TOKEN("LEFT", Left, Keyword) \ + __ENUMERATE_SQL_TOKEN("LIKE", Like, Keyword) \ + __ENUMERATE_SQL_TOKEN("LIMIT", Limit, Keyword) \ + __ENUMERATE_SQL_TOKEN("MATCH", Match, Keyword) \ + __ENUMERATE_SQL_TOKEN("MATERIALIZED", Materialized, Keyword) \ + __ENUMERATE_SQL_TOKEN("NATURAL", Natural, Keyword) \ + __ENUMERATE_SQL_TOKEN("NO", No, Keyword) \ + __ENUMERATE_SQL_TOKEN("NOT", Not, Keyword) \ + __ENUMERATE_SQL_TOKEN("NOTHING", Nothing, Keyword) \ + __ENUMERATE_SQL_TOKEN("NOTNULL", Notnull, Keyword) \ + __ENUMERATE_SQL_TOKEN("NULL", Null, Keyword) \ + __ENUMERATE_SQL_TOKEN("NULLS", Nulls, Keyword) \ + __ENUMERATE_SQL_TOKEN("OF", Of, Keyword) \ + __ENUMERATE_SQL_TOKEN("OFFSET", Offset, Keyword) \ + __ENUMERATE_SQL_TOKEN("ON", On, Keyword) \ + __ENUMERATE_SQL_TOKEN("OR", Or, Keyword) \ + __ENUMERATE_SQL_TOKEN("ORDER", Order, Keyword) \ + __ENUMERATE_SQL_TOKEN("OTHERS", Others, Keyword) \ + __ENUMERATE_SQL_TOKEN("OUTER", Outer, Keyword) \ + __ENUMERATE_SQL_TOKEN("OVER", Over, Keyword) \ + __ENUMERATE_SQL_TOKEN("PARTITION", Partition, Keyword) \ + __ENUMERATE_SQL_TOKEN("PLAN", Plan, Keyword) \ + __ENUMERATE_SQL_TOKEN("PRAGMA", Pragma, Keyword) \ + __ENUMERATE_SQL_TOKEN("PRECEDING", Preceding, Keyword) \ + __ENUMERATE_SQL_TOKEN("PRIMARY", Primary, Keyword) \ + __ENUMERATE_SQL_TOKEN("QUERY", Query, Keyword) \ + __ENUMERATE_SQL_TOKEN("RAISE", Raise, Keyword) \ + __ENUMERATE_SQL_TOKEN("RANGE", Range, Keyword) \ + __ENUMERATE_SQL_TOKEN("RECURSIVE", Recursive, Keyword) \ + __ENUMERATE_SQL_TOKEN("REFERENCES", References, Keyword) \ + __ENUMERATE_SQL_TOKEN("REGEXP", Regexp, Keyword) \ + __ENUMERATE_SQL_TOKEN("REINDEX", Reindex, Keyword) \ + __ENUMERATE_SQL_TOKEN("RELEASE", Release, Keyword) \ + __ENUMERATE_SQL_TOKEN("RENAME", Rename, Keyword) \ + __ENUMERATE_SQL_TOKEN("REPLACE", Replace, Keyword) \ + __ENUMERATE_SQL_TOKEN("RESTRICT", Restrict, Keyword) \ + __ENUMERATE_SQL_TOKEN("RETURNING", Returning, Keyword) \ + __ENUMERATE_SQL_TOKEN("RIGHT", Right, Keyword) \ + __ENUMERATE_SQL_TOKEN("ROLLBACK", Rollback, Keyword) \ + __ENUMERATE_SQL_TOKEN("ROW", Row, Keyword) \ + __ENUMERATE_SQL_TOKEN("ROWS", Rows, Keyword) \ + __ENUMERATE_SQL_TOKEN("SAVEPOINT", Savepoint, Keyword) \ + __ENUMERATE_SQL_TOKEN("SELECT", Select, Keyword) \ + __ENUMERATE_SQL_TOKEN("SET", Set, Keyword) \ + __ENUMERATE_SQL_TOKEN("TABLE", Table, Keyword) \ + __ENUMERATE_SQL_TOKEN("TEMP", Temp, Keyword) \ + __ENUMERATE_SQL_TOKEN("TEMPORARY", Temporary, Keyword) \ + __ENUMERATE_SQL_TOKEN("THEN", Then, Keyword) \ + __ENUMERATE_SQL_TOKEN("TIES", Ties, Keyword) \ + __ENUMERATE_SQL_TOKEN("TO", To, Keyword) \ + __ENUMERATE_SQL_TOKEN("TRANSACTION", Transaction, Keyword) \ + __ENUMERATE_SQL_TOKEN("TRIGGER", Trigger, Keyword) \ + __ENUMERATE_SQL_TOKEN("UNBOUNDED", Unbounded, Keyword) \ + __ENUMERATE_SQL_TOKEN("UNION", Union, Keyword) \ + __ENUMERATE_SQL_TOKEN("UNIQUE", Unique, Keyword) \ + __ENUMERATE_SQL_TOKEN("UPDATE", Update, Keyword) \ + __ENUMERATE_SQL_TOKEN("USING", Using, Keyword) \ + __ENUMERATE_SQL_TOKEN("VACUUM", Vacuum, Keyword) \ + __ENUMERATE_SQL_TOKEN("VALUES", Values, Keyword) \ + __ENUMERATE_SQL_TOKEN("VIEW", View, Keyword) \ + __ENUMERATE_SQL_TOKEN("VIRTUAL", Virtual, Keyword) \ + __ENUMERATE_SQL_TOKEN("WHEN", When, Keyword) \ + __ENUMERATE_SQL_TOKEN("WHERE", Where, Keyword) \ + __ENUMERATE_SQL_TOKEN("WINDOW", Window, Keyword) \ + __ENUMERATE_SQL_TOKEN("WITH", With, Keyword) \ + __ENUMERATE_SQL_TOKEN("WITHOUT", Without, Keyword) \ + __ENUMERATE_SQL_TOKEN("_identifier_", Identifier, Identifier) \ + __ENUMERATE_SQL_TOKEN("_numeric_", NumericLiteral, Number) \ + __ENUMERATE_SQL_TOKEN("_string_", StringLiteral, String) \ + __ENUMERATE_SQL_TOKEN("_blob_", BlobLiteral, Blob) \ + __ENUMERATE_SQL_TOKEN("_eof_", Eof, Invalid) \ + __ENUMERATE_SQL_TOKEN("_invalid_", Invalid, Invalid) \ + __ENUMERATE_SQL_TOKEN("&", Ampersand, Operator) \ + __ENUMERATE_SQL_TOKEN("*", Asterisk, Operator) \ + __ENUMERATE_SQL_TOKEN(",", Comma, Punctuation) \ + __ENUMERATE_SQL_TOKEN("/", Divide, Operator) \ + __ENUMERATE_SQL_TOKEN("||", DoublePipe, Operator) \ + __ENUMERATE_SQL_TOKEN("=", Equals, Operator) \ + __ENUMERATE_SQL_TOKEN("==", EqualsEquals, Operator) \ + __ENUMERATE_SQL_TOKEN(">", GreaterThan, Operator) \ + __ENUMERATE_SQL_TOKEN(">=", GreaterThanEquals, Operator) \ + __ENUMERATE_SQL_TOKEN("<", LessThan, Operator) \ + __ENUMERATE_SQL_TOKEN("<=", LessThanEquals, Operator) \ + __ENUMERATE_SQL_TOKEN("-", Minus, Operator) \ + __ENUMERATE_SQL_TOKEN("%", Modulus, Operator) \ + __ENUMERATE_SQL_TOKEN("!=", NotEquals1, Operator) \ + __ENUMERATE_SQL_TOKEN("<>", NotEquals2, Operator) \ + __ENUMERATE_SQL_TOKEN(")", ParenClose, Punctuation) \ + __ENUMERATE_SQL_TOKEN("(", ParenOpen, Punctuation) \ + __ENUMERATE_SQL_TOKEN(".", Period, Operator) \ + __ENUMERATE_SQL_TOKEN("|", Pipe, Operator) \ + __ENUMERATE_SQL_TOKEN("+", Plus, Operator) \ + __ENUMERATE_SQL_TOKEN(";", SemiColon, Punctuation) \ + __ENUMERATE_SQL_TOKEN("<<", ShiftLeft, Operator) \ + __ENUMERATE_SQL_TOKEN(">>", ShiftRight, Operator) \ + __ENUMERATE_SQL_TOKEN("~", Tilde, Operator) + +enum class TokenType { +#define __ENUMERATE_SQL_TOKEN(value, type, category) type, + ENUMERATE_SQL_TOKENS +#undef __ENUMERATE_SQL_TOKEN + _COUNT_OF_TOKENS, +}; + +enum class TokenCategory { + Invalid, + Keyword, + Identifier, + Number, + String, + Blob, + Operator, + Punctuation, +}; + +class Token { +public: + Token(TokenType type, StringView value, size_t line_number, size_t line_column) + : m_type(type) + , m_value(value) + , m_line_number(line_number) + , m_line_column(line_column) + { + } + + static StringView name(TokenType); + static TokenCategory category(TokenType); + + StringView name() const { return name(m_type); } + TokenType type() const { return m_type; } + TokenCategory category() const { return category(m_type); } + + StringView value() const { return m_value; } + double double_value() const; + + size_t line_number() const { return m_line_number; } + size_t line_column() const { return m_line_column; } + +private: + TokenType m_type; + StringView m_value; + size_t m_line_number; + size_t m_line_column; +}; + +} |