/* * Copyright (c) 2021, Tim Flynn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "Parser.h" #include namespace SQL { Parser::Parser(Lexer lexer) : m_parser_state(move(lexer)) { } NonnullRefPtr Parser::next_statement() { switch (m_parser_state.m_token.type()) { case TokenType::Create: return parse_create_table_statement(); case TokenType::Drop: return parse_drop_table_statement(); case TokenType::Delete: case TokenType::With: return parse_delete_statement(); default: expected("CREATE, DROP, or DELETE"); return create_ast_node(); } } NonnullRefPtr Parser::parse_create_table_statement() { // https://sqlite.org/lang_createtable.html consume(TokenType::Create); bool is_temporary = false; if (consume_if(TokenType::Temp) || consume_if(TokenType::Temporary)) is_temporary = true; consume(TokenType::Table); bool is_error_if_table_exists = true; if (consume_if(TokenType::If)) { consume(TokenType::Not); consume(TokenType::Exists); is_error_if_table_exists = false; } String schema_or_table_name = consume(TokenType::Identifier).value(); String schema_name; String table_name; if (consume_if(TokenType::Period)) { schema_name = move(schema_or_table_name); table_name = consume(TokenType::Identifier).value(); } else { table_name = move(schema_or_table_name); } // FIXME: Parse "AS select-stmt". NonnullRefPtrVector column_definitions; consume(TokenType::ParenOpen); do { column_definitions.append(parse_column_definition()); if (match(TokenType::ParenClose)) break; consume(TokenType::Comma); } while (!match(TokenType::Eof)); // FIXME: Parse "table-constraint". consume(TokenType::ParenClose); consume(TokenType::SemiColon); return create_ast_node(move(schema_name), move(table_name), move(column_definitions), is_temporary, is_error_if_table_exists); } NonnullRefPtr Parser::parse_drop_table_statement() { // https://sqlite.org/lang_droptable.html consume(TokenType::Drop); consume(TokenType::Table); bool is_error_if_table_does_not_exist = true; if (consume_if(TokenType::If)) { consume(TokenType::Exists); is_error_if_table_does_not_exist = false; } String schema_or_table_name = consume(TokenType::Identifier).value(); String schema_name; String table_name; if (consume_if(TokenType::Period)) { schema_name = move(schema_or_table_name); table_name = consume(TokenType::Identifier).value(); } else { table_name = move(schema_or_table_name); } consume(TokenType::SemiColon); return create_ast_node(move(schema_name), move(table_name), is_error_if_table_does_not_exist); } NonnullRefPtr Parser::parse_delete_statement() { // https://sqlite.org/lang_delete.html bool recursive = false; RefPtr common_table_expression; if (consume_if(TokenType::With)) { recursive = consume_if(TokenType::Recursive); common_table_expression = parse_common_table_expression(); } consume(TokenType::Delete); consume(TokenType::From); auto qualified_table_name = parse_qualified_table_name(); RefPtr where_clause; if (consume_if(TokenType::Where)) where_clause = parse_expression(); RefPtr returning_clause; if (match(TokenType::Returning)) returning_clause = parse_returning_clause(); consume(TokenType::SemiColon); return create_ast_node(recursive, move(common_table_expression), move(qualified_table_name), move(where_clause), move(returning_clause)); } NonnullRefPtr Parser::parse_expression() { // https://sqlite.org/lang_expr.html auto expression = parse_primary_expression(); if (match_secondary_expression()) expression = parse_secondary_expression(move(expression)); // FIXME: Parse 'bind-parameter'. // FIXME: Parse 'function-name'. // FIXME: Parse 'exists'. // FIXME: Parse 'raise-function'. return expression; } NonnullRefPtr Parser::parse_primary_expression() { if (auto expression = parse_literal_value_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_column_name_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_unary_operator_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_chained_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_cast_expression(); expression.has_value()) return move(expression.value()); if (auto expression = parse_case_expression(); expression.has_value()) return move(expression.value()); expected("Primary Expression"); consume(); return create_ast_node(); } NonnullRefPtr Parser::parse_secondary_expression(NonnullRefPtr primary) { if (auto expression = parse_binary_operator_expression(primary); expression.has_value()) return move(expression.value()); if (auto expression = parse_collate_expression(primary); expression.has_value()) return move(expression.value()); if (auto expression = parse_is_expression(primary); expression.has_value()) return move(expression.value()); bool invert_expression = false; if (consume_if(TokenType::Not)) invert_expression = true; if (auto expression = parse_match_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_null_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_between_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); if (auto expression = parse_in_expression(primary, invert_expression); expression.has_value()) return move(expression.value()); expected("Secondary Expression"); consume(); return create_ast_node(); } bool Parser::match_secondary_expression() const { return match(TokenType::Not) || match(TokenType::DoublePipe) || match(TokenType::Asterisk) || match(TokenType::Divide) || match(TokenType::Modulus) || match(TokenType::Plus) || match(TokenType::Minus) || match(TokenType::ShiftLeft) || match(TokenType::ShiftRight) || match(TokenType::Ampersand) || match(TokenType::Pipe) || match(TokenType::LessThan) || match(TokenType::LessThanEquals) || match(TokenType::GreaterThan) || match(TokenType::GreaterThanEquals) || match(TokenType::Equals) || match(TokenType::EqualsEquals) || match(TokenType::NotEquals1) || match(TokenType::NotEquals2) || match(TokenType::And) || match(TokenType::Or) || match(TokenType::Collate) || match(TokenType::Is) || match(TokenType::Like) || match(TokenType::Glob) || match(TokenType::Match) || match(TokenType::Regexp) || match(TokenType::Isnull) || match(TokenType::Notnull) || match(TokenType::Between) || match(TokenType::In); } Optional> Parser::parse_literal_value_expression() { if (match(TokenType::NumericLiteral)) { auto value = consume().double_value(); return create_ast_node(value); } if (match(TokenType::StringLiteral)) { // TODO: Should the surrounding ' ' be removed here? auto value = consume().value(); return create_ast_node(value); } if (match(TokenType::BlobLiteral)) { // TODO: Should the surrounding x' ' be removed here? auto value = consume().value(); return create_ast_node(value); } if (consume_if(TokenType::Null)) return create_ast_node(); return {}; } Optional> Parser::parse_column_name_expression() { if (!match(TokenType::Identifier)) return {}; String first_identifier = consume(TokenType::Identifier).value(); String schema_name; String table_name; String column_name; if (consume_if(TokenType::Period)) { String second_identifier = consume(TokenType::Identifier).value(); if (consume_if(TokenType::Period)) { schema_name = move(first_identifier); table_name = move(second_identifier); column_name = consume(TokenType::Identifier).value(); } else { table_name = move(first_identifier); column_name = move(second_identifier); } } else { column_name = move(first_identifier); } return create_ast_node(move(schema_name), move(table_name), move(column_name)); } Optional> Parser::parse_unary_operator_expression() { if (consume_if(TokenType::Minus)) return create_ast_node(UnaryOperator::Minus, parse_expression()); if (consume_if(TokenType::Plus)) return create_ast_node(UnaryOperator::Plus, parse_expression()); if (consume_if(TokenType::Tilde)) return create_ast_node(UnaryOperator::BitwiseNot, parse_expression()); if (consume_if(TokenType::Not)) return create_ast_node(UnaryOperator::Not, parse_expression()); return {}; } Optional> Parser::parse_binary_operator_expression(NonnullRefPtr lhs) { if (consume_if(TokenType::DoublePipe)) return create_ast_node(BinaryOperator::Concatenate, move(lhs), parse_expression()); if (consume_if(TokenType::Asterisk)) return create_ast_node(BinaryOperator::Multiplication, move(lhs), parse_expression()); if (consume_if(TokenType::Divide)) return create_ast_node(BinaryOperator::Division, move(lhs), parse_expression()); if (consume_if(TokenType::Modulus)) return create_ast_node(BinaryOperator::Modulo, move(lhs), parse_expression()); if (consume_if(TokenType::Plus)) return create_ast_node(BinaryOperator::Plus, move(lhs), parse_expression()); if (consume_if(TokenType::Minus)) return create_ast_node(BinaryOperator::Minus, move(lhs), parse_expression()); if (consume_if(TokenType::ShiftLeft)) return create_ast_node(BinaryOperator::ShiftLeft, move(lhs), parse_expression()); if (consume_if(TokenType::ShiftRight)) return create_ast_node(BinaryOperator::ShiftRight, move(lhs), parse_expression()); if (consume_if(TokenType::Ampersand)) return create_ast_node(BinaryOperator::BitwiseAnd, move(lhs), parse_expression()); if (consume_if(TokenType::Pipe)) return create_ast_node(BinaryOperator::BitwiseOr, move(lhs), parse_expression()); if (consume_if(TokenType::LessThan)) return create_ast_node(BinaryOperator::LessThan, move(lhs), parse_expression()); if (consume_if(TokenType::LessThanEquals)) return create_ast_node(BinaryOperator::LessThanEquals, move(lhs), parse_expression()); if (consume_if(TokenType::GreaterThan)) return create_ast_node(BinaryOperator::GreaterThan, move(lhs), parse_expression()); if (consume_if(TokenType::GreaterThanEquals)) return create_ast_node(BinaryOperator::GreaterThanEquals, move(lhs), parse_expression()); if (consume_if(TokenType::Equals) || consume_if(TokenType::EqualsEquals)) return create_ast_node(BinaryOperator::Equals, move(lhs), parse_expression()); if (consume_if(TokenType::NotEquals1) || consume_if(TokenType::NotEquals2)) return create_ast_node(BinaryOperator::NotEquals, move(lhs), parse_expression()); if (consume_if(TokenType::And)) return create_ast_node(BinaryOperator::And, move(lhs), parse_expression()); if (consume_if(TokenType::Or)) return create_ast_node(BinaryOperator::Or, move(lhs), parse_expression()); return {}; } Optional> Parser::parse_chained_expression() { if (!match(TokenType::ParenOpen)) return {}; NonnullRefPtrVector expressions; consume(TokenType::ParenOpen); do { expressions.append(parse_expression()); if (match(TokenType::ParenClose)) break; consume(TokenType::Comma); } while (!match(TokenType::Eof)); consume(TokenType::ParenClose); return create_ast_node(move(expressions)); } Optional> Parser::parse_cast_expression() { if (!match(TokenType::Cast)) return {}; consume(TokenType::Cast); consume(TokenType::ParenOpen); auto expression = parse_expression(); consume(TokenType::As); auto type_name = parse_type_name(); consume(TokenType::ParenClose); return create_ast_node(move(expression), move(type_name)); } Optional> Parser::parse_case_expression() { if (!match(TokenType::Case)) return {}; consume(); RefPtr case_expression; if (!match(TokenType::When)) { case_expression = parse_expression(); } Vector when_then_clauses; do { consume(TokenType::When); auto when = parse_expression(); consume(TokenType::Then); auto then = parse_expression(); when_then_clauses.append({ move(when), move(then) }); if (!match(TokenType::When)) break; } while (!match(TokenType::Eof)); RefPtr else_expression; if (consume_if(TokenType::Else)) else_expression = parse_expression(); consume(TokenType::End); return create_ast_node(move(case_expression), move(when_then_clauses), move(else_expression)); } Optional> Parser::parse_collate_expression(NonnullRefPtr expression) { if (!match(TokenType::Collate)) return {}; consume(); String collation_name = consume(TokenType::Identifier).value(); return create_ast_node(move(expression), move(collation_name)); } Optional> Parser::parse_is_expression(NonnullRefPtr expression) { if (!match(TokenType::Is)) return {}; consume(); bool invert_expression = false; if (match(TokenType::Not)) { consume(); invert_expression = true; } auto rhs = parse_expression(); return create_ast_node(move(expression), move(rhs), invert_expression); } Optional> Parser::parse_match_expression(NonnullRefPtr lhs, bool invert_expression) { auto parse_escape = [this]() { RefPtr escape; if (consume_if(TokenType::Escape)) escape = parse_expression(); return escape; }; if (consume_if(TokenType::Like)) return create_ast_node(MatchOperator::Like, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Glob)) return create_ast_node(MatchOperator::Glob, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Match)) return create_ast_node(MatchOperator::Match, move(lhs), parse_expression(), parse_escape(), invert_expression); if (consume_if(TokenType::Regexp)) return create_ast_node(MatchOperator::Regexp, move(lhs), parse_expression(), parse_escape(), invert_expression); return {}; } Optional> Parser::parse_null_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::Isnull) && !match(TokenType::Notnull) && !(invert_expression && match(TokenType::Null))) return {}; auto type = consume().type(); invert_expression |= (type == TokenType::Notnull); return create_ast_node(move(expression), invert_expression); } Optional> Parser::parse_between_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::Between)) return {}; consume(); auto nested = parse_expression(); if (!is(*nested)) { expected("Binary Expression"); return create_ast_node(); } const auto& binary_expression = static_cast(*nested); if (binary_expression.type() != BinaryOperator::And) { expected("AND Expression"); return create_ast_node(); } return create_ast_node(move(expression), binary_expression.lhs(), binary_expression.rhs(), invert_expression); } Optional> Parser::parse_in_expression(NonnullRefPtr expression, bool invert_expression) { if (!match(TokenType::In)) return {}; consume(); if (consume_if(TokenType::ParenOpen)) { if (match(TokenType::Select)) { // FIXME: Parse "select-stmt". return {}; } // FIXME: Consolidate this with parse_chained_expression(). That method consumes the opening paren as // well, and also requires at least one expression (whereas this allows for an empty chain). NonnullRefPtrVector expressions; if (!match(TokenType::ParenClose)) { do { expressions.append(parse_expression()); if (match(TokenType::ParenClose)) break; consume(TokenType::Comma); } while (!match(TokenType::Eof)); } consume(TokenType::ParenClose); auto chain = create_ast_node(move(expressions)); return create_ast_node(move(expression), move(chain), invert_expression); } String schema_or_table_name = consume(TokenType::Identifier).value(); String schema_name; String table_name; if (consume_if(TokenType::Period)) { schema_name = move(schema_or_table_name); table_name = consume(TokenType::Identifier).value(); } else { table_name = move(schema_or_table_name); } if (match(TokenType::ParenOpen)) { // FIXME: Parse "table-function". return {}; } return create_ast_node(move(expression), move(schema_name), move(table_name), invert_expression); } NonnullRefPtr Parser::parse_column_definition() { // https://sqlite.org/syntax/column-def.html auto name = consume(TokenType::Identifier).value(); auto type_name = match(TokenType::Identifier) ? parse_type_name() // https://www.sqlite.org/datatype3.html: If no type is specified then the column has affinity BLOB. : create_ast_node("BLOB", NonnullRefPtrVector {}); // FIXME: Parse "column-constraint". return create_ast_node(move(name), move(type_name)); } NonnullRefPtr Parser::parse_type_name() { // https: //sqlite.org/syntax/type-name.html auto name = consume(TokenType::Identifier).value(); NonnullRefPtrVector signed_numbers; if (consume_if(TokenType::ParenOpen)) { signed_numbers.append(parse_signed_number()); if (consume_if(TokenType::Comma)) signed_numbers.append(parse_signed_number()); consume(TokenType::ParenClose); } return create_ast_node(move(name), move(signed_numbers)); } NonnullRefPtr Parser::parse_signed_number() { // https://sqlite.org/syntax/signed-number.html bool is_positive = true; if (consume_if(TokenType::Plus)) is_positive = true; else if (consume_if(TokenType::Minus)) is_positive = false; if (match(TokenType::NumericLiteral)) { auto number = consume(TokenType::NumericLiteral).double_value(); return create_ast_node(is_positive ? number : (number * -1)); } expected("NumericLiteral"); return create_ast_node(0); } NonnullRefPtr Parser::parse_common_table_expression() { // https://sqlite.org/syntax/common-table-expression.html auto table_name = consume(TokenType::Identifier).value(); Vector column_names; if (consume_if(TokenType::ParenOpen)) { do { column_names.append(consume(TokenType::Identifier).value()); if (match(TokenType::ParenClose)) break; consume(TokenType::Comma); } while (!match(TokenType::Eof)); consume(TokenType::ParenClose); } consume(TokenType::As); consume(TokenType::ParenOpen); // FIXME: Parse "select-stmt". consume(TokenType::ParenClose); return create_ast_node(move(table_name), move(column_names)); } NonnullRefPtr Parser::parse_qualified_table_name() { // https://sqlite.org/syntax/qualified-table-name.html String schema_or_table_name = consume(TokenType::Identifier).value(); String schema_name; String table_name; if (consume_if(TokenType::Period)) { schema_name = move(schema_or_table_name); table_name = consume(TokenType::Identifier).value(); } else { table_name = move(schema_or_table_name); } String alias; if (consume_if(TokenType::As)) alias = consume(TokenType::Identifier).value(); // Note: The qualified-table-name spec may include an "INDEXED BY index-name" or "NOT INDEXED" clause. This is a SQLite extension // "designed to help detect undesirable query plan changes during regression testing", and "application developers are admonished // to omit all use of INDEXED BY during application design, implementation, testing, and tuning". Our implementation purposefully // omits parsing INDEXED BY for now until there is good reason to add support. return create_ast_node(move(schema_name), move(table_name), move(alias)); } NonnullRefPtr Parser::parse_returning_clause() { // https://sqlite.org/syntax/returning-clause.html consume(TokenType::Returning); if (consume_if(TokenType::Asterisk)) return create_ast_node(); Vector columns; do { auto expression = parse_expression(); consume_if(TokenType::As); // 'AS' is optional. String column_alias; if (match(TokenType::Identifier)) column_alias = consume().value(); columns.append({ move(expression), move(column_alias) }); if (!match(TokenType::Comma)) break; consume(TokenType::Comma); } while (!match(TokenType::Eof)); return create_ast_node(move(columns)); } Token Parser::consume() { auto old_token = m_parser_state.m_token; m_parser_state.m_token = m_parser_state.m_lexer.next(); return old_token; } Token Parser::consume(TokenType expected_type) { if (!match(expected_type)) { expected(Token::name(expected_type)); } return consume(); } bool Parser::consume_if(TokenType expected_type) { if (!match(expected_type)) return false; consume(); return true; } bool Parser::match(TokenType type) const { return m_parser_state.m_token.type() == type; } void Parser::expected(StringView what) { syntax_error(String::formatted("Unexpected token {}, expected {}", m_parser_state.m_token.name(), what)); } void Parser::syntax_error(String message) { m_parser_state.m_errors.append({ move(message), position() }); } Parser::Position Parser::position() const { return { m_parser_state.m_token.line_number(), m_parser_state.m_token.line_column() }; } Parser::ParserState::ParserState(Lexer lexer) : m_lexer(move(lexer)) , m_token(m_lexer.next()) { } }