/* * Copyright (c) 2020-2021, the SerenityOS developers. * * SPDX-License-Identifier: BSD-2-Clause */ #include "Parser.h" #include "Shell.h" #include #include #include #include #include #include #include #include namespace Shell { Parser::SavedOffset Parser::save_offset() const { return { m_offset, m_line }; } char Parser::peek() { if (at_end()) return 0; VERIFY(m_offset < m_input.length()); auto ch = m_input[m_offset]; if (ch == '\\' && m_input.length() > m_offset + 1 && m_input[m_offset + 1] == '\n') { m_offset += 2; ++m_line.line_number; m_line.line_column = 0; return peek(); } return ch; } char Parser::consume() { if (at_end()) return 0; auto ch = peek(); ++m_offset; if (ch == '\n') { ++m_line.line_number; m_line.line_column = 0; } else { ++m_line.line_column; } return ch; } bool Parser::expect(char ch) { return expect(StringView { &ch, 1 }); } bool Parser::expect(const StringView& expected) { auto offset_at_start = m_offset; auto line_at_start = line(); if (expected.length() + m_offset > m_input.length()) return false; for (auto& c : expected) { if (peek() != c) { restore_to(offset_at_start, line_at_start); return false; } consume(); } return true; } template NonnullRefPtr Parser::create(Args... args) { return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...)); } [[nodiscard]] OwnPtr Parser::push_start() { return make(m_rule_start_offsets, m_rule_start_lines, m_offset, m_line.line_number, m_line.line_column); } Parser::Offset Parser::current_position() { return Offset { m_offset, { m_line.line_number, m_line.line_column } }; } static constexpr bool is_whitespace(char c) { return c == ' ' || c == '\t'; } static constexpr bool is_digit(char c) { return c <= '9' && c >= '0'; } static constexpr auto is_not(char c) { return [c](char ch) { return ch != c; }; } static inline char to_byte(char a, char b) { char buf[3] { a, b, 0 }; return strtol(buf, nullptr, 16); } RefPtr Parser::parse() { m_offset = 0; m_line = { 0, 0 }; auto toplevel = parse_toplevel(); if (m_offset < m_input.length()) { // Parsing stopped midway, this is a syntax error. auto error_start = push_start(); while (!at_end()) consume(); auto syntax_error_node = create("Unexpected tokens past the end"); if (!toplevel) toplevel = move(syntax_error_node); else if (!toplevel->is_syntax_error()) toplevel->set_is_syntax_error(*syntax_error_node); } return toplevel; } RefPtr Parser::parse_as_single_expression() { auto input = Shell::escape_token_for_double_quotes(m_input); Parser parser { input }; return parser.parse_expression(); } NonnullRefPtrVector Parser::parse_as_multiple_expressions() { NonnullRefPtrVector nodes; for (;;) { consume_while(is_whitespace); auto node = parse_expression(); if (!node) node = parse_redirection(); if (!node) return nodes; nodes.append(node.release_nonnull()); } return nodes; } RefPtr Parser::parse_toplevel() { auto rule_start = push_start(); SequenceParseResult result; NonnullRefPtrVector sequence; Vector positions; do { result = parse_sequence(); if (result.entries.is_empty()) break; sequence.extend(move(result.entries)); positions.extend(move(result.separator_positions)); } while (result.decision == ShouldReadMoreSequences::Yes); if (sequence.is_empty()) return nullptr; return create( create(move(sequence), move(positions))); } Parser::SequenceParseResult Parser::parse_sequence() { NonnullRefPtrVector left; auto read_terminators = [&](bool consider_tabs_and_spaces) { if (m_heredoc_initiations.is_empty()) { discard_terminators:; consume_while(is_any_of(consider_tabs_and_spaces ? " \t\n;" : "\n;")); } else { for (;;) { if (consider_tabs_and_spaces && (peek() == '\t' || peek() == ' ')) { consume(); continue; } if (peek() == ';') { consume(); continue; } if (peek() == '\n') { auto rule_start = push_start(); consume(); if (!parse_heredoc_entries()) { StringBuilder error_builder; error_builder.append("Expected to find heredoc entries for "); bool first = true; for (auto& entry : m_heredoc_initiations) { if (first) error_builder.appendff("{} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number); else error_builder.appendff(", {} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number); first = false; } left.append(create(error_builder.build(), true)); // Just read the rest of the newlines goto discard_terminators; } continue; } break; } } }; read_terminators(true); auto rule_start = push_start(); { auto var_decls = parse_variable_decls(); if (var_decls) left.append(var_decls.release_nonnull()); } auto pos_before_seps = save_offset(); switch (peek()) { case '}': return { move(left), {}, ShouldReadMoreSequences::No }; case '\n': read_terminators(false); [[fallthrough]]; case ';': { if (left.is_empty()) break; consume_while(is_any_of("\n;")); auto pos_after_seps = save_offset(); AST::Position separator_position { pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line }; return { move(left), { move(separator_position) }, ShouldReadMoreSequences::Yes }; } default: break; } auto first_entry = parse_function_decl(); Vector separator_positions; if (!first_entry) first_entry = parse_or_logical_sequence(); if (!first_entry) return { move(left), {}, ShouldReadMoreSequences::No }; left.append(first_entry.release_nonnull()); separator_positions.empend(pos_before_seps.offset, pos_before_seps.offset, pos_before_seps.line, pos_before_seps.line); consume_while(is_whitespace); pos_before_seps = save_offset(); switch (peek()) { case '\n': read_terminators(false); [[fallthrough]]; case ';': { consume_while(is_any_of("\n;")); auto pos_after_seps = save_offset(); separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line); return { move(left), move(separator_positions), ShouldReadMoreSequences::Yes }; } case '&': { consume(); auto pos_after_seps = save_offset(); auto bg = create(left.take_last()); // Execute Background left.append(move(bg)); separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line); return { move(left), move(separator_positions), ShouldReadMoreSequences::Yes }; } default: return { move(left), move(separator_positions), ShouldReadMoreSequences::No }; } } RefPtr Parser::parse_variable_decls() { auto rule_start = push_start(); consume_while(is_whitespace); auto pos_before_name = save_offset(); auto var_name = consume_while(is_word_character); if (var_name.is_empty()) return nullptr; if (!expect('=')) { restore_to(pos_before_name.offset, pos_before_name.line); return nullptr; } auto name_expr = create(move(var_name)); auto start = push_start(); auto expression = parse_expression(); if (!expression || expression->is_syntax_error()) { restore_to(*start); if (peek() == '(') { consume(); auto command = parse_pipe_sequence(); if (!command) restore_to(*start); else if (!expect(')')) command->set_is_syntax_error(*create("Expected a terminating close paren", true)); expression = command; } } if (!expression) { if (is_whitespace(peek())) { auto string_start = push_start(); expression = create(""); } else { restore_to(pos_before_name.offset, pos_before_name.line); return nullptr; } } Vector variables; variables.append({ move(name_expr), expression.release_nonnull() }); if (consume_while(is_whitespace).is_empty()) return create(move(variables)); auto rest = parse_variable_decls(); if (!rest) return create(move(variables)); VERIFY(rest->is_variable_decls()); auto* rest_decl = static_cast(rest.ptr()); variables.extend(rest_decl->variables()); return create(move(variables)); } RefPtr Parser::parse_function_decl() { auto rule_start = push_start(); auto restore = [&] { restore_to(*rule_start); return nullptr; }; consume_while(is_whitespace); auto pos_before_name = save_offset(); auto function_name = consume_while(is_word_character); auto pos_after_name = save_offset(); if (function_name.is_empty()) return restore(); if (!expect('(')) return restore(); Vector arguments; for (;;) { consume_while(is_whitespace); if (expect(')')) break; auto name_offset = m_offset; auto start_line = line(); auto arg_name = consume_while(is_word_character); if (arg_name.is_empty()) { // FIXME: Should this be a syntax error, or just return? return restore(); } arguments.append({ arg_name, { name_offset, m_offset, start_line, line() } }); } consume_while(is_any_of("\n\t ")); { RefPtr syntax_error; { auto obrace_error_start = push_start(); syntax_error = create("Expected an open brace '{' to start a function body", true); } if (!expect('{')) { return create( AST::NameWithPosition { move(function_name), { pos_before_name.offset, pos_after_name.offset, pos_before_name.line, pos_after_name.line } }, move(arguments), move(syntax_error)); } } TemporaryChange controls { m_continuation_controls_allowed, false }; auto body = parse_toplevel(); { RefPtr syntax_error; { auto cbrace_error_start = push_start(); syntax_error = create("Expected a close brace '}' to end a function body", true); } if (!expect('}')) { if (body) body->set_is_syntax_error(*syntax_error); else body = move(syntax_error); return create( AST::NameWithPosition { move(function_name), { pos_before_name.offset, pos_after_name.offset, pos_before_name.line, pos_after_name.line } }, move(arguments), move(body)); } } return create( AST::NameWithPosition { move(function_name), { pos_before_name.offset, pos_after_name.offset, pos_before_name.line, pos_after_name.line } }, move(arguments), move(body)); } RefPtr Parser::parse_or_logical_sequence() { consume_while(is_whitespace); auto rule_start = push_start(); auto and_sequence = parse_and_logical_sequence(); if (!and_sequence) return nullptr; consume_while(is_whitespace); auto pos_before_or = save_offset(); if (!expect("||")) return and_sequence; auto pos_after_or = save_offset(); auto right_and_sequence = parse_and_logical_sequence(); if (!right_and_sequence) right_and_sequence = create("Expected an expression after '||'", true); return create( and_sequence.release_nonnull(), right_and_sequence.release_nonnull(), AST::Position { pos_before_or.offset, pos_after_or.offset, pos_before_or.line, pos_after_or.line }); } RefPtr Parser::parse_and_logical_sequence() { consume_while(is_whitespace); auto rule_start = push_start(); auto pipe_sequence = parse_pipe_sequence(); if (!pipe_sequence) return nullptr; consume_while(is_whitespace); auto pos_before_and = save_offset(); if (!expect("&&")) return pipe_sequence; auto pos_after_end = save_offset(); auto right_and_sequence = parse_and_logical_sequence(); if (!right_and_sequence) right_and_sequence = create("Expected an expression after '&&'", true); return create( pipe_sequence.release_nonnull(), right_and_sequence.release_nonnull(), AST::Position { pos_before_and.offset, pos_after_end.offset, pos_before_and.line, pos_after_end.line }); } RefPtr Parser::parse_pipe_sequence() { auto rule_start = push_start(); auto left = parse_control_structure(); if (!left) { if (auto cmd = parse_command()) left = cmd; else return nullptr; } consume_while(is_whitespace); if (peek() != '|') return left; auto before_pipe = save_offset(); consume(); if (auto pipe_seq = parse_pipe_sequence()) { return create(left.release_nonnull(), pipe_seq.release_nonnull()); // Pipe } restore_to(before_pipe.offset, before_pipe.line); return left; } RefPtr Parser::parse_command() { auto rule_start = push_start(); consume_while(is_whitespace); auto redir = parse_redirection(); if (!redir) { auto list_expr = parse_list_expression(); if (!list_expr) return nullptr; auto cast = create(list_expr.release_nonnull()); // Cast List Command auto next_command = parse_command(); if (!next_command) return cast; return create(move(cast), next_command.release_nonnull()); // Join List Command } auto command = parse_command(); if (!command) return redir; return create(redir.release_nonnull(), command.release_nonnull()); // Join Command Command } RefPtr Parser::parse_control_structure() { auto rule_start = push_start(); consume_while(is_whitespace); if (auto control = parse_continuation_control()) return control; if (auto for_loop = parse_for_loop()) return for_loop; if (auto loop = parse_loop_loop()) return loop; if (auto if_expr = parse_if_expr()) return if_expr; if (auto subshell = parse_subshell()) return subshell; if (auto match = parse_match_expr()) return match; return nullptr; } RefPtr Parser::parse_continuation_control() { if (!m_continuation_controls_allowed) return nullptr; auto rule_start = push_start(); if (expect("break")) { { auto break_end = push_start(); if (consume_while(is_any_of(" \t\n;")).is_empty()) { restore_to(*rule_start); return nullptr; } restore_to(*break_end); } return create(AST::ContinuationControl::Break); } if (expect("continue")) { { auto continue_end = push_start(); if (consume_while(is_any_of(" \t\n;")).is_empty()) { restore_to(*rule_start); return nullptr; } restore_to(*continue_end); } return create(AST::ContinuationControl::Continue); } return nullptr; } RefPtr Parser::parse_for_loop() { auto rule_start = push_start(); if (!expect("for")) return nullptr; if (consume_while(is_any_of(" \t\n")).is_empty()) { restore_to(*rule_start); return nullptr; } Optional index_variable_name, variable_name; Optional in_start_position, index_start_position; auto offset_before_index = current_position(); if (expect("index")) { auto offset = current_position(); if (!consume_while(is_whitespace).is_empty()) { auto offset_before_variable = current_position(); auto variable = consume_while(is_word_character); if (!variable.is_empty()) { index_start_position = AST::Position { offset_before_index.offset, offset.offset, offset_before_index.line, offset.line }; auto offset_after_variable = current_position(); index_variable_name = AST::NameWithPosition { variable, { offset_before_variable.offset, offset_after_variable.offset, offset_before_variable.line, offset_after_variable.line }, }; consume_while(is_whitespace); } else { restore_to(offset_before_index.offset, offset_before_index.line); } } else { restore_to(offset_before_index.offset, offset_before_index.line); } } auto variable_name_start_offset = current_position(); auto name = consume_while(is_word_character); auto variable_name_end_offset = current_position(); if (!name.is_empty()) { variable_name = AST::NameWithPosition { name, { variable_name_start_offset.offset, variable_name_end_offset.offset, variable_name_start_offset.line, variable_name_end_offset.line } }; consume_while(is_whitespace); auto in_error_start = push_start(); if (!expect("in")) { auto syntax_error = create("Expected 'in' after a variable name in a 'for' loop", true); return create(move(variable_name), move(index_variable_name), move(syntax_error), nullptr); // ForLoop Var Iterated Block } in_start_position = AST::Position { in_error_start->offset, m_offset, in_error_start->line, line() }; } consume_while(is_whitespace); RefPtr iterated_expression; { auto iter_error_start = push_start(); iterated_expression = parse_expression(); if (!iterated_expression) iterated_expression = create("Expected an expression in 'for' loop", true); } consume_while(is_any_of(" \t\n")); { auto obrace_error_start = push_start(); if (!expect('{')) { auto syntax_error = create("Expected an open brace '{' to start a 'for' loop body", true); return create(move(variable_name), move(index_variable_name), move(iterated_expression), move(syntax_error), move(in_start_position), move(index_start_position)); // ForLoop Var Iterated Block } } TemporaryChange controls { m_continuation_controls_allowed, true }; auto body = parse_toplevel(); { auto cbrace_error_start = push_start(); if (!expect('}')) { auto error_start = push_start(); auto syntax_error = create("Expected a close brace '}' to end a 'for' loop body", true); if (body) body->set_is_syntax_error(*syntax_error); else body = syntax_error; } } return create(move(variable_name), move(index_variable_name), move(iterated_expression), move(body), move(in_start_position), move(index_start_position)); // ForLoop Var Iterated Block } RefPtr Parser::parse_loop_loop() { auto rule_start = push_start(); if (!expect("loop")) return nullptr; if (consume_while(is_any_of(" \t\n")).is_empty()) { restore_to(*rule_start); return nullptr; } { auto obrace_error_start = push_start(); if (!expect('{')) { auto syntax_error = create("Expected an open brace '{' to start a 'loop' loop body", true); return create(AST::NameWithPosition {}, AST::NameWithPosition {}, nullptr, move(syntax_error)); // ForLoop null null Block } } TemporaryChange controls { m_continuation_controls_allowed, true }; auto body = parse_toplevel(); { auto cbrace_error_start = push_start(); if (!expect('}')) { auto error_start = push_start(); auto syntax_error = create("Expected a close brace '}' to end a 'loop' loop body", true); if (body) body->set_is_syntax_error(*syntax_error); else body = syntax_error; } } return create(AST::NameWithPosition {}, AST::NameWithPosition {}, nullptr, move(body)); // ForLoop null null Block } RefPtr Parser::parse_if_expr() { auto rule_start = push_start(); if (!expect("if")) return nullptr; if (consume_while(is_any_of(" \t\n")).is_empty()) { restore_to(*rule_start); return nullptr; } RefPtr condition; { auto cond_error_start = push_start(); condition = parse_or_logical_sequence(); if (!condition) condition = create("Expected a logical sequence after 'if'", true); } auto parse_braced_toplevel = [&]() -> RefPtr { RefPtr body; { auto obrace_error_start = push_start(); if (!expect('{')) { body = create("Expected an open brace '{' to start an 'if' true branch", true); } } if (!body) body = parse_toplevel(); { auto cbrace_error_start = push_start(); if (!expect('}')) { auto error_start = push_start(); RefPtr syntax_error = create("Expected a close brace '}' to end an 'if' true branch", true); if (body) body->set_is_syntax_error(*syntax_error); else body = syntax_error; } } return body; }; consume_while(is_any_of(" \t\n")); auto true_branch = parse_braced_toplevel(); auto end_before_else = m_offset; auto line_before_else = line(); consume_while(is_any_of(" \t\n")); Optional else_position; { auto else_start = push_start(); if (expect("else")) else_position = AST::Position { else_start->offset, m_offset, else_start->line, line() }; else restore_to(end_before_else, line_before_else); } if (else_position.has_value()) { consume_while(is_any_of(" \t\n")); if (peek() == '{') { auto false_branch = parse_braced_toplevel(); return create(else_position, condition.release_nonnull(), move(true_branch), move(false_branch)); // If expr true_branch Else false_branch } auto else_if_branch = parse_if_expr(); return create(else_position, condition.release_nonnull(), move(true_branch), move(else_if_branch)); // If expr true_branch Else If ... } return create(else_position, condition.release_nonnull(), move(true_branch), nullptr); // If expr true_branch } RefPtr Parser::parse_subshell() { auto rule_start = push_start(); if (!expect('{')) return nullptr; auto body = parse_toplevel(); { auto cbrace_error_start = push_start(); if (!expect('}')) { auto error_start = push_start(); RefPtr syntax_error = create("Expected a close brace '}' to end a subshell", true); if (body) body->set_is_syntax_error(*syntax_error); else body = syntax_error; } } return create(move(body)); } RefPtr Parser::parse_match_expr() { auto rule_start = push_start(); if (!expect("match")) return nullptr; if (consume_while(is_whitespace).is_empty()) { restore_to(*rule_start); return nullptr; } auto match_expression = parse_expression(); if (!match_expression) { return create( create("Expected an expression after 'match'", true), String {}, Optional {}, Vector {}); } consume_while(is_any_of(" \t\n")); String match_name; Optional as_position; auto as_start = m_offset; auto as_line = line(); if (expect("as")) { as_position = AST::Position { as_start, m_offset, as_line, line() }; if (consume_while(is_any_of(" \t\n")).is_empty()) { auto node = create( match_expression.release_nonnull(), String {}, move(as_position), Vector {}); node->set_is_syntax_error(create("Expected whitespace after 'as' in 'match'", true)); return node; } match_name = consume_while(is_word_character); if (match_name.is_empty()) { auto node = create( match_expression.release_nonnull(), String {}, move(as_position), Vector {}); node->set_is_syntax_error(create("Expected an identifier after 'as' in 'match'", true)); return node; } } consume_while(is_any_of(" \t\n")); if (!expect('{')) { auto node = create( match_expression.release_nonnull(), move(match_name), move(as_position), Vector {}); node->set_is_syntax_error(create("Expected an open brace '{' to start a 'match' entry list", true)); return node; } consume_while(is_any_of(" \t\n")); Vector entries; for (;;) { auto entry = parse_match_entry(); consume_while(is_any_of(" \t\n")); if (entry.options.is_empty()) break; entries.append(entry); } consume_while(is_any_of(" \t\n")); if (!expect('}')) { auto node = create( match_expression.release_nonnull(), move(match_name), move(as_position), move(entries)); node->set_is_syntax_error(create("Expected a close brace '}' to end a 'match' entry list", true)); return node; } return create(match_expression.release_nonnull(), move(match_name), move(as_position), move(entries)); } AST::MatchEntry Parser::parse_match_entry() { auto rule_start = push_start(); NonnullRefPtrVector patterns; Vector pipe_positions; Optional> match_names; Optional match_as_position; auto pattern = parse_match_pattern(); if (!pattern) return { {}, {}, {}, {}, create("Expected a pattern in 'match' body", true) }; patterns.append(pattern.release_nonnull()); consume_while(is_any_of(" \t\n")); auto previous_pipe_start_position = m_offset; auto previous_pipe_start_line = line(); RefPtr error; while (expect('|')) { pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() }); consume_while(is_any_of(" \t\n")); auto pattern = parse_match_pattern(); if (!pattern) { error = create("Expected a pattern to follow '|' in 'match' body", true); break; } consume_while(is_any_of(" \t\n")); patterns.append(pattern.release_nonnull()); previous_pipe_start_line = line(); previous_pipe_start_position = m_offset; } consume_while(is_any_of(" \t\n")); auto as_start_position = m_offset; auto as_start_line = line(); if (expect("as")) { match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() }; consume_while(is_any_of(" \t\n")); if (!expect('(')) { if (!error) error = create("Expected an explicit list of identifiers after a pattern 'as'"); } else { match_names = Vector(); for (;;) { consume_while(is_whitespace); auto name = consume_while(is_word_character); if (name.is_empty()) break; match_names.value().append(move(name)); } if (!expect(')')) { if (!error) error = create("Expected a close paren ')' to end the identifier list of pattern 'as'", true); } } consume_while(is_any_of(" \t\n")); } if (!expect('{')) { if (!error) error = create("Expected an open brace '{' to start a match entry body", true); } auto body = parse_toplevel(); if (!expect('}')) { if (!error) error = create("Expected a close brace '}' to end a match entry body", true); } if (body && error) body->set_is_syntax_error(*error); else if (error) body = error; return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; } RefPtr Parser::parse_match_pattern() { return parse_expression(); } RefPtr Parser::parse_redirection() { auto rule_start = push_start(); // heredoc entry if (next_is("<<-") || next_is("<<~")) return nullptr; auto pipe_fd = 0; auto number = consume_while(is_digit); if (number.is_empty()) { pipe_fd = -1; } else { auto fd = number.to_int(); pipe_fd = fd.value_or(-1); } switch (peek()) { case '>': { consume(); if (peek() == '>') { consume(); consume_while(is_whitespace); pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO; auto path = parse_expression(); if (!path) { if (!at_end()) { // Eat a character and hope the problem goes away consume(); } path = create("Expected a path after redirection", true); } return create(pipe_fd, path.release_nonnull()); // Redirection WriteAppend } if (peek() == '&') { consume(); // FIXME: 'fd>&-' Syntax not the best. needs discussion. if (peek() == '-') { consume(); pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO; return create(pipe_fd); // Redirection CloseFd } int dest_pipe_fd = 0; auto number = consume_while(is_digit); pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO; if (number.is_empty()) { dest_pipe_fd = -1; } else { auto fd = number.to_int(); dest_pipe_fd = fd.value_or(-1); } auto redir = create(pipe_fd, dest_pipe_fd); // Redirection Fd2Fd if (dest_pipe_fd == -1) redir->set_is_syntax_error(*create("Expected a file descriptor")); return redir; } consume_while(is_whitespace); pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO; auto path = parse_expression(); if (!path) { if (!at_end()) { // Eat a character and hope the problem goes away consume(); } path = create("Expected a path after redirection", true); } return create(pipe_fd, path.release_nonnull()); // Redirection Write } case '<': { consume(); enum { Read, ReadWrite, } mode { Read }; if (peek() == '>') { mode = ReadWrite; consume(); } consume_while(is_whitespace); pipe_fd = pipe_fd >= 0 ? pipe_fd : STDIN_FILENO; auto path = parse_expression(); if (!path) { if (!at_end()) { // Eat a character and hope the problem goes away consume(); } path = create("Expected a path after redirection", true); } if (mode == Read) return create(pipe_fd, path.release_nonnull()); // Redirection Read return create(pipe_fd, path.release_nonnull()); // Redirection ReadWrite } default: restore_to(*rule_start); return nullptr; } } RefPtr Parser::parse_list_expression() { consume_while(is_whitespace); auto rule_start = push_start(); Vector> nodes; do { auto expr = parse_expression(); if (!expr) break; nodes.append(expr.release_nonnull()); } while (!consume_while(is_whitespace).is_empty()); if (nodes.is_empty()) return nullptr; return create(move(nodes)); // Concatenate List } RefPtr Parser::parse_expression() { auto rule_start = push_start(); if (m_rule_start_offsets.size() > max_allowed_nested_rule_depth) return create(String::formatted("Expression nested too deep (max allowed is {})", max_allowed_nested_rule_depth)); auto starting_char = peek(); auto read_concat = [&](auto&& expr) -> NonnullRefPtr { if (is_whitespace(peek())) return move(expr); if (auto next_expr = parse_expression()) return create(move(expr), next_expr.release_nonnull()); return move(expr); }; // Heredocs are expressions, so allow them if (!(next_is("<<-") || next_is("<<~"))) { if (strchr("&|)} ;<>\n", starting_char) != nullptr) return nullptr; } if (m_extra_chars_not_allowed_in_barewords.contains_slow(starting_char)) return nullptr; if (m_is_in_brace_expansion_spec && next_is("..")) return nullptr; if (isdigit(starting_char)) { ScopedValueRollback offset_rollback { m_offset }; auto redir = parse_redirection(); if (redir) return nullptr; } if (starting_char == '$') { if (auto variable = parse_variable()) return read_concat(variable.release_nonnull()); if (auto immediate = parse_immediate_expression()) return read_concat(immediate.release_nonnull()); if (auto inline_exec = parse_evaluate()) return read_concat(inline_exec.release_nonnull()); } if (starting_char == '#') return parse_comment(); if (starting_char == '(') { consume(); auto list = parse_list_expression(); if (!expect(')')) { restore_to(*rule_start); return nullptr; } return read_concat(create(move(list))); // Cast To List } if (starting_char == '!' && m_in_interactive_mode) { if (auto designator = parse_history_designator()) return designator; } if (auto composite = parse_string_composite()) return read_concat(composite.release_nonnull()); return nullptr; } RefPtr Parser::parse_string_composite() { auto rule_start = push_start(); if (auto string = parse_string()) { if (auto next_part = parse_string_composite()) return create(string.release_nonnull(), next_part.release_nonnull()); // Concatenate String StringComposite return string; } if (auto variable = parse_variable()) { if (auto next_part = parse_string_composite()) return create(variable.release_nonnull(), next_part.release_nonnull()); // Concatenate Variable StringComposite return variable; } if (auto glob = parse_glob()) { if (auto next_part = parse_string_composite()) return create(glob.release_nonnull(), next_part.release_nonnull()); // Concatenate Glob StringComposite return glob; } if (auto expansion = parse_brace_expansion()) { if (auto next_part = parse_string_composite()) return create(expansion.release_nonnull(), next_part.release_nonnull()); // Concatenate BraceExpansion StringComposite return expansion; } if (auto bareword = parse_bareword()) { if (auto next_part = parse_string_composite()) return create(bareword.release_nonnull(), next_part.release_nonnull()); // Concatenate Bareword StringComposite return bareword; } if (auto inline_command = parse_evaluate()) { if (auto next_part = parse_string_composite()) return create(inline_command.release_nonnull(), next_part.release_nonnull()); // Concatenate Execute StringComposite return inline_command; } if (auto heredoc = parse_heredoc_initiation_record()) { if (auto next_part = parse_string_composite()) return create(heredoc.release_nonnull(), next_part.release_nonnull()); // Concatenate Heredoc StringComposite return heredoc; } return nullptr; } RefPtr Parser::parse_string() { auto rule_start = push_start(); if (at_end()) return nullptr; if (peek() == '"') { consume(); auto inner = parse_string_inner(StringEndCondition::DoubleQuote); if (!inner) inner = create("Unexpected EOF in string", true); if (!expect('"')) { inner = create(move(inner)); inner->set_is_syntax_error(*create("Expected a terminating double quote", true)); return inner; } return create(move(inner)); // Double Quoted String } if (peek() == '\'') { consume(); auto text = consume_while(is_not('\'')); bool is_error = false; if (!expect('\'')) is_error = true; auto result = create(move(text)); // String Literal if (is_error) result->set_is_syntax_error(*create("Expected a terminating single quote", true)); return result; } return nullptr; } RefPtr Parser::parse_string_inner(StringEndCondition condition) { auto rule_start = push_start(); if (at_end()) return nullptr; StringBuilder builder; while (!at_end()) { if (condition == StringEndCondition::DoubleQuote && peek() == '"') { break; } if (peek() == '\\') { consume(); if (at_end()) { break; } auto ch = consume(); switch (ch) { case '\\': default: builder.append(ch); break; case 'x': { if (m_input.length() <= m_offset + 2) break; auto first_nibble = tolower(consume()); auto second_nibble = tolower(consume()); if (!isxdigit(first_nibble) || !isxdigit(second_nibble)) { builder.append(first_nibble); builder.append(second_nibble); break; } builder.append(to_byte(first_nibble, second_nibble)); break; } case 'u': { if (m_input.length() <= m_offset + 8) break; size_t counter = 8; auto chars = consume_while([&](auto) { return counter-- > 0; }); if (auto number = AK::StringUtils::convert_to_uint_from_hex(chars); number.has_value()) builder.append(Utf32View { &number.value(), 1 }); else builder.append(chars); break; } case 'a': builder.append('\a'); break; case 'b': builder.append('\b'); break; case 'e': builder.append('\x1b'); break; case 'f': builder.append('\f'); break; case 'r': builder.append('\r'); break; case 'n': builder.append('\n'); break; case 't': builder.append('\t'); break; } continue; } if (peek() == '$') { auto string_literal = create(builder.to_string()); // String Literal auto read_concat = [&](auto&& node) { auto inner = create( move(string_literal), move(node)); // Compose String Node if (auto string = parse_string_inner(condition)) { return create(move(inner), string.release_nonnull()); // Compose Composition Composition } return inner; }; if (auto variable = parse_variable()) return read_concat(variable.release_nonnull()); if (auto immediate = parse_immediate_expression()) return read_concat(immediate.release_nonnull()); if (auto evaluate = parse_evaluate()) return read_concat(evaluate.release_nonnull()); } builder.append(consume()); } return create(builder.to_string()); // String Literal } RefPtr Parser::parse_variable() { auto rule_start = push_start(); auto ref = parse_variable_ref(); if (!ref) return nullptr; auto variable = static_ptr_cast(ref); if (auto slice = parse_slice()) variable->set_slice(slice.release_nonnull()); return variable; } RefPtr Parser::parse_variable_ref() { auto rule_start = push_start(); if (at_end()) return nullptr; if (peek() != '$') return nullptr; consume(); switch (peek()) { case '$': case '?': case '*': case '#': return create(consume()); // Variable Special default: break; } auto name = consume_while(is_word_character); if (name.length() == 0) { restore_to(rule_start->offset, rule_start->line); return nullptr; } return create(move(name)); // Variable Simple } RefPtr Parser::parse_slice() { auto rule_start = push_start(); if (!next_is("[")) return nullptr; consume(); // [ ScopedValueRollback chars_change { m_extra_chars_not_allowed_in_barewords }; m_extra_chars_not_allowed_in_barewords.append(']'); auto spec = parse_brace_expansion_spec(); RefPtr error; if (peek() != ']') error = create("Expected a close bracket ']' to end a variable slice"); else consume(); if (!spec) { if (error) spec = move(error); else spec = create("Expected either a range, or a comma-seprated list of selectors"); } auto node = create(spec.release_nonnull()); if (error) node->set_is_syntax_error(*error); return node; } RefPtr Parser::parse_evaluate() { auto rule_start = push_start(); if (at_end()) return nullptr; if (peek() != '$') return nullptr; consume(); if (peek() == '(') { consume(); auto inner = parse_pipe_sequence(); if (!inner) inner = create("Unexpected EOF in list", true); if (!expect(')')) inner->set_is_syntax_error(*create("Expected a terminating close paren", true)); return create(inner.release_nonnull(), true); } auto inner = parse_expression(); if (!inner) { inner = create("Expected a command", true); } else { if (inner->is_list()) { auto execute_inner = create(inner.release_nonnull(), true); inner = move(execute_inner); } else { auto dyn_inner = create(inner.release_nonnull()); inner = move(dyn_inner); } } return inner; } RefPtr Parser::parse_immediate_expression() { auto rule_start = push_start(); if (at_end()) return nullptr; if (peek() != '$') return nullptr; consume(); if (peek() != '{') { restore_to(*rule_start); return nullptr; } consume(); consume_while(is_whitespace); auto function_name_start_offset = current_position(); auto function_name = consume_while(is_word_character); auto function_name_end_offset = current_position(); AST::Position function_position { function_name_start_offset.offset, function_name_end_offset.offset, function_name_start_offset.line, function_name_end_offset.line, }; consume_while(is_whitespace); NonnullRefPtrVector arguments; do { auto expr = parse_expression(); if (!expr) break; arguments.append(expr.release_nonnull()); } while (!consume_while(is_whitespace).is_empty()); auto ending_brace_start_offset = current_position(); if (peek() == '}') consume(); auto ending_brace_end_offset = current_position(); auto ending_brace_position = ending_brace_start_offset.offset == ending_brace_end_offset.offset ? Optional {} : Optional { AST::Position { ending_brace_start_offset.offset, ending_brace_end_offset.offset, ending_brace_start_offset.line, ending_brace_end_offset.line, } }; auto node = create( AST::NameWithPosition { function_name, move(function_position) }, move(arguments), ending_brace_position); if (!ending_brace_position.has_value()) node->set_is_syntax_error(create("Expected a closing brace '}' to end an immediate expression", true)); else if (node->function_name().is_empty()) node->set_is_syntax_error(create("Expected an immediate function name")); return node; } RefPtr Parser::parse_history_designator() { auto rule_start = push_start(); VERIFY(peek() == '!'); consume(); // Event selector AST::HistorySelector selector; RefPtr syntax_error; selector.event.kind = AST::HistorySelector::EventKind::StartingStringLookup; selector.event.text_position = { m_offset, m_offset, m_line, m_line }; selector.word_selector_range = { AST::HistorySelector::WordSelector { AST::HistorySelector::WordSelectorKind::Index, 0, { m_offset, m_offset, m_line, m_line }, nullptr }, AST::HistorySelector::WordSelector { AST::HistorySelector::WordSelectorKind::Last, 0, { m_offset, m_offset, m_line, m_line }, nullptr } }; bool is_word_selector = false; switch (peek()) { case ':': consume(); [[fallthrough]]; case '^': case '$': case '*': is_word_selector = true; break; case '!': consume(); selector.event.kind = AST::HistorySelector::EventKind::IndexFromEnd; selector.event.index = 0; selector.event.text = "!"; break; case '?': consume(); selector.event.kind = AST::HistorySelector::EventKind::ContainingStringLookup; [[fallthrough]]; default: { TemporaryChange chars_change { m_extra_chars_not_allowed_in_barewords, { ':', '^', '$', '*' } }; auto bareword = parse_bareword(); if (!bareword || !bareword->is_bareword()) { restore_to(*rule_start); return nullptr; } selector.event.text = static_ptr_cast(bareword)->text(); selector.event.text_position = bareword->position(); auto it = selector.event.text.begin(); bool is_negative = false; if (*it == '-') { ++it; is_negative = true; } if (it != selector.event.text.end() && all_of(it, selector.event.text.end(), is_digit)) { if (is_negative) selector.event.kind = AST::HistorySelector::EventKind::IndexFromEnd; else selector.event.kind = AST::HistorySelector::EventKind::IndexFromStart; auto number = abs(selector.event.text.to_int().value_or(0)); if (number != 0) selector.event.index = number - 1; else syntax_error = create("History entry index value invalid or out of range"); } if (":^$*"sv.contains(peek())) { is_word_selector = true; if (peek() == ':') consume(); } } } if (!is_word_selector) { auto node = create(move(selector)); if (syntax_error) node->set_is_syntax_error(*syntax_error); return node; } // Word selectors auto parse_word_selector = [&]() -> Optional { auto c = peek(); AST::HistorySelector::WordSelectorKind word_selector_kind; ssize_t offset = -1; if (isdigit(c)) { auto num = consume_while(is_digit); auto value = num.to_uint(); if (!value.has_value()) return {}; word_selector_kind = AST::HistorySelector::WordSelectorKind::Index; offset = value.value(); } else if (c == '^') { consume(); word_selector_kind = AST::HistorySelector::WordSelectorKind::Index; offset = 1; } else if (c == '$') { consume(); word_selector_kind = AST::HistorySelector::WordSelectorKind::Last; offset = 0; } if (offset == -1) return {}; return AST::HistorySelector::WordSelector { word_selector_kind, static_cast(offset), { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, syntax_error }; }; auto make_word_selector = [&](AST::HistorySelector::WordSelectorKind word_selector_kind, size_t offset) { return AST::HistorySelector::WordSelector { word_selector_kind, offset, { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, syntax_error }; }; auto first_char = peek(); if (!(is_digit(first_char) || "^$-*"sv.contains(first_char))) { if (!syntax_error) syntax_error = create("Expected a word selector after ':' in a history event designator", true); } else if (first_char == '*') { consume(); selector.word_selector_range.start = make_word_selector(AST::HistorySelector::WordSelectorKind::Index, 1); selector.word_selector_range.end = make_word_selector(AST::HistorySelector::WordSelectorKind::Last, 0); } else if (first_char == '-') { consume(); selector.word_selector_range.start = make_word_selector(AST::HistorySelector::WordSelectorKind::Index, 0); auto last_selector = parse_word_selector(); if (!last_selector.has_value()) selector.word_selector_range.end = make_word_selector(AST::HistorySelector::WordSelectorKind::Last, 1); else selector.word_selector_range.end = last_selector.release_value(); } else { auto first_selector = parse_word_selector(); // peek() should be a digit, ^, or $ here, so this should always have value. VERIFY(first_selector.has_value()); selector.word_selector_range.start = first_selector.release_value(); if (peek() == '-') { consume(); auto last_selector = parse_word_selector(); if (last_selector.has_value()) { selector.word_selector_range.end = last_selector.release_value(); } else { selector.word_selector_range.end = make_word_selector(AST::HistorySelector::WordSelectorKind::Last, 1); } } else if (peek() == '*') { consume(); selector.word_selector_range.end = make_word_selector(AST::HistorySelector::WordSelectorKind::Last, 0); } else { selector.word_selector_range.end.clear(); } } auto node = create(move(selector)); if (syntax_error) node->set_is_syntax_error(*syntax_error); return node; } RefPtr Parser::parse_comment() { if (at_end()) return nullptr; if (peek() != '#') return nullptr; consume(); auto text = consume_while(is_not('\n')); return create(move(text)); // Comment } RefPtr Parser::parse_bareword() { auto rule_start = push_start(); StringBuilder builder; auto is_acceptable_bareword_character = [&](char c) { return strchr("\\\"'*$&#|(){} ?;<>\n", c) == nullptr && !m_extra_chars_not_allowed_in_barewords.contains_slow(c); }; while (!at_end()) { char ch = peek(); if (ch == '\\') { consume(); if (!at_end()) { ch = consume(); if (is_acceptable_bareword_character(ch)) builder.append('\\'); } builder.append(ch); continue; } if (m_is_in_brace_expansion_spec && next_is("..")) { // Don't eat '..' in a brace expansion spec. break; } if (is_acceptable_bareword_character(ch)) { builder.append(consume()); continue; } break; } if (builder.is_empty()) return nullptr; auto current_end = m_offset; auto current_line = line(); auto string = builder.to_string(); if (string.starts_with('~')) { String username; RefPtr tilde, text; auto first_slash_index = string.find('/'); if (first_slash_index.has_value()) { username = string.substring_view(1, first_slash_index.value() - 1); string = string.substring_view(first_slash_index.value(), string.length() - first_slash_index.value()); } else { username = string.substring_view(1, string.length() - 1); string = ""; } // Synthesize a Tilde Node with the correct positioning information. { restore_to(rule_start->offset, rule_start->line); auto ch = consume(); VERIFY(ch == '~'); auto username_length = username.length(); tilde = create(move(username)); // Consume the username (if any) for (size_t i = 0; i < username_length; ++i) consume(); } if (string.is_empty()) return tilde; // Synthesize a BarewordLiteral Node with the correct positioning information. { auto text_start = push_start(); restore_to(current_end, current_line); text = create(move(string)); } return create(tilde.release_nonnull(), text.release_nonnull()); // Juxtaposition Variable Bareword } if (string.starts_with("\\~")) { // Un-escape the tilde, but only at the start (where it would be an expansion) string = string.substring(1, string.length() - 1); } return create(move(string)); // Bareword Literal } RefPtr Parser::parse_glob() { auto rule_start = push_start(); auto bareword_part = parse_bareword(); if (at_end()) return bareword_part; char ch = peek(); if (ch == '*' || ch == '?') { auto saved_offset = save_offset(); consume(); StringBuilder textbuilder; if (bareword_part) { StringView text; if (bareword_part->is_bareword()) { auto bareword = static_cast(bareword_part.ptr()); text = bareword->text(); } else { // FIXME: Allow composition of tilde+bareword with globs: '~/foo/bar/baz*' restore_to(saved_offset.offset, saved_offset.line); bareword_part->set_is_syntax_error(*create(String::formatted("Unexpected {} inside a glob", bareword_part->class_name()))); return bareword_part; } textbuilder.append(text); } textbuilder.append(ch); auto glob_after = parse_glob(); if (glob_after) { if (glob_after->is_glob()) { auto glob = static_cast(glob_after.ptr()); textbuilder.append(glob->text()); } else if (glob_after->is_bareword()) { auto bareword = static_cast(glob_after.ptr()); textbuilder.append(bareword->text()); } else if (glob_after->is_tilde()) { auto bareword = static_cast(glob_after.ptr()); textbuilder.append("~"); textbuilder.append(bareword->text()); } else { return create(String::formatted("Invalid node '{}' in glob position, escape shell special characters", glob_after->class_name())); } } return create(textbuilder.to_string()); // Glob } return bareword_part; } RefPtr Parser::parse_brace_expansion() { auto rule_start = push_start(); if (!expect('{')) return nullptr; if (auto spec = parse_brace_expansion_spec()) { if (!expect('}')) spec->set_is_syntax_error(create("Expected a close brace '}' to end a brace expansion", true)); return spec; } restore_to(*rule_start); return nullptr; } RefPtr Parser::parse_brace_expansion_spec() { TemporaryChange is_in_brace_expansion { m_is_in_brace_expansion_spec, true }; ScopedValueRollback chars_change { m_extra_chars_not_allowed_in_barewords }; m_extra_chars_not_allowed_in_barewords.append(','); auto rule_start = push_start(); auto start_expr = parse_expression(); if (start_expr) { if (expect("..")) { if (auto end_expr = parse_expression()) { if (end_expr->position().start_offset != start_expr->position().end_offset + 2) end_expr->set_is_syntax_error(create("Expected no whitespace between '..' and the following expression in brace expansion")); return create(start_expr.release_nonnull(), end_expr.release_nonnull()); } return create(start_expr.release_nonnull(), create("Expected an expression to end range brace expansion with", true)); } } NonnullRefPtrVector subexpressions; if (start_expr) subexpressions.append(start_expr.release_nonnull()); while (expect(',')) { auto expr = parse_expression(); if (expr) { subexpressions.append(expr.release_nonnull()); } else { subexpressions.append(create("")); } } if (subexpressions.is_empty()) return nullptr; return create(move(subexpressions)); } RefPtr Parser::parse_heredoc_initiation_record() { if (!next_is("<<")) return nullptr; auto rule_start = push_start(); // '<' '<' consume(); consume(); HeredocInitiationRecord record; record.end = ""; RefPtr syntax_error_node; // '-' | '~' switch (peek()) { case '-': record.deindent = false; consume(); break; case '~': record.deindent = true; consume(); break; default: restore_to(*rule_start); return nullptr; } // StringLiteral | bareword if (auto bareword = parse_bareword()) { if (!bareword->is_bareword()) { syntax_error_node = create(String::formatted("Expected a bareword or a quoted string, not {}", bareword->class_name())); } else { if (bareword->is_syntax_error()) syntax_error_node = bareword->syntax_error_node(); else record.end = static_cast(bareword.ptr())->text(); } record.interpolate = true; } else if (peek() == '\'') { consume(); auto text = consume_while(is_not('\'')); bool is_error = false; if (!expect('\'')) is_error = true; if (is_error) syntax_error_node = create("Expected a terminating single quote", true); record.end = text; record.interpolate = false; } else { syntax_error_node = create("Expected a bareword or a single-quoted string literal for heredoc end key", true); } auto node = create(record.end, record.interpolate, record.deindent); if (syntax_error_node) node->set_is_syntax_error(*syntax_error_node); else node->set_is_syntax_error(*create(String::formatted("Expected heredoc contents for heredoc with end key '{}'", node->end()), true)); record.node = node; m_heredoc_initiations.append(move(record)); return node; } bool Parser::parse_heredoc_entries() { auto heredocs = move(m_heredoc_initiations); m_heredoc_initiations.clear(); // Try to parse heredoc entries, as reverse recorded in the initiation records for (auto& record : heredocs) { auto rule_start = push_start(); if (m_rule_start_offsets.size() > max_allowed_nested_rule_depth) { record.node->set_is_syntax_error(*create(String::formatted("Expression nested too deep (max allowed is {})", max_allowed_nested_rule_depth))); continue; } bool found_key = false; if (!record.interpolate) { // Since no interpolation is allowed, just read lines until we hit the key Optional last_line_offset; for (;;) { if (at_end()) break; if (peek() == '\n') consume(); last_line_offset = current_position(); auto line = consume_while(is_not('\n')); if (peek() == '\n') consume(); if (line.trim_whitespace() == record.end) { found_key = true; break; } } if (!last_line_offset.has_value()) last_line_offset = current_position(); // Now just wrap it in a StringLiteral and set it as the node's contents auto node = create(m_input.substring_view(rule_start->offset, last_line_offset->offset - rule_start->offset)); if (!found_key) node->set_is_syntax_error(*create(String::formatted("Expected to find the heredoc key '{}', but found Eof", record.end), true)); record.node->set_contents(move(node)); } else { // Interpolation is allowed, so we're going to read doublequoted string innards // until we find a line that contains the key auto end_condition = move(m_end_condition); found_key = false; set_end_condition(make>([this, end = record.end, &found_key] { if (found_key) return true; auto offset = current_position(); auto cond = move(m_end_condition); ScopeGuard guard { [&] { m_end_condition = move(cond); } }; if (peek() == '\n') { consume(); auto line = consume_while(is_not('\n')); if (peek() == '\n') consume(); if (line.trim_whitespace() == end) { restore_to(offset.offset, offset.line); found_key = true; return true; } } restore_to(offset.offset, offset.line); return false; })); auto expr = parse_string_inner(StringEndCondition::Heredoc); set_end_condition(move(end_condition)); if (found_key) { auto offset = current_position(); if (peek() == '\n') consume(); auto line = consume_while(is_not('\n')); if (peek() == '\n') consume(); if (line.trim_whitespace() != record.end) restore_to(offset.offset, offset.line); } if (!expr && found_key) { expr = create(""); } else if (!expr) { expr = create(String::formatted("Expected to find a valid string inside a heredoc (with end key '{}')", record.end), true); } else if (!found_key) { expr->set_is_syntax_error(*create(String::formatted("Expected to find the heredoc key '{}'", record.end), true)); } record.node->set_contents(create(move(expr))); } } return true; } StringView Parser::consume_while(Function condition) { if (at_end()) return {}; auto start_offset = m_offset; while (!at_end() && condition(peek())) consume(); return m_input.substring_view(start_offset, m_offset - start_offset); } bool Parser::next_is(const StringView& next) { auto start = current_position(); auto res = expect(next); restore_to(start.offset, start.line); return res; } }