diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2023-02-11 17:59:15 +0330 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2023-02-13 23:00:15 +0330 |
commit | 2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f (patch) | |
tree | 6d3b6533dc1c6e890eb8dde1a540397dfa2a0419 /Userland | |
parent | 2dc1682274c7f1fcb3ba1ad4d613cf307581b748 (diff) | |
download | serenity-2a276c86d4b84668ab7f8daa2c1e37f7d9e5418f.zip |
Shell: Start implementing a POSIX-compliant parser
The parser is still very much a work-in-progress, but it can currently
parse most of the basic bits, the only *completely* unimplemented things
in the parser are:
- heredocs (io_here)
- alias expansion
- arithmetic expansion
There are a whole suite of bugs, and syntax highlighting is unreliable
at best.
For now, this is not attached anywhere, a future commit will enable it
for /bin/sh or a `Shell --posix` invocation.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Shell/AST.cpp | 23 | ||||
-rw-r--r-- | Userland/Shell/AST.h | 18 | ||||
-rw-r--r-- | Userland/Shell/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Userland/Shell/Formatter.cpp | 3 | ||||
-rw-r--r-- | Userland/Shell/Formatter.h | 5 | ||||
-rw-r--r-- | Userland/Shell/ImmediateFunctions.cpp | 155 | ||||
-rw-r--r-- | Userland/Shell/PosixLexer.cpp | 725 | ||||
-rw-r--r-- | Userland/Shell/PosixLexer.h | 413 | ||||
-rw-r--r-- | Userland/Shell/PosixParser.cpp | 1922 | ||||
-rw-r--r-- | Userland/Shell/PosixParser.h | 117 | ||||
-rw-r--r-- | Userland/Shell/Shell.cpp | 49 | ||||
-rw-r--r-- | Userland/Shell/Shell.h | 35 |
12 files changed, 3439 insertions, 28 deletions
diff --git a/Userland/Shell/AST.cpp b/Userland/Shell/AST.cpp index 26fce6e092..cbcf35384f 100644 --- a/Userland/Shell/AST.cpp +++ b/Userland/Shell/AST.cpp @@ -3004,6 +3004,24 @@ RefPtr<Value> Juxtaposition::run(RefPtr<Shell> shell) auto left = left_value->resolve_as_list(shell); auto right = right_value->resolve_as_list(shell); + if (m_mode == Mode::StringExpand) { + Vector<DeprecatedString> result; + result.ensure_capacity(left.size() + right.size()); + + for (auto& left_item : left) + result.append(left_item); + + if (!result.is_empty() && !right.is_empty()) { + auto& last = result.last(); + last = DeprecatedString::formatted("{}{}", last, right.first()); + right.take_first(); + } + for (auto& right_item : right) + result.append(right_item); + + return make_ref_counted<ListValue>(move(result)); + } + if (left_value->is_string() && right_value->is_string()) { VERIFY(left.size() == 1); @@ -3016,7 +3034,7 @@ RefPtr<Value> Juxtaposition::run(RefPtr<Shell> shell) return make_ref_counted<StringValue>(builder.to_deprecated_string()); } - // Otherwise, treat them as lists and create a list product. + // Otherwise, treat them as lists and create a list product (or just append). if (left.is_empty() || right.is_empty()) return make_ref_counted<ListValue>({}); @@ -3114,10 +3132,11 @@ HitTestResult Juxtaposition::hit_test_position(size_t offset) const return result; } -Juxtaposition::Juxtaposition(Position position, NonnullRefPtr<Node> left, NonnullRefPtr<Node> right) +Juxtaposition::Juxtaposition(Position position, NonnullRefPtr<Node> left, NonnullRefPtr<Node> right, Juxtaposition::Mode mode) : Node(move(position)) , m_left(move(left)) , m_right(move(right)) + , m_mode(mode) { if (m_left->is_syntax_error()) set_is_syntax_error(m_left->syntax_error_node()); diff --git a/Userland/Shell/AST.h b/Userland/Shell/AST.h index 64ed885db8..bead8ad804 100644 --- a/Userland/Shell/AST.h +++ b/Userland/Shell/AST.h @@ -47,6 +47,16 @@ struct Position { } start_line, end_line; bool contains(size_t offset) const { return start_offset <= offset && offset <= end_offset; } + + Position with_end(Position const& end) const + { + return { + .start_offset = start_offset, + .end_offset = end.end_offset, + .start_line = start_line, + .end_line = end.end_line, + }; + } }; struct NameWithPosition { @@ -989,6 +999,7 @@ public: NonnullRefPtr<Node> const& condition() const { return m_condition; } RefPtr<Node> const& true_branch() const { return m_true_branch; } RefPtr<Node> const& false_branch() const { return m_false_branch; } + RefPtr<Node>& false_branch() { return m_false_branch; } Optional<Position> const else_position() const { return m_else_position; } private: @@ -1301,7 +1312,11 @@ private: class Juxtaposition final : public Node { public: - Juxtaposition(Position, NonnullRefPtr<Node>, NonnullRefPtr<Node>); + enum class Mode { + ListExpand, + StringExpand, + }; + Juxtaposition(Position, NonnullRefPtr<Node>, NonnullRefPtr<Node>, Mode = Mode::ListExpand); virtual ~Juxtaposition(); virtual void visit(NodeVisitor& visitor) override { visitor.visit(this); } @@ -1318,6 +1333,7 @@ private: NonnullRefPtr<Node> m_left; NonnullRefPtr<Node> m_right; + Mode m_mode { Mode::ListExpand }; }; class Heredoc final : public Node { diff --git a/Userland/Shell/CMakeLists.txt b/Userland/Shell/CMakeLists.txt index e805a5c3cc..961e205210 100644 --- a/Userland/Shell/CMakeLists.txt +++ b/Userland/Shell/CMakeLists.txt @@ -12,6 +12,8 @@ set(SOURCES Job.cpp NodeVisitor.cpp Parser.cpp + PosixLexer.cpp + PosixParser.cpp Shell.cpp ) diff --git a/Userland/Shell/Formatter.cpp b/Userland/Shell/Formatter.cpp index 6eefc86949..9f7aea7820 100644 --- a/Userland/Shell/Formatter.cpp +++ b/Userland/Shell/Formatter.cpp @@ -7,6 +7,7 @@ #include "Formatter.h" #include "AST.h" #include "Parser.h" +#include "PosixParser.h" #include <AK/Hex.h> #include <AK/ScopedValueRollback.h> #include <AK/TemporaryChange.h> @@ -15,7 +16,7 @@ namespace Shell { DeprecatedString Formatter::format() { - auto node = m_root_node ? m_root_node : Parser(m_source).parse(); + auto node = m_root_node ?: (m_parse_as_posix ? Posix::Parser(m_source).parse() : Parser(m_source).parse()); if (m_cursor >= 0) m_output_cursor = m_cursor; diff --git a/Userland/Shell/Formatter.h b/Userland/Shell/Formatter.h index 9cb6bfbc18..b1f2cb4059 100644 --- a/Userland/Shell/Formatter.h +++ b/Userland/Shell/Formatter.h @@ -19,10 +19,11 @@ namespace Shell { class Formatter final : public AST::NodeVisitor { public: - Formatter(StringView source, ssize_t cursor = -1) + Formatter(StringView source, ssize_t cursor = -1, bool parse_as_posix = false) : m_builders({ StringBuilder { round_up_to_power_of_two(source.length(), 16) } }) , m_source(source) , m_cursor(cursor) + , m_parse_as_posix(parse_as_posix) { if (m_source.is_empty()) return; @@ -124,6 +125,8 @@ private: StringView m_trivia; Vector<DeprecatedString> m_heredocs_to_append_after_sequence; + + bool m_parse_as_posix { false }; }; } diff --git a/Userland/Shell/ImmediateFunctions.cpp b/Userland/Shell/ImmediateFunctions.cpp index 1ec7d5cf2e..647bafb80d 100644 --- a/Userland/Shell/ImmediateFunctions.cpp +++ b/Userland/Shell/ImmediateFunctions.cpp @@ -453,6 +453,161 @@ RefPtr<AST::Node> Shell::immediate_join(AST::ImmediateExpression& invoking_node, return AST::make_ref_counted<AST::StringLiteral>(invoking_node.position(), builder.to_deprecated_string(), AST::StringLiteral::EnclosureType::None); } +RefPtr<AST::Node> Shell::immediate_value_or_default(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to value_or_default", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (!local_variable_or(name, ""sv).is_empty()) + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + return arguments.last(); +} + +RefPtr<AST::Node> Shell::immediate_assign_default(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to assign_default", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (!local_variable_or(name, ""sv).is_empty()) + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + auto value = const_cast<AST::Node&>(arguments.last()).run(*this)->resolve_without_cast(*this); + set_local_variable(name, value); + + return make_ref_counted<AST::SyntheticNode>(invoking_node.position(), value); +} + +RefPtr<AST::Node> Shell::immediate_error_if_empty(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to error_if_empty", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (!local_variable_or(name, ""sv).is_empty()) + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + auto error_value = const_cast<AST::Node&>(arguments.last()).run(*this)->resolve_as_string(*this); + if (error_value.is_empty()) + error_value = DeprecatedString::formatted("Expected {} to be non-empty", name); + + raise_error(ShellError::EvaluatedSyntaxError, error_value, invoking_node.position()); + return nullptr; +} + +RefPtr<AST::Node> Shell::immediate_null_or_alternative(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to null_or_alternative", invoking_node.position()); + return nullptr; + } + + auto value = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_without_cast(*this); + if ((value->is_string() && value->resolve_as_string(*this).is_empty()) || (value->is_list() && value->resolve_as_list(*this).is_empty())) + return make_ref_counted<AST::SyntheticNode>(invoking_node.position(), value); + + return arguments.last(); +} + +RefPtr<AST::Node> Shell::immediate_defined_value_or_default(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to defined_value_or_default", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (!find_frame_containing_local_variable(name)) + return arguments.last(); + + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); +} + +RefPtr<AST::Node> Shell::immediate_assign_defined_default(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to assign_defined_default", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (find_frame_containing_local_variable(name)) + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + auto value = const_cast<AST::Node&>(arguments.last()).run(*this)->resolve_without_cast(*this); + set_local_variable(name, value); + + return make_ref_counted<AST::SyntheticNode>(invoking_node.position(), value); +} + +RefPtr<AST::Node> Shell::immediate_error_if_unset(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to error_if_unset", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (find_frame_containing_local_variable(name)) + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + auto error_value = const_cast<AST::Node&>(arguments.last()).run(*this)->resolve_as_string(*this); + if (error_value.is_empty()) + error_value = DeprecatedString::formatted("Expected {} to be set", name); + + raise_error(ShellError::EvaluatedSyntaxError, error_value, invoking_node.position()); + return nullptr; +} + +RefPtr<AST::Node> Shell::immediate_null_if_unset_or_alternative(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 2) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 2 arguments to null_if_unset_or_alternative", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + if (!find_frame_containing_local_variable(name)) + return arguments.last(); + + return make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); +} + +RefPtr<AST::Node> Shell::immediate_reexpand(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 1) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 1 argument to reexpand", invoking_node.position()); + return nullptr; + } + + auto value = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + return parse(value, m_is_interactive, false); +} + +RefPtr<AST::Node> Shell::immediate_length_of_variable(AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) +{ + if (arguments.size() != 1) { + raise_error(ShellError::EvaluatedSyntaxError, "Expected exactly 1 argument to length_of_variable", invoking_node.position()); + return nullptr; + } + + auto name = const_cast<AST::Node&>(arguments.first()).run(*this)->resolve_as_string(*this); + auto variable = make_ref_counted<AST::SimpleVariable>(invoking_node.position(), name); + + return immediate_length_impl( + invoking_node, + { move(variable) }, + false); +} + RefPtr<AST::Node> Shell::run_immediate_function(StringView str, AST::ImmediateExpression& invoking_node, NonnullRefPtrVector<AST::Node> const& arguments) { #define __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(name) \ diff --git a/Userland/Shell/PosixLexer.cpp b/Userland/Shell/PosixLexer.cpp new file mode 100644 index 0000000000..a3bdd11cda --- /dev/null +++ b/Userland/Shell/PosixLexer.cpp @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/CharacterTypes.h> +#include <Shell/PosixLexer.h> + +static bool is_operator(StringView text) +{ + return Shell::Posix::Token::operator_from_name(text).has_value(); +} + +static bool is_part_of_operator(StringView text, char ch) +{ + StringBuilder builder; + builder.append(text); + builder.append(ch); + + return Shell::Posix::Token::operator_from_name(builder.string_view()).has_value(); +} + +namespace Shell::Posix { + +Vector<Token> Lexer::batch_next() +{ + for (; m_next_reduction != Reduction::None;) { + auto result = reduce(m_next_reduction); + m_next_reduction = result.next_reduction; + if (!result.tokens.is_empty()) + return result.tokens; + } + + return {}; +} + +ExpansionRange Lexer::range(ssize_t offset) const +{ + return { + m_state.position.end_offset - m_state.position.start_offset + offset - 1, + 0, + }; +} + +char Lexer::consume() +{ + auto ch = m_lexer.consume(); + if (ch == '\n') { + m_state.position.end_line.line_number++; + m_state.position.end_line.line_column = 0; + } + + m_state.position.end_offset++; + return ch; +} + +bool Lexer::consume_specific(char ch) +{ + if (m_lexer.peek() == ch) { + consume(); + return true; + } + return false; +} + +Lexer::ReductionResult Lexer::reduce(Reduction reduction) +{ + switch (reduction) { + case Reduction::None: + return { {}, Reduction::None }; + case Reduction::End: + return reduce_end(); + case Reduction::Operator: + return reduce_operator(); + case Reduction::Comment: + return reduce_comment(); + case Reduction::SingleQuotedString: + return reduce_single_quoted_string(); + case Reduction::DoubleQuotedString: + return reduce_double_quoted_string(); + case Reduction::Expansion: + return reduce_expansion(); + case Reduction::CommandExpansion: + return reduce_command_expansion(); + case Reduction::Start: + return reduce_start(); + case Reduction::ArithmeticExpansion: + return reduce_arithmetic_expansion(); + case Reduction::SpecialParameterExpansion: + return reduce_special_parameter_expansion(); + case Reduction::ParameterExpansion: + return reduce_parameter_expansion(); + case Reduction::CommandOrArithmeticSubstitutionExpansion: + return reduce_command_or_arithmetic_substitution_expansion(); + case Reduction::ExtendedParameterExpansion: + return reduce_extended_parameter_expansion(); + } + + VERIFY_NOT_REACHED(); +} + +Lexer::ReductionResult Lexer::reduce_end() +{ + return { + .tokens = { Token::eof() }, + .next_reduction = Reduction::None, + }; +} + +Lexer::ReductionResult Lexer::reduce_operator() +{ + if (m_lexer.is_eof()) { + if (is_operator(m_state.buffer.string_view())) { + auto tokens = Token::operators_from(m_state); + m_state.buffer.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::End, + }; + } + + return reduce(Reduction::Start); + } + + if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) { + m_state.buffer.append(consume()); + return { + .tokens = {}, + .next_reduction = Reduction::Operator, + }; + } + + auto tokens = Vector<Token> {}; + if (is_operator(m_state.buffer.string_view())) { + tokens.extend(Token::operators_from(m_state)); + m_state.buffer.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + } + + auto result = reduce(Reduction::Start); + tokens.extend(move(result.tokens)); + return { + .tokens = move(tokens), + .next_reduction = result.next_reduction, + }; +} + +Lexer::ReductionResult Lexer::reduce_comment() +{ + if (m_lexer.is_eof()) { + return { + .tokens = {}, + .next_reduction = Reduction::End, + }; + } + + if (consume() == '\n') { + return { + .tokens = { Token::newline() }, + .next_reduction = Reduction::Start, + }; + } + + return { + .tokens = {}, + .next_reduction = Reduction::Comment, + }; +} + +Lexer::ReductionResult Lexer::reduce_single_quoted_string() +{ + if (m_lexer.is_eof()) { + auto tokens = Token::maybe_from_state(m_state); + tokens.append(Token::continuation('\'')); + return { + .tokens = move(tokens), + .next_reduction = Reduction::End, + }; + } + + auto ch = consume(); + m_state.buffer.append(ch); + + if (ch == '\'') { + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; + } + + return { + .tokens = {}, + .next_reduction = Reduction::SingleQuotedString, + }; +} + +Lexer::ReductionResult Lexer::reduce_double_quoted_string() +{ + m_state.previous_reduction = Reduction::DoubleQuotedString; + if (m_lexer.is_eof()) { + auto tokens = Token::maybe_from_state(m_state); + tokens.append(Token::continuation('"')); + return { + .tokens = move(tokens), + .next_reduction = Reduction::End, + }; + } + + auto ch = consume(); + m_state.buffer.append(ch); + + if (m_state.escaping) { + m_state.escaping = false; + + return { + .tokens = {}, + .next_reduction = Reduction::DoubleQuotedString, + }; + } + + switch (ch) { + case '\\': + m_state.escaping = true; + return { + .tokens = {}, + .next_reduction = Reduction::DoubleQuotedString, + }; + case '"': + m_state.previous_reduction = Reduction::Start; + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; + case '$': + if (m_lexer.next_is("(")) + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + else + m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); + return { + .tokens = {}, + .next_reduction = Reduction::Expansion, + }; + case '`': + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + return { + .tokens = {}, + .next_reduction = Reduction::CommandExpansion, + }; + default: + return { + .tokens = {}, + .next_reduction = Reduction::DoubleQuotedString, + }; + } +} + +Lexer::ReductionResult Lexer::reduce_expansion() +{ + if (m_lexer.is_eof()) + return reduce(m_state.previous_reduction); + + auto ch = m_lexer.peek(); + + switch (ch) { + case '{': + consume(); + m_state.buffer.append(ch); + return { + .tokens = {}, + .next_reduction = Reduction::ExtendedParameterExpansion, + }; + case '(': + consume(); + m_state.buffer.append(ch); + return { + .tokens = {}, + .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion, + }; + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': { + consume(); + m_state.buffer.append(ch); + auto& expansion = m_state.expansions.last().get<ParameterExpansion>(); + expansion.parameter.append(ch); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = Reduction::ParameterExpansion, + }; + } + case '0' ... '9': + case '-': + case '!': + case '@': + case '#': + case '?': + case '*': + case '$': + return reduce(Reduction::SpecialParameterExpansion); + default: + m_state.buffer.append(ch); + return reduce(m_state.previous_reduction); + } +} + +Lexer::ReductionResult Lexer::reduce_command_expansion() +{ + if (m_lexer.is_eof()) { + auto& expansion = m_state.expansions.last().get<CommandExpansion>(); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = { Token::continuation('`') }, + .next_reduction = m_state.previous_reduction, + }; + } + + auto ch = consume(); + + if (!m_state.escaping && ch == '`') { + m_state.buffer.append(ch); + auto& expansion = m_state.expansions.last().get<CommandExpansion>(); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = m_state.previous_reduction, + }; + } + + if (!m_state.escaping && ch == '\\') { + m_state.escaping = true; + return { + .tokens = {}, + .next_reduction = Reduction::CommandExpansion, + }; + } + + m_state.escaping = false; + m_state.buffer.append(ch); + m_state.expansions.last().get<CommandExpansion>().command.append(ch); + return { + .tokens = {}, + .next_reduction = Reduction::CommandExpansion, + }; +} + +Lexer::ReductionResult Lexer::reduce_start() +{ + if (m_lexer.is_eof()) { + auto tokens = Token::maybe_from_state(m_state); + m_state.buffer.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::End, + }; + } + + if (m_state.escaping && consume_specific('\n')) { + m_state.escaping = false; + + auto buffer = m_state.buffer.to_deprecated_string().substring(0, m_state.buffer.length() - 1); + m_state.buffer.clear(); + m_state.buffer.append(buffer); + + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; + } + + if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) { + consume(); + return { + .tokens = {}, + .next_reduction = Reduction::Comment, + }; + } + + if (!m_state.escaping && consume_specific('\n')) { + auto tokens = Token::maybe_from_state(m_state); + tokens.append(Token::newline()); + + m_state.buffer.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::Start, + }; + } + + if (!m_state.escaping && consume_specific('\\')) { + m_state.escaping = true; + m_state.buffer.append('\\'); + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; + } + + if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) { + auto tokens = Token::maybe_from_state(m_state); + m_state.buffer.clear(); + m_state.buffer.append(consume()); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::Operator, + }; + } + + if (!m_state.escaping && consume_specific('\'')) { + m_state.buffer.append('\''); + return { + .tokens = {}, + .next_reduction = Reduction::SingleQuotedString, + }; + } + + if (!m_state.escaping && consume_specific('"')) { + m_state.buffer.append('"'); + return { + .tokens = {}, + .next_reduction = Reduction::DoubleQuotedString, + }; + } + + if (!m_state.escaping && is_ascii_space(m_lexer.peek())) { + consume(); + auto tokens = Token::maybe_from_state(m_state); + m_state.buffer.clear(); + m_state.expansions.clear(); + m_state.position.start_offset = m_state.position.end_offset; + m_state.position.start_line = m_state.position.end_line; + + return { + .tokens = move(tokens), + .next_reduction = Reduction::Start, + }; + } + + if (!m_state.escaping && consume_specific('$')) { + m_state.buffer.append('$'); + if (m_lexer.next_is("(")) + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + else + m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); + + return { + .tokens = {}, + .next_reduction = Reduction::Expansion, + }; + } + + if (!m_state.escaping && consume_specific('`')) { + m_state.buffer.append('`'); + m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); + return { + .tokens = {}, + .next_reduction = Reduction::CommandExpansion, + }; + } + + m_state.escaping = false; + m_state.buffer.append(consume()); + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; +} + +Lexer::ReductionResult Lexer::reduce_arithmetic_expansion() +{ + if (m_lexer.is_eof()) { + auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>(); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = { Token::continuation("$((") }, + .next_reduction = m_state.previous_reduction, + }; + } + + if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) { + m_state.buffer.append(consume()); + auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>(); + expansion.expression = expansion.value.to_deprecated_string().substring(0, expansion.value.length() - 1); + expansion.value.clear(); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = m_state.previous_reduction, + }; + } + + auto ch = consume(); + m_state.buffer.append(ch); + m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch); + return { + .tokens = {}, + .next_reduction = Reduction::ArithmeticExpansion, + }; +} + +Lexer::ReductionResult Lexer::reduce_special_parameter_expansion() +{ + auto ch = consume(); + m_state.buffer.append(ch); + m_state.expansions.last() = ParameterExpansion { + .parameter = StringBuilder {}, + .range = range(-1), + }; + m_state.expansions.last().get<ParameterExpansion>().parameter.append(ch); + + return { + .tokens = {}, + .next_reduction = m_state.previous_reduction, + }; +} + +Lexer::ReductionResult Lexer::reduce_parameter_expansion() +{ + auto& expansion = m_state.expansions.last().get<ParameterExpansion>(); + + if (m_lexer.is_eof()) { + return { + .tokens = {}, + .next_reduction = Reduction::Start, + }; + } + + auto next = m_lexer.peek(); + if (is_ascii_alphanumeric(next)) { + m_state.buffer.append(consume()); + expansion.parameter.append(next); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = Reduction::ParameterExpansion, + }; + } + + return reduce(m_state.previous_reduction); +} + +Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansion() +{ + if (m_lexer.is_eof()) { + return { + .tokens = { Token::continuation("$(") }, + .next_reduction = m_state.previous_reduction, + }; + } + + auto ch = m_lexer.peek(); + if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) { + m_state.buffer.append(consume()); + m_state.expansions.last() = ArithmeticExpansion { + .expression = "", + .value = StringBuilder {}, + .range = range(-2) + }; + return { + .tokens = {}, + .next_reduction = Reduction::ArithmeticExpansion, + }; + } + + if (ch == ')') { + m_state.buffer.append(consume()); + m_state.expansions.last().visit([&](auto& expansion) { + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + }); + return { + .tokens = {}, + .next_reduction = m_state.previous_reduction, + }; + } + + m_state.buffer.append(consume()); + m_state.expansions.last().get<CommandExpansion>().command.append(ch); + return { + .tokens = {}, + .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion, + }; +} + +Lexer::ReductionResult Lexer::reduce_extended_parameter_expansion() +{ + auto& expansion = m_state.expansions.last().get<ParameterExpansion>(); + + if (m_lexer.is_eof()) { + return { + .tokens = { Token::continuation("${") }, + .next_reduction = m_state.previous_reduction, + }; + } + + auto ch = m_lexer.peek(); + if (ch == '}') { + m_state.buffer.append(consume()); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = m_state.previous_reduction, + }; + } + + m_state.buffer.append(consume()); + expansion.parameter.append(ch); + expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; + + return { + .tokens = {}, + .next_reduction = Reduction::ExtendedParameterExpansion, + }; +} + +StringView Token::type_name() const +{ + switch (type) { + case Type::Eof: + return "Eof"sv; + case Type::Newline: + return "Newline"sv; + case Type::Continuation: + return "Continuation"sv; + case Type::Token: + return "Token"sv; + case Type::And: + return "And"sv; + case Type::Pipe: + return "Pipe"sv; + case Type::OpenParen: + return "OpenParen"sv; + case Type::CloseParen: + return "CloseParen"sv; + case Type::Great: + return "Great"sv; + case Type::Less: + return "Less"sv; + case Type::AndIf: + return "AndIf"sv; + case Type::OrIf: + return "OrIf"sv; + case Type::DoubleSemicolon: + return "DoubleSemicolon"sv; + case Type::DoubleLess: + return "DoubleLess"sv; + case Type::DoubleGreat: + return "DoubleGreat"sv; + case Type::LessAnd: + return "LessAnd"sv; + case Type::GreatAnd: + return "GreatAnd"sv; + case Type::LessGreat: + return "LessGreat"sv; + case Type::DoubleLessDash: + return "DoubleLessDash"sv; + case Type::Clobber: + return "Clobber"sv; + case Type::Semicolon: + return "Semicolon"sv; + case Type::AssignmentWord: + return "AssignmentWord"sv; + case Type::Bang: + return "Bang"sv; + case Type::Case: + return "Case"sv; + case Type::CloseBrace: + return "CloseBrace"sv; + case Type::Do: + return "Do"sv; + case Type::Done: + return "Done"sv; + case Type::Elif: + return "Elif"sv; + case Type::Else: + return "Else"sv; + case Type::Esac: + return "Esac"sv; + case Type::Fi: + return "Fi"sv; + case Type::For: + return "For"sv; + case Type::If: + return "If"sv; + case Type::In: + return "In"sv; + case Type::IoNumber: + return "IoNumber"sv; + case Type::OpenBrace: + return "OpenBrace"sv; + case Type::Then: + return "Then"sv; + case Type::Until: + return "Until"sv; + case Type::VariableName: + return "VariableName"sv; + case Type::While: + return "While"sv; + case Type::Word: + return "Word"sv; + } + return "Idk"sv; +} + +} diff --git a/Userland/Shell/PosixLexer.h b/Userland/Shell/PosixLexer.h new file mode 100644 index 0000000000..b42f5a3e62 --- /dev/null +++ b/Userland/Shell/PosixLexer.h @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/DeprecatedString.h> +#include <AK/GenericLexer.h> +#include <AK/Variant.h> +#include <AK/Vector.h> +#include <Shell/AST.h> + +namespace Shell::Posix { + +enum class Reduction { + None, + End, + Operator, + Comment, + SingleQuotedString, + DoubleQuotedString, + Expansion, + CommandExpansion, + Start, + ArithmeticExpansion, + SpecialParameterExpansion, + ParameterExpansion, + CommandOrArithmeticSubstitutionExpansion, + ExtendedParameterExpansion, +}; + +struct ExpansionRange { + size_t start; + size_t length; +}; + +struct ParameterExpansion { + StringBuilder parameter; + ExpansionRange range; +}; + +struct CommandExpansion { + StringBuilder command; + ExpansionRange range; +}; + +struct ArithmeticExpansion { + DeprecatedString expression; + StringBuilder value; + ExpansionRange range; +}; + +using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>; + +struct ResolvedParameterExpansion { + DeprecatedString parameter; + DeprecatedString argument; + ExpansionRange range; + enum class Op { + UseDefaultValue, // ${parameter:-word} + AssignDefaultValue, // ${parameter:=word} + IndicateErrorIfEmpty, // ${parameter:?word} + UseAlternativeValue, // ${parameter:+word} + UseDefaultValueIfUnset, // ${parameter-default} + AssignDefaultValueIfUnset, // ${parameter=default} + IndicateErrorIfUnset, // ${parameter?default} + UseAlternativeValueIfUnset, // ${parameter+default} + RemoveLargestSuffixByPattern, // ${parameter%%pattern} + RemoveLargestPrefixByPattern, // ${parameter##pattern} + RemoveSmallestSuffixByPattern, // ${parameter%pattern} + RemoveSmallestPrefixByPattern, // ${parameter#pattern} + StringLength, // ${#parameter} + GetPositionalParameter, // ${parameter} + GetVariable, // ${parameter} + GetLastBackgroundPid, // $! + GetPositionalParameterList, // $* + GetCurrentOptionFlags, // $- + GetPositionalParameterCount, // $# + GetLastExitStatus, // $? + GetPositionalParameterListAsString, // $@ + GetShellProcessId, // $$ + } op; + + enum class Expand { + Nothing, + Word, + } expand { Expand::Nothing }; + + DeprecatedString to_deprecated_string() const + { + StringBuilder builder; + builder.append("{"sv); + switch (op) { + case Op::UseDefaultValue: + builder.append("UseDefaultValue"sv); + break; + case Op::AssignDefaultValue: + builder.append("AssignDefaultValue"sv); + break; + case Op::IndicateErrorIfEmpty: + builder.append("IndicateErrorIfEmpty"sv); + break; + case Op::UseAlternativeValue: + builder.append("UseAlternativeValue"sv); + break; + case Op::UseDefaultValueIfUnset: + builder.append("UseDefaultValueIfUnset"sv); + break; + case Op::AssignDefaultValueIfUnset: + builder.append("AssignDefaultValueIfUnset"sv); + break; + case Op::IndicateErrorIfUnset: + builder.append("IndicateErrorIfUnset"sv); + break; + case Op::UseAlternativeValueIfUnset: + builder.append("UseAlternativeValueIfUnset"sv); + break; + case Op::RemoveLargestSuffixByPattern: + builder.append("RemoveLargestSuffixByPattern"sv); + break; + case Op::RemoveLargestPrefixByPattern: + builder.append("RemoveLargestPrefixByPattern"sv); + break; + case Op::RemoveSmallestSuffixByPattern: + builder.append("RemoveSmallestSuffixByPattern"sv); + break; + case Op::RemoveSmallestPrefixByPattern: + builder.append("RemoveSmallestPrefixByPattern"sv); + break; + case Op::StringLength: + builder.append("StringLength"sv); + break; + case Op::GetPositionalParameter: + builder.append("GetPositionalParameter"sv); + break; + case Op::GetLastBackgroundPid: + builder.append("GetLastBackgroundPid"sv); + break; + case Op::GetPositionalParameterList: + builder.append("GetPositionalParameterList"sv); + break; + case Op::GetCurrentOptionFlags: + builder.append("GetCurrentOptionFlags"sv); + break; + case Op::GetPositionalParameterCount: + builder.append("GetPositionalParameterCount"sv); + break; + case Op::GetLastExitStatus: + builder.append("GetLastExitStatus"sv); + break; + case Op::GetPositionalParameterListAsString: + builder.append("GetPositionalParameterListAsString"sv); + break; + case Op::GetShellProcessId: + builder.append("GetShellProcessId"sv); + break; + case Op::GetVariable: + builder.append("GetVariable"sv); + break; + } + builder.append(" "sv); + builder.append(parameter); + builder.append(" ("sv); + builder.append(argument); + builder.append(")"sv); + builder.append("}"sv); + return builder.to_deprecated_string(); + } +}; + +struct ResolvedCommandExpansion { + RefPtr<AST::Node> command; + ExpansionRange range; +}; + +using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion>; + +struct State { + StringBuilder buffer {}; + Reduction previous_reduction { Reduction::Start }; + bool escaping { false }; + AST::Position position { + .start_offset = 0, + .end_offset = 0, + .start_line = { + .line_number = 0, + .line_column = 0, + }, + .end_line = { + .line_number = 0, + .line_column = 0, + }, + }; + Vector<Expansion> expansions {}; +}; + +struct Token { + enum class Type { + Eof, + Newline, + Continuation, + Token, + And, + Pipe, + OpenParen, + CloseParen, + Great, + Less, + AndIf, + OrIf, + DoubleSemicolon, + DoubleLess, + DoubleGreat, + LessAnd, + GreatAnd, + LessGreat, + DoubleLessDash, + Clobber, + Semicolon, + + // Not produced by this lexer, but generated in later stages. + AssignmentWord, + Bang, + Case, + CloseBrace, + Do, + Done, + Elif, + Else, + Esac, + Fi, + For, + If, + In, + IoNumber, + OpenBrace, + Then, + Until, + VariableName, + While, + Word, + }; + + Type type; + DeprecatedString value; + Optional<AST::Position> position; + Vector<Expansion> expansions; + Vector<ResolvedExpansion> resolved_expansions {}; + StringView original_text; + bool could_be_start_of_a_simple_command { false }; + + static Vector<Token> maybe_from_state(State const& state) + { + if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty()) + return {}; + + auto token = Token { + .type = Type::Token, + .value = state.buffer.to_deprecated_string(), + .position = state.position, + .expansions = state.expansions, + .original_text = {}, + }; + return { move(token) }; + } + + static Optional<Token::Type> operator_from_name(StringView name) + { + if (name == "&&"sv) + return Token::Type::AndIf; + if (name == "||"sv) + return Token::Type::OrIf; + if (name == ";;"sv) + return Token::Type::DoubleSemicolon; + if (name == "<<"sv) + return Token::Type::DoubleLess; + if (name == ">>"sv) + return Token::Type::DoubleGreat; + if (name == "<&"sv) + return Token::Type::LessAnd; + if (name == ">&"sv) + return Token::Type::GreatAnd; + if (name == "<>"sv) + return Token::Type::LessGreat; + if (name == "<<-"sv) + return Token::Type::DoubleLessDash; + if (name == ">|"sv) + return Token::Type::Clobber; + if (name == ";"sv) + return Token::Type::Semicolon; + if (name == "&"sv) + return Token::Type::And; + if (name == "|"sv) + return Token::Type::Pipe; + if (name == "("sv) + return Token::Type::OpenParen; + if (name == ")"sv) + return Token::Type::CloseParen; + if (name == ">"sv) + return Token::Type::Great; + if (name == "<"sv) + return Token::Type::Less; + + return {}; + } + + static Vector<Token> operators_from(State const& state) + { + auto name = state.buffer.string_view(); + auto type = operator_from_name(name); + if (!type.has_value()) + return {}; + + return { + Token { + .type = *type, + .value = name, + .position = state.position, + .expansions = {}, + .original_text = {}, + } + }; + } + + static Token eof() + { + return { + .type = Type::Eof, + .value = {}, + .position = {}, + .expansions = {}, + .original_text = {}, + }; + } + + static Token newline() + { + return { + .type = Type::Newline, + .value = "\n", + .position = {}, + .expansions = {}, + .original_text = {}, + }; + } + + static Token continuation(char expected) + { + return { + .type = Type::Continuation, + .value = DeprecatedString::formatted("{:c}", expected), + .position = {}, + .expansions = {}, + .original_text = {}, + }; + } + + static Token continuation(DeprecatedString expected) + { + return { + .type = Type::Continuation, + .value = move(expected), + .position = {}, + .expansions = {}, + .original_text = {}, + }; + } + + StringView type_name() const; +}; + +class Lexer { +public: + explicit Lexer(StringView input) + : m_lexer(input) + { + } + + Vector<Token> batch_next(); + +private: + struct ReductionResult { + Vector<Token> tokens; + Reduction next_reduction { Reduction::None }; + }; + + ReductionResult reduce(Reduction); + ReductionResult reduce_end(); + ReductionResult reduce_operator(); + ReductionResult reduce_comment(); + ReductionResult reduce_single_quoted_string(); + ReductionResult reduce_double_quoted_string(); + ReductionResult reduce_expansion(); + ReductionResult reduce_command_expansion(); + ReductionResult reduce_start(); + ReductionResult reduce_arithmetic_expansion(); + ReductionResult reduce_special_parameter_expansion(); + ReductionResult reduce_parameter_expansion(); + ReductionResult reduce_command_or_arithmetic_substitution_expansion(); + ReductionResult reduce_extended_parameter_expansion(); + + char consume(); + bool consume_specific(char); + ExpansionRange range(ssize_t offset = 0) const; + + GenericLexer m_lexer; + State m_state; + Reduction m_next_reduction { Reduction::Start }; +}; + +} diff --git a/Userland/Shell/PosixParser.cpp b/Userland/Shell/PosixParser.cpp new file mode 100644 index 0000000000..15b75ae5c8 --- /dev/null +++ b/Userland/Shell/PosixParser.cpp @@ -0,0 +1,1922 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/CharacterTypes.h> +#include <AK/Debug.h> +#include <AK/StringUtils.h> +#include <Shell/PosixParser.h> + +template<typename T, typename... Ts> +static inline bool is_one_of(T const& value, Ts const&... values) +{ + return ((value == values) || ...); +} + +static inline bool is_io_operator(Shell::Posix::Token const& token) +{ + using namespace Shell::Posix; + return is_one_of(token.type, + Token::Type::Less, Token::Type::Great, + Token::Type::LessAnd, Token::Type::GreatAnd, + Token::Type::DoubleLess, Token::Type::DoubleGreat, + Token::Type::LessGreat, Token::Type::Clobber); +} + +static inline bool is_separator(Shell::Posix::Token const& token) +{ + using namespace Shell::Posix; + return is_one_of(token.type, + Token::Type::Semicolon, Token::Type::Newline, + Token::Type::AndIf, Token::Type::OrIf, + Token::Type::Pipe, + Token::Type::And); +} + +static inline bool is_a_reserved_word_position(Shell::Posix::Token const& token, Optional<Shell::Posix::Token> const& previous_token, Optional<Shell::Posix::Token> const& previous_previous_token) +{ + using namespace Shell::Posix; + + auto is_start_of_command = !previous_token.has_value() + || previous_token->value.is_empty() + || is_separator(*previous_token) + || is_one_of(previous_token->type, + Token::Type::OpenParen, Token::Type::CloseParen, Token::Type::Newline, Token::Type::DoubleSemicolon, + Token::Type::Semicolon, Token::Type::Pipe, Token::Type::OrIf, Token::Type::AndIf); + if (is_start_of_command) + return true; + + if (!previous_token.has_value()) + return false; + + auto previous_is_reserved_word = is_one_of(previous_token->value, + "for"sv, "in"sv, "case"sv, "if"sv, "then"sv, "else"sv, + "elif"sv, "while"sv, "until"sv, "do"sv, "done"sv, "esac"sv, + "fi"sv, "!"sv, "{"sv, "}"sv); + + if (previous_is_reserved_word) + return true; + + if (!previous_previous_token.has_value()) + return false; + + auto is_third_in_case = previous_previous_token->value == "case"sv + && token.type == Token::Type::Token && token.value == "in"sv; + + if (is_third_in_case) + return true; + + auto is_third_in_for = previous_previous_token->value == "for"sv + && token.type == Token::Type::Token && is_one_of(token.value, "in"sv, "do"sv); + + return is_third_in_for; +} + +static inline bool is_reserved(Shell::Posix::Token const& token) +{ + using namespace Shell::Posix; + return is_one_of(token.type, + Token::Type::If, Token::Type::Then, Token::Type::Else, + Token::Type::Elif, Token::Type::Fi, Token::Type::Do, + Token::Type::Done, Token::Type::Case, Token::Type::Esac, + Token::Type::While, Token::Type::Until, Token::Type::For, + Token::Type::In, Token::Type::OpenBrace, Token::Type::CloseBrace, + Token::Type::Bang); +} + +static inline bool is_valid_name(StringView word) +{ + // Dr.POSIX: a word consisting solely of underscores, digits, and alphabetics from the portable character set. The first character of a name is not a digit. + return !word.is_empty() + && !is_ascii_digit(word[0]) + && all_of(word, [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; }); +} + +namespace Shell::Posix { +void Parser::fill_token_buffer() +{ + for (;;) { + auto token = next_expanded_token(); + if (!token.has_value()) + break; +#if SHELL_POSIX_PARSER_DEBUG + DeprecatedString position = "(~)"; + if (token->position.has_value()) + position = DeprecatedString::formatted("{}:{}", token->position->start_offset, token->position->end_offset); + DeprecatedString expansions = ""; + for (auto& exp : token->resolved_expansions) + exp.visit( + [&](ResolvedParameterExpansion& x) { expansions = DeprecatedString::formatted("{}param({}),", expansions, x.to_deprecated_string()); }, + [&](ResolvedCommandExpansion& x) { expansions = DeprecatedString::formatted("{}command({:p})", expansions, x.command.ptr()); }); + DeprecatedString rexpansions = ""; + for (auto& exp : token->expansions) + exp.visit( + [&](ParameterExpansion& x) { rexpansions = DeprecatedString::formatted("{}param({}) from {} to {},", rexpansions, x.parameter.string_view(), x.range.start, x.range.length); }, + [&](auto&) { rexpansions = DeprecatedString::formatted("{}...,", rexpansions); }); + dbgln("Token @ {}: '{}' (type {}) - parsed expansions: {} - raw expansions: {}", position, token->value.replace("\n"sv, "\\n"sv, ReplaceMode::All), token->type_name(), expansions, rexpansions); +#endif + } + m_token_index = 0; +} + +RefPtr<AST::Node> Parser::parse() +{ + return parse_complete_command(); +} + +Optional<Token> Parser::next_expanded_token() +{ + while (m_token_buffer.find_if([](auto& token) { return token.type == Token::Type::Eof; }).is_end()) { + auto tokens = m_lexer.batch_next(); + auto expanded = perform_expansions(move(tokens)); + m_token_buffer.extend(expanded); + } + + if (m_token_buffer.size() == m_token_index) + return {}; + + return m_token_buffer[m_token_index++]; +} + +Vector<Token> Parser::perform_expansions(Vector<Token> tokens) +{ + if (tokens.is_empty()) + return {}; + + Vector<Token> expanded_tokens; + auto previous_token = Optional<Token>(); + auto previous_previous_token = Optional<Token>(); + auto tokens_taken_from_buffer = 0; + + expanded_tokens.ensure_capacity(tokens.size()); + + auto swap_expansions = [&] { + if (previous_previous_token.has_value()) + expanded_tokens.append(previous_previous_token.release_value()); + + if (previous_token.has_value()) + expanded_tokens.append(previous_token.release_value()); + + for (; tokens_taken_from_buffer > 0; tokens_taken_from_buffer--) + m_token_buffer.append(expanded_tokens.take_first()); + + swap(tokens, expanded_tokens); + expanded_tokens.clear_with_capacity(); + }; + + // (1) join all consecutive newlines (this works around a grammar ambiguity) + auto previous_was_newline = !m_token_buffer.is_empty() && m_token_buffer.last().type == Token::Type::Newline; + for (auto& token : tokens) { + if (token.type == Token::Type::Newline) { + if (previous_was_newline) + continue; + previous_was_newline = true; + } else { + previous_was_newline = false; + } + expanded_tokens.append(move(token)); + } + + swap_expansions(); + + // (2) Detect reserved words + if (m_token_buffer.size() >= 1) { + previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + if (m_token_buffer.size() >= 1) { + previous_previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + + auto check_reserved_word = [&](auto& token) { + if (is_a_reserved_word_position(token, previous_token, previous_previous_token)) { + if (token.value == "if"sv) + token.type = Token::Type::If; + else if (token.value == "then"sv) + token.type = Token::Type::Then; + else if (token.value == "else"sv) + token.type = Token::Type::Else; + else if (token.value == "elif"sv) + token.type = Token::Type::Elif; + else if (token.value == "fi"sv) + token.type = Token::Type::Fi; + else if (token.value == "while"sv) + token.type = Token::Type::While; + else if (token.value == "until"sv) + token.type = Token::Type::Until; + else if (token.value == "do"sv) + token.type = Token::Type::Do; + else if (token.value == "done"sv) + token.type = Token::Type::Done; + else if (token.value == "case"sv) + token.type = Token::Type::Case; + else if (token.value == "esac"sv) + token.type = Token::Type::Esac; + else if (token.value == "for"sv) + token.type = Token::Type::For; + else if (token.value == "in"sv) + token.type = Token::Type::In; + else if (token.value == "!"sv) + token.type = Token::Type::Bang; + else if (token.value == "{"sv) + token.type = Token::Type::OpenBrace; + else if (token.value == "}"sv) + token.type = Token::Type::CloseBrace; + else if (token.type == Token::Type::Token) + token.type = Token::Type::Word; + } else if (token.type == Token::Type::Token) { + token.type = Token::Type::Word; + } + }; + + for (auto& token : tokens) { + if (!previous_token.has_value()) { + check_reserved_word(token); + previous_token = token; + continue; + } + if (!previous_previous_token.has_value()) { + check_reserved_word(token); + previous_previous_token = move(previous_token); + previous_token = token; + continue; + } + + check_reserved_word(token); + expanded_tokens.append(exchange(*previous_previous_token, exchange(*previous_token, move(token)))); + } + + swap_expansions(); + + // (3) Detect io_number tokens + previous_token = Optional<Token>(); + tokens_taken_from_buffer = 0; + if (m_token_buffer.size() >= 1) { + previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + + for (auto& token : tokens) { + if (!previous_token.has_value()) { + previous_token = token; + continue; + } + + if (is_io_operator(token) && previous_token->type == Token::Type::Word && all_of(previous_token->value, is_ascii_digit)) { + previous_token->type = Token::Type::IoNumber; + } + + expanded_tokens.append(exchange(*previous_token, move(token))); + } + + swap_expansions(); + + // (4) Try to identify simple commands + previous_token = Optional<Token>(); + tokens_taken_from_buffer = 0; + + if (m_token_buffer.size() >= 1) { + previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + + for (auto& token : tokens) { + if (!previous_token.has_value()) { + token.could_be_start_of_a_simple_command = true; + previous_token = token; + continue; + } + + token.could_be_start_of_a_simple_command = is_one_of(previous_token->type, Token::Type::OpenParen, Token::Type::CloseParen, Token::Type::Newline) + || is_separator(*previous_token) + || (!is_reserved(*previous_token) && is_reserved(token)); + + expanded_tokens.append(exchange(*previous_token, move(token))); + } + + swap_expansions(); + + // (5) Detect assignment words + for (auto& token : tokens) { + if (token.could_be_start_of_a_simple_command) + m_disallow_command_prefix = false; + + // Check if we're in a command prefix (could be an assignment) + if (!m_disallow_command_prefix && token.type == Token::Type::Word && token.value.contains('=')) { + // If the word before '=' is a valid name, this is an assignment + auto parts = token.value.split_limit('=', 2); + if (is_valid_name(parts[0])) + token.type = Token::Type::AssignmentWord; + else + m_disallow_command_prefix = true; + } else { + m_disallow_command_prefix = true; + } + + expanded_tokens.append(move(token)); + } + + swap_expansions(); + + // (6) Parse expansions + for (auto& token : tokens) { + if (!is_one_of(token.type, Token::Type::Word, Token::Type::AssignmentWord)) { + expanded_tokens.append(move(token)); + continue; + } + + Vector<ResolvedExpansion> resolved_expansions; + for (auto& expansion : token.expansions) { + auto resolved = expansion.visit( + [&](ParameterExpansion const& expansion) -> ResolvedExpansion { + auto text = expansion.parameter.string_view(); + // ${NUMBER} + if (all_of(text, is_ascii_digit)) { + return ResolvedParameterExpansion { + .parameter = expansion.parameter.to_deprecated_string(), + .argument = {}, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::GetPositionalParameter, + }; + } + + if (text.length() == 1) { + ResolvedParameterExpansion::Op op; + switch (text[0]) { + case '!': + op = ResolvedParameterExpansion::Op::GetLastBackgroundPid; + break; + case '@': + op = ResolvedParameterExpansion::Op::GetPositionalParameterList; + break; + case '-': + op = ResolvedParameterExpansion::Op::GetCurrentOptionFlags; + break; + case '#': + op = ResolvedParameterExpansion::Op::GetPositionalParameterCount; + break; + case '?': + op = ResolvedParameterExpansion::Op::GetLastExitStatus; + break; + case '*': + op = ResolvedParameterExpansion::Op::GetPositionalParameterListAsString; + break; + case '$': + op = ResolvedParameterExpansion::Op::GetShellProcessId; + break; + default: + if (is_valid_name(text)) { + op = ResolvedParameterExpansion::Op::GetVariable; + } else { + error(token, "Unknown parameter expansion: {}", text); + return ResolvedParameterExpansion { + .parameter = expansion.parameter.to_deprecated_string(), + .argument = {}, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::StringLength, + }; + } + } + + return ResolvedParameterExpansion { + .parameter = {}, + .argument = {}, + .range = expansion.range, + .op = op, + }; + } + + if (text.starts_with('#')) { + return ResolvedParameterExpansion { + .parameter = text.substring_view(1).to_deprecated_string(), + .argument = {}, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::StringLength, + }; + } + + GenericLexer lexer { text }; + auto parameter = lexer.consume_while([first = true](char c) mutable { + if (first) { + first = false; + return is_ascii_alpha(c) || c == '_'; + } + return is_ascii_alphanumeric(c) || c == '_'; + }); + + StringView argument; + ResolvedParameterExpansion::Op op; + switch (lexer.peek()) { + case ':': + lexer.ignore(); + switch (lexer.is_eof() ? 0 : lexer.consume()) { + case '-': + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::UseDefaultValue; + break; + case '=': + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::AssignDefaultValue; + break; + case '?': + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::IndicateErrorIfEmpty; + break; + case '+': + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::UseAlternativeValue; + break; + default: + error(token, "Unknown parameter expansion: {}", text); + return ResolvedParameterExpansion { + .parameter = parameter.to_deprecated_string(), + .argument = {}, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::StringLength, + }; + } + break; + case '-': + lexer.ignore(); + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::UseDefaultValueIfUnset; + break; + case '=': + lexer.ignore(); + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::AssignDefaultValueIfUnset; + break; + case '?': + lexer.ignore(); + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::IndicateErrorIfUnset; + break; + case '+': + lexer.ignore(); + argument = lexer.consume_all(); + op = ResolvedParameterExpansion::Op::UseAlternativeValueIfUnset; + break; + case '%': + if (lexer.consume_specific('%')) + op = ResolvedParameterExpansion::Op::RemoveLargestSuffixByPattern; + else + op = ResolvedParameterExpansion::Op::RemoveSmallestSuffixByPattern; + argument = lexer.consume_all(); + break; + case '#': + if (lexer.consume_specific('#')) + op = ResolvedParameterExpansion::Op::RemoveLargestPrefixByPattern; + else + op = ResolvedParameterExpansion::Op::RemoveSmallestPrefixByPattern; + argument = lexer.consume_all(); + break; + default: + if (is_valid_name(text)) { + op = ResolvedParameterExpansion::Op::GetVariable; + } else { + error(token, "Unknown parameter expansion: {}", text); + return ResolvedParameterExpansion { + .parameter = parameter.to_deprecated_string(), + .argument = {}, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::StringLength, + }; + } + } + VERIFY(lexer.is_eof()); + + return ResolvedParameterExpansion { + .parameter = parameter.to_deprecated_string(), + .argument = argument.to_deprecated_string(), + .range = expansion.range, + .op = op, + .expand = ResolvedParameterExpansion::Expand::Word, + }; + }, + [&](ArithmeticExpansion const& expansion) -> ResolvedExpansion { + error(token, "Arithmetic expansion is not supported"); + return ResolvedParameterExpansion { + .parameter = ""sv, + .argument = ""sv, + .range = expansion.range, + .op = ResolvedParameterExpansion::Op::StringLength, + .expand = ResolvedParameterExpansion::Expand::Nothing, + }; + }, + [&](CommandExpansion const& expansion) -> ResolvedExpansion { + Parser parser { expansion.command.string_view() }; + auto node = parser.parse(); + m_errors.extend(move(parser.m_errors)); + return ResolvedCommandExpansion { + move(node), + expansion.range, + }; + }); + + resolved_expansions.append(move(resolved)); + } + + token.resolved_expansions = move(resolved_expansions); + expanded_tokens.append(move(token)); + } + + swap_expansions(); + + // (7) Loop variables + previous_token = {}; + tokens_taken_from_buffer = 0; + if (m_token_buffer.size() >= 1) { + previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + + for (auto& token : tokens) { + if (!previous_token.has_value()) { + previous_token = token; + continue; + } + + if (previous_token->type == Token::Type::For && token.type == Token::Type::Word && is_valid_name(token.value)) { + token.type = Token::Type::VariableName; + } + + expanded_tokens.append(exchange(*previous_token, token)); + } + + swap_expansions(); + + // (8) Function names + previous_token = {}; + previous_previous_token = {}; + tokens_taken_from_buffer = 0; + if (m_token_buffer.size() >= 1) { + previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + if (m_token_buffer.size() >= 1) { + previous_previous_token = m_token_buffer.take_last(); + tokens_taken_from_buffer++; + } + + for (auto& token : tokens) { + if (!previous_token.has_value()) { + previous_token = token; + continue; + } + if (!previous_previous_token.has_value()) { + previous_previous_token = move(previous_token); + previous_token = token; + continue; + } + + // NAME ( ) + if (previous_previous_token->could_be_start_of_a_simple_command + && previous_previous_token->type == Token::Type::Word + && previous_token->type == Token::Type::OpenParen + && token.type == Token::Type::CloseParen) { + + previous_previous_token->type = Token::Type::VariableName; + } + + expanded_tokens.append(exchange(*previous_previous_token, exchange(*previous_token, token))); + } + + swap_expansions(); + + return tokens; +} + +static AST::Position empty_position() +{ + return { 0, 0, { 0, 0 }, { 0, 0 } }; +} + +RefPtr<AST::Node> Parser::parse_complete_command() +{ + auto list = [&] { + // separator... + while (is_separator(peek())) + skip(); + + // list EOF + auto list = parse_list(); + if (eof()) + return list; + + // list separator EOF + while (is_separator(peek())) + skip(); + + if (eof()) + return list; + + auto position = peek().position; + auto syntax_error = make_ref_counted<AST::SyntaxError>( + position.value_or(empty_position()), + "Extra tokens after complete command"sv); + + if (list) + list->set_is_syntax_error(*syntax_error); + else + list = syntax_error; + + return list; + }(); + + if (!list) + return nullptr; + + return make_ref_counted<AST::Execute>(list->position(), *list); +} + +RefPtr<AST::Node> Parser::parse_list() +{ + NonnullRefPtrVector<AST::Node> nodes; + Vector<AST::Position> positions; + + auto start_position = peek().position.value_or(empty_position()); + + for (;;) { + auto new_node = parse_and_or(); + if (!new_node) + break; + + if (peek().type == Token::Type::And) { + new_node = make_ref_counted<AST::Background>( + new_node->position(), + *new_node); + } + + nodes.append(new_node.release_nonnull()); + + if (!is_separator(peek()) || eof()) + break; + + auto position = consume().position; + if (position.has_value()) + positions.append(position.release_value()); + } + + auto end_position = peek().position.value_or(empty_position()); + + return make_ref_counted<AST::Sequence>( + AST::Position { + start_position.start_offset, + end_position.end_offset, + start_position.start_line, + start_position.end_line, + }, + move(nodes), + move(positions)); +} + +RefPtr<AST::Node> Parser::parse_and_or() +{ + auto node = parse_pipeline(); + if (!node) + return {}; + + for (;;) { + if (peek().type == Token::Type::AndIf) { + auto and_token = consume(); + while (peek().type == Token::Type::Newline) + skip(); + + auto rhs = parse_pipeline(); + if (!rhs) + return {}; + node = make_ref_counted<AST::And>( + node->position(), + *node, + rhs.release_nonnull(), + and_token.position.value_or(empty_position())); + continue; + } + if (peek().type == Token::Type::OrIf) { + auto or_token = consume(); + while (peek().type == Token::Type::Newline) + skip(); + + auto rhs = parse_pipeline(); + if (!rhs) + return {}; + node = make_ref_counted<AST::And>( + node->position(), + *node, + rhs.release_nonnull(), + or_token.position.value_or(empty_position())); + continue; + } + break; + } + + return node; +} + +RefPtr<AST::Node> Parser::parse_pipeline() +{ + return parse_pipe_sequence(); +} + +RefPtr<AST::Node> Parser::parse_pipe_sequence() +{ + auto node = parse_command(); + if (!node) + return {}; + + for (;;) { + if (peek().type != Token::Type::Pipe) + break; + + consume(); + while (peek().type == Token::Type::Newline) + skip(); + + auto rhs = parse_command(); + if (!rhs) + return {}; + node = make_ref_counted<AST::Pipe>( + node->position(), + *node, + rhs.release_nonnull()); + } + + return node; +} + +RefPtr<AST::Node> Parser::parse_command() +{ + auto node = [this] { + if (auto node = parse_function_definition()) + return node; + + if (auto node = parse_simple_command()) + return node; + + auto node = parse_compound_command(); + if (!node) + return node; + + if (auto list = parse_redirect_list()) { + auto position = list->position(); + node = make_ref_counted<AST::Join>( + node->position().with_end(position), + *node, + list.release_nonnull()); + } + + return node; + }(); + + if (!node) + return nullptr; + + return make_ref_counted<AST::CastToCommand>(node->position(), *node); +} + +RefPtr<AST::Node> Parser::parse_function_definition() +{ + // NAME OPEN_PAREN CLOSE_PAREN newline* function_body + + auto start_index = m_token_index; + ArmedScopeGuard reset = [&] { + m_token_index = start_index; + }; + + if (peek().type != Token::Type::VariableName) { + return nullptr; + } + + auto name = consume(); + + if (consume().type != Token::Type::OpenParen) + return nullptr; + + if (consume().type != Token::Type::CloseParen) + return nullptr; + + while (peek().type == Token::Type::Newline) + skip(); + + auto body = parse_function_body(); + + if (!body) + return nullptr; + + reset.disarm(); + + return make_ref_counted<AST::FunctionDeclaration>( + name.position.value_or(empty_position()).with_end(peek().position.value_or(empty_position())), + AST::NameWithPosition { name.value, name.position.value_or(empty_position()) }, + Vector<AST::NameWithPosition> {}, + body.release_nonnull()); +} + +RefPtr<AST::Node> Parser::parse_function_body() +{ + // compound_command redirect_list? + + auto node = parse_compound_command(); + if (!node) + return nullptr; + + if (auto list = parse_redirect_list()) { + auto position = list->position(); + node = make_ref_counted<AST::Join>( + node->position().with_end(position), + *node, + list.release_nonnull()); + } + + return node; +} + +RefPtr<AST::Node> Parser::parse_redirect_list() +{ + // io_redirect* + + RefPtr<AST::Node> node; + + for (;;) { + auto new_node = parse_io_redirect(); + if (!new_node) + break; + + if (node) { + node = make_ref_counted<AST::Join>( + node->position().with_end(new_node->position()), + *node, + new_node.release_nonnull()); + } else { + node = new_node; + } + } + + return node; +} + +RefPtr<AST::Node> Parser::parse_compound_command() +{ + if (auto node = parse_brace_group()) + return node; + + if (auto node = parse_subshell()) + return node; + + if (auto node = parse_if_clause()) + return node; + + if (auto node = parse_for_clause()) + return node; + + if (auto node = parse_case_clause()) + return node; + + if (auto node = parse_while_clause()) + return node; + + if (auto node = parse_until_clause()) + return node; + + return {}; +} + +RefPtr<AST::Node> Parser::parse_while_clause() +{ + if (peek().type != Token::Type::While) + return nullptr; + + auto start_position = consume().position.value_or(empty_position()); + auto condition = parse_compound_list(); + if (!condition) + condition = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + "Expected condition after 'while'"sv); + + auto do_group = parse_do_group(); + if (!do_group) + do_group = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + "Expected 'do' after 'while'"sv); + + // while foo; bar -> loop { if foo { bar } else { break } } + return make_ref_counted<AST::ForLoop>( + start_position.with_end(peek().position.value_or(empty_position())), + Optional<AST::NameWithPosition> {}, + Optional<AST::NameWithPosition> {}, + nullptr, + make_ref_counted<AST::IfCond>( + start_position.with_end(peek().position.value_or(empty_position())), + Optional<AST::Position> {}, + condition.release_nonnull(), + do_group.release_nonnull(), + make_ref_counted<AST::ContinuationControl>( + start_position, + AST::ContinuationControl::ContinuationKind::Break))); +} + +RefPtr<AST::Node> Parser::parse_until_clause() +{ + if (peek().type != Token::Type::Until) + return nullptr; + + auto start_position = consume().position.value_or(empty_position()); + auto condition = parse_compound_list(); + if (!condition) + condition = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + "Expected condition after 'until'"sv); + + auto do_group = parse_do_group(); + if (!do_group) + do_group = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + "Expected 'do' after 'until'"sv); + + // until foo; bar -> loop { if foo { break } else { bar } } + return make_ref_counted<AST::ForLoop>( + start_position.with_end(peek().position.value_or(empty_position())), + Optional<AST::NameWithPosition> {}, + Optional<AST::NameWithPosition> {}, + nullptr, + make_ref_counted<AST::IfCond>( + start_position.with_end(peek().position.value_or(empty_position())), + Optional<AST::Position> {}, + condition.release_nonnull(), + make_ref_counted<AST::ContinuationControl>( + start_position, + AST::ContinuationControl::ContinuationKind::Break), + do_group.release_nonnull())); +} + +RefPtr<AST::Node> Parser::parse_brace_group() +{ + if (peek().type != Token::Type::OpenBrace) + return nullptr; + + consume(); + + auto list = parse_compound_list(); + + RefPtr<AST::SyntaxError> error; + if (peek().type != Token::Type::CloseBrace) { + error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected '}}', not {}", peek().type_name())); + } else { + consume(); + } + + if (error) { + if (list) + list->set_is_syntax_error(*error); + else + list = error; + } + + return make_ref_counted<AST::Execute>(list->position(), *list); +} + +RefPtr<AST::Node> Parser::parse_case_clause() +{ + auto start_position = peek().position.value_or(empty_position()); + if (peek().type != Token::Type::Case) + return nullptr; + + skip(); + + RefPtr<AST::SyntaxError> syntax_error; + auto expr = parse_word(); + if (!expr) + expr = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected a word, not {}", peek().type_name())); + + if (peek().type != Token::Type::In) { + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'in', not {}", peek().type_name())); + } else { + skip(); + } + + while (peek().type == Token::Type::Newline) + skip(); + + Vector<AST::MatchEntry> entries; + + for (;;) { + if (eof() || peek().type == Token::Type::Esac) + break; + + if (peek().type == Token::Type::Newline) { + skip(); + continue; + } + + // Parse a pattern list + auto needs_dsemi = true; + if (peek().type == Token::Type::OpenParen) { + skip(); + needs_dsemi = false; + } + + auto result = parse_case_list(); + + if (peek().type == Token::Type::CloseParen) { + skip(); + } else { + if (!syntax_error) + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected ')', not {}", peek().type_name())); + break; + } + + while (peek().type == Token::Type::Newline) + skip(); + + auto compound_list = parse_compound_list(); + + if (peek().type == Token::Type::DoubleSemicolon) { + skip(); + } else if (needs_dsemi) { + if (!syntax_error) + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected ';;', not {}", peek().type_name())); + } + + if (syntax_error) { + if (compound_list) + compound_list->set_is_syntax_error(*syntax_error); + else + compound_list = syntax_error; + syntax_error = nullptr; + } + + entries.append(AST::MatchEntry { + .options = move(result.nodes), + .match_names = {}, + .match_as_position = {}, + .pipe_positions = move(result.pipe_positions), + .body = move(compound_list), + }); + } + + if (peek().type != Token::Type::Esac) { + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'esac', not {}", peek().type_name())); + } else { + skip(); + } + + auto node = make_ref_counted<AST::MatchExpr>( + start_position.with_end(peek().position.value_or(empty_position())), + expr.release_nonnull(), + DeprecatedString {}, + Optional<AST::Position> {}, + move(entries)); + + if (syntax_error) + node->set_is_syntax_error(*syntax_error); + + return node; +} + +Parser::CaseItemsResult Parser::parse_case_list() +{ + // Just a list of words split by '|', delimited by ')' + NonnullRefPtrVector<AST::Node> nodes; + Vector<AST::Position> pipes; + + for (;;) { + if (eof() || peek().type == Token::Type::CloseParen) + break; + + if (peek().type != Token::Type::Word) + break; + + auto node = parse_word(); + if (!node) + node = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected a word, not {}", peek().type_name())); + + nodes.append(node.release_nonnull()); + + if (peek().type == Token::Type::Pipe) { + pipes.append(peek().position.value_or(empty_position())); + skip(); + } else { + break; + } + } + + if (nodes.is_empty()) + nodes.append(make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected a word, not {}", peek().type_name()))); + + return { move(pipes), move(nodes) }; +} + +RefPtr<AST::Node> Parser::parse_if_clause() +{ + // If compound_list Then compound_list {Elif compound_list Then compound_list (Fi|Else)?} [(?=Else) compound_list] (?!=Fi) Fi + auto start_position = peek().position.value_or(empty_position()); + if (peek().type != Token::Type::If) + return nullptr; + + skip(); + auto main_condition = parse_compound_list(); + if (!main_condition) + main_condition = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'if'"); + + RefPtr<AST::SyntaxError> syntax_error; + if (peek().type != Token::Type::Then) { + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'then', not {}", peek().type_name())); + } else { + skip(); + } + + auto main_consequence = parse_compound_list(); + if (!main_consequence) + main_consequence = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'then'"); + + auto node = make_ref_counted<AST::IfCond>(start_position, Optional<AST::Position>(), main_condition.release_nonnull(), main_consequence.release_nonnull(), nullptr); + auto active_node = node; + + while (peek().type == Token::Type::Elif) { + skip(); + auto condition = parse_compound_list(); + if (!condition) + condition = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'elif'"); + + if (peek().type != Token::Type::Then) { + if (!syntax_error) + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'then', not {}", peek().type_name())); + } else { + skip(); + } + + auto consequence = parse_compound_list(); + if (!consequence) + consequence = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'then'"); + + auto new_node = make_ref_counted<AST::IfCond>(start_position, Optional<AST::Position>(), condition.release_nonnull(), consequence.release_nonnull(), nullptr); + + active_node->false_branch() = new_node; + active_node = move(new_node); + } + + auto needs_fi = true; + switch (peek().type) { + case Token::Type::Else: + skip(); + active_node->false_branch() = parse_compound_list(); + if (!active_node->false_branch()) + active_node->false_branch() = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'else'"); + break; + case Token::Type::Fi: + needs_fi = false; + break; + default: + if (!syntax_error) + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'else' or 'fi', not {}", peek().type_name())); + break; + } + + if (needs_fi) { + if (peek().type != Token::Type::Fi) { + if (!syntax_error) + syntax_error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'fi', not {}", peek().type_name())); + } else { + skip(); + } + } + + if (syntax_error) + node->set_is_syntax_error(*syntax_error); + + return node; +} + +RefPtr<AST::Node> Parser::parse_subshell() +{ + auto start_position = peek().position.value_or(empty_position()); + if (peek().type != Token::Type::OpenParen) + return nullptr; + + skip(); + RefPtr<AST::SyntaxError> error; + + auto list = parse_compound_list(); + if (!list) + error = make_ref_counted<AST::SyntaxError>(peek().position.value_or(empty_position()), "Expected compound list after ("sv); + + if (peek().type != Token::Type::CloseParen) + error = make_ref_counted<AST::SyntaxError>(peek().position.value_or(empty_position()), "Expected ) after compound list"sv); + else + skip(); + + if (!list) + return error; + + return make_ref_counted<AST::Subshell>( + start_position.with_end(peek().position.value_or(empty_position())), + list.release_nonnull()); +} + +RefPtr<AST::Node> Parser::parse_compound_list() +{ + while (peek().type == Token::Type::Newline) + skip(); + + auto term = parse_term(); + if (!term) + return term; + + if (is_separator(peek())) { + if (consume().type == Token::Type::And) { + term = make_ref_counted<AST::Background>( + term->position().with_end(peek().position.value_or(empty_position())), + *term); + } + } + + return term; +} + +RefPtr<AST::Node> Parser::parse_term() +{ + NonnullRefPtrVector<AST::Node> nodes; + Vector<AST::Position> positions; + + auto start_position = peek().position.value_or(empty_position()); + + for (;;) { + auto new_node = parse_and_or(); + if (!new_node) + break; + + nodes.append(new_node.release_nonnull()); + + if (!is_separator(peek())) + break; + + auto position = consume().position; + if (position.has_value()) + positions.append(position.release_value()); + } + + auto end_position = peek().position.value_or(empty_position()); + + return make_ref_counted<AST::Sequence>( + start_position.with_end(end_position), + move(nodes), + move(positions)); +} + +RefPtr<AST::Node> Parser::parse_for_clause() +{ + // FOR NAME newline+ do_group + // FOR NAME newline+ IN separator do_group + // FOR NAME IN separator do_group + // FOR NAME IN wordlist separator do_group + + if (peek().type != Token::Type::For) + return nullptr; + + auto start_position = consume().position.value_or(empty_position()); + + DeprecatedString name; + Optional<AST::Position> name_position; + if (peek().type == Token::Type::VariableName) { + name_position = peek().position; + name = consume().value; + } else { + name = "it"; + error(peek(), "Expected a variable name, not {}", peek().type_name()); + } + + auto saw_newline = false; + while (peek().type == Token::Type::Newline) { + saw_newline = true; + skip(); + } + + auto saw_in = false; + Optional<AST::Position> in_kw_position; + if (peek().type == Token::Type::In) { + saw_in = true; + in_kw_position = peek().position; + skip(); + } else if (!saw_newline) { + error(peek(), "Expected 'in' or a newline, not {}", peek().type_name()); + } + + RefPtr<AST::Node> iterated_expression; + if (!saw_newline) + iterated_expression = parse_word_list(); + + if (saw_in) { + if (peek().type == Token::Type::Semicolon) + skip(); + else + error(peek(), "Expected a semicolon, not {}", peek().type_name()); + } + + auto body = parse_do_group(); + return AST::make_ref_counted<AST::ForLoop>( + start_position.with_end(peek().position.value_or(empty_position())), + AST::NameWithPosition { move(name), name_position.value_or(empty_position()) }, + Optional<AST::NameWithPosition> {}, + move(iterated_expression), + move(body), + move(in_kw_position), + Optional<AST::Position> {}); +} + +RefPtr<AST::Node> Parser::parse_word_list() +{ + NonnullRefPtrVector<AST::Node> nodes; + + auto start_position = peek().position.value_or(empty_position()); + + for (; peek().type == Token::Type::Word;) { + auto word = parse_word(); + nodes.append(word.release_nonnull()); + } + + return make_ref_counted<AST::ListConcatenate>( + start_position.with_end(peek().position.value_or(empty_position())), + move(nodes)); +} + +RefPtr<AST::Node> Parser::parse_word() +{ + if (peek().type != Token::Type::Word) + return nullptr; + + auto token = consume(); + RefPtr<AST::Node> word; + + enum class Quote { + None, + Single, + Double, + } in_quote { Quote::None }; + + auto append_bareword = [&](StringView string) { + if (!word && string.starts_with('~')) { + GenericLexer lexer { string }; + lexer.ignore(); + auto user = lexer.consume_while(is_ascii_alphanumeric); + string = lexer.remaining(); + + word = make_ref_counted<AST::Tilde>(token.position.value_or(empty_position()), user); + } + + if (string.is_empty()) + return; + + auto node = make_ref_counted<AST::BarewordLiteral>(token.position.value_or(empty_position()), string); + + if (word) { + word = make_ref_counted<AST::Juxtaposition>( + word->position().with_end(token.position.value_or(empty_position())), + *word, + move(node), + AST::Juxtaposition::Mode::StringExpand); + } else { + word = move(node); + } + }; + + auto append_string_literal = [&](StringView string) { + if (string.is_empty()) + return; + + auto node = make_ref_counted<AST::StringLiteral>(token.position.value_or(empty_position()), string, AST::StringLiteral::EnclosureType::SingleQuotes); + + if (word) { + word = make_ref_counted<AST::Juxtaposition>( + word->position().with_end(token.position.value_or(empty_position())), + *word, + move(node), + AST::Juxtaposition::Mode::StringExpand); + } else { + word = move(node); + } + }; + + auto append_string_part = [&](StringView string) { + if (string.is_empty()) + return; + + auto node = make_ref_counted<AST::StringLiteral>(token.position.value_or(empty_position()), string, AST::StringLiteral::EnclosureType::DoubleQuotes); + + if (word) { + word = make_ref_counted<AST::Juxtaposition>( + word->position().with_end(token.position.value_or(empty_position())), + *word, + move(node), + AST::Juxtaposition::Mode::StringExpand); + } else { + word = move(node); + } + }; + + auto append_parameter_expansion = [&](ResolvedParameterExpansion const& x) { + DeprecatedString immediate_function_name; + RefPtr<AST::Node> node; + switch (x.op) { + case ResolvedParameterExpansion::Op::UseDefaultValue: + immediate_function_name = "value_or_default"; + break; + case ResolvedParameterExpansion::Op::AssignDefaultValue: + immediate_function_name = "assign_default"; + break; + case ResolvedParameterExpansion::Op::IndicateErrorIfEmpty: + immediate_function_name = "error_if_empty"; + break; + case ResolvedParameterExpansion::Op::UseAlternativeValue: + immediate_function_name = "null_or_alternative"; + break; + case ResolvedParameterExpansion::Op::UseDefaultValueIfUnset: + immediate_function_name = "defined_value_or_default"; + break; + case ResolvedParameterExpansion::Op::AssignDefaultValueIfUnset: + immediate_function_name = "assign_defined_default"; + break; + case ResolvedParameterExpansion::Op::IndicateErrorIfUnset: + immediate_function_name = "error_if_unset"; + break; + case ResolvedParameterExpansion::Op::UseAlternativeValueIfUnset: + immediate_function_name = "null_if_unset_or_alternative"; + break; + case ResolvedParameterExpansion::Op::RemoveLargestSuffixByPattern: + // FIXME: Implement this + case ResolvedParameterExpansion::Op::RemoveSmallestSuffixByPattern: + immediate_function_name = "remove_suffix"; + break; + case ResolvedParameterExpansion::Op::RemoveLargestPrefixByPattern: + // FIXME: Implement this + case ResolvedParameterExpansion::Op::RemoveSmallestPrefixByPattern: + immediate_function_name = "remove_prefix"; + break; + case ResolvedParameterExpansion::Op::StringLength: + immediate_function_name = "length_of_variable"; + break; + case ResolvedParameterExpansion::Op::GetPositionalParameter: + case ResolvedParameterExpansion::Op::GetVariable: + node = make_ref_counted<AST::SimpleVariable>( + token.position.value_or(empty_position()), + x.parameter); + break; + case ResolvedParameterExpansion::Op::GetLastBackgroundPid: + node = make_ref_counted<AST::SyntaxError>( + token.position.value_or(empty_position()), + "$! not implemented"); + break; + case ResolvedParameterExpansion::Op::GetPositionalParameterList: + node = make_ref_counted<AST::SpecialVariable>( + token.position.value_or(empty_position()), + '*'); + break; + case ResolvedParameterExpansion::Op::GetCurrentOptionFlags: + node = make_ref_counted<AST::SyntaxError>( + token.position.value_or(empty_position()), + "The current option flags are not available in parameter expansions"); + break; + case ResolvedParameterExpansion::Op::GetPositionalParameterCount: + node = make_ref_counted<AST::SpecialVariable>( + token.position.value_or(empty_position()), + '#'); + break; + case ResolvedParameterExpansion::Op::GetLastExitStatus: + node = make_ref_counted<AST::SpecialVariable>( + token.position.value_or(empty_position()), + '?'); + break; + case ResolvedParameterExpansion::Op::GetPositionalParameterListAsString: + node = make_ref_counted<AST::SyntaxError>( + token.position.value_or(empty_position()), + "$* not implemented"); + break; + case ResolvedParameterExpansion::Op::GetShellProcessId: + node = make_ref_counted<AST::SpecialVariable>( + token.position.value_or(empty_position()), + '$'); + break; + } + + if (!node) { + NonnullRefPtrVector<AST::Node> arguments; + arguments.append(make_ref_counted<AST::BarewordLiteral>( + token.position.value_or(empty_position()), + x.parameter)); + + if (!x.argument.is_empty()) { + // dbgln("Will parse {}", x.argument); + arguments.append(*Parser { x.argument }.parse_word()); + } + + node = make_ref_counted<AST::ImmediateExpression>( + token.position.value_or(empty_position()), + AST::NameWithPosition { + immediate_function_name, + token.position.value_or(empty_position()), + }, + move(arguments), + Optional<AST::Position> {}); + } + + if (x.expand == ResolvedParameterExpansion::Expand::Word) { + node = make_ref_counted<AST::ImmediateExpression>( + token.position.value_or(empty_position()), + AST::NameWithPosition { + "reexpand", + token.position.value_or(empty_position()), + }, + Vector { node.release_nonnull() }, + Optional<AST::Position> {}); + } + + if (word) { + word = make_ref_counted<AST::Juxtaposition>( + word->position().with_end(token.position.value_or(empty_position())), + *word, + node.release_nonnull(), + AST::Juxtaposition::Mode::StringExpand); + } else { + word = move(node); + } + }; + + auto append_command_expansion = [&](ResolvedCommandExpansion const& x) { + if (!x.command) + return; + + RefPtr<AST::Execute> execute_node; + + if (x.command->is_execute()) { + execute_node = const_cast<AST::Execute&>(static_cast<AST::Execute const&>(*x.command)); + execute_node->capture_stdout(); + } else { + execute_node = make_ref_counted<AST::Execute>( + word ? word->position() : empty_position(), + *x.command, + true); + } + + if (word) { + word = make_ref_counted<AST::Juxtaposition>( + word->position(), + *word, + execute_node.release_nonnull(), + AST::Juxtaposition::Mode::StringExpand); + } else { + word = move(execute_node); + } + }; + + auto append_string = [&](StringView string) { + if (string.is_empty()) + return; + + Optional<size_t> run_start; + auto escape = false; + for (size_t i = 0; i < string.length(); ++i) { + auto ch = string[i]; + switch (ch) { + case '\\': + if (!escape && i + 1 < string.length()) { + if (is_one_of(string[i + 1], '"', '\'', '$', '`', '\\')) { + escape = in_quote != Quote::Single; + continue; + } + } + break; + case '\'': + if (in_quote == Quote::Single) { + in_quote = Quote::None; + append_string_literal(string.substring_view(*run_start, i - *run_start)); + run_start = i + 1; + continue; + } + if (in_quote == Quote::Double) { + escape = false; + continue; + } + [[fallthrough]]; + case '"': + if (ch == '\'' && in_quote == Quote::Single) { + escape = false; + continue; + } + if (!escape) { + if (ch == '"' && in_quote == Quote::Double) { + in_quote = Quote::None; + if (run_start.has_value()) + append_string_part(string.substring_view(*run_start, i - *run_start)); + run_start = i + 1; + continue; + } + if (run_start.has_value()) + append_bareword(string.substring_view(*run_start, i - *run_start)); + in_quote = ch == '\'' ? Quote::Single : Quote::Double; + run_start = i + 1; + } + escape = false; + [[fallthrough]]; + default: + if (!run_start.has_value()) + run_start = i; + escape = false; + continue; + } + } + + if (run_start.has_value()) + append_bareword(string.substring_view(*run_start, string.length() - *run_start)); + }; + + size_t current_offset = 0; + for (auto& expansion : token.resolved_expansions) { + expansion.visit( + [&](ResolvedParameterExpansion const& x) { + if (x.range.start >= token.value.length()) { + dbgln("Parameter expansion range {}-{} is out of bounds for '{}'", x.range.start, x.range.length, token.value); + return; + } + + if (x.range.start != current_offset) { + append_string(token.value.substring_view(current_offset, x.range.start - current_offset)); + current_offset = x.range.start; + } + current_offset += x.range.length; + append_parameter_expansion(x); + }, + [&](ResolvedCommandExpansion const& x) { + if (x.range.start >= token.value.length()) { + dbgln("Parameter expansion range {}-{} is out of bounds for '{}'", x.range.start, x.range.length, token.value); + return; + } + + if (x.range.start != current_offset) { + append_string(token.value.substring_view(current_offset, x.range.start - current_offset)); + current_offset = x.range.start; + } + current_offset += x.range.length; + append_command_expansion(x); + }); + } + + if (current_offset >= token.value.length()) { + dbgln("Parameter expansion range {}- is out of bounds for '{}'", current_offset, token.value); + return word; + } + + if (current_offset != token.value.length()) + append_string(token.value.substring_view(current_offset)); + + return word; +} + +RefPtr<AST::Node> Parser::parse_do_group() +{ + if (peek().type != Token::Type::Do) { + return make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'do', not {}", peek().type_name())); + } + + consume(); + + auto list = parse_compound_list(); + + RefPtr<AST::SyntaxError> error; + if (peek().type != Token::Type::Done) { + error = make_ref_counted<AST::SyntaxError>( + peek().position.value_or(empty_position()), + DeprecatedString::formatted("Expected 'done', not {}", peek().type_name())); + } else { + consume(); + } + + if (error) { + if (list) + list->set_is_syntax_error(*error); + else + list = error; + } + + return make_ref_counted<AST::Execute>(list->position(), *list); +} + +RefPtr<AST::Node> Parser::parse_simple_command() +{ + auto start_position = peek().position.value_or(empty_position()); + + Vector<DeprecatedString> definitions; + NonnullRefPtrVector<AST::Node> nodes; + + for (;;) { + if (auto io_redirect = parse_io_redirect()) + nodes.append(*io_redirect); + else + break; + } + + while (peek().type == Token::Type::AssignmentWord) { + definitions.append(peek().value); + + if (!nodes.is_empty()) { + nodes.append( + make_ref_counted<AST::BarewordLiteral>( + peek().position.value_or(empty_position()), + consume().value)); + } else { + // env (assignments) (command) + nodes.append(make_ref_counted<AST::BarewordLiteral>( + empty_position(), + "env")); + + nodes.append( + make_ref_counted<AST::BarewordLiteral>( + peek().position.value_or(empty_position()), + consume().value)); + } + } + + // WORD or io_redirect: IO_NUMBER or io_file + if (!is_one_of(peek().type, + Token::Type::Word, Token::Type::IoNumber, + Token::Type::Less, Token::Type::LessAnd, Token::Type::Great, Token::Type::GreatAnd, + Token::Type::DoubleGreat, Token::Type::LessGreat, Token::Type::Clobber)) { + if (!nodes.is_empty()) { + Vector<AST::VariableDeclarations::Variable> variables; + for (auto& definition : definitions) { + auto parts = definition.split_limit('=', 2, SplitBehavior::KeepEmpty); + auto name = make_ref_counted<AST::BarewordLiteral>( + empty_position(), + parts[0]); + auto value = make_ref_counted<AST::BarewordLiteral>( + empty_position(), + parts.size() > 1 ? parts[1] : ""); + + variables.append({ move(name), move(value) }); + } + + return make_ref_counted<AST::VariableDeclarations>(empty_position(), move(variables)); + } + return nullptr; + } + + // auto first = true; + for (;;) { + if (peek().type == Token::Type::Word) { + auto new_word = parse_word(); + if (!new_word) + break; + + // if (first) { + // first = false; + // new_word = make_ref_counted<AST::ImmediateExpression>( + // new_word->position(), + // AST::NameWithPosition { + // "substitute_aliases"sv, + // empty_position(), + // }, + // NonnullRefPtrVector<AST::Node> { *new_word }, + // Optional<AST::Position> {}); + // } + + nodes.append(new_word.release_nonnull()); + } else if (auto io_redirect = parse_io_redirect()) { + nodes.append(io_redirect.release_nonnull()); + } else { + break; + } + } + + auto node = make_ref_counted<AST::ListConcatenate>( + start_position.with_end(peek().position.value_or(empty_position())), + move(nodes)); + + return node; +} + +RefPtr<AST::Node> Parser::parse_io_redirect() +{ + auto start_position = peek().position.value_or(empty_position()); + auto start_index = m_token_index; + + // io_redirect: IO_NUMBER? io_file | IO_NUMBER? io_here + Optional<int> io_number; + + if (peek().type == Token::Type::IoNumber) + io_number = consume().value.to_int(TrimWhitespace::No); + + if (auto io_file = parse_io_file(start_position, io_number)) + return io_file; + + // if (auto io_here = parse_io_here(start_position, io_number)) + // return io_here; + + m_token_index = start_index; + return nullptr; +} + +RefPtr<AST::Node> Parser::parse_io_file(AST::Position start_position, Optional<int> fd) +{ + auto start_index = m_token_index; + + // io_file = (LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER) WORD + auto io_operator = peek().type; + if (!is_one_of(io_operator, + Token::Type::Less, Token::Type::LessAnd, Token::Type::Great, Token::Type::GreatAnd, + Token::Type::DoubleGreat, Token::Type::LessGreat, Token::Type::Clobber)) + return nullptr; + + auto io_operator_token = consume(); + + auto word = parse_word(); + if (!word) { + m_token_index = start_index; + return nullptr; + } + + auto position = start_position.with_end(peek().position.value_or(empty_position())); + switch (io_operator) { + case Token::Type::Less: + return make_ref_counted<AST::ReadRedirection>( + position, + fd.value_or(0), + word.release_nonnull()); + case Token::Type::Clobber: + // FIXME: Add support for clobber (and 'noclobber') + case Token::Type::Great: + return make_ref_counted<AST::WriteRedirection>( + position, + fd.value_or(1), + word.release_nonnull()); + case Token::Type::DoubleGreat: + return make_ref_counted<AST::WriteAppendRedirection>( + position, + fd.value_or(1), + word.release_nonnull()); + case Token::Type::LessGreat: + return make_ref_counted<AST::ReadWriteRedirection>( + position, + fd.value_or(0), + word.release_nonnull()); + case Token::Type::LessAnd: + case Token::Type::GreatAnd: { + auto is_less = io_operator == Token::Type::LessAnd; + auto source_fd = fd.value_or(is_less ? 0 : 1); + if (word->is_bareword()) { + auto maybe_target_fd = static_ptr_cast<AST::BarewordLiteral>(word)->text().to_int(AK::TrimWhitespace::No); + if (maybe_target_fd.has_value()) { + auto target_fd = maybe_target_fd.release_value(); + if (is_less) + swap(source_fd, target_fd); + + return make_ref_counted<AST::Fd2FdRedirection>( + position, + source_fd, + target_fd); + } + } + if (is_less) { + return make_ref_counted<AST::ReadRedirection>( + position, + source_fd, + word.release_nonnull()); + } + + return make_ref_counted<AST::WriteRedirection>( + position, + source_fd, + word.release_nonnull()); + } + default: + VERIFY_NOT_REACHED(); + } +} + +} diff --git a/Userland/Shell/PosixParser.h b/Userland/Shell/PosixParser.h new file mode 100644 index 0000000000..4f873128ce --- /dev/null +++ b/Userland/Shell/PosixParser.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <Shell/AST.h> +#include <Shell/PosixLexer.h> + +namespace Shell::Posix { + +class Parser { +public: + Parser(StringView input, bool interactive = false) + : m_lexer(input) + , m_in_interactive_mode(interactive) + , m_eof_token(Token::eof()) + { + fill_token_buffer(); + } + + RefPtr<AST::Node> parse(); + RefPtr<AST::Node> parse_word_list(); + + struct Error { + DeprecatedString message; + Optional<AST::Position> position; + }; + auto& errors() const { return m_errors; } + +private: + Optional<Token> next_expanded_token(); + Vector<Token> perform_expansions(Vector<Token> tokens); + void fill_token_buffer(); + + Token const& peek() const + { + if (eof()) + return m_eof_token; + return m_token_buffer[m_token_index]; + } + Token const& consume() + { + if (eof()) + return m_eof_token; + return m_token_buffer[m_token_index++]; + } + void skip() + { + if (eof()) + return; + m_token_index++; + } + bool eof() const + { + return m_token_index == m_token_buffer.size() || m_token_buffer[m_token_index].type == Token::Type::Eof; + } + + struct CaseItemsResult { + Vector<AST::Position> pipe_positions; + NonnullRefPtrVector<AST::Node> nodes; + }; + + RefPtr<AST::Node> parse_complete_command(); + RefPtr<AST::Node> parse_list(); + RefPtr<AST::Node> parse_and_or(); + RefPtr<AST::Node> parse_pipeline(); + RefPtr<AST::Node> parse_pipe_sequence(); + RefPtr<AST::Node> parse_command(); + RefPtr<AST::Node> parse_compound_command(); + RefPtr<AST::Node> parse_subshell(); + RefPtr<AST::Node> parse_compound_list(); + RefPtr<AST::Node> parse_term(); + RefPtr<AST::Node> parse_for_clause(); + RefPtr<AST::Node> parse_case_clause(); + CaseItemsResult parse_case_list(); + RefPtr<AST::Node> parse_if_clause(); + RefPtr<AST::Node> parse_while_clause(); + RefPtr<AST::Node> parse_until_clause(); + RefPtr<AST::Node> parse_function_definition(); + RefPtr<AST::Node> parse_function_body(); + RefPtr<AST::Node> parse_brace_group(); + RefPtr<AST::Node> parse_do_group(); + RefPtr<AST::Node> parse_simple_command(); + RefPtr<AST::Node> parse_prefix(); + RefPtr<AST::Node> parse_suffix(); + RefPtr<AST::Node> parse_io_redirect(); + RefPtr<AST::Node> parse_redirect_list(); + RefPtr<AST::Node> parse_io_file(AST::Position, Optional<int> fd); + RefPtr<AST::Node> parse_io_here(AST::Position, Optional<int> fd); + RefPtr<AST::Node> parse_word(); + + template<typename... Ts> + void error(Token const& token, CheckedFormatString<Ts...> fmt, Ts&&... args) + { + m_errors.append(Error { + DeprecatedString::formatted(fmt.view(), forward<Ts>(args)...), + token.position, + }); + } + + Lexer m_lexer; + bool m_in_interactive_mode { false }; + Vector<Token, 2> m_token_buffer; + size_t m_token_index { 0 }; + Vector<Token> m_previous_token_buffer; + + Vector<Error> m_errors; + + Token m_eof_token; + + bool m_disallow_command_prefix { true }; +}; + +} diff --git a/Userland/Shell/Shell.cpp b/Userland/Shell/Shell.cpp index 32deed4f97..23e79128b9 100644 --- a/Userland/Shell/Shell.cpp +++ b/Userland/Shell/Shell.cpp @@ -26,6 +26,7 @@ #include <LibCore/System.h> #include <LibCore/Timer.h> #include <LibLine/Editor.h> +#include <Shell/PosixParser.h> #include <errno.h> #include <fcntl.h> #include <inttypes.h> @@ -297,7 +298,7 @@ Vector<AST::Command> Shell::expand_aliases(Vector<AST::Command> initial_commands auto alias = resolve_alias(command.argv[0]); if (!alias.is_null()) { auto argv0 = command.argv.take_first(); - auto subcommand_ast = Parser { alias }.parse(); + auto subcommand_ast = parse(alias, false); if (subcommand_ast) { while (subcommand_ast->is_execute()) { auto* ast = static_cast<AST::Execute*>(subcommand_ast.ptr()); @@ -477,7 +478,7 @@ bool Shell::invoke_function(const AST::Command& command, int& retval) DeprecatedString Shell::format(StringView source, ssize_t& cursor) const { - Formatter formatter(source, cursor); + Formatter formatter(source, cursor, m_in_posix_mode); auto result = formatter.format(); cursor = formatter.cursor(); @@ -580,7 +581,7 @@ int Shell::run_command(StringView cmd, Optional<SourcePosition> source_position_ if (cmd.is_empty()) return 0; - auto command = Parser(cmd, m_is_interactive).parse(); + auto command = parse(cmd, m_is_interactive); if (!command) return 0; @@ -1410,8 +1411,7 @@ void Shell::remove_entry_from_cache(StringView entry) void Shell::highlight(Line::Editor& editor) const { auto line = editor.line(); - Parser parser(line, m_is_interactive); - auto ast = parser.parse(); + auto ast = parse(line, m_is_interactive); if (!ast) return; ast->highlight_in_editor(editor, const_cast<Shell&>(*this)); @@ -1425,9 +1425,7 @@ Vector<Line::CompletionSuggestion> Shell::complete() Vector<Line::CompletionSuggestion> Shell::complete(StringView line) { - Parser parser(line, m_is_interactive); - - auto ast = parser.parse(); + auto ast = parse(line, m_is_interactive); if (!ast) return {}; @@ -2177,8 +2175,9 @@ Shell::Shell() cache_path(); } -Shell::Shell(Line::Editor& editor, bool attempt_interactive) - : m_editor(editor) +Shell::Shell(Line::Editor& editor, bool attempt_interactive, bool posix_mode) + : m_in_posix_mode(posix_mode) + , m_editor(editor) { uid = getuid(); tcsetpgrp(0, getpgrp()); @@ -2224,8 +2223,8 @@ Shell::Shell(Line::Editor& editor, bool attempt_interactive) cache_path(); } - m_editor->register_key_input_callback('\n', [](Line::Editor& editor) { - auto ast = Parser(editor.line()).parse(); + m_editor->register_key_input_callback('\n', [this](Line::Editor& editor) { + auto ast = parse(editor.line(), false); if (ast && ast->is_syntax_error() && ast->syntax_error_node().is_continuable()) return true; @@ -2484,6 +2483,32 @@ void Shell::timer_event(Core::TimerEvent& event) m_editor->save_history(get_history_path()); } +RefPtr<AST::Node> Shell::parse(StringView input, bool interactive, bool as_command) const +{ + if (m_in_posix_mode) { + Posix::Parser parser(input); + if (as_command) { + auto node = parser.parse(); + if constexpr (SHELL_POSIX_PARSER_DEBUG) { + dbgln("Parsed with the POSIX Parser:"); + node->dump(0); + } + return node; + } + + return parser.parse_word_list(); + } + + Parser parser { input, interactive }; + if (as_command) + return parser.parse(); + + auto nodes = parser.parse_as_multiple_expressions(); + return make_ref_counted<AST::ListConcatenate>( + nodes.is_empty() ? AST::Position { 0, 0, { 0, 0 }, { 0, 0 } } : nodes.first().position(), + move(nodes)); +} + void FileDescriptionCollector::collect() { for (auto fd : m_fds) diff --git a/Userland/Shell/Shell.h b/Userland/Shell/Shell.h index ef5b95e25c..969ffe0a29 100644 --- a/Userland/Shell/Shell.h +++ b/Userland/Shell/Shell.h @@ -61,16 +61,26 @@ __ENUMERATE_SHELL_OPTION(verbose, false, "Announce every command that is about to be executed") \ __ENUMERATE_SHELL_OPTION(invoke_program_for_autocomplete, false, "Attempt to use the program being completed itself for autocompletion via --complete") -#define ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS() \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(concat_lists) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(length) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(length_across) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(remove_suffix) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(remove_prefix) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(regex_replace) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(filter_glob) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(split) \ - __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(join) +#define ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS() \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(concat_lists) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(length) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(length_across) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(remove_suffix) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(remove_prefix) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(regex_replace) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(filter_glob) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(split) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(join) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(value_or_default) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(assign_default) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(error_if_empty) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(null_or_alternative) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(defined_value_or_default) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(assign_defined_default) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(error_if_unset) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(null_if_unset_or_alternative) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(length_of_variable) \ + __ENUMERATE_SHELL_IMMEDIATE_FUNCTION(reexpand) namespace Shell { @@ -376,10 +386,12 @@ public: #undef __ENUMERATE_SHELL_OPTION private: - Shell(Line::Editor&, bool attempt_interactive); + Shell(Line::Editor&, bool attempt_interactive, bool posix_mode = false); Shell(); virtual ~Shell() override; + RefPtr<AST::Node> parse(StringView, bool interactive = false, bool as_command = true) const; + void timer_event(Core::TimerEvent&) override; bool is_allowed_to_modify_termios(const AST::Command&) const; @@ -450,6 +462,7 @@ private: bool m_is_interactive { true }; bool m_is_subshell { false }; bool m_should_reinstall_signal_handlers { true }; + bool m_in_posix_mode { false }; ShellError m_error { ShellError::None }; DeprecatedString m_error_description; |