diff options
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Shell/AST.cpp | 72 | ||||
-rw-r--r-- | Userland/Shell/AST.h | 34 | ||||
-rw-r--r-- | Userland/Shell/Forward.h | 1 | ||||
-rw-r--r-- | Userland/Shell/NodeVisitor.cpp | 6 | ||||
-rw-r--r-- | Userland/Shell/NodeVisitor.h | 1 | ||||
-rw-r--r-- | Userland/Shell/Parser.cpp | 231 | ||||
-rw-r--r-- | Userland/Shell/Parser.h | 29 |
7 files changed, 364 insertions, 10 deletions
diff --git a/Userland/Shell/AST.cpp b/Userland/Shell/AST.cpp index fbc4241f2d..0863b72fb4 100644 --- a/Userland/Shell/AST.cpp +++ b/Userland/Shell/AST.cpp @@ -1307,6 +1307,78 @@ Glob::~Glob() { } +void Heredoc::dump(int level) const +{ + Node::dump(level); + print_indented("(End Key)", level + 1); + print_indented(m_end, level + 2); + print_indented("(Allows Interpolation)", level + 1); + print_indented(String::formatted("{}", m_allows_interpolation), level + 2); + print_indented("(Contents)", level + 1); + if (m_contents) + m_contents->dump(level + 2); + else + print_indented("(null)", level + 2); +} + +RefPtr<Value> Heredoc::run(RefPtr<Shell> shell) +{ + if (!m_deindent) + return m_contents->run(shell); + + // To deindent, first split to lines... + auto value = m_contents->run(shell); + if (!value) + return value; + auto list = value->resolve_as_list(shell); + // The list better have one entry, otherwise we've put the wrong kind of node inside this heredoc + VERIFY(list.size() == 1); + auto lines = list.first().split_view('\n'); + + // Now just trim each line and put them back in a string + StringBuilder builder { list.first().length() }; + for (auto& line : lines) { + builder.append(line.trim_whitespace(TrimMode::Left)); + builder.append('\n'); + } + + return create<StringValue>(builder.to_string()); +} + +void Heredoc::highlight_in_editor(Line::Editor& editor, Shell& shell, HighlightMetadata metadata) +{ + Line::Style content_style { Line::Style::Foreground(Line::Style::XtermColor::Yellow) }; + if (metadata.is_first_in_list) + content_style.unify_with({ Line::Style::Bold }); + + if (!m_contents) + content_style.unify_with({ Line::Style::Foreground(Line::Style::XtermColor::Red) }, true); + + editor.stylize({ m_position.start_offset, m_position.end_offset }, content_style); + if (m_contents) + m_contents->highlight_in_editor(editor, shell, metadata); +} + +HitTestResult Heredoc::hit_test_position(size_t offset) const +{ + if (!m_contents) + return {}; + + return m_contents->hit_test_position(offset); +} + +Heredoc::Heredoc(Position position, String end, bool allow_interpolation, bool deindent) + : Node(move(position)) + , m_end(move(end)) + , m_allows_interpolation(allow_interpolation) + , m_deindent(deindent) +{ +} + +Heredoc::~Heredoc() +{ +} + void HistoryEvent::dump(int level) const { Node::dump(level); diff --git a/Userland/Shell/AST.h b/Userland/Shell/AST.h index 28ff0dac64..20ef502033 100644 --- a/Userland/Shell/AST.h +++ b/Userland/Shell/AST.h @@ -474,6 +474,7 @@ public: ForLoop, FunctionDeclaration, Glob, + Heredoc, HistoryEvent, IfCond, ImmediateExpression, @@ -1313,6 +1314,39 @@ private: NonnullRefPtr<Node> m_right; }; +class Heredoc final : public Node { +public: + Heredoc(Position, String end, bool allow_interpolation, bool deindent); + virtual ~Heredoc(); + virtual void visit(NodeVisitor& visitor) override { visitor.visit(this); } + + const String& end() const { return m_end; } + bool allow_interpolation() const { return m_allows_interpolation; } + bool deindent() const { return m_deindent; } + const RefPtr<AST::Node>& contents() const { return m_contents; } + void set_contents(RefPtr<AST::Node> contents) + { + m_contents = move(contents); + if (m_contents->is_syntax_error()) + set_is_syntax_error(m_contents->syntax_error_node()); + else + clear_syntax_error(); + } + +private: + NODE(Heredoc); + virtual void dump(int level) const override; + virtual RefPtr<Value> run(RefPtr<Shell>) override; + virtual void highlight_in_editor(Line::Editor&, Shell&, HighlightMetadata = {}) override; + virtual HitTestResult hit_test_position(size_t) const override; + virtual RefPtr<Node> leftmost_trivial_literal() const override { return this; }; + + String m_end; + bool m_allows_interpolation { false }; + bool m_deindent { false }; + RefPtr<AST::Node> m_contents; +}; + class StringLiteral final : public Node { public: StringLiteral(Position, String); diff --git a/Userland/Shell/Forward.h b/Userland/Shell/Forward.h index 43dbf0084c..a1200acec6 100644 --- a/Userland/Shell/Forward.h +++ b/Userland/Shell/Forward.h @@ -34,6 +34,7 @@ class Fd2FdRedirection; class FunctionDeclaration; class ForLoop; class Glob; +class Heredoc; class HistoryEvent; class Execute; class IfCond; diff --git a/Userland/Shell/NodeVisitor.cpp b/Userland/Shell/NodeVisitor.cpp index b7e5cc4d13..4c3c16d87a 100644 --- a/Userland/Shell/NodeVisitor.cpp +++ b/Userland/Shell/NodeVisitor.cpp @@ -101,6 +101,12 @@ void NodeVisitor::visit(const AST::Glob*) { } +void NodeVisitor::visit(const AST::Heredoc* node) +{ + if (node->contents()) + node->contents()->visit(*this); +} + void NodeVisitor::visit(const AST::HistoryEvent*) { } diff --git a/Userland/Shell/NodeVisitor.h b/Userland/Shell/NodeVisitor.h index 410bbfa12f..7e7e52e57a 100644 --- a/Userland/Shell/NodeVisitor.h +++ b/Userland/Shell/NodeVisitor.h @@ -30,6 +30,7 @@ public: virtual void visit(const AST::FunctionDeclaration*); virtual void visit(const AST::ForLoop*); virtual void visit(const AST::Glob*); + virtual void visit(const AST::Heredoc*); virtual void visit(const AST::HistoryEvent*); virtual void visit(const AST::Execute*); virtual void visit(const AST::IfCond*); diff --git a/Userland/Shell/Parser.cpp b/Userland/Shell/Parser.cpp index 699de495ae..0507f055cd 100644 --- a/Userland/Shell/Parser.cpp +++ b/Userland/Shell/Parser.cpp @@ -7,6 +7,7 @@ #include "Parser.h" #include "Shell.h" #include <AK/AllOf.h> +#include <AK/ScopeGuard.h> #include <AK/ScopedValueRollback.h> #include <AK/TemporaryChange.h> #include <ctype.h> @@ -187,9 +188,47 @@ RefPtr<AST::Node> Parser::parse_toplevel() Parser::SequenceParseResult Parser::parse_sequence() { - consume_while(is_any_of(" \t\n;")); // ignore whitespaces or terminators without effect. - NonnullRefPtrVector<AST::Node> left; + auto read_terminators = [&](bool consider_tabs_and_spaces) { + if (m_heredoc_initiations.is_empty()) { + discard_terminators:; + consume_while(is_any_of(consider_tabs_and_spaces ? " \t\n;" : "\n;")); + } else { + for (;;) { + if (consider_tabs_and_spaces && (peek() == '\t' || peek() == ' ')) { + consume(); + continue; + } + if (peek() == ';') { + consume(); + continue; + } + if (peek() == '\n') { + auto rule_start = push_start(); + consume(); + if (!parse_heredoc_entries()) { + StringBuilder error_builder; + error_builder.append("Expected to find heredoc entries for "); + bool first = true; + for (auto& entry : m_heredoc_initiations) { + if (first) + error_builder.appendff("{} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number); + else + error_builder.appendff(", {} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number); + first = false; + } + left.append(create<AST::SyntaxError>(error_builder.build(), true)); + // Just read the rest of the newlines + goto discard_terminators; + } + continue; + } + break; + } + } + }; + + read_terminators(true); auto rule_start = push_start(); { @@ -203,8 +242,10 @@ Parser::SequenceParseResult Parser::parse_sequence() switch (peek()) { case '}': return { move(left), {}, ShouldReadMoreSequences::No }; - case ';': - case '\n': { + case '\n': + read_terminators(false); + [[fallthrough]]; + case ';': { if (left.is_empty()) break; @@ -235,8 +276,10 @@ Parser::SequenceParseResult Parser::parse_sequence() pos_before_seps = save_offset(); switch (peek()) { - case ';': - case '\n': { + case '\n': + read_terminators(false); + [[fallthrough]]; + case ';': { consume_while(is_any_of("\n;")); auto pos_after_seps = save_offset(); separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line); @@ -960,6 +1003,11 @@ RefPtr<AST::Node> Parser::parse_match_pattern() RefPtr<AST::Node> Parser::parse_redirection() { auto rule_start = push_start(); + + // heredoc entry + if (next_is("<<-") || next_is("<<~")) + return nullptr; + auto pipe_fd = 0; auto number = consume_while(is_digit); if (number.is_empty()) { @@ -1091,8 +1139,11 @@ RefPtr<AST::Node> Parser::parse_expression() return move(expr); }; - if (strchr("&|)} ;<>\n", starting_char) != nullptr) - return nullptr; + // Heredocs are expressions, so allow them + if (!(next_is("<<-") || next_is("<<~"))) { + if (strchr("&|)} ;<>\n", starting_char) != nullptr) + return nullptr; + } if (m_extra_chars_not_allowed_in_barewords.contains_slow(starting_char)) return nullptr; @@ -1188,6 +1239,13 @@ RefPtr<AST::Node> Parser::parse_string_composite() return inline_command; } + if (auto heredoc = parse_heredoc_initiation_record()) { + if (auto next_part = parse_string_composite()) + return create<AST::Juxtaposition>(heredoc.release_nonnull(), next_part.release_nonnull()); // Concatenate Heredoc StringComposite + + return heredoc; + } + return nullptr; } @@ -1852,6 +1910,163 @@ RefPtr<AST::Node> Parser::parse_brace_expansion_spec() return create<AST::BraceExpansion>(move(subexpressions)); } +RefPtr<AST::Node> Parser::parse_heredoc_initiation_record() +{ + if (!next_is("<<")) + return nullptr; + + auto rule_start = push_start(); + + // '<' '<' + consume(); + consume(); + + HeredocInitiationRecord record; + record.end = "<error>"; + + RefPtr<AST::SyntaxError> syntax_error_node; + + // '-' | '~' + switch (peek()) { + case '-': + record.deindent = false; + consume(); + break; + case '~': + record.deindent = true; + consume(); + break; + default: + restore_to(*rule_start); + return nullptr; + } + + // StringLiteral | bareword + if (auto bareword = parse_bareword()) { + if (bareword->is_syntax_error()) + syntax_error_node = bareword->syntax_error_node(); + else + record.end = static_cast<AST::BarewordLiteral*>(bareword.ptr())->text(); + + record.interpolate = true; + } else if (peek() == '\'') { + consume(); + auto text = consume_while(is_not('\'')); + bool is_error = false; + if (!expect('\'')) + is_error = true; + if (is_error) + syntax_error_node = create<AST::SyntaxError>("Expected a terminating single quote", true); + + record.end = text; + record.interpolate = false; + } else { + syntax_error_node = create<AST::SyntaxError>("Expected a bareword or a single-quoted string literal for heredoc end key", true); + } + + auto node = create<AST::Heredoc>(record.end, record.interpolate, record.deindent); + if (syntax_error_node) + node->set_is_syntax_error(*syntax_error_node); + else + node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected heredoc contents for heredoc with end key '{}'", node->end()), true)); + + record.node = node; + m_heredoc_initiations.append(move(record)); + + return node; +} + +bool Parser::parse_heredoc_entries() +{ + // Try to parse heredoc entries, as reverse recorded in the initiation records + for (auto& record : m_heredoc_initiations) { + auto rule_start = push_start(); + bool found_key = false; + if (!record.interpolate) { + // Since no interpolation is allowed, just read lines until we hit the key + Optional<Offset> last_line_offset; + for (;;) { + if (at_end()) + break; + if (peek() == '\n') + consume(); + last_line_offset = current_position(); + auto line = consume_while(is_not('\n')); + if (peek() == '\n') + consume(); + if (line.trim_whitespace() == record.end) { + found_key = true; + break; + } + } + + if (!last_line_offset.has_value()) + last_line_offset = current_position(); + // Now just wrap it in a StringLiteral and set it as the node's contents + auto node = create<AST::StringLiteral>(m_input.substring_view(rule_start->offset, last_line_offset->offset - rule_start->offset)); + if (!found_key) + node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}', but found Eof", record.end), true)); + record.node->set_contents(move(node)); + } else { + // Interpolation is allowed, so we're going to read doublequoted string innards + // until we find a line that contains the key + auto end_condition = move(m_end_condition); + found_key = false; + set_end_condition([this, end = record.end, &found_key] { + if (found_key) + return true; + auto offset = current_position(); + auto cond = move(m_end_condition); + ScopeGuard guard { + [&] { + m_end_condition = move(cond); + } + }; + if (peek() == '\n') { + consume(); + auto line = consume_while(is_not('\n')); + if (peek() == '\n') + consume(); + if (line.trim_whitespace() == end) { + restore_to(offset.offset, offset.line); + found_key = true; + return true; + } + } + restore_to(offset.offset, offset.line); + return false; + }); + + auto expr = parse_doublequoted_string_inner(); + set_end_condition(move(end_condition)); + + if (found_key) { + auto offset = current_position(); + if (peek() == '\n') + consume(); + auto line = consume_while(is_not('\n')); + if (peek() == '\n') + consume(); + if (line.trim_whitespace() != record.end) + restore_to(offset.offset, offset.line); + } + + if (!expr && found_key) { + expr = create<AST::StringLiteral>(""); + } else if (!expr) { + expr = create<AST::SyntaxError>(String::formatted("Expected to find a valid string inside a heredoc (with end key '{}')", record.end), true); + } else if (!found_key) { + expr->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}'", record.end), true)); + } + + record.node->set_contents(create<AST::DoubleQuotedString>(move(expr))); + } + } + + m_heredoc_initiations.clear(); + return true; +} + StringView Parser::consume_while(Function<bool(char)> condition) { if (at_end()) diff --git a/Userland/Shell/Parser.h b/Userland/Shell/Parser.h index 20f986fdf4..306c8517d1 100644 --- a/Userland/Shell/Parser.h +++ b/Userland/Shell/Parser.h @@ -46,6 +46,13 @@ private: ShouldReadMoreSequences decision; }; + struct HeredocInitiationRecord { + String end; + RefPtr<AST::Heredoc> node; + bool interpolate { false }; + bool deindent { false }; + }; + constexpr static size_t max_allowed_nested_rule_depth = 2048; RefPtr<AST::Node> parse_toplevel(); SequenceParseResult parse_sequence(); @@ -81,11 +88,19 @@ private: RefPtr<AST::Node> parse_brace_expansion(); RefPtr<AST::Node> parse_brace_expansion_spec(); RefPtr<AST::Node> parse_immediate_expression(); + RefPtr<AST::Node> parse_heredoc_initiation_record(); + bool parse_heredoc_entries(); template<typename A, typename... Args> NonnullRefPtr<A> create(Args... args); - bool at_end() const { return m_input.length() <= m_offset; } + void set_end_condition(Function<bool()> condition) { m_end_condition = move(condition); } + bool at_end() const + { + if (m_end_condition && m_end_condition()) + return true; + return m_input.length() <= m_offset; + } char peek(); char consume(); bool expect(char); @@ -144,6 +159,8 @@ private: Vector<size_t> m_rule_start_offsets; Vector<AST::Position::Line> m_rule_start_lines; + Function<bool()> m_end_condition; + Vector<HeredocInitiationRecord> m_heredoc_initiations; Vector<char> m_extra_chars_not_allowed_in_barewords; bool m_is_in_brace_expansion_spec { false }; bool m_continuation_controls_allowed { false }; @@ -169,7 +186,9 @@ and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence | pipe_sequence terminator :: ';' - | '\n' + | '\n' [?!heredoc_stack.is_empty] heredoc_entries + +heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries] variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '* | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '* @@ -233,6 +252,12 @@ string_composite :: string string_composite? | bareword string_composite? | glob string_composite? | brace_expansion string_composite? + | heredoc_initiator string_composite? {append to heredoc_entries} + +heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent} + | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent} + | '<' '<' '~' bareword {*bareword, interpolate, deindent} + | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent} string :: '"' dquoted_string_inner '"' | "'" [^']* "'" |