diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2022-04-15 01:50:36 +0430 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2022-04-18 19:53:10 +0430 |
commit | 4ede121d31f43a787b32fc4c137582638ea305e1 (patch) | |
tree | e8a011b40e00d6848eafb72c401d8b4e884c49eb | |
parent | 6aceec45357f8e253bc169f1ed961fc26038a3da (diff) | |
download | serenity-4ede121d31f43a787b32fc4c137582638ea305e1.zip |
Shell: Add support for regex match patterns
We previously allowed globs as match pattern, but for more complex
matching needs, it's nice to have regular expressions.
And as the existing "name a part of the match" concept maps nicely to
named capture groups, we can simply reuse the same code and make groups
with names available in the match body.
-rw-r--r-- | Userland/Shell/AST.cpp | 124 | ||||
-rw-r--r-- | Userland/Shell/AST.h | 3 | ||||
-rw-r--r-- | Userland/Shell/Formatter.cpp | 24 | ||||
-rw-r--r-- | Userland/Shell/NodeVisitor.cpp | 6 | ||||
-rw-r--r-- | Userland/Shell/Parser.cpp | 119 | ||||
-rw-r--r-- | Userland/Shell/Parser.h | 12 |
6 files changed, 218 insertions, 70 deletions
diff --git a/Userland/Shell/AST.cpp b/Userland/Shell/AST.cpp index 205a79f1c3..f941f88409 100644 --- a/Userland/Shell/AST.cpp +++ b/Userland/Shell/AST.cpp @@ -2117,8 +2117,15 @@ void MatchExpr::dump(int level) const builder.append(')'); } print_indented(builder.string_view(), level + 2); - for (auto& node : entry.options) - node.dump(level + 3); + entry.options.visit( + [&](NonnullRefPtrVector<Node> const& options) { + for (auto& option : options) + option.dump(level + 3); + }, + [&](Vector<Regex<ECMA262>> const& options) { + for (auto& option : options) + print_indented(String::formatted("(regex: {})", option.pattern_value), level + 3); + }); print_indented("(execute)", level + 2); if (entry.body) entry.body->dump(level + 3); @@ -2136,39 +2143,59 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell) auto list = value->resolve_as_list(shell); auto list_matches = [&](auto&& pattern, auto& spans) { - if (pattern.size() != list.size()) - return false; + if constexpr (IsSame<RemoveCVReference<decltype(pattern)>, Regex<ECMA262>>) { + if (list.size() != 1) + return false; + auto& subject = list.first(); + auto match = pattern.match(subject); + if (!match.success) + return false; - for (size_t i = 0; i < pattern.size(); ++i) { - Vector<AK::MaskSpan> mask_spans; - if (!list[i].matches(pattern[i], mask_spans)) + spans.ensure_capacity(match.n_capture_groups); + for (size_t i = 0; i < match.n_capture_groups; ++i) { + auto& capture = match.capture_group_matches[0][i]; + spans.append(capture.view.to_string()); + } + return true; + } else { + if (pattern.size() != list.size()) return false; - for (auto& span : mask_spans) - spans.append(list[i].substring(span.start, span.length)); - } - return true; + for (size_t i = 0; i < pattern.size(); ++i) { + Vector<AK::MaskSpan> mask_spans; + if (!list[i].matches(pattern[i], mask_spans)) + return false; + for (auto& span : mask_spans) + spans.append(list[i].substring(span.start, span.length)); + } + + return true; + } }; - auto resolve_pattern = [&](auto& option) { - Vector<String> pattern; - if (option.is_glob()) { - pattern.append(static_cast<const Glob*>(&option)->text()); - } else if (option.is_bareword()) { - pattern.append(static_cast<const BarewordLiteral*>(&option)->text()); + auto resolve_pattern = [&](auto& option) -> decltype(auto) { + if constexpr (IsSame<RemoveCVReference<decltype(option)>, Regex<ECMA262>>) { + return option; } else { - auto list = option.run(shell); - if (shell && shell->has_any_error()) - return pattern; + Vector<String> pattern; + if (option.is_glob()) { + pattern.append(static_cast<const Glob*>(&option)->text()); + } else if (option.is_bareword()) { + pattern.append(static_cast<const BarewordLiteral*>(&option)->text()); + } else { + auto list = option.run(shell); + if (shell && shell->has_any_error()) + return pattern; + + option.for_each_entry(shell, [&](auto&& value) { + pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior, + // asking the node for a 'raw' value. + return IterationDecision::Continue; + }); + } - option.for_each_entry(shell, [&](auto&& value) { - pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior, - // asking the node for a 'raw' value. - return IterationDecision::Continue; - }); + return pattern; } - - return pattern; }; auto frame = shell->push_frame(String::formatted("match ({})", this)); @@ -2176,24 +2203,31 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell) shell->set_local_variable(m_expr_name, value, true); for (auto& entry : m_entries) { - for (auto& option : entry.options) { - Vector<String> spans; - if (list_matches(resolve_pattern(option), spans)) { - if (entry.body) { - if (entry.match_names.has_value()) { - size_t i = 0; - for (auto& name : entry.match_names.value()) { - if (spans.size() > i) - shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true); - ++i; + auto result = entry.options.visit([&](auto& options) -> Variant<IterationDecision, RefPtr<Value>> { + for (auto& option : options) { + Vector<String> spans; + if (list_matches(resolve_pattern(option), spans)) { + if (entry.body) { + if (entry.match_names.has_value()) { + size_t i = 0; + for (auto& name : entry.match_names.value()) { + if (spans.size() > i) + shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true); + ++i; + } } + return entry.body->run(shell); } - return entry.body->run(shell); - } else { - return make_ref_counted<AST::ListValue>({}); + return RefPtr<Value>(make_ref_counted<AST::ListValue>({})); } } - } + return IterationDecision::Continue; + }); + if (result.has<IterationDecision>() && result.get<IterationDecision>() == IterationDecision::Break) + break; + + if (result.has<RefPtr<Value>>()) + return move(result).get<RefPtr<Value>>(); } shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position()); @@ -2211,8 +2245,12 @@ void MatchExpr::highlight_in_editor(Line::Editor& editor, Shell& shell, Highligh for (auto& entry : m_entries) { metadata.is_first_in_list = false; - for (auto& option : entry.options) - option.highlight_in_editor(editor, shell, metadata); + entry.options.visit( + [&](NonnullRefPtrVector<Node>& node_options) { + for (auto& option : node_options) + option.highlight_in_editor(editor, shell, metadata); + }, + [](auto&) {}); metadata.is_first_in_list = true; if (entry.body) diff --git a/Userland/Shell/AST.h b/Userland/Shell/AST.h index c3290c080d..9bb16dc033 100644 --- a/Userland/Shell/AST.h +++ b/Userland/Shell/AST.h @@ -17,6 +17,7 @@ #include <AK/Types.h> #include <AK/Vector.h> #include <LibLine/Editor.h> +#include <LibRegex/Regex.h> namespace Shell::AST { @@ -1051,7 +1052,7 @@ private: }; struct MatchEntry { - NonnullRefPtrVector<Node> options; + Variant<NonnullRefPtrVector<Node>, Vector<Regex<ECMA262>>> options; Optional<Vector<String>> match_names; Optional<Position> match_as_position; Vector<Position> pipe_positions; diff --git a/Userland/Shell/Formatter.cpp b/Userland/Shell/Formatter.cpp index f005a2c71d..5f3ab9d840 100644 --- a/Userland/Shell/Formatter.cpp +++ b/Userland/Shell/Formatter.cpp @@ -583,12 +583,24 @@ void Formatter::visit(const AST::MatchExpr* node) insert_separator(); first_entry = false; auto first = true; - for (auto& option : entry.options) { - if (!first) - current_builder().append(" | "); - first = false; - option.visit(*this); - } + entry.options.visit( + [&](NonnullRefPtrVector<AST::Node> const& patterns) { + for (auto& option : patterns) { + if (!first) + current_builder().append(" | "); + first = false; + option.visit(*this); + } + }, + [&](Vector<Regex<ECMA262>> const& patterns) { + for (auto& option : patterns) { + if (!first) + current_builder().append(" | "); + first = false; + auto node = make_ref_counted<AST::BarewordLiteral>(AST::Position {}, option.pattern_value); + node->visit(*this); + } + }); current_builder().append(' '); if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) { diff --git a/Userland/Shell/NodeVisitor.cpp b/Userland/Shell/NodeVisitor.cpp index 4c3c16d87a..834c78a3f0 100644 --- a/Userland/Shell/NodeVisitor.cpp +++ b/Userland/Shell/NodeVisitor.cpp @@ -141,8 +141,10 @@ void NodeVisitor::visit(const AST::MatchExpr* node) { node->matched_expr()->visit(*this); for (auto& entry : node->entries()) { - for (auto& option : entry.options) - option.visit(*this); + if (auto* ptr = entry.options.get_pointer<NonnullRefPtrVector<Node>>()) { + for (auto& option : *ptr) + option.visit(*this); + } if (entry.body) entry.body->visit(*this); } diff --git a/Userland/Shell/Parser.cpp b/Userland/Shell/Parser.cpp index 68d189ce04..22826199fb 100644 --- a/Userland/Shell/Parser.cpp +++ b/Userland/Shell/Parser.cpp @@ -84,9 +84,9 @@ bool Parser::expect(StringView expected) } template<typename A, typename... Args> -NonnullRefPtr<A> Parser::create(Args... args) +NonnullRefPtr<A> Parser::create(Args&&... args) { - return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...)); + return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward<Args>(args)...)); } [[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start() @@ -892,10 +892,10 @@ RefPtr<AST::Node> Parser::parse_match_expr() for (;;) { auto entry = parse_match_entry(); consume_while(is_any_of(" \t\n")); - if (entry.options.is_empty()) + if (entry.options.visit([](auto& x) { return x.is_empty(); })) break; - entries.append(entry); + entries.append(move(entry)); } consume_while(is_any_of(" \t\n")); @@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry() auto rule_start = push_start(); NonnullRefPtrVector<AST::Node> patterns; + Vector<Regex<ECMA262>> regexps; Vector<AST::Position> pipe_positions; Optional<Vector<String>> match_names; Optional<AST::Position> match_as_position; + enum { + Regex, + Glob, + } pattern_kind; - auto pattern = parse_match_pattern(); - if (!pattern) - return { {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) }; + consume_while(is_any_of(" \t\n")); + + auto regex_pattern = parse_regex_pattern(); + if (regex_pattern.has_value()) { + if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError) + return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>(regex::get_error_string(error), false) }; + + pattern_kind = Regex; + regexps.append(regex_pattern.release_value()); + } else { + auto glob_pattern = parse_match_pattern(); + if (!glob_pattern) + return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) }; - patterns.append(pattern.release_nonnull()); + pattern_kind = Glob; + patterns.append(glob_pattern.release_nonnull()); + } consume_while(is_any_of(" \t\n")); @@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry() while (expect('|')) { pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() }); consume_while(is_any_of(" \t\n")); - auto pattern = parse_match_pattern(); - if (!pattern) { - error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true); + switch (pattern_kind) { + case Regex: { + auto pattern = parse_regex_pattern(); + if (!pattern.has_value()) { + error = create<AST::SyntaxError>("Expected a regex pattern to follow '|' in 'match' body", true); + break; + } + regexps.append(pattern.release_value()); break; } - consume_while(is_any_of(" \t\n")); + case Glob: { + auto pattern = parse_match_pattern(); + if (!pattern) { + error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true); + break; + } + patterns.append(pattern.release_nonnull()); + break; + } + } - patterns.append(pattern.release_nonnull()); + consume_while(is_any_of(" \t\n")); previous_pipe_start_line = line(); previous_pipe_start_position = m_offset; @@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry() auto as_start_position = m_offset; auto as_start_line = line(); - if (expect("as")) { + if (pattern_kind == Glob && expect("as")) { match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() }; consume_while(is_any_of(" \t\n")); if (!expect('(')) { @@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry() consume_while(is_any_of(" \t\n")); } + if (pattern_kind == Regex) { + Vector<String> names; + for (auto& regex : regexps) { + if (names.is_empty()) { + for (auto& name : regex.parser_result.capture_groups) + names.append(name); + } else { + size_t index = 0; + for (auto& name : regex.parser_result.capture_groups) { + if (names.size() <= index) { + names.append(name); + continue; + } + + if (names[index] != name) { + if (!error) + error = create<AST::SyntaxError>("Alternative regex patterns must have the same capture groups", false); + break; + } + } + } + } + match_names = move(names); + } + if (!expect('{')) { if (!error) error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true); @@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry() else if (error) body = error; - return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; + if (pattern_kind == Glob) + return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; + + return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; } RefPtr<AST::Node> Parser::parse_match_pattern() @@ -1000,6 +1059,36 @@ RefPtr<AST::Node> Parser::parse_match_pattern() return parse_expression(); } +Optional<Regex<ECMA262>> Parser::parse_regex_pattern() +{ + auto rule_start = push_start(); + + auto start = m_offset; + if (!expect("(?:") && !expect("(?<")) + return {}; + + size_t open_parens = 1; + while (open_parens > 0) { + if (at_end()) + break; + + if (next_is("(")) + ++open_parens; + else if (next_is(")")) + --open_parens; + consume(); + } + + if (open_parens != 0) { + restore_to(*rule_start); + return {}; + } + + auto end = m_offset; + auto pattern = m_input.substring_view(start, end - start); + return Regex<ECMA262>(pattern); +} + RefPtr<AST::Node> Parser::parse_redirection() { auto rule_start = push_start(); diff --git a/Userland/Shell/Parser.h b/Userland/Shell/Parser.h index ec8ebfc919..41a2a333fe 100644 --- a/Userland/Shell/Parser.h +++ b/Userland/Shell/Parser.h @@ -25,7 +25,7 @@ public: RefPtr<AST::Node> parse(); /// Parse the given string *as* an expression - /// that is to forefully enclose it in double-quotes. + /// that is to forcefully enclose it in double-quotes. RefPtr<AST::Node> parse_as_single_expression(); NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions(); @@ -77,6 +77,7 @@ private: RefPtr<AST::Node> parse_match_expr(); AST::MatchEntry parse_match_entry(); RefPtr<AST::Node> parse_match_pattern(); + Optional<Regex<ECMA262>> parse_regex_pattern(); RefPtr<AST::Node> parse_redirection(); RefPtr<AST::Node> parse_list_expression(); RefPtr<AST::Node> parse_expression(); @@ -98,7 +99,7 @@ private: bool parse_heredoc_entries(); template<typename A, typename... Args> - NonnullRefPtr<A> create(Args... args); + NonnullRefPtr<A> create(Args&&... args); void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); } bool at_end() const @@ -228,10 +229,15 @@ subshell :: '{' toplevel '}' match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}' match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}' + | regex_pattern ws* '{' toplevel '}' identifier_list :: '(' (identifier ws*)* ')' -match_pattern :: expression (ws* '|' ws* expression)* +regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)* + +match_pattern :: expression (ws* '|' ws* expression)* + +regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses } command :: redirection command | list_expression command? |