summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2022-04-15 01:50:36 +0430
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2022-04-18 19:53:10 +0430
commit4ede121d31f43a787b32fc4c137582638ea305e1 (patch)
treee8a011b40e00d6848eafb72c401d8b4e884c49eb
parent6aceec45357f8e253bc169f1ed961fc26038a3da (diff)
downloadserenity-4ede121d31f43a787b32fc4c137582638ea305e1.zip
Shell: Add support for regex match patterns
We previously allowed globs as match pattern, but for more complex matching needs, it's nice to have regular expressions. And as the existing "name a part of the match" concept maps nicely to named capture groups, we can simply reuse the same code and make groups with names available in the match body.
-rw-r--r--Userland/Shell/AST.cpp124
-rw-r--r--Userland/Shell/AST.h3
-rw-r--r--Userland/Shell/Formatter.cpp24
-rw-r--r--Userland/Shell/NodeVisitor.cpp6
-rw-r--r--Userland/Shell/Parser.cpp119
-rw-r--r--Userland/Shell/Parser.h12
6 files changed, 218 insertions, 70 deletions
diff --git a/Userland/Shell/AST.cpp b/Userland/Shell/AST.cpp
index 205a79f1c3..f941f88409 100644
--- a/Userland/Shell/AST.cpp
+++ b/Userland/Shell/AST.cpp
@@ -2117,8 +2117,15 @@ void MatchExpr::dump(int level) const
builder.append(')');
}
print_indented(builder.string_view(), level + 2);
- for (auto& node : entry.options)
- node.dump(level + 3);
+ entry.options.visit(
+ [&](NonnullRefPtrVector<Node> const& options) {
+ for (auto& option : options)
+ option.dump(level + 3);
+ },
+ [&](Vector<Regex<ECMA262>> const& options) {
+ for (auto& option : options)
+ print_indented(String::formatted("(regex: {})", option.pattern_value), level + 3);
+ });
print_indented("(execute)", level + 2);
if (entry.body)
entry.body->dump(level + 3);
@@ -2136,39 +2143,59 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
auto list = value->resolve_as_list(shell);
auto list_matches = [&](auto&& pattern, auto& spans) {
- if (pattern.size() != list.size())
- return false;
+ if constexpr (IsSame<RemoveCVReference<decltype(pattern)>, Regex<ECMA262>>) {
+ if (list.size() != 1)
+ return false;
+ auto& subject = list.first();
+ auto match = pattern.match(subject);
+ if (!match.success)
+ return false;
- for (size_t i = 0; i < pattern.size(); ++i) {
- Vector<AK::MaskSpan> mask_spans;
- if (!list[i].matches(pattern[i], mask_spans))
+ spans.ensure_capacity(match.n_capture_groups);
+ for (size_t i = 0; i < match.n_capture_groups; ++i) {
+ auto& capture = match.capture_group_matches[0][i];
+ spans.append(capture.view.to_string());
+ }
+ return true;
+ } else {
+ if (pattern.size() != list.size())
return false;
- for (auto& span : mask_spans)
- spans.append(list[i].substring(span.start, span.length));
- }
- return true;
+ for (size_t i = 0; i < pattern.size(); ++i) {
+ Vector<AK::MaskSpan> mask_spans;
+ if (!list[i].matches(pattern[i], mask_spans))
+ return false;
+ for (auto& span : mask_spans)
+ spans.append(list[i].substring(span.start, span.length));
+ }
+
+ return true;
+ }
};
- auto resolve_pattern = [&](auto& option) {
- Vector<String> pattern;
- if (option.is_glob()) {
- pattern.append(static_cast<const Glob*>(&option)->text());
- } else if (option.is_bareword()) {
- pattern.append(static_cast<const BarewordLiteral*>(&option)->text());
+ auto resolve_pattern = [&](auto& option) -> decltype(auto) {
+ if constexpr (IsSame<RemoveCVReference<decltype(option)>, Regex<ECMA262>>) {
+ return option;
} else {
- auto list = option.run(shell);
- if (shell && shell->has_any_error())
- return pattern;
+ Vector<String> pattern;
+ if (option.is_glob()) {
+ pattern.append(static_cast<const Glob*>(&option)->text());
+ } else if (option.is_bareword()) {
+ pattern.append(static_cast<const BarewordLiteral*>(&option)->text());
+ } else {
+ auto list = option.run(shell);
+ if (shell && shell->has_any_error())
+ return pattern;
+
+ option.for_each_entry(shell, [&](auto&& value) {
+ pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior,
+ // asking the node for a 'raw' value.
+ return IterationDecision::Continue;
+ });
+ }
- option.for_each_entry(shell, [&](auto&& value) {
- pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior,
- // asking the node for a 'raw' value.
- return IterationDecision::Continue;
- });
+ return pattern;
}
-
- return pattern;
};
auto frame = shell->push_frame(String::formatted("match ({})", this));
@@ -2176,24 +2203,31 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
shell->set_local_variable(m_expr_name, value, true);
for (auto& entry : m_entries) {
- for (auto& option : entry.options) {
- Vector<String> spans;
- if (list_matches(resolve_pattern(option), spans)) {
- if (entry.body) {
- if (entry.match_names.has_value()) {
- size_t i = 0;
- for (auto& name : entry.match_names.value()) {
- if (spans.size() > i)
- shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true);
- ++i;
+ auto result = entry.options.visit([&](auto& options) -> Variant<IterationDecision, RefPtr<Value>> {
+ for (auto& option : options) {
+ Vector<String> spans;
+ if (list_matches(resolve_pattern(option), spans)) {
+ if (entry.body) {
+ if (entry.match_names.has_value()) {
+ size_t i = 0;
+ for (auto& name : entry.match_names.value()) {
+ if (spans.size() > i)
+ shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true);
+ ++i;
+ }
}
+ return entry.body->run(shell);
}
- return entry.body->run(shell);
- } else {
- return make_ref_counted<AST::ListValue>({});
+ return RefPtr<Value>(make_ref_counted<AST::ListValue>({}));
}
}
- }
+ return IterationDecision::Continue;
+ });
+ if (result.has<IterationDecision>() && result.get<IterationDecision>() == IterationDecision::Break)
+ break;
+
+ if (result.has<RefPtr<Value>>())
+ return move(result).get<RefPtr<Value>>();
}
shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position());
@@ -2211,8 +2245,12 @@ void MatchExpr::highlight_in_editor(Line::Editor& editor, Shell& shell, Highligh
for (auto& entry : m_entries) {
metadata.is_first_in_list = false;
- for (auto& option : entry.options)
- option.highlight_in_editor(editor, shell, metadata);
+ entry.options.visit(
+ [&](NonnullRefPtrVector<Node>& node_options) {
+ for (auto& option : node_options)
+ option.highlight_in_editor(editor, shell, metadata);
+ },
+ [](auto&) {});
metadata.is_first_in_list = true;
if (entry.body)
diff --git a/Userland/Shell/AST.h b/Userland/Shell/AST.h
index c3290c080d..9bb16dc033 100644
--- a/Userland/Shell/AST.h
+++ b/Userland/Shell/AST.h
@@ -17,6 +17,7 @@
#include <AK/Types.h>
#include <AK/Vector.h>
#include <LibLine/Editor.h>
+#include <LibRegex/Regex.h>
namespace Shell::AST {
@@ -1051,7 +1052,7 @@ private:
};
struct MatchEntry {
- NonnullRefPtrVector<Node> options;
+ Variant<NonnullRefPtrVector<Node>, Vector<Regex<ECMA262>>> options;
Optional<Vector<String>> match_names;
Optional<Position> match_as_position;
Vector<Position> pipe_positions;
diff --git a/Userland/Shell/Formatter.cpp b/Userland/Shell/Formatter.cpp
index f005a2c71d..5f3ab9d840 100644
--- a/Userland/Shell/Formatter.cpp
+++ b/Userland/Shell/Formatter.cpp
@@ -583,12 +583,24 @@ void Formatter::visit(const AST::MatchExpr* node)
insert_separator();
first_entry = false;
auto first = true;
- for (auto& option : entry.options) {
- if (!first)
- current_builder().append(" | ");
- first = false;
- option.visit(*this);
- }
+ entry.options.visit(
+ [&](NonnullRefPtrVector<AST::Node> const& patterns) {
+ for (auto& option : patterns) {
+ if (!first)
+ current_builder().append(" | ");
+ first = false;
+ option.visit(*this);
+ }
+ },
+ [&](Vector<Regex<ECMA262>> const& patterns) {
+ for (auto& option : patterns) {
+ if (!first)
+ current_builder().append(" | ");
+ first = false;
+ auto node = make_ref_counted<AST::BarewordLiteral>(AST::Position {}, option.pattern_value);
+ node->visit(*this);
+ }
+ });
current_builder().append(' ');
if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) {
diff --git a/Userland/Shell/NodeVisitor.cpp b/Userland/Shell/NodeVisitor.cpp
index 4c3c16d87a..834c78a3f0 100644
--- a/Userland/Shell/NodeVisitor.cpp
+++ b/Userland/Shell/NodeVisitor.cpp
@@ -141,8 +141,10 @@ void NodeVisitor::visit(const AST::MatchExpr* node)
{
node->matched_expr()->visit(*this);
for (auto& entry : node->entries()) {
- for (auto& option : entry.options)
- option.visit(*this);
+ if (auto* ptr = entry.options.get_pointer<NonnullRefPtrVector<Node>>()) {
+ for (auto& option : *ptr)
+ option.visit(*this);
+ }
if (entry.body)
entry.body->visit(*this);
}
diff --git a/Userland/Shell/Parser.cpp b/Userland/Shell/Parser.cpp
index 68d189ce04..22826199fb 100644
--- a/Userland/Shell/Parser.cpp
+++ b/Userland/Shell/Parser.cpp
@@ -84,9 +84,9 @@ bool Parser::expect(StringView expected)
}
template<typename A, typename... Args>
-NonnullRefPtr<A> Parser::create(Args... args)
+NonnullRefPtr<A> Parser::create(Args&&... args)
{
- return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...));
+ return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward<Args>(args)...));
}
[[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
@@ -892,10 +892,10 @@ RefPtr<AST::Node> Parser::parse_match_expr()
for (;;) {
auto entry = parse_match_entry();
consume_while(is_any_of(" \t\n"));
- if (entry.options.is_empty())
+ if (entry.options.visit([](auto& x) { return x.is_empty(); }))
break;
- entries.append(entry);
+ entries.append(move(entry));
}
consume_while(is_any_of(" \t\n"));
@@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry()
auto rule_start = push_start();
NonnullRefPtrVector<AST::Node> patterns;
+ Vector<Regex<ECMA262>> regexps;
Vector<AST::Position> pipe_positions;
Optional<Vector<String>> match_names;
Optional<AST::Position> match_as_position;
+ enum {
+ Regex,
+ Glob,
+ } pattern_kind;
- auto pattern = parse_match_pattern();
- if (!pattern)
- return { {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
+ consume_while(is_any_of(" \t\n"));
+
+ auto regex_pattern = parse_regex_pattern();
+ if (regex_pattern.has_value()) {
+ if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError)
+ return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>(regex::get_error_string(error), false) };
+
+ pattern_kind = Regex;
+ regexps.append(regex_pattern.release_value());
+ } else {
+ auto glob_pattern = parse_match_pattern();
+ if (!glob_pattern)
+ return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
- patterns.append(pattern.release_nonnull());
+ pattern_kind = Glob;
+ patterns.append(glob_pattern.release_nonnull());
+ }
consume_while(is_any_of(" \t\n"));
@@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry()
while (expect('|')) {
pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() });
consume_while(is_any_of(" \t\n"));
- auto pattern = parse_match_pattern();
- if (!pattern) {
- error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
+ switch (pattern_kind) {
+ case Regex: {
+ auto pattern = parse_regex_pattern();
+ if (!pattern.has_value()) {
+ error = create<AST::SyntaxError>("Expected a regex pattern to follow '|' in 'match' body", true);
+ break;
+ }
+ regexps.append(pattern.release_value());
break;
}
- consume_while(is_any_of(" \t\n"));
+ case Glob: {
+ auto pattern = parse_match_pattern();
+ if (!pattern) {
+ error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
+ break;
+ }
+ patterns.append(pattern.release_nonnull());
+ break;
+ }
+ }
- patterns.append(pattern.release_nonnull());
+ consume_while(is_any_of(" \t\n"));
previous_pipe_start_line = line();
previous_pipe_start_position = m_offset;
@@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry()
auto as_start_position = m_offset;
auto as_start_line = line();
- if (expect("as")) {
+ if (pattern_kind == Glob && expect("as")) {
match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() };
consume_while(is_any_of(" \t\n"));
if (!expect('(')) {
@@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry()
consume_while(is_any_of(" \t\n"));
}
+ if (pattern_kind == Regex) {
+ Vector<String> names;
+ for (auto& regex : regexps) {
+ if (names.is_empty()) {
+ for (auto& name : regex.parser_result.capture_groups)
+ names.append(name);
+ } else {
+ size_t index = 0;
+ for (auto& name : regex.parser_result.capture_groups) {
+ if (names.size() <= index) {
+ names.append(name);
+ continue;
+ }
+
+ if (names[index] != name) {
+ if (!error)
+ error = create<AST::SyntaxError>("Alternative regex patterns must have the same capture groups", false);
+ break;
+ }
+ }
+ }
+ }
+ match_names = move(names);
+ }
+
if (!expect('{')) {
if (!error)
error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true);
@@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry()
else if (error)
body = error;
- return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
+ if (pattern_kind == Glob)
+ return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
+
+ return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
}
RefPtr<AST::Node> Parser::parse_match_pattern()
@@ -1000,6 +1059,36 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
return parse_expression();
}
+Optional<Regex<ECMA262>> Parser::parse_regex_pattern()
+{
+ auto rule_start = push_start();
+
+ auto start = m_offset;
+ if (!expect("(?:") && !expect("(?<"))
+ return {};
+
+ size_t open_parens = 1;
+ while (open_parens > 0) {
+ if (at_end())
+ break;
+
+ if (next_is("("))
+ ++open_parens;
+ else if (next_is(")"))
+ --open_parens;
+ consume();
+ }
+
+ if (open_parens != 0) {
+ restore_to(*rule_start);
+ return {};
+ }
+
+ auto end = m_offset;
+ auto pattern = m_input.substring_view(start, end - start);
+ return Regex<ECMA262>(pattern);
+}
+
RefPtr<AST::Node> Parser::parse_redirection()
{
auto rule_start = push_start();
diff --git a/Userland/Shell/Parser.h b/Userland/Shell/Parser.h
index ec8ebfc919..41a2a333fe 100644
--- a/Userland/Shell/Parser.h
+++ b/Userland/Shell/Parser.h
@@ -25,7 +25,7 @@ public:
RefPtr<AST::Node> parse();
/// Parse the given string *as* an expression
- /// that is to forefully enclose it in double-quotes.
+ /// that is to forcefully enclose it in double-quotes.
RefPtr<AST::Node> parse_as_single_expression();
NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
@@ -77,6 +77,7 @@ private:
RefPtr<AST::Node> parse_match_expr();
AST::MatchEntry parse_match_entry();
RefPtr<AST::Node> parse_match_pattern();
+ Optional<Regex<ECMA262>> parse_regex_pattern();
RefPtr<AST::Node> parse_redirection();
RefPtr<AST::Node> parse_list_expression();
RefPtr<AST::Node> parse_expression();
@@ -98,7 +99,7 @@ private:
bool parse_heredoc_entries();
template<typename A, typename... Args>
- NonnullRefPtr<A> create(Args... args);
+ NonnullRefPtr<A> create(Args&&... args);
void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
bool at_end() const
@@ -228,10 +229,15 @@ subshell :: '{' toplevel '}'
match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
+ | regex_pattern ws* '{' toplevel '}'
identifier_list :: '(' (identifier ws*)* ')'
-match_pattern :: expression (ws* '|' ws* expression)*
+regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
+
+match_pattern :: expression (ws* '|' ws* expression)*
+
+regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
command :: redirection command
| list_expression command?