summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorEli Youngs <eli.m.youngs@gmail.com>2022-12-10 18:03:58 -0800
committerAndrew Kaster <andrewdkaster@gmail.com>2022-12-16 10:41:56 -0700
commit9caa3d2b55d1f67e9ae8203ed49f192ca538c042 (patch)
treecbe49b428f00d8aa55059eb6684a96a54b30413e /Userland
parent950b36f95df23e2b11ef15351d0208699c7677ba (diff)
downloadserenity-9caa3d2b55d1f67e9ae8203ed49f192ca538c042.zip
Userland: Add a tsort utility
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Utilities/tsort.cpp145
1 files changed, 145 insertions, 0 deletions
diff --git a/Userland/Utilities/tsort.cpp b/Userland/Utilities/tsort.cpp
new file mode 100644
index 0000000000..76acdaa17c
--- /dev/null
+++ b/Userland/Utilities/tsort.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022, Eli Youngs <eli.m.youngs@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/CharacterTypes.h>
+#include <AK/HashMap.h>
+#include <LibCore/ArgsParser.h>
+#include <LibCore/Stream.h>
+#include <LibCore/System.h>
+#include <LibMain/Main.h>
+
+enum NodeStatus {
+ NotSeen,
+ Seen,
+ Prioritized,
+};
+
+struct Node {
+ StringView name;
+ OrderedHashTable<StringView> ancestors;
+ NodeStatus status;
+};
+
+using NodeMap = OrderedHashMap<StringView, Node>;
+using NodeStack = Vector<Node&>;
+
+static void handle_cycle(NodeStack& stack, Node& duplicated_node)
+{
+ // Report on a cycle by moving down the stack of dependencies, logging every node
+ // between the implicit top of the stack (represented by duplicate_node) and that
+ // node's first appearance.
+
+ warnln("tsort: The following nodes form a cycle");
+
+ for (auto it = stack.rbegin(); it != stack.rend(); ++it) {
+ auto node = *it;
+ node.status = NodeStatus::NotSeen;
+ warnln("tsort: {}", node.name);
+ if (node.name == duplicated_node.name)
+ return;
+ }
+}
+
+static void prioritize_nodes(Node& start, NodeMap& node_map, NodeStack& stack)
+{
+ // Prioritize (topologically sort) a subset of a directed graph using a depth first
+ // search. The "deepest" nodes are the earliest ancestors of all other nodes and
+ // have no dependencies. To avoid a stack overflow when processing deep dependency
+ // chains, this function does not call itself recursively. Instead, the recursive
+ // algorithm is implemented on a provided stack.
+
+ assert(stack.is_empty());
+ stack.append(start);
+
+ while (!stack.is_empty()) {
+ auto& node = stack.last();
+
+ // If a node has already been prioritized, it can be ignored.
+ if (node.status == NodeStatus::Prioritized) {
+ stack.take_last();
+ continue;
+ }
+
+ // Keep track of which nodes have been seen to detect cycles.
+ node.status = NodeStatus::Seen;
+
+ if (node.ancestors.is_empty()) {
+ // If a node has no remaining ancestors (dependencies), it either never had
+ // ancestors, or its ancestors have already been prioritized. In either case,
+ // this is now the deepest un-prioritized node, which makes it the next
+ // highest priority.
+ node.status = NodeStatus::Prioritized;
+ outln("{}", stack.take_last().name);
+ } else {
+ auto next_ancestor_name = node.ancestors.pop();
+ auto& next_ancestor = node_map.get(next_ancestor_name).release_value();
+ if (next_ancestor.status == NodeStatus::Seen)
+ // If the same node is seen multiple times, this represents a cycle in
+ // the graph. To avoid an infinite loop, the duplicate node is not added
+ // to the stack a second time. Instead, the edge is deliberately ignored,
+ // and the topological sort proceeds as though the cycle did not exist.
+ handle_cycle(stack, next_ancestor);
+ else
+ // Recursively prioritize all ancestors.
+ stack.append(next_ancestor);
+ }
+ }
+}
+
+ErrorOr<int> serenity_main(Main::Arguments arguments)
+{
+ TRY(Core::System::pledge("stdio rpath"));
+
+ StringView path;
+
+ Core::ArgsParser args_parser;
+ args_parser.add_positional_argument(path, "Path to file", "path", Core::ArgsParser::Required::No);
+ args_parser.parse(arguments);
+
+ auto file = TRY(Core::Stream::File::open_file_or_standard_stream(path, Core::Stream::OpenMode::Read));
+ auto input_bytes = TRY(file->read_until_eof());
+ auto inputs = StringView(input_bytes).split_view_if(is_ascii_space);
+
+ if (inputs.is_empty())
+ return 0;
+
+ if (inputs.size() % 2 != 0) {
+ warnln("tsort: the number of inputs must be even");
+ return 1;
+ }
+
+ NodeMap node_map;
+
+ // Each pair of inputs (e.g. "a b") represents an edge of a directed acyclic graph.
+ // If the same input is repeated (e.g. "a a"), this defines a single node with no
+ // connection to any other nodes. Otherwise, the first input is interpreted as an
+ // ancestor of the second.
+ for (size_t i = 0; i < inputs.size(); i += 2) {
+ auto ancestor_name = inputs[i];
+ auto descendent_name = inputs[i + 1];
+
+ TRY(node_map.try_ensure(descendent_name, [&]() { return Node { descendent_name, OrderedHashTable<StringView> {}, NodeStatus::NotSeen }; }));
+ if (descendent_name != ancestor_name) {
+ TRY(node_map.try_ensure(ancestor_name, [&]() { return Node { ancestor_name, OrderedHashTable<StringView> {}, NodeStatus::NotSeen }; }));
+ // Creating the ancestor_node might cause the node_map to expand, re-hash
+ // its contents, and invalidate existing references to its values. To handle
+ // this, we always get a new reference to the descendent_node.
+ auto& descendent_node = node_map.get(descendent_name).release_value();
+ TRY(descendent_node.ancestors.try_set(ancestor_name));
+ }
+ }
+
+ // Each node must be checked individually, since any node could be disconnected from
+ // the rest of the graph.
+ NodeStack stack;
+ for (auto& entry : node_map) {
+ auto& node_to_prioritize = entry.value;
+ if (node_to_prioritize.status == NodeStatus::NotSeen)
+ prioritize_nodes(node_to_prioritize, node_map, stack);
+ }
+
+ return 0;
+}