summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/support/strongly_connected_components.h262
-rw-r--r--test/gtest/CMakeLists.txt1
-rw-r--r--test/gtest/scc.cpp311
3 files changed, 574 insertions, 0 deletions
diff --git a/src/support/strongly_connected_components.h b/src/support/strongly_connected_components.h
new file mode 100644
index 000000000..d54200ad9
--- /dev/null
+++ b/src/support/strongly_connected_components.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2024 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_support_strongly_connected_components_h
+#define wasm_support_strongly_connected_components_h
+
+#include <cassert>
+#include <optional>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include <iostream>
+
+namespace wasm {
+
+// A CRTP utility implementing Tarjan's Strongly Connected Component algorithm
+// (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
+// in terms of iterators. Given the beginning and end iterators over the
+// elements in a graph an implementation of `pushChildren` in the CRTP subclass
+// that pushes an element's children, provides an iterable over the SCCs, each
+// of which is an iterable over the elements in the SCC. All implemented
+// iterators are input iterators that mutate the underlying state, so this
+// utility can only be used for single-pass algorithms.
+template<typename It, typename Class> struct SCCs {
+ SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {}
+
+private:
+ using T = typename It::value_type;
+
+ // The iterators over the graph we are calculating the SCCs for.
+ It inputIt;
+ It inputEnd;
+
+ // Stack of pending elements to visit, used instead of a recursive visitor.
+ struct WorkItem {
+ T item;
+ std::optional<T> parent = std::nullopt;
+ bool processedChildren = false;
+ };
+ std::vector<WorkItem> workStack;
+
+ // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only
+ // popped off once they are committed to a strongly connected component.
+ // Elements stay on the stack after they are visited iff they have a back edge
+ // to an element earlier in the stack.
+ std::vector<T> stack;
+
+ struct ElementInfo {
+ // Index assigned based on element visitation order.
+ size_t index;
+ // The smallest index of the elements reachable from this element.
+ size_t lowlink;
+ // Whether this element is still on `stack`.
+ bool onStack;
+ };
+ std::unordered_map<T, ElementInfo> elementInfo;
+
+ // The parent to record when calling into the subclass to push children.
+ std::optional<T> currParent;
+
+ // The root (i.e. deepest element in the stack) for the current SCC. Empty
+ // whenever we have yet to find the next SCC.
+ std::optional<T> currRoot;
+
+ bool stepToNextGroup() {
+ while (inputIt != inputEnd || !workStack.empty()) {
+ if (workStack.empty()) {
+ workStack.push_back({*inputIt++});
+ }
+ while (!workStack.empty()) {
+ auto& work = workStack.back();
+ auto& item = work.item;
+
+ if (!work.processedChildren) {
+ auto newIndex = elementInfo.size();
+ auto [it, inserted] =
+ elementInfo.insert({item, {newIndex, newIndex, true}});
+ if (inserted) {
+ // This is a new item we have never seen before. We have already
+ // initialized its associated data.
+ stack.push_back(item);
+
+ // Leave the element on the work stack because we will have to do
+ // more work after we have finished processing its children.
+ work.processedChildren = true;
+ currParent = item;
+ static_cast<Class*>(this)->pushChildren(item);
+ currParent = std::nullopt;
+ // Process the pushed children first; we will come back to this item
+ // later.
+ continue;
+ }
+
+ auto& info = it->second;
+ if (info.onStack) {
+ assert(work.parent);
+ // Item is already in the current SCC. Update the parent's lowlink
+ // if this child has a smaller index than we have seen so far.
+ auto& parentLowlink = elementInfo[*work.parent].lowlink;
+ parentLowlink = std::min(parentLowlink, info.index);
+ } else {
+ // Item is in an SCC we have already processed, so ignore it
+ // entirely.
+ }
+ // Do not recurse for this item we have seen before. We are done with
+ // it.
+ workStack.pop_back();
+ continue;
+ }
+
+ // We have finished processing the children for the current element, so
+ // we know its final lowlink value. Use it to potentially update the
+ // parent's lowlink value.
+ auto& info = elementInfo[item];
+ if (work.parent) {
+ auto& parentLowlink = elementInfo[*work.parent].lowlink;
+ parentLowlink = std::min(parentLowlink, info.lowlink);
+ }
+
+ if (info.index == info.lowlink) {
+ // This element reaches and is reachable by all shallower elements in
+ // the stack (otherwise they would have already been popped) and does
+ // not itself reach any deeper elements, so we have found an SCC and
+ // the current item is its root.
+ currRoot = item;
+ workStack.pop_back();
+ return true;
+ }
+ workStack.pop_back();
+ }
+ }
+ // We are at the end.
+ return false;
+ }
+
+ void stepToNextElem() {
+ assert(currRoot);
+ if (stack.back() == *currRoot) {
+ // This was the last element in the current SCC. We have to find the next
+ // SCC now.
+ currRoot = std::nullopt;
+ }
+ elementInfo[stack.back()].onStack = false;
+ stack.pop_back();
+ }
+
+ void pushChildren(const T& parent) {
+ static_assert(&SCCs<It, Class>::pushChildren != &Class::pushChildren,
+ "SCCs subclass must implement `pushChildren`");
+ }
+
+protected:
+ // Call this from `Class::pushChildren` to add a child.
+ void push(const T& item) {
+ assert(currParent);
+ workStack.push_back({item, currParent});
+ }
+
+public:
+ struct SCC {
+ SCCs<It, Class>* parent;
+
+ // Iterate over the elements in a strongly connected component.
+ struct Iterator {
+ using value_type = T;
+ using difference_type = std::ptrdiff_t;
+ using reference = T&;
+ using pointer = T*;
+ using iterator_category = std::input_iterator_tag;
+
+ SCCs<It, Class>* parent;
+ std::optional<T> val = std::nullopt;
+
+ bool isEnd() const { return !parent || !parent->currRoot; }
+ bool operator==(const Iterator& other) const {
+ return isEnd() == other.isEnd();
+ }
+ bool operator!=(const Iterator& other) const { return !(*this == other); }
+ T operator*() { return *val; }
+ T* operator->() { return &*val; }
+ void setVal() {
+ if (isEnd()) {
+ val = std::nullopt;
+ } else {
+ val = parent->stack.back();
+ }
+ }
+
+ Iterator& operator++() {
+ parent->stepToNextElem();
+ setVal();
+ return *this;
+ }
+ Iterator operator++(int) {
+ auto it = *this;
+ ++(*this);
+ return it;
+ }
+ };
+
+ Iterator begin() {
+ Iterator it = {parent};
+ it.setVal();
+ return it;
+ }
+ Iterator end() { return {nullptr}; }
+ };
+
+ // Iterate over the strongly connected components of the graph.
+ struct Iterator {
+ using value_type = SCC;
+ using different_type = std::ptrdiff_t;
+ using reference = SCC;
+ using pointer = SCC*;
+ using iterator_category = std::input_iterator_tag;
+
+ // scc.parent is null iff we are at the end.
+ SCC scc;
+
+ bool isEnd() const { return !scc.parent; }
+ bool operator==(const Iterator& other) const {
+ return isEnd() == other.isEnd();
+ }
+ bool operator!=(const Iterator& other) const { return !(*this == other); }
+ SCC operator*() { return scc; }
+ SCC* operator->() { return &scc; }
+ Iterator& operator++() {
+ // Skip the rest of the current SCC, if for some reason it was not
+ // consumed.
+ for (auto elem : *(*this)) {
+ (void)elem;
+ }
+ if (!scc.parent->stepToNextGroup()) {
+ // We are at the end, so mark ourselves as such.
+ scc.parent = nullptr;
+ }
+ return *this;
+ }
+ void operator++(int) { ++(*this); }
+ };
+
+ Iterator begin() { return ++Iterator{this}; }
+ Iterator end() { return {nullptr}; }
+};
+
+} // namespace wasm
+
+#endif // wasm_support_strongly_connected_components_h
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 303e386c6..4604eea77 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -8,6 +8,7 @@ set(unittest_SOURCES
lattices.cpp
possible-contents.cpp
printing.cpp
+ scc.cpp
stringify.cpp
suffix_tree.cpp
type-builder.cpp
diff --git a/test/gtest/scc.cpp b/test/gtest/scc.cpp
new file mode 100644
index 000000000..baf509d7f
--- /dev/null
+++ b/test/gtest/scc.cpp
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2024 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include "support/strongly_connected_components.h"
+#include "gtest/gtest.h"
+
+using namespace wasm;
+
+struct IntIt {
+ using value_type = unsigned;
+ using difference_type = std::ptrdiff_t;
+ using reference = unsigned&;
+ using pointer = unsigned*;
+ using iterator_category = std::input_iterator_tag;
+
+ unsigned i = 0;
+
+ bool operator==(const IntIt& other) const { return i == other.i; }
+ bool operator!=(const IntIt& other) const { return !(*this == other); }
+ unsigned operator*() { return i; }
+ IntIt& operator++() {
+ ++i;
+ return *this;
+ }
+ IntIt operator++(int) {
+ IntIt it = *this;
+ ++(*this);
+ return it;
+ }
+};
+
+struct Graph {
+ std::vector<std::set<unsigned>> graph;
+
+ explicit Graph(unsigned size) : graph(size) {}
+
+ void addEdge(unsigned from, unsigned to) {
+ assert(from < graph.size());
+ assert(to < graph.size());
+ graph[from].insert(to);
+ }
+
+ const std::set<unsigned>& children(unsigned parent) const {
+ return graph[parent];
+ }
+
+ IntIt begin() const { return {}; }
+ IntIt end() const { return {unsigned(graph.size())}; }
+};
+
+struct GraphSCCs : SCCs<IntIt, GraphSCCs> {
+ const Graph& graph;
+ GraphSCCs(const Graph& graph)
+ : SCCs<IntIt, GraphSCCs>(graph.begin(), graph.end()), graph(graph) {}
+
+ void pushChildren(unsigned parent) {
+ for (auto child : graph.children(parent)) {
+ push(child);
+ }
+ }
+};
+
+using SCCSet = std::set<std::set<unsigned>>;
+
+SCCSet getSCCs(const Graph& graph) {
+ SCCSet set;
+ for (auto scc : GraphSCCs(graph)) {
+ set.emplace(scc.begin(), scc.end());
+ }
+ return set;
+}
+
+TEST(SCCTest, Empty) {
+ Graph graph(0);
+ GraphSCCs sccs(graph);
+ auto it = sccs.begin();
+ EXPECT_EQ(it, sccs.end());
+
+ EXPECT_EQ(getSCCs(graph), SCCSet{});
+}
+
+TEST(SCCTest, Singleton) {
+ Graph graph(1);
+ GraphSCCs sccs(graph);
+
+ auto it = sccs.begin();
+ ASSERT_NE(it, sccs.end());
+
+ auto scc = *it;
+ auto sccIt = scc.begin();
+ ASSERT_NE(sccIt, scc.end());
+ EXPECT_EQ(*sccIt, 0u);
+
+ ++sccIt;
+ EXPECT_EQ(sccIt, scc.end());
+ ASSERT_NE(it, sccs.end());
+
+ ++it;
+ EXPECT_EQ(it, sccs.end());
+
+ EXPECT_EQ(getSCCs(graph), SCCSet{{0}});
+}
+
+TEST(SCCTest, SingletonLoop) {
+ // Same as above, but now there is an edge. The results are the same.
+ Graph graph(1);
+ graph.addEdge(0, 0);
+ GraphSCCs sccs(graph);
+
+ auto it = sccs.begin();
+ ASSERT_NE(it, sccs.end());
+
+ auto scc = *it;
+ auto sccIt = scc.begin();
+ ASSERT_NE(sccIt, scc.end());
+ EXPECT_EQ(*sccIt, 0u);
+
+ ++sccIt;
+ EXPECT_EQ(sccIt, scc.end());
+ ASSERT_NE(it, sccs.end());
+
+ ++it;
+ EXPECT_EQ(it, sccs.end());
+
+ EXPECT_EQ(getSCCs(graph), SCCSet{{0}});
+}
+
+TEST(SCCTest, DerefPostIncrement) {
+ // Valid input iterators must have *it++ yield the value from before the
+ // increment.
+ Graph graph(1);
+ GraphSCCs sccs(graph);
+ auto it = sccs.begin()->begin();
+ EXPECT_EQ(*it++, 0u);
+}
+
+TEST(SCCTest, Disconnected) {
+ // There are no edges between the vertices.
+ Graph graph(3);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}}));
+}
+
+TEST(SCCTest, Chain) {
+ // 0 -> 1 -> 2
+ Graph graph(3);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 2);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}}));
+}
+
+TEST(SCCTest, ChainReverse) {
+ // 0 <- 1 <- 2
+ Graph graph(3);
+ graph.addEdge(2, 1);
+ graph.addEdge(1, 0);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}}));
+}
+
+TEST(SCCTest, Loop) {
+ // 0 -> 1 -> 2 -> 0
+ // There is only one SCC here.
+ Graph graph(3);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 2);
+ graph.addEdge(2, 0);
+ EXPECT_EQ(getSCCs(graph), (SCCSet({{0, 1, 2}})));
+}
+
+TEST(SCCTest, LoopReverse) {
+ // 0 <- 1 <- 2 <- 0
+ // There is only one SCC here.
+ Graph graph(3);
+ graph.addEdge(0, 2);
+ graph.addEdge(2, 1);
+ graph.addEdge(1, 0);
+ EXPECT_EQ(getSCCs(graph), (SCCSet({{0, 1, 2}})));
+}
+
+TEST(SCCTest, Full) {
+ // Fully connected graph has one SCC.
+ Graph graph(3);
+ for (unsigned i = 0; i < 3; ++i) {
+ for (unsigned j = 0; j < 3; ++j) {
+ graph.addEdge(i, j);
+ }
+ }
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2}}));
+}
+
+TEST(SCCTest, TwoAndOne) {
+ // 0 <-> 1 -> 2
+ Graph graph(3);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+ graph.addEdge(1, 2);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2}}));
+}
+
+TEST(SCCTest, TwoAndOneRedundant) {
+ // 2 <- 0 <-> 1 -> 2
+ Graph graph(3);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+ graph.addEdge(1, 2);
+ // New from previous, doesn't affect result.
+ graph.addEdge(0, 2);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2}}));
+}
+
+TEST(SCCTest, OneAndTwo) {
+ // 0 -> 1 <-> 2
+ Graph graph(3);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 2);
+ graph.addEdge(2, 1);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1, 2}}));
+}
+
+TEST(SCCTest, TwoAndTwoDisconnected) {
+ // 0 <-> 1 2 <-> 3
+ Graph graph(4);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+ graph.addEdge(2, 3);
+ graph.addEdge(3, 2);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}}));
+}
+
+TEST(SCCTest, TwoAndTwo) {
+ // 0 <-> 1 -> 2 <-> 3
+ Graph graph(4);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+ graph.addEdge(2, 3);
+ graph.addEdge(3, 2);
+ // New from previous, doesn't affect result
+ graph.addEdge(1, 2);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}}));
+}
+
+TEST(SCCTest, DoublyLinkedList) {
+ // 0 <-> 1 <-> 2 <-> 3
+ Graph graph(4);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+ graph.addEdge(2, 3);
+ graph.addEdge(3, 2);
+ graph.addEdge(1, 2);
+ // New from previous, combines SCCs.
+ graph.addEdge(2, 1);
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2, 3}}));
+}
+
+TEST(SCCTest, BigTree) {
+ // 012 <- 345 -> 678
+ Graph graph(9);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 2);
+ graph.addEdge(2, 0);
+
+ graph.addEdge(3, 4);
+ graph.addEdge(4, 5);
+ graph.addEdge(5, 3);
+
+ graph.addEdge(6, 7);
+ graph.addEdge(7, 8);
+ graph.addEdge(8, 6);
+
+ graph.addEdge(3, 2);
+ graph.addEdge(5, 6);
+
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}));
+}
+
+TEST(SCCTest, BigDiamond) {
+ // 67 <- 01 <- 23 -> 45 -> 67
+ Graph graph(8);
+ graph.addEdge(0, 1);
+ graph.addEdge(1, 0);
+
+ graph.addEdge(2, 3);
+ graph.addEdge(3, 2);
+
+ graph.addEdge(4, 5);
+ graph.addEdge(5, 4);
+
+ graph.addEdge(6, 7);
+ graph.addEdge(7, 6);
+
+ graph.addEdge(2, 0);
+ graph.addEdge(2, 4);
+ graph.addEdge(0, 6);
+ graph.addEdge(4, 6);
+
+ EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}, {4, 5}, {6, 7}}));
+}