diff options
-rw-r--r-- | src/support/strongly_connected_components.h | 262 | ||||
-rw-r--r-- | test/gtest/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/gtest/scc.cpp | 311 |
3 files changed, 574 insertions, 0 deletions
diff --git a/src/support/strongly_connected_components.h b/src/support/strongly_connected_components.h new file mode 100644 index 000000000..d54200ad9 --- /dev/null +++ b/src/support/strongly_connected_components.h @@ -0,0 +1,262 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_support_strongly_connected_components_h +#define wasm_support_strongly_connected_components_h + +#include <cassert> +#include <optional> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include <iostream> + +namespace wasm { + +// A CRTP utility implementing Tarjan's Strongly Connected Component algorithm +// (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) +// in terms of iterators. Given the beginning and end iterators over the +// elements in a graph an implementation of `pushChildren` in the CRTP subclass +// that pushes an element's children, provides an iterable over the SCCs, each +// of which is an iterable over the elements in the SCC. All implemented +// iterators are input iterators that mutate the underlying state, so this +// utility can only be used for single-pass algorithms. +template<typename It, typename Class> struct SCCs { + SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {} + +private: + using T = typename It::value_type; + + // The iterators over the graph we are calculating the SCCs for. + It inputIt; + It inputEnd; + + // Stack of pending elements to visit, used instead of a recursive visitor. + struct WorkItem { + T item; + std::optional<T> parent = std::nullopt; + bool processedChildren = false; + }; + std::vector<WorkItem> workStack; + + // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only + // popped off once they are committed to a strongly connected component. + // Elements stay on the stack after they are visited iff they have a back edge + // to an element earlier in the stack. + std::vector<T> stack; + + struct ElementInfo { + // Index assigned based on element visitation order. + size_t index; + // The smallest index of the elements reachable from this element. + size_t lowlink; + // Whether this element is still on `stack`. + bool onStack; + }; + std::unordered_map<T, ElementInfo> elementInfo; + + // The parent to record when calling into the subclass to push children. + std::optional<T> currParent; + + // The root (i.e. deepest element in the stack) for the current SCC. Empty + // whenever we have yet to find the next SCC. + std::optional<T> currRoot; + + bool stepToNextGroup() { + while (inputIt != inputEnd || !workStack.empty()) { + if (workStack.empty()) { + workStack.push_back({*inputIt++}); + } + while (!workStack.empty()) { + auto& work = workStack.back(); + auto& item = work.item; + + if (!work.processedChildren) { + auto newIndex = elementInfo.size(); + auto [it, inserted] = + elementInfo.insert({item, {newIndex, newIndex, true}}); + if (inserted) { + // This is a new item we have never seen before. We have already + // initialized its associated data. + stack.push_back(item); + + // Leave the element on the work stack because we will have to do + // more work after we have finished processing its children. + work.processedChildren = true; + currParent = item; + static_cast<Class*>(this)->pushChildren(item); + currParent = std::nullopt; + // Process the pushed children first; we will come back to this item + // later. + continue; + } + + auto& info = it->second; + if (info.onStack) { + assert(work.parent); + // Item is already in the current SCC. Update the parent's lowlink + // if this child has a smaller index than we have seen so far. + auto& parentLowlink = elementInfo[*work.parent].lowlink; + parentLowlink = std::min(parentLowlink, info.index); + } else { + // Item is in an SCC we have already processed, so ignore it + // entirely. + } + // Do not recurse for this item we have seen before. We are done with + // it. + workStack.pop_back(); + continue; + } + + // We have finished processing the children for the current element, so + // we know its final lowlink value. Use it to potentially update the + // parent's lowlink value. + auto& info = elementInfo[item]; + if (work.parent) { + auto& parentLowlink = elementInfo[*work.parent].lowlink; + parentLowlink = std::min(parentLowlink, info.lowlink); + } + + if (info.index == info.lowlink) { + // This element reaches and is reachable by all shallower elements in + // the stack (otherwise they would have already been popped) and does + // not itself reach any deeper elements, so we have found an SCC and + // the current item is its root. + currRoot = item; + workStack.pop_back(); + return true; + } + workStack.pop_back(); + } + } + // We are at the end. + return false; + } + + void stepToNextElem() { + assert(currRoot); + if (stack.back() == *currRoot) { + // This was the last element in the current SCC. We have to find the next + // SCC now. + currRoot = std::nullopt; + } + elementInfo[stack.back()].onStack = false; + stack.pop_back(); + } + + void pushChildren(const T& parent) { + static_assert(&SCCs<It, Class>::pushChildren != &Class::pushChildren, + "SCCs subclass must implement `pushChildren`"); + } + +protected: + // Call this from `Class::pushChildren` to add a child. + void push(const T& item) { + assert(currParent); + workStack.push_back({item, currParent}); + } + +public: + struct SCC { + SCCs<It, Class>* parent; + + // Iterate over the elements in a strongly connected component. + struct Iterator { + using value_type = T; + using difference_type = std::ptrdiff_t; + using reference = T&; + using pointer = T*; + using iterator_category = std::input_iterator_tag; + + SCCs<It, Class>* parent; + std::optional<T> val = std::nullopt; + + bool isEnd() const { return !parent || !parent->currRoot; } + bool operator==(const Iterator& other) const { + return isEnd() == other.isEnd(); + } + bool operator!=(const Iterator& other) const { return !(*this == other); } + T operator*() { return *val; } + T* operator->() { return &*val; } + void setVal() { + if (isEnd()) { + val = std::nullopt; + } else { + val = parent->stack.back(); + } + } + + Iterator& operator++() { + parent->stepToNextElem(); + setVal(); + return *this; + } + Iterator operator++(int) { + auto it = *this; + ++(*this); + return it; + } + }; + + Iterator begin() { + Iterator it = {parent}; + it.setVal(); + return it; + } + Iterator end() { return {nullptr}; } + }; + + // Iterate over the strongly connected components of the graph. + struct Iterator { + using value_type = SCC; + using different_type = std::ptrdiff_t; + using reference = SCC; + using pointer = SCC*; + using iterator_category = std::input_iterator_tag; + + // scc.parent is null iff we are at the end. + SCC scc; + + bool isEnd() const { return !scc.parent; } + bool operator==(const Iterator& other) const { + return isEnd() == other.isEnd(); + } + bool operator!=(const Iterator& other) const { return !(*this == other); } + SCC operator*() { return scc; } + SCC* operator->() { return &scc; } + Iterator& operator++() { + // Skip the rest of the current SCC, if for some reason it was not + // consumed. + for (auto elem : *(*this)) { + (void)elem; + } + if (!scc.parent->stepToNextGroup()) { + // We are at the end, so mark ourselves as such. + scc.parent = nullptr; + } + return *this; + } + void operator++(int) { ++(*this); } + }; + + Iterator begin() { return ++Iterator{this}; } + Iterator end() { return {nullptr}; } +}; + +} // namespace wasm + +#endif // wasm_support_strongly_connected_components_h diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 303e386c6..4604eea77 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -8,6 +8,7 @@ set(unittest_SOURCES lattices.cpp possible-contents.cpp printing.cpp + scc.cpp stringify.cpp suffix_tree.cpp type-builder.cpp diff --git a/test/gtest/scc.cpp b/test/gtest/scc.cpp new file mode 100644 index 000000000..baf509d7f --- /dev/null +++ b/test/gtest/scc.cpp @@ -0,0 +1,311 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> + +#include "support/strongly_connected_components.h" +#include "gtest/gtest.h" + +using namespace wasm; + +struct IntIt { + using value_type = unsigned; + using difference_type = std::ptrdiff_t; + using reference = unsigned&; + using pointer = unsigned*; + using iterator_category = std::input_iterator_tag; + + unsigned i = 0; + + bool operator==(const IntIt& other) const { return i == other.i; } + bool operator!=(const IntIt& other) const { return !(*this == other); } + unsigned operator*() { return i; } + IntIt& operator++() { + ++i; + return *this; + } + IntIt operator++(int) { + IntIt it = *this; + ++(*this); + return it; + } +}; + +struct Graph { + std::vector<std::set<unsigned>> graph; + + explicit Graph(unsigned size) : graph(size) {} + + void addEdge(unsigned from, unsigned to) { + assert(from < graph.size()); + assert(to < graph.size()); + graph[from].insert(to); + } + + const std::set<unsigned>& children(unsigned parent) const { + return graph[parent]; + } + + IntIt begin() const { return {}; } + IntIt end() const { return {unsigned(graph.size())}; } +}; + +struct GraphSCCs : SCCs<IntIt, GraphSCCs> { + const Graph& graph; + GraphSCCs(const Graph& graph) + : SCCs<IntIt, GraphSCCs>(graph.begin(), graph.end()), graph(graph) {} + + void pushChildren(unsigned parent) { + for (auto child : graph.children(parent)) { + push(child); + } + } +}; + +using SCCSet = std::set<std::set<unsigned>>; + +SCCSet getSCCs(const Graph& graph) { + SCCSet set; + for (auto scc : GraphSCCs(graph)) { + set.emplace(scc.begin(), scc.end()); + } + return set; +} + +TEST(SCCTest, Empty) { + Graph graph(0); + GraphSCCs sccs(graph); + auto it = sccs.begin(); + EXPECT_EQ(it, sccs.end()); + + EXPECT_EQ(getSCCs(graph), SCCSet{}); +} + +TEST(SCCTest, Singleton) { + Graph graph(1); + GraphSCCs sccs(graph); + + auto it = sccs.begin(); + ASSERT_NE(it, sccs.end()); + + auto scc = *it; + auto sccIt = scc.begin(); + ASSERT_NE(sccIt, scc.end()); + EXPECT_EQ(*sccIt, 0u); + + ++sccIt; + EXPECT_EQ(sccIt, scc.end()); + ASSERT_NE(it, sccs.end()); + + ++it; + EXPECT_EQ(it, sccs.end()); + + EXPECT_EQ(getSCCs(graph), SCCSet{{0}}); +} + +TEST(SCCTest, SingletonLoop) { + // Same as above, but now there is an edge. The results are the same. + Graph graph(1); + graph.addEdge(0, 0); + GraphSCCs sccs(graph); + + auto it = sccs.begin(); + ASSERT_NE(it, sccs.end()); + + auto scc = *it; + auto sccIt = scc.begin(); + ASSERT_NE(sccIt, scc.end()); + EXPECT_EQ(*sccIt, 0u); + + ++sccIt; + EXPECT_EQ(sccIt, scc.end()); + ASSERT_NE(it, sccs.end()); + + ++it; + EXPECT_EQ(it, sccs.end()); + + EXPECT_EQ(getSCCs(graph), SCCSet{{0}}); +} + +TEST(SCCTest, DerefPostIncrement) { + // Valid input iterators must have *it++ yield the value from before the + // increment. + Graph graph(1); + GraphSCCs sccs(graph); + auto it = sccs.begin()->begin(); + EXPECT_EQ(*it++, 0u); +} + +TEST(SCCTest, Disconnected) { + // There are no edges between the vertices. + Graph graph(3); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}})); +} + +TEST(SCCTest, Chain) { + // 0 -> 1 -> 2 + Graph graph(3); + graph.addEdge(0, 1); + graph.addEdge(1, 2); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}})); +} + +TEST(SCCTest, ChainReverse) { + // 0 <- 1 <- 2 + Graph graph(3); + graph.addEdge(2, 1); + graph.addEdge(1, 0); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1}, {2}})); +} + +TEST(SCCTest, Loop) { + // 0 -> 1 -> 2 -> 0 + // There is only one SCC here. + Graph graph(3); + graph.addEdge(0, 1); + graph.addEdge(1, 2); + graph.addEdge(2, 0); + EXPECT_EQ(getSCCs(graph), (SCCSet({{0, 1, 2}}))); +} + +TEST(SCCTest, LoopReverse) { + // 0 <- 1 <- 2 <- 0 + // There is only one SCC here. + Graph graph(3); + graph.addEdge(0, 2); + graph.addEdge(2, 1); + graph.addEdge(1, 0); + EXPECT_EQ(getSCCs(graph), (SCCSet({{0, 1, 2}}))); +} + +TEST(SCCTest, Full) { + // Fully connected graph has one SCC. + Graph graph(3); + for (unsigned i = 0; i < 3; ++i) { + for (unsigned j = 0; j < 3; ++j) { + graph.addEdge(i, j); + } + } + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2}})); +} + +TEST(SCCTest, TwoAndOne) { + // 0 <-> 1 -> 2 + Graph graph(3); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + graph.addEdge(1, 2); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2}})); +} + +TEST(SCCTest, TwoAndOneRedundant) { + // 2 <- 0 <-> 1 -> 2 + Graph graph(3); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + graph.addEdge(1, 2); + // New from previous, doesn't affect result. + graph.addEdge(0, 2); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2}})); +} + +TEST(SCCTest, OneAndTwo) { + // 0 -> 1 <-> 2 + Graph graph(3); + graph.addEdge(0, 1); + graph.addEdge(1, 2); + graph.addEdge(2, 1); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0}, {1, 2}})); +} + +TEST(SCCTest, TwoAndTwoDisconnected) { + // 0 <-> 1 2 <-> 3 + Graph graph(4); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + graph.addEdge(2, 3); + graph.addEdge(3, 2); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}})); +} + +TEST(SCCTest, TwoAndTwo) { + // 0 <-> 1 -> 2 <-> 3 + Graph graph(4); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + graph.addEdge(2, 3); + graph.addEdge(3, 2); + // New from previous, doesn't affect result + graph.addEdge(1, 2); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}})); +} + +TEST(SCCTest, DoublyLinkedList) { + // 0 <-> 1 <-> 2 <-> 3 + Graph graph(4); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + graph.addEdge(2, 3); + graph.addEdge(3, 2); + graph.addEdge(1, 2); + // New from previous, combines SCCs. + graph.addEdge(2, 1); + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2, 3}})); +} + +TEST(SCCTest, BigTree) { + // 012 <- 345 -> 678 + Graph graph(9); + graph.addEdge(0, 1); + graph.addEdge(1, 2); + graph.addEdge(2, 0); + + graph.addEdge(3, 4); + graph.addEdge(4, 5); + graph.addEdge(5, 3); + + graph.addEdge(6, 7); + graph.addEdge(7, 8); + graph.addEdge(8, 6); + + graph.addEdge(3, 2); + graph.addEdge(5, 6); + + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); +} + +TEST(SCCTest, BigDiamond) { + // 67 <- 01 <- 23 -> 45 -> 67 + Graph graph(8); + graph.addEdge(0, 1); + graph.addEdge(1, 0); + + graph.addEdge(2, 3); + graph.addEdge(3, 2); + + graph.addEdge(4, 5); + graph.addEdge(5, 4); + + graph.addEdge(6, 7); + graph.addEdge(7, 6); + + graph.addEdge(2, 0); + graph.addEdge(2, 4); + graph.addEdge(0, 6); + graph.addEdge(4, 6); + + EXPECT_EQ(getSCCs(graph), (SCCSet{{0, 1}, {2, 3}, {4, 5}, {6, 7}})); +} |