diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/support/strongly_connected_components.h | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/src/support/strongly_connected_components.h b/src/support/strongly_connected_components.h new file mode 100644 index 000000000..d54200ad9 --- /dev/null +++ b/src/support/strongly_connected_components.h @@ -0,0 +1,262 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_support_strongly_connected_components_h +#define wasm_support_strongly_connected_components_h + +#include <cassert> +#include <optional> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include <iostream> + +namespace wasm { + +// A CRTP utility implementing Tarjan's Strongly Connected Component algorithm +// (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) +// in terms of iterators. Given the beginning and end iterators over the +// elements in a graph an implementation of `pushChildren` in the CRTP subclass +// that pushes an element's children, provides an iterable over the SCCs, each +// of which is an iterable over the elements in the SCC. All implemented +// iterators are input iterators that mutate the underlying state, so this +// utility can only be used for single-pass algorithms. +template<typename It, typename Class> struct SCCs { + SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {} + +private: + using T = typename It::value_type; + + // The iterators over the graph we are calculating the SCCs for. + It inputIt; + It inputEnd; + + // Stack of pending elements to visit, used instead of a recursive visitor. + struct WorkItem { + T item; + std::optional<T> parent = std::nullopt; + bool processedChildren = false; + }; + std::vector<WorkItem> workStack; + + // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only + // popped off once they are committed to a strongly connected component. + // Elements stay on the stack after they are visited iff they have a back edge + // to an element earlier in the stack. + std::vector<T> stack; + + struct ElementInfo { + // Index assigned based on element visitation order. + size_t index; + // The smallest index of the elements reachable from this element. + size_t lowlink; + // Whether this element is still on `stack`. + bool onStack; + }; + std::unordered_map<T, ElementInfo> elementInfo; + + // The parent to record when calling into the subclass to push children. + std::optional<T> currParent; + + // The root (i.e. deepest element in the stack) for the current SCC. Empty + // whenever we have yet to find the next SCC. + std::optional<T> currRoot; + + bool stepToNextGroup() { + while (inputIt != inputEnd || !workStack.empty()) { + if (workStack.empty()) { + workStack.push_back({*inputIt++}); + } + while (!workStack.empty()) { + auto& work = workStack.back(); + auto& item = work.item; + + if (!work.processedChildren) { + auto newIndex = elementInfo.size(); + auto [it, inserted] = + elementInfo.insert({item, {newIndex, newIndex, true}}); + if (inserted) { + // This is a new item we have never seen before. We have already + // initialized its associated data. + stack.push_back(item); + + // Leave the element on the work stack because we will have to do + // more work after we have finished processing its children. + work.processedChildren = true; + currParent = item; + static_cast<Class*>(this)->pushChildren(item); + currParent = std::nullopt; + // Process the pushed children first; we will come back to this item + // later. + continue; + } + + auto& info = it->second; + if (info.onStack) { + assert(work.parent); + // Item is already in the current SCC. Update the parent's lowlink + // if this child has a smaller index than we have seen so far. + auto& parentLowlink = elementInfo[*work.parent].lowlink; + parentLowlink = std::min(parentLowlink, info.index); + } else { + // Item is in an SCC we have already processed, so ignore it + // entirely. + } + // Do not recurse for this item we have seen before. We are done with + // it. + workStack.pop_back(); + continue; + } + + // We have finished processing the children for the current element, so + // we know its final lowlink value. Use it to potentially update the + // parent's lowlink value. + auto& info = elementInfo[item]; + if (work.parent) { + auto& parentLowlink = elementInfo[*work.parent].lowlink; + parentLowlink = std::min(parentLowlink, info.lowlink); + } + + if (info.index == info.lowlink) { + // This element reaches and is reachable by all shallower elements in + // the stack (otherwise they would have already been popped) and does + // not itself reach any deeper elements, so we have found an SCC and + // the current item is its root. + currRoot = item; + workStack.pop_back(); + return true; + } + workStack.pop_back(); + } + } + // We are at the end. + return false; + } + + void stepToNextElem() { + assert(currRoot); + if (stack.back() == *currRoot) { + // This was the last element in the current SCC. We have to find the next + // SCC now. + currRoot = std::nullopt; + } + elementInfo[stack.back()].onStack = false; + stack.pop_back(); + } + + void pushChildren(const T& parent) { + static_assert(&SCCs<It, Class>::pushChildren != &Class::pushChildren, + "SCCs subclass must implement `pushChildren`"); + } + +protected: + // Call this from `Class::pushChildren` to add a child. + void push(const T& item) { + assert(currParent); + workStack.push_back({item, currParent}); + } + +public: + struct SCC { + SCCs<It, Class>* parent; + + // Iterate over the elements in a strongly connected component. + struct Iterator { + using value_type = T; + using difference_type = std::ptrdiff_t; + using reference = T&; + using pointer = T*; + using iterator_category = std::input_iterator_tag; + + SCCs<It, Class>* parent; + std::optional<T> val = std::nullopt; + + bool isEnd() const { return !parent || !parent->currRoot; } + bool operator==(const Iterator& other) const { + return isEnd() == other.isEnd(); + } + bool operator!=(const Iterator& other) const { return !(*this == other); } + T operator*() { return *val; } + T* operator->() { return &*val; } + void setVal() { + if (isEnd()) { + val = std::nullopt; + } else { + val = parent->stack.back(); + } + } + + Iterator& operator++() { + parent->stepToNextElem(); + setVal(); + return *this; + } + Iterator operator++(int) { + auto it = *this; + ++(*this); + return it; + } + }; + + Iterator begin() { + Iterator it = {parent}; + it.setVal(); + return it; + } + Iterator end() { return {nullptr}; } + }; + + // Iterate over the strongly connected components of the graph. + struct Iterator { + using value_type = SCC; + using different_type = std::ptrdiff_t; + using reference = SCC; + using pointer = SCC*; + using iterator_category = std::input_iterator_tag; + + // scc.parent is null iff we are at the end. + SCC scc; + + bool isEnd() const { return !scc.parent; } + bool operator==(const Iterator& other) const { + return isEnd() == other.isEnd(); + } + bool operator!=(const Iterator& other) const { return !(*this == other); } + SCC operator*() { return scc; } + SCC* operator->() { return &scc; } + Iterator& operator++() { + // Skip the rest of the current SCC, if for some reason it was not + // consumed. + for (auto elem : *(*this)) { + (void)elem; + } + if (!scc.parent->stepToNextGroup()) { + // We are at the end, so mark ourselves as such. + scc.parent = nullptr; + } + return *this; + } + void operator++(int) { ++(*this); } + }; + + Iterator begin() { return ++Iterator{this}; } + Iterator end() { return {nullptr}; } +}; + +} // namespace wasm + +#endif // wasm_support_strongly_connected_components_h |