/* * Copyright 2024 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef wasm_support_strongly_connected_components_h #define wasm_support_strongly_connected_components_h #include #include #include #include #include #include namespace wasm { // A CRTP utility implementing Tarjan's Strongly Connected Component algorithm // (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) // in terms of iterators. Given the beginning and end iterators over the // elements in a graph an implementation of `pushChildren` in the CRTP subclass // that pushes an element's children, provides an iterable over the SCCs, each // of which is an iterable over the elements in the SCC. All implemented // iterators are input iterators that mutate the underlying state, so this // utility can only be used for single-pass algorithms. template struct SCCs { SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {} private: using T = typename It::value_type; // The iterators over the graph we are calculating the SCCs for. It inputIt; It inputEnd; // Stack of pending elements to visit, used instead of a recursive visitor. struct WorkItem { T item; std::optional parent = std::nullopt; bool processedChildren = false; }; std::vector workStack; // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only // popped off once they are committed to a strongly connected component. // Elements stay on the stack after they are visited iff they have a back edge // to an element earlier in the stack. std::vector stack; struct ElementInfo { // Index assigned based on element visitation order. size_t index; // The smallest index of the elements reachable from this element. size_t lowlink; // Whether this element is still on `stack`. bool onStack; }; std::unordered_map elementInfo; // The parent to record when calling into the subclass to push children. std::optional currParent; // The root (i.e. deepest element in the stack) for the current SCC. Empty // whenever we have yet to find the next SCC. std::optional currRoot; bool stepToNextGroup() { while (inputIt != inputEnd || !workStack.empty()) { if (workStack.empty()) { workStack.push_back({*inputIt++}); } while (!workStack.empty()) { auto& work = workStack.back(); auto& item = work.item; if (!work.processedChildren) { auto newIndex = elementInfo.size(); auto [it, inserted] = elementInfo.insert({item, {newIndex, newIndex, true}}); if (inserted) { // This is a new item we have never seen before. We have already // initialized its associated data. stack.push_back(item); // Leave the element on the work stack because we will have to do // more work after we have finished processing its children. work.processedChildren = true; currParent = item; static_cast(this)->pushChildren(item); currParent = std::nullopt; // Process the pushed children first; we will come back to this item // later. continue; } auto& info = it->second; if (info.onStack) { assert(work.parent); // Item is already in the current SCC. Update the parent's lowlink // if this child has a smaller index than we have seen so far. auto& parentLowlink = elementInfo[*work.parent].lowlink; parentLowlink = std::min(parentLowlink, info.index); } else { // Item is in an SCC we have already processed, so ignore it // entirely. } // Do not recurse for this item we have seen before. We are done with // it. workStack.pop_back(); continue; } // We have finished processing the children for the current element, so // we know its final lowlink value. Use it to potentially update the // parent's lowlink value. auto& info = elementInfo[item]; if (work.parent) { auto& parentLowlink = elementInfo[*work.parent].lowlink; parentLowlink = std::min(parentLowlink, info.lowlink); } if (info.index == info.lowlink) { // This element reaches and is reachable by all shallower elements in // the stack (otherwise they would have already been popped) and does // not itself reach any deeper elements, so we have found an SCC and // the current item is its root. currRoot = item; workStack.pop_back(); return true; } workStack.pop_back(); } } // We are at the end. return false; } void stepToNextElem() { assert(currRoot); if (stack.back() == *currRoot) { // This was the last element in the current SCC. We have to find the next // SCC now. currRoot = std::nullopt; } elementInfo[stack.back()].onStack = false; stack.pop_back(); } void pushChildren(const T& parent) { static_assert(&SCCs::pushChildren != &Class::pushChildren, "SCCs subclass must implement `pushChildren`"); } protected: // Call this from `Class::pushChildren` to add a child. void push(const T& item) { assert(currParent); workStack.push_back({item, currParent}); } public: struct SCC { SCCs* parent; // Iterate over the elements in a strongly connected component. struct Iterator { using value_type = T; using difference_type = std::ptrdiff_t; using reference = T&; using pointer = T*; using iterator_category = std::input_iterator_tag; SCCs* parent; std::optional val = std::nullopt; bool isEnd() const { return !parent || !parent->currRoot; } bool operator==(const Iterator& other) const { return isEnd() == other.isEnd(); } bool operator!=(const Iterator& other) const { return !(*this == other); } T operator*() { return *val; } T* operator->() { return &*val; } void setVal() { if (isEnd()) { val = std::nullopt; } else { val = parent->stack.back(); } } Iterator& operator++() { parent->stepToNextElem(); setVal(); return *this; } Iterator operator++(int) { auto it = *this; ++(*this); return it; } }; Iterator begin() { Iterator it = {parent}; it.setVal(); return it; } Iterator end() { return {nullptr}; } }; // Iterate over the strongly connected components of the graph. struct Iterator { using value_type = SCC; using difference_type = std::ptrdiff_t; using reference = SCC&; using pointer = SCC*; using iterator_category = std::input_iterator_tag; // scc.parent is null iff we are at the end. SCC scc; bool isEnd() const { return !scc.parent; } bool operator==(const Iterator& other) const { return isEnd() == other.isEnd(); } bool operator!=(const Iterator& other) const { return !(*this == other); } SCC operator*() { return scc; } SCC* operator->() { return &scc; } Iterator& operator++() { // Skip the rest of the current SCC, if for some reason it was not // consumed. for (auto elem : *(*this)) { (void)elem; } if (!scc.parent->stepToNextGroup()) { // We are at the end, so mark ourselves as such. scc.parent = nullptr; } return *this; } void operator++(int) { ++(*this); } }; Iterator begin() { return ++Iterator{this}; } Iterator end() { return {nullptr}; } }; } // namespace wasm #endif // wasm_support_strongly_connected_components_h