diff options
author | Alon Zakai <azakai@google.com> | 2021-09-29 13:28:51 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-29 13:28:51 -0700 |
commit | 6b715407b96d270a5242871bb2ba680566ed304b (patch) | |
tree | 6304cbfe1f034db4a6197586514bb19e9a182183 | |
parent | 6b80ef6cccfd14b9b6620c5d41a148024d78576c (diff) | |
download | binaryen-6b715407b96d270a5242871bb2ba680566ed304b.tar.gz binaryen-6b715407b96d270a5242871bb2ba680566ed304b.tar.bz2 binaryen-6b715407b96d270a5242871bb2ba680566ed304b.zip |
Add a SmallSet and use it in LocalGraph. NFC (#4188)
A SmallSet starts with fixed storage that it uses in the simplest
possible way (linear scan, no sorting). If it exceeds a size then it
starts using a normal std::set. So for small amounts of data it
avoids allocation and any other overhead.
This adds a unit test and also uses it in LocalGraph which provides
a large amount of additional coverage.
I also changed an unrelated data structure from std::map to
std::unordered_map which I noticed while doing profiling in
LocalGraph. (And a tiny bit of additional refactoring there.)
This makes LocalGraph-using passes like ssa-nomerge and
precompute-propagate 10-15% faster on a bunch of real-world
codebases I tested.
-rw-r--r-- | src/ir/local-graph.h | 18 | ||||
-rw-r--r-- | src/passes/Heap2Local.cpp | 2 | ||||
-rw-r--r-- | src/support/small_set.h | 276 | ||||
-rw-r--r-- | test/example/small_set.cpp | 221 | ||||
-rw-r--r-- | test/example/small_set.txt | 1 |
5 files changed, 510 insertions, 8 deletions
diff --git a/src/ir/local-graph.h b/src/ir/local-graph.h index 2e4fdf0fe..111f5c073 100644 --- a/src/ir/local-graph.h +++ b/src/ir/local-graph.h @@ -17,6 +17,7 @@ #ifndef wasm_ir_local_graph_h #define wasm_ir_local_graph_h +#include "support/small_set.h" #include "wasm.h" namespace wasm { @@ -35,12 +36,14 @@ struct LocalGraph { // the constructor computes getSetses, the sets affecting each get LocalGraph(Function* func); - // the local.sets relevant for an index or a get. - typedef std::set<LocalSet*> Sets; + // The local.sets relevant for an index or a get. The most common case is to + // have a single set; after that, to be a phi of 2 items, so we use a small + // set of size 2 to avoid allocations there. + using Sets = SmallSet<LocalSet*, 2>; - typedef std::map<LocalGet*, Sets> GetSetses; + using GetSetses = std::unordered_map<LocalGet*, Sets>; - typedef std::map<Expression*, Expression**> Locations; + using Locations = std::map<Expression*, Expression**>; // externally useful information GetSetses getSetses; // the sets affecting each get. a nullptr set means the @@ -64,9 +67,10 @@ struct LocalGraph { } // for each get, the sets whose values are influenced by that get - std::unordered_map<LocalGet*, std::unordered_set<LocalSet*>> getInfluences; - // for each set, the gets whose values are influenced by that set - std::unordered_map<LocalSet*, std::unordered_set<LocalGet*>> setInfluences; + using GetInfluences = std::unordered_set<LocalSet*>; + std::unordered_map<LocalGet*, GetInfluences> getInfluences; + using SetInfluences = std::unordered_set<LocalGet*>; + std::unordered_map<LocalSet*, SetInfluences> setInfluences; // Optional: Compute the local indexes that are SSA, in the sense of // * a single set for all the gets for that local index diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 796e0fa05..ac6fb49a6 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -686,7 +686,7 @@ struct Heap2LocalOptimizer { return ParentChildInteraction::Mixes; } - std::unordered_set<LocalGet*>* getGetsReached(LocalSet* set) { + LocalGraph::SetInfluences* getGetsReached(LocalSet* set) { auto iter = localGraph.setInfluences.find(set); if (iter != localGraph.setInfluences.end()) { return &iter->second; diff --git a/src/support/small_set.h b/src/support/small_set.h new file mode 100644 index 000000000..9ee0f0403 --- /dev/null +++ b/src/support/small_set.h @@ -0,0 +1,276 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// A set of elements, which is often small. While the number of items is small, +// the implementation simply stores them in an array that is linearly looked +// through. Once the size is large enough, we switch to using a std::set or +// std::unordered_set. +// + +#ifndef wasm_support_small_set_h +#define wasm_support_small_set_h + +#include <algorithm> +#include <array> +#include <cassert> +#include <set> +#include <unordered_set> + +#include "utilities.h" + +namespace wasm { + +template<typename T, size_t N, typename FlexibleSet> class SmallSetBase { + // fixed-space storage + size_t usedFixed = 0; + std::array<T, N> fixed; + + // flexible additional storage + FlexibleSet flexible; + + bool usingFixed() const { + // If the flexible storage contains something, then we are using it. + // Otherwise we use the fixed storage. Note that if we grow and shrink then + // we will stay in flexible mode until we reach a size of zero, at which + // point we return to fixed mode. This is intentional, to avoid a lot of + // movement in switching between fixed and flexible mode. + return flexible.empty(); + } + +public: + using value_type = T; + using key_type = T; + using reference = T&; + using const_reference = const T&; + using set_type = FlexibleSet; + using size_type = size_t; + + SmallSetBase() {} + SmallSetBase(std::initializer_list<T> init) { + for (T item : init) { + insert(item); + } + } + + void insert(const T& x) { + if (usingFixed()) { + if (count(x)) { + // The item already exists, so there is nothing to do. + return; + } + // We must add a new item. + if (usedFixed < N) { + // Room remains in the fixed storage. + fixed[usedFixed++] = x; + } else { + // No fixed storage remains. Switch to flexible. + assert(usedFixed == N); + assert(flexible.empty()); + flexible.insert(fixed.begin(), fixed.begin() + usedFixed); + flexible.insert(x); + assert(!usingFixed()); + usedFixed = 0; + } + } else { + flexible.insert(x); + } + } + + void erase(const T& x) { + if (usingFixed()) { + for (size_t i = 0; i < usedFixed; i++) { + if (fixed[i] == x) { + // We found the item; erase it by moving the final item to replace it + // and truncating the size. + usedFixed--; + fixed[i] = fixed[usedFixed]; + return; + } + } + } else { + flexible.erase(x); + } + } + + size_t count(const T& x) const { + if (usingFixed()) { + // Do a linear search. + for (size_t i = 0; i < usedFixed; i++) { + if (fixed[i] == x) { + return 1; + } + } + return 0; + } else { + return flexible.count(x); + } + } + + size_t size() const { + if (usingFixed()) { + return usedFixed; + } else { + return flexible.size(); + } + } + + bool empty() const { return size() == 0; } + + void clear() { + usedFixed = 0; + flexible.clear(); + } + + bool operator==(const SmallSetBase<T, N, FlexibleSet>& other) const { + if (size() != other.size()) { + return false; + } + if (usingFixed()) { + return std::all_of(fixed.begin(), + fixed.begin() + usedFixed, + [&other](const T& x) { return other.count(x); }); + } else if (other.usingFixed()) { + return std::all_of(other.fixed.begin(), + other.fixed.begin() + other.usedFixed, + [this](const T& x) { return count(x); }); + } else { + return flexible == other.flexible; + } + } + + bool operator!=(const SmallSetBase<T, N, FlexibleSet>& other) const { + return !(*this == other); + } + + // iteration + + template<typename Parent, typename FlexibleIterator> struct IteratorBase { + using iterator_category = std::forward_iterator_tag; + using difference_type = long; + using value_type = T; + using pointer = const value_type*; + using reference = const value_type&; + + Parent* parent; + + using Iterator = IteratorBase<Parent, FlexibleIterator>; + + // Whether we are using fixed storage in the parent. When doing so we have + // the index in fixedIndex. Otherwise, we are using flexible storage, and we + // will use flexibleIterator. + bool usingFixed; + size_t fixedIndex; + FlexibleIterator flexibleIterator; + + IteratorBase(Parent* parent) + : parent(parent), usingFixed(parent->usingFixed()) {} + + void setBegin() { + if (usingFixed) { + fixedIndex = 0; + } else { + flexibleIterator = parent->flexible.begin(); + } + } + + void setEnd() { + if (usingFixed) { + fixedIndex = parent->size(); + } else { + flexibleIterator = parent->flexible.end(); + } + } + + bool operator==(const Iterator& other) const { + if (parent != other.parent) { + return false; + } + // std::set allows changes while iterating. For us here, though, it would + // be nontrivial to support that given we have two iterators that we + // generalize over (switching "in the middle" would not be easy or fast), + // so error on that. + if (usingFixed != other.usingFixed) { + Fatal() << "SmallSet does not support changes while iterating"; + } + if (usingFixed) { + return fixedIndex == other.fixedIndex; + } else { + return flexibleIterator == other.flexibleIterator; + } + } + + bool operator!=(const Iterator& other) const { return !(*this == other); } + + Iterator& operator++() { + if (usingFixed) { + fixedIndex++; + } else { + flexibleIterator++; + } + return *this; + } + + const value_type& operator*() const { + if (this->usingFixed) { + return (this->parent->fixed)[this->fixedIndex]; + } else { + return *this->flexibleIterator; + } + } + }; + + using Iterator = IteratorBase<SmallSetBase<T, N, FlexibleSet>, + typename FlexibleSet::const_iterator>; + + Iterator begin() { + auto ret = Iterator(this); + ret.setBegin(); + return ret; + } + Iterator end() { + auto ret = Iterator(this); + ret.setEnd(); + return ret; + } + Iterator begin() const { + auto ret = Iterator(this); + ret.setBegin(); + return ret; + } + Iterator end() const { + auto ret = Iterator(this); + ret.setEnd(); + return ret; + } + + using iterator = Iterator; + using const_iterator = Iterator; + + // Test-only method to allow unit tests to verify the right internal + // behavior. + bool TEST_ONLY_NEVER_USE_usingFixed() { return usingFixed(); } +}; + +template<typename T, size_t N> +class SmallSet : public SmallSetBase<T, N, std::set<T>> {}; + +template<typename T, size_t N> +class SmallUnorderedSet : public SmallSetBase<T, N, std::unordered_set<T>> {}; + +} // namespace wasm + +#endif // wasm_support_small_set_h diff --git a/test/example/small_set.cpp b/test/example/small_set.cpp new file mode 100644 index 000000000..48bcb6a7b --- /dev/null +++ b/test/example/small_set.cpp @@ -0,0 +1,221 @@ +#include <algorithm> +#include <cassert> +#include <iostream> +#include <vector> + +#include "support/small_set.h" + +using namespace wasm; + +template<typename T> +void assertContents(T& t, const std::vector<int>& expectedContents) { + assert(t.size() == expectedContents.size()); + for (auto item : expectedContents) { + assert(t.count(item) == 1); + } + // Also test this using an iterator and a const iterator to also get + // coverage there. + for (auto& item : t) { + assert(std::find(expectedContents.begin(), expectedContents.end(), item) != + expectedContents.end()); + } + for (const auto& item : t) { + assert(std::find(expectedContents.begin(), expectedContents.end(), item) != + expectedContents.end()); + } +} + +template<typename T> void testAPI() { + { + T t; + + // build up with no duplicates + assert(t.empty()); + assert(t.size() == 0); + t.insert(1); + assertContents(t, {1}); + assert(!t.empty()); + assert(t.size() == 1); + t.insert(2); + assertContents(t, {1, 2}); + assert(!t.empty()); + assert(t.size() == 2); + t.insert(3); + assertContents(t, {1, 2, 3}); + assert(!t.empty()); + + // unwind + assert(t.size() == 3); + t.erase(3); + assertContents(t, {1, 2}); + assert(t.size() == 2); + t.erase(2); + assertContents(t, {1}); + assert(t.size() == 1); + t.erase(1); + assertContents(t, {}); + assert(t.size() == 0); + assert(t.empty()); + } + { + T t; + + // build up with duplicates + t.insert(1); + t.insert(2); + t.insert(2); + t.insert(3); + assertContents(t, {1, 2, 3}); + assert(t.size() == 3); + + // unwind by erasing (in the opposite direction from before) + assert(t.count(1) == 1); + assert(t.count(2) == 1); + assert(t.count(3) == 1); + assert(t.count(1337) == 0); + + t.erase(1); + assert(t.count(1) == 0); + + assert(t.size() == 2); + + assert(t.count(2) == 1); + t.erase(2); + assert(t.count(2) == 0); + + assert(t.size() == 1); + + assert(t.count(3) == 1); + t.erase(3); + + assert(t.count(1) == 0); + assert(t.count(2) == 0); + assert(t.count(3) == 0); + assert(t.count(1337) == 0); + + assert(t.size() == 0); + } + { + T t; + + // build up + t.insert(1); + t.insert(2); + t.insert(3); + + // unwind by clearing + t.clear(); + assert(t.size() == 0); + assert(t.empty()); + } + { + T t, u; + // comparisons + assert(t == u); + t.insert(1); + assert(t != u); + u.insert(1); + assert(t == u); + u.erase(1); + assert(t != u); + u.insert(2); + assert(t != u); + } + { + T t, u; + // comparisons should ignore the order of insertion + t.insert(1); + t.insert(2); + u.insert(2); + u.insert(1); + assert(t == u); + } + { + T t, u; + // comparisons should ignore the mode: in a SmallSet<1>, a set of size 1 + // can be either fixed - if we just grew it to size 1 - or flexible - if we + // grew it enough to be flexible, then shrank it back (as it only becomes + // fixed at size 0). + t.insert(1); + + u.insert(1); + u.insert(2); + + // one extra item in u + assert(t != u); + assert(u != t); + + // remove the extra item + u.erase(2); + + assert(t == u); + assert(u == t); + } + { + T t, u; + // as above, but for size 2, and don't erase the last item added + t.insert(1); + t.insert(2); + + u.insert(3); + u.insert(2); + u.insert(1); + + // one extra item in u + assert(t != u); + assert(u != t); + + // remove the extra item + u.erase(3); + + assert(t == u); + assert(u == t); + } +} + +template<typename T> void testInternals() { + { + T s; + // Start out using fixed storage. + assert(s.TEST_ONLY_NEVER_USE_usingFixed()); + // Adding one item still keeps us using fixed storage, as that is the exact + // amount we have in fact. + s.insert(0); + assert(s.TEST_ONLY_NEVER_USE_usingFixed()); + // Adding one more item forces us to use flexible storage. + s.insert(1); + assert(!s.TEST_ONLY_NEVER_USE_usingFixed()); + // Removing an item returns us to size 1, *but we keep using flexible + // storage*. We do not ping-pong between flexible and fixed; once flexible, + // we stay that way. + s.erase(0); + assert(!s.TEST_ONLY_NEVER_USE_usingFixed()); + // However, removing all items does return us to using fixed storage, should + // we ever insert again. + s.erase(1); + assert(s.empty()); + assert(s.TEST_ONLY_NEVER_USE_usingFixed()); + // And once more we can add an additional item while remaining fixed. + s.insert(10); + assert(s.TEST_ONLY_NEVER_USE_usingFixed()); + } +} + +int main() { + testAPI<SmallSet<int, 0>>(); + testAPI<SmallSet<int, 1>>(); + testAPI<SmallSet<int, 2>>(); + testAPI<SmallSet<int, 3>>(); + testAPI<SmallSet<int, 10>>(); + + testAPI<SmallUnorderedSet<int, 0>>(); + testAPI<SmallUnorderedSet<int, 1>>(); + testAPI<SmallUnorderedSet<int, 2>>(); + testAPI<SmallUnorderedSet<int, 3>>(); + testAPI<SmallUnorderedSet<int, 10>>(); + + testInternals<SmallSet<int, 1>>(); + testInternals<SmallUnorderedSet<int, 1>>(); + + std::cout << "ok.\n"; +} diff --git a/test/example/small_set.txt b/test/example/small_set.txt new file mode 100644 index 000000000..90b5016ef --- /dev/null +++ b/test/example/small_set.txt @@ -0,0 +1 @@ +ok. |