diff options
author | juj <jujjyl@gmail.com> | 2022-04-28 21:31:08 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-28 18:31:08 +0000 |
commit | ba64ac3ed47cb35a477aaaf185679ae9d0b79862 (patch) | |
tree | a35322a8d2162e07352c7272b97ffbd195d876a3 /src | |
parent | 408b2eb7df01f42157e24c2c58f01c4fa5c6b9d6 (diff) | |
download | binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.gz binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.bz2 binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.zip |
Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function. (#4567)
* Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function.
* Lint
* Fix typo
* Fix static
* Lint
* Lint
* Lint
* Add needed canRun function
* lint
* Use either a sparse or a dense matrix for tracking liveness copies, depending on the locals count.
* Lint
* Fix lint
* Lint
* Implement sparse_square_matrix class and use that as a backing.
* Lint
* Lint
* Lint #includes
* Lint
* Lint includes
* Remove unnecessary code
* Fix canonical accesses to copies matrix
* Lint
* Add missing variable update
* Remove canRun() function
* Address review
* Update expected test results
* Update test name
* Add asserts to sparse_square_matrix set and get functions that they are not out of bound.
* Lint includes
* Update test expectation
* Use .clear() + .resize() to reset totalCopies vector
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg/liveness-traversal.h | 41 | ||||
-rw-r--r-- | src/passes/Asyncify.cpp | 8 | ||||
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 48 | ||||
-rw-r--r-- | src/support/sparse_square_matrix.h | 76 |
4 files changed, 114 insertions, 59 deletions
diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h index ee56a9925..3193cbd1f 100644 --- a/src/cfg/liveness-traversal.h +++ b/src/cfg/liveness-traversal.h @@ -24,6 +24,7 @@ #include "cfg-traversal.h" #include "ir/utils.h" #include "support/sorted_vector.h" +#include "support/sparse_square_matrix.h" #include "wasm-builder.h" #include "wasm-traversal.h" #include "wasm.h" @@ -105,10 +106,10 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { Index numLocals; std::unordered_set<BasicBlock*> liveBlocks; - // canonicalized - accesses should check (low, high) - // TODO: use a map for high N, as this tends to be sparse? or don't look at - // copies at all for big N? - std::vector<uint8_t> copies; + // access as a upper triangular matrix: i.e. when accessing a pair (i,j), + // should access the cell i < j. + sparse_square_matrix<uint8_t> copies; + // total # of copies for each local, with all others std::vector<Index> totalCopies; @@ -191,30 +192,13 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { return nullptr; } - // If there are too many locals, we cannot run currently as - // numLocals * numLocals might overflow. We may want to add an option for - // a sparse matrix at some point TODO - bool canRun(Function* func) { - Index numLocals = func->getNumLocals(); - if (uint64_t(numLocals) * uint64_t(numLocals) <= - std::numeric_limits<Index>::max()) { - return true; - } - std::cerr << "warning: too many locals (" << numLocals - << ") to run liveness analysis in " << this->getFunction()->name - << '\n'; - return false; - } - // main entry point void doWalkFunction(Function* func) { numLocals = func->getNumLocals(); - assert(canRun(func)); - copies.resize(numLocals * numLocals); - std::fill(copies.begin(), copies.end(), 0); + copies.recreate(numLocals); + totalCopies.clear(); totalCopies.resize(numLocals); - std::fill(totalCopies.begin(), totalCopies.end(), 0); // create the CFG by walking the IR CFGWalker<SubType, VisitorType, Liveness>::doWalkFunction(func); // ignore links to dead blocks, so they don't confuse us and we can see @@ -297,14 +281,19 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { } void addCopy(Index i, Index j) { - auto k = std::min(i, j) * numLocals + std::max(i, j); - copies[k] = std::min(copies[k], uint8_t(254)) + 1; + if (j > i) { + std::swap(i, j); + } + copies.set(i, j, std::min(copies.get(i, j), uint8_t(254)) + 1); totalCopies[i]++; totalCopies[j]++; } uint8_t getCopies(Index i, Index j) { - return copies[std::min(i, j) * numLocals + std::max(i, j)]; + if (j > i) { + std::swap(i, j); + } + return copies.get(i, j); } }; diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp index 649f136c5..8a50d49e6 100644 --- a/src/passes/Asyncify.cpp +++ b/src/passes/Asyncify.cpp @@ -1344,14 +1344,6 @@ private: RelevantLiveLocalsWalker walker; walker.setFunction(func); - if (!walker.canRun(func)) { - // We can proceed without this optimization, which will cause more - // spilling - assume all locals are relevant. - for (Index i = 0; i < func->getNumLocals(); i++) { - relevantLiveLocals.insert(i); - } - return; - } walker.walkFunctionInModule(func, getModule()); // The relevant live locals are ones that are alive at an unwind/rewind // location. TODO look more precisely inside basic blocks, as one might stop diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index f58e57e72..ba9b87527 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -35,6 +35,7 @@ #include "pass.h" #include "support/learning.h" #include "support/permutations.h" +#include "support/sparse_square_matrix.h" #include "wasm.h" #ifdef CFG_PROFILE #include "support/timing.h" @@ -76,34 +77,31 @@ struct CoalesceLocals // interference state // canonicalized - accesses should check (low, high) - std::vector<bool> interferences; + sparse_square_matrix<bool> interferences; void interfere(Index i, Index j) { if (i == j) { return; } - interferences[std::min(i, j) * numLocals + std::max(i, j)] = 1; + interferences.set(std::min(i, j), std::max(i, j), true); } // optimized version where you know that low < high void interfereLowHigh(Index low, Index high) { assert(low < high); - interferences[low * numLocals + high] = 1; + interferences.set(low, high, true); } void unInterfere(Index i, Index j) { - interferences[std::min(i, j) * numLocals + std::max(i, j)] = 0; + interferences.set(std::min(i, j), std::max(i, j), false); } bool interferes(Index i, Index j) { - return interferences[std::min(i, j) * numLocals + std::max(i, j)]; + return interferences.get(std::min(i, j), std::max(i, j)); } }; void CoalesceLocals::doWalkFunction(Function* func) { - if (!canRun(func)) { - return; - } super::doWalkFunction(func); // prioritize back edges increaseBackEdgePriorities(); @@ -145,8 +143,7 @@ void CoalesceLocals::increaseBackEdgePriorities() { } void CoalesceLocals::calculateInterferences() { - interferences.resize(numLocals * numLocals); - std::fill(interferences.begin(), interferences.end(), false); + interferences.recreate(numLocals); // We will track the values in each local, using a numbering where each index // represents a unique different value. This array maps a local index to the @@ -379,17 +376,19 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, // registers, for gzip) std::vector<Type> types; // new index * numLocals => list of all interferences of locals merged to it - std::vector<bool> newInterferences; + sparse_square_matrix<bool> newInterferences; + // new index * numLocals => list of all copies of locals merged to it - std::vector<uint8_t> newCopies; + sparse_square_matrix<uint8_t> newCopies; + indices.resize(numLocals); types.resize(numLocals); - newInterferences.resize(numLocals * numLocals); - std::fill(newInterferences.begin(), newInterferences.end(), false); + auto numParams = getFunction()->getNumParams(); - // start with enough room for the params - newCopies.resize(numParams * numLocals); - std::fill(newCopies.begin(), newCopies.end(), 0); + + newInterferences.recreate(numLocals); + newCopies.recreate(numLocals); + Index nextFree = 0; removedCopies = 0; // we can't reorder parameters, they are fixed in order, and cannot coalesce @@ -399,8 +398,8 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, indices[i] = i; types[i] = getFunction()->getLocalType(i); for (Index j = numParams; j < numLocals; j++) { - newInterferences[numLocals * i + j] = interferes(i, j); - newCopies[numLocals * i + j] = getCopies(i, j); + newInterferences.set(i, j, interferes(i, j)); + newCopies.set(i, j, getCopies(i, j)); } nextFree++; } @@ -409,13 +408,13 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, Index found = -1; uint8_t foundCopies = -1; for (Index j = 0; j < nextFree; j++) { - if (!newInterferences[j * numLocals + actual] && + if (!newInterferences.get(j, actual) && getFunction()->getLocalType(actual) == types[j]) { // this does not interfere, so it might be what we want. but pick the // one eliminating the most copies (we could stop looking forward when // there are no more items that have copies anyhow, but it doesn't seem // to help) - auto currCopies = newCopies[j * numLocals + actual]; + auto currCopies = newCopies.get(j, actual); if (found == Index(-1) || currCopies > foundCopies) { indices[actual] = found = j; foundCopies = currCopies; @@ -427,7 +426,6 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, types[found] = getFunction()->getLocalType(actual); nextFree++; removedCopies += getCopies(found, actual); - newCopies.resize(nextFree * numLocals); } else { removedCopies += foundCopies; } @@ -438,9 +436,9 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, for (Index k = i + 1; k < numLocals; k++) { // go in the order, we only need to update for those we will see later auto j = order[k]; - newInterferences[found * numLocals + j] = - newInterferences[found * numLocals + j] || interferes(actual, j); - newCopies[found * numLocals + j] += getCopies(actual, j); + newInterferences.set( + found, j, newInterferences.get(found, j) || interferes(actual, j)); + newCopies.set(found, j, newCopies.get(found, j) + getCopies(actual, j)); } } } diff --git a/src/support/sparse_square_matrix.h b/src/support/sparse_square_matrix.h new file mode 100644 index 000000000..03ebcb5c6 --- /dev/null +++ b/src/support/sparse_square_matrix.h @@ -0,0 +1,76 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// A helper type for potentially sparse N*N matrix container. + +#pragma once + +#include <assert.h> +#include <stdint.h> +#include <unordered_map> +#include <vector> + +template<typename Ty> class sparse_square_matrix { + std::vector<Ty> denseStorage; + std::unordered_map<uint64_t, Ty> sparseStorage; + uint32_t N; + +public: + sparse_square_matrix() : N(0) {} + + explicit sparse_square_matrix(int N) : N(N) { + if (N < DenseLimit) { + denseStorage.resize(N * N); + } + } + + static const size_t DenseLimit = 8192; + + uint32_t width() const { return N; } + + bool usingDenseStorage() const { return !denseStorage.empty(); } + + void set(uint32_t i, uint32_t j, const Ty& value) { + assert(i < N); + assert(j < N); + if (usingDenseStorage()) { + denseStorage[i * N + j] = value; + } else { + sparseStorage[i * N + j] = value; + } + } + + const Ty get(uint32_t i, uint32_t j) const { + assert(i < N); + assert(j < N); + if (usingDenseStorage()) { + return denseStorage[i * N + j]; + } + auto iter = sparseStorage.find(i * N + j); + return iter == sparseStorage.end() ? Ty() : iter->second; + } + + // Resizes the matrix to a new n*n size, and clears all entries + // to the default-initialized value. + void recreate(uint32_t n) { + N = n; + denseStorage.clear(); + sparseStorage.clear(); + if (N < DenseLimit) { + denseStorage.resize(N * N); + } + } +}; |