diff options
author | juj <jujjyl@gmail.com> | 2022-04-28 21:31:08 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-28 18:31:08 +0000 |
commit | ba64ac3ed47cb35a477aaaf185679ae9d0b79862 (patch) | |
tree | a35322a8d2162e07352c7272b97ffbd195d876a3 | |
parent | 408b2eb7df01f42157e24c2c58f01c4fa5c6b9d6 (diff) | |
download | binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.gz binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.bz2 binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.zip |
Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function. (#4567)
* Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function.
* Lint
* Fix typo
* Fix static
* Lint
* Lint
* Lint
* Add needed canRun function
* lint
* Use either a sparse or a dense matrix for tracking liveness copies, depending on the locals count.
* Lint
* Fix lint
* Lint
* Implement sparse_square_matrix class and use that as a backing.
* Lint
* Lint
* Lint #includes
* Lint
* Lint includes
* Remove unnecessary code
* Fix canonical accesses to copies matrix
* Lint
* Add missing variable update
* Remove canRun() function
* Address review
* Update expected test results
* Update test name
* Add asserts to sparse_square_matrix set and get functions that they are not out of bound.
* Lint includes
* Update test expectation
* Use .clear() + .resize() to reset totalCopies vector
-rw-r--r-- | src/cfg/liveness-traversal.h | 41 | ||||
-rw-r--r-- | src/passes/Asyncify.cpp | 8 | ||||
-rw-r--r-- | src/passes/CoalesceLocals.cpp | 48 | ||||
-rw-r--r-- | src/support/sparse_square_matrix.h | 76 | ||||
-rw-r--r-- | test/example/sparse_square_matrix.cpp | 54 | ||||
-rw-r--r-- | test/example/sparse_square_matrix.txt | 1 | ||||
-rw-r--r-- | test/passes/65536_locals_for_liveness.bin.txt (renamed from test/passes/too_much_for_liveness.bin.txt) | 2 | ||||
-rw-r--r-- | test/passes/65536_locals_for_liveness.passes (renamed from test/passes/too_much_for_liveness.passes) | 0 | ||||
-rw-r--r-- | test/passes/65536_locals_for_liveness.wasm (renamed from test/passes/too_much_for_liveness.wasm) | bin | 44 -> 44 bytes |
9 files changed, 170 insertions, 60 deletions
diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h index ee56a9925..3193cbd1f 100644 --- a/src/cfg/liveness-traversal.h +++ b/src/cfg/liveness-traversal.h @@ -24,6 +24,7 @@ #include "cfg-traversal.h" #include "ir/utils.h" #include "support/sorted_vector.h" +#include "support/sparse_square_matrix.h" #include "wasm-builder.h" #include "wasm-traversal.h" #include "wasm.h" @@ -105,10 +106,10 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { Index numLocals; std::unordered_set<BasicBlock*> liveBlocks; - // canonicalized - accesses should check (low, high) - // TODO: use a map for high N, as this tends to be sparse? or don't look at - // copies at all for big N? - std::vector<uint8_t> copies; + // access as a upper triangular matrix: i.e. when accessing a pair (i,j), + // should access the cell i < j. + sparse_square_matrix<uint8_t> copies; + // total # of copies for each local, with all others std::vector<Index> totalCopies; @@ -191,30 +192,13 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { return nullptr; } - // If there are too many locals, we cannot run currently as - // numLocals * numLocals might overflow. We may want to add an option for - // a sparse matrix at some point TODO - bool canRun(Function* func) { - Index numLocals = func->getNumLocals(); - if (uint64_t(numLocals) * uint64_t(numLocals) <= - std::numeric_limits<Index>::max()) { - return true; - } - std::cerr << "warning: too many locals (" << numLocals - << ") to run liveness analysis in " << this->getFunction()->name - << '\n'; - return false; - } - // main entry point void doWalkFunction(Function* func) { numLocals = func->getNumLocals(); - assert(canRun(func)); - copies.resize(numLocals * numLocals); - std::fill(copies.begin(), copies.end(), 0); + copies.recreate(numLocals); + totalCopies.clear(); totalCopies.resize(numLocals); - std::fill(totalCopies.begin(), totalCopies.end(), 0); // create the CFG by walking the IR CFGWalker<SubType, VisitorType, Liveness>::doWalkFunction(func); // ignore links to dead blocks, so they don't confuse us and we can see @@ -297,14 +281,19 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> { } void addCopy(Index i, Index j) { - auto k = std::min(i, j) * numLocals + std::max(i, j); - copies[k] = std::min(copies[k], uint8_t(254)) + 1; + if (j > i) { + std::swap(i, j); + } + copies.set(i, j, std::min(copies.get(i, j), uint8_t(254)) + 1); totalCopies[i]++; totalCopies[j]++; } uint8_t getCopies(Index i, Index j) { - return copies[std::min(i, j) * numLocals + std::max(i, j)]; + if (j > i) { + std::swap(i, j); + } + return copies.get(i, j); } }; diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp index 649f136c5..8a50d49e6 100644 --- a/src/passes/Asyncify.cpp +++ b/src/passes/Asyncify.cpp @@ -1344,14 +1344,6 @@ private: RelevantLiveLocalsWalker walker; walker.setFunction(func); - if (!walker.canRun(func)) { - // We can proceed without this optimization, which will cause more - // spilling - assume all locals are relevant. - for (Index i = 0; i < func->getNumLocals(); i++) { - relevantLiveLocals.insert(i); - } - return; - } walker.walkFunctionInModule(func, getModule()); // The relevant live locals are ones that are alive at an unwind/rewind // location. TODO look more precisely inside basic blocks, as one might stop diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp index f58e57e72..ba9b87527 100644 --- a/src/passes/CoalesceLocals.cpp +++ b/src/passes/CoalesceLocals.cpp @@ -35,6 +35,7 @@ #include "pass.h" #include "support/learning.h" #include "support/permutations.h" +#include "support/sparse_square_matrix.h" #include "wasm.h" #ifdef CFG_PROFILE #include "support/timing.h" @@ -76,34 +77,31 @@ struct CoalesceLocals // interference state // canonicalized - accesses should check (low, high) - std::vector<bool> interferences; + sparse_square_matrix<bool> interferences; void interfere(Index i, Index j) { if (i == j) { return; } - interferences[std::min(i, j) * numLocals + std::max(i, j)] = 1; + interferences.set(std::min(i, j), std::max(i, j), true); } // optimized version where you know that low < high void interfereLowHigh(Index low, Index high) { assert(low < high); - interferences[low * numLocals + high] = 1; + interferences.set(low, high, true); } void unInterfere(Index i, Index j) { - interferences[std::min(i, j) * numLocals + std::max(i, j)] = 0; + interferences.set(std::min(i, j), std::max(i, j), false); } bool interferes(Index i, Index j) { - return interferences[std::min(i, j) * numLocals + std::max(i, j)]; + return interferences.get(std::min(i, j), std::max(i, j)); } }; void CoalesceLocals::doWalkFunction(Function* func) { - if (!canRun(func)) { - return; - } super::doWalkFunction(func); // prioritize back edges increaseBackEdgePriorities(); @@ -145,8 +143,7 @@ void CoalesceLocals::increaseBackEdgePriorities() { } void CoalesceLocals::calculateInterferences() { - interferences.resize(numLocals * numLocals); - std::fill(interferences.begin(), interferences.end(), false); + interferences.recreate(numLocals); // We will track the values in each local, using a numbering where each index // represents a unique different value. This array maps a local index to the @@ -379,17 +376,19 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, // registers, for gzip) std::vector<Type> types; // new index * numLocals => list of all interferences of locals merged to it - std::vector<bool> newInterferences; + sparse_square_matrix<bool> newInterferences; + // new index * numLocals => list of all copies of locals merged to it - std::vector<uint8_t> newCopies; + sparse_square_matrix<uint8_t> newCopies; + indices.resize(numLocals); types.resize(numLocals); - newInterferences.resize(numLocals * numLocals); - std::fill(newInterferences.begin(), newInterferences.end(), false); + auto numParams = getFunction()->getNumParams(); - // start with enough room for the params - newCopies.resize(numParams * numLocals); - std::fill(newCopies.begin(), newCopies.end(), 0); + + newInterferences.recreate(numLocals); + newCopies.recreate(numLocals); + Index nextFree = 0; removedCopies = 0; // we can't reorder parameters, they are fixed in order, and cannot coalesce @@ -399,8 +398,8 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, indices[i] = i; types[i] = getFunction()->getLocalType(i); for (Index j = numParams; j < numLocals; j++) { - newInterferences[numLocals * i + j] = interferes(i, j); - newCopies[numLocals * i + j] = getCopies(i, j); + newInterferences.set(i, j, interferes(i, j)); + newCopies.set(i, j, getCopies(i, j)); } nextFree++; } @@ -409,13 +408,13 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, Index found = -1; uint8_t foundCopies = -1; for (Index j = 0; j < nextFree; j++) { - if (!newInterferences[j * numLocals + actual] && + if (!newInterferences.get(j, actual) && getFunction()->getLocalType(actual) == types[j]) { // this does not interfere, so it might be what we want. but pick the // one eliminating the most copies (we could stop looking forward when // there are no more items that have copies anyhow, but it doesn't seem // to help) - auto currCopies = newCopies[j * numLocals + actual]; + auto currCopies = newCopies.get(j, actual); if (found == Index(-1) || currCopies > foundCopies) { indices[actual] = found = j; foundCopies = currCopies; @@ -427,7 +426,6 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, types[found] = getFunction()->getLocalType(actual); nextFree++; removedCopies += getCopies(found, actual); - newCopies.resize(nextFree * numLocals); } else { removedCopies += foundCopies; } @@ -438,9 +436,9 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, for (Index k = i + 1; k < numLocals; k++) { // go in the order, we only need to update for those we will see later auto j = order[k]; - newInterferences[found * numLocals + j] = - newInterferences[found * numLocals + j] || interferes(actual, j); - newCopies[found * numLocals + j] += getCopies(actual, j); + newInterferences.set( + found, j, newInterferences.get(found, j) || interferes(actual, j)); + newCopies.set(found, j, newCopies.get(found, j) + getCopies(actual, j)); } } } diff --git a/src/support/sparse_square_matrix.h b/src/support/sparse_square_matrix.h new file mode 100644 index 000000000..03ebcb5c6 --- /dev/null +++ b/src/support/sparse_square_matrix.h @@ -0,0 +1,76 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// A helper type for potentially sparse N*N matrix container. + +#pragma once + +#include <assert.h> +#include <stdint.h> +#include <unordered_map> +#include <vector> + +template<typename Ty> class sparse_square_matrix { + std::vector<Ty> denseStorage; + std::unordered_map<uint64_t, Ty> sparseStorage; + uint32_t N; + +public: + sparse_square_matrix() : N(0) {} + + explicit sparse_square_matrix(int N) : N(N) { + if (N < DenseLimit) { + denseStorage.resize(N * N); + } + } + + static const size_t DenseLimit = 8192; + + uint32_t width() const { return N; } + + bool usingDenseStorage() const { return !denseStorage.empty(); } + + void set(uint32_t i, uint32_t j, const Ty& value) { + assert(i < N); + assert(j < N); + if (usingDenseStorage()) { + denseStorage[i * N + j] = value; + } else { + sparseStorage[i * N + j] = value; + } + } + + const Ty get(uint32_t i, uint32_t j) const { + assert(i < N); + assert(j < N); + if (usingDenseStorage()) { + return denseStorage[i * N + j]; + } + auto iter = sparseStorage.find(i * N + j); + return iter == sparseStorage.end() ? Ty() : iter->second; + } + + // Resizes the matrix to a new n*n size, and clears all entries + // to the default-initialized value. + void recreate(uint32_t n) { + N = n; + denseStorage.clear(); + sparseStorage.clear(); + if (N < DenseLimit) { + denseStorage.resize(N * N); + } + } +}; diff --git a/test/example/sparse_square_matrix.cpp b/test/example/sparse_square_matrix.cpp new file mode 100644 index 000000000..ad1f71ade --- /dev/null +++ b/test/example/sparse_square_matrix.cpp @@ -0,0 +1,54 @@ +#include <iostream> + +#include "support/sparse_square_matrix.h" + +int main() { + sparse_square_matrix<uint32_t> m; + + // New matrix should initialize to 0x0 size. + assert(m.width() == 0); + + // Recreating should resize the matrix. + m.recreate(100); + assert(m.width() == 100); + + // Small matrices should use dense storage. + assert(m.usingDenseStorage()); + + // Setting and getting element values in dense storage should work. + for (int y = 0; y < 100; ++y) + for (int x = 0; x < 100; ++x) + m.set(y, x, y * 100 + x); + for (int y = 0; y < 100; ++y) + for (int x = 0; x < 100; ++x) + assert(m.get(y, x) == y * 100 + x); + + // Recreating should clear the matrix elements to zero, + // even if recreating to same size as before. + assert(m.width() == 100); + m.recreate(100); + for (int y = 0; y < 100; ++y) + for (int x = 0; x < 100; ++x) + assert(m.get(y, x) == 0); + + // Large matrices should use sparse storage. + m.recreate(m.DenseLimit); + assert(!m.usingDenseStorage()); + + // Setting and getting element values in sparse storage should work. + for (int y = 0; y < m.DenseLimit; y += 128) + for (int x = 0; x < m.DenseLimit; x += 128) + m.set(y, x, y * m.DenseLimit + x); + for (int y = 0; y < m.DenseLimit; y += 128) + for (int x = 0; x < m.DenseLimit; x += 128) + assert(m.get(y, x) == y * m.DenseLimit + x); + + // Recreating matrix in sparse mode should reset values in sparse + // storage to zero. + m.recreate(m.DenseLimit + 1); + for (int y = 0; y < m.width(); y += 128) + for (int x = 0; x < m.width(); x += 128) + assert(m.get(y, x) == 0); + + std::cout << "ok.\n"; +} diff --git a/test/example/sparse_square_matrix.txt b/test/example/sparse_square_matrix.txt new file mode 100644 index 000000000..90b5016ef --- /dev/null +++ b/test/example/sparse_square_matrix.txt @@ -0,0 +1 @@ +ok. diff --git a/test/passes/too_much_for_liveness.bin.txt b/test/passes/65536_locals_for_liveness.bin.txt index 793f9578c..0fc7f873a 100644 --- a/test/passes/too_much_for_liveness.bin.txt +++ b/test/passes/65536_locals_for_liveness.bin.txt @@ -19,7 +19,7 @@ total [tables] : 0 [tags] : 0 [total] : 4 - [vars] : 65536 + [vars] : 1 -65535 Block : 1 Const : 1 LocalGet : 1 diff --git a/test/passes/too_much_for_liveness.passes b/test/passes/65536_locals_for_liveness.passes index 719d0bbcd..719d0bbcd 100644 --- a/test/passes/too_much_for_liveness.passes +++ b/test/passes/65536_locals_for_liveness.passes diff --git a/test/passes/too_much_for_liveness.wasm b/test/passes/65536_locals_for_liveness.wasm Binary files differindex 88929243d..88929243d 100644 --- a/test/passes/too_much_for_liveness.wasm +++ b/test/passes/65536_locals_for_liveness.wasm |