summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjuj <jujjyl@gmail.com>2022-04-28 21:31:08 +0300
committerGitHub <noreply@github.com>2022-04-28 18:31:08 +0000
commitba64ac3ed47cb35a477aaaf185679ae9d0b79862 (patch)
treea35322a8d2162e07352c7272b97ffbd195d876a3 /src
parent408b2eb7df01f42157e24c2c58f01c4fa5c6b9d6 (diff)
downloadbinaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.gz
binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.tar.bz2
binaryen-ba64ac3ed47cb35a477aaaf185679ae9d0b79862.zip
Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function. (#4567)
* Lift the restriction in liveness-traversal.h that supported max 65535 locals in a function. * Lint * Fix typo * Fix static * Lint * Lint * Lint * Add needed canRun function * lint * Use either a sparse or a dense matrix for tracking liveness copies, depending on the locals count. * Lint * Fix lint * Lint * Implement sparse_square_matrix class and use that as a backing. * Lint * Lint * Lint #includes * Lint * Lint includes * Remove unnecessary code * Fix canonical accesses to copies matrix * Lint * Add missing variable update * Remove canRun() function * Address review * Update expected test results * Update test name * Add asserts to sparse_square_matrix set and get functions that they are not out of bound. * Lint includes * Update test expectation * Use .clear() + .resize() to reset totalCopies vector
Diffstat (limited to 'src')
-rw-r--r--src/cfg/liveness-traversal.h41
-rw-r--r--src/passes/Asyncify.cpp8
-rw-r--r--src/passes/CoalesceLocals.cpp48
-rw-r--r--src/support/sparse_square_matrix.h76
4 files changed, 114 insertions, 59 deletions
diff --git a/src/cfg/liveness-traversal.h b/src/cfg/liveness-traversal.h
index ee56a9925..3193cbd1f 100644
--- a/src/cfg/liveness-traversal.h
+++ b/src/cfg/liveness-traversal.h
@@ -24,6 +24,7 @@
#include "cfg-traversal.h"
#include "ir/utils.h"
#include "support/sorted_vector.h"
+#include "support/sparse_square_matrix.h"
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "wasm.h"
@@ -105,10 +106,10 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> {
Index numLocals;
std::unordered_set<BasicBlock*> liveBlocks;
- // canonicalized - accesses should check (low, high)
- // TODO: use a map for high N, as this tends to be sparse? or don't look at
- // copies at all for big N?
- std::vector<uint8_t> copies;
+ // access as a upper triangular matrix: i.e. when accessing a pair (i,j),
+ // should access the cell i < j.
+ sparse_square_matrix<uint8_t> copies;
+
// total # of copies for each local, with all others
std::vector<Index> totalCopies;
@@ -191,30 +192,13 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> {
return nullptr;
}
- // If there are too many locals, we cannot run currently as
- // numLocals * numLocals might overflow. We may want to add an option for
- // a sparse matrix at some point TODO
- bool canRun(Function* func) {
- Index numLocals = func->getNumLocals();
- if (uint64_t(numLocals) * uint64_t(numLocals) <=
- std::numeric_limits<Index>::max()) {
- return true;
- }
- std::cerr << "warning: too many locals (" << numLocals
- << ") to run liveness analysis in " << this->getFunction()->name
- << '\n';
- return false;
- }
-
// main entry point
void doWalkFunction(Function* func) {
numLocals = func->getNumLocals();
- assert(canRun(func));
- copies.resize(numLocals * numLocals);
- std::fill(copies.begin(), copies.end(), 0);
+ copies.recreate(numLocals);
+ totalCopies.clear();
totalCopies.resize(numLocals);
- std::fill(totalCopies.begin(), totalCopies.end(), 0);
// create the CFG by walking the IR
CFGWalker<SubType, VisitorType, Liveness>::doWalkFunction(func);
// ignore links to dead blocks, so they don't confuse us and we can see
@@ -297,14 +281,19 @@ struct LivenessWalker : public CFGWalker<SubType, VisitorType, Liveness> {
}
void addCopy(Index i, Index j) {
- auto k = std::min(i, j) * numLocals + std::max(i, j);
- copies[k] = std::min(copies[k], uint8_t(254)) + 1;
+ if (j > i) {
+ std::swap(i, j);
+ }
+ copies.set(i, j, std::min(copies.get(i, j), uint8_t(254)) + 1);
totalCopies[i]++;
totalCopies[j]++;
}
uint8_t getCopies(Index i, Index j) {
- return copies[std::min(i, j) * numLocals + std::max(i, j)];
+ if (j > i) {
+ std::swap(i, j);
+ }
+ return copies.get(i, j);
}
};
diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp
index 649f136c5..8a50d49e6 100644
--- a/src/passes/Asyncify.cpp
+++ b/src/passes/Asyncify.cpp
@@ -1344,14 +1344,6 @@ private:
RelevantLiveLocalsWalker walker;
walker.setFunction(func);
- if (!walker.canRun(func)) {
- // We can proceed without this optimization, which will cause more
- // spilling - assume all locals are relevant.
- for (Index i = 0; i < func->getNumLocals(); i++) {
- relevantLiveLocals.insert(i);
- }
- return;
- }
walker.walkFunctionInModule(func, getModule());
// The relevant live locals are ones that are alive at an unwind/rewind
// location. TODO look more precisely inside basic blocks, as one might stop
diff --git a/src/passes/CoalesceLocals.cpp b/src/passes/CoalesceLocals.cpp
index f58e57e72..ba9b87527 100644
--- a/src/passes/CoalesceLocals.cpp
+++ b/src/passes/CoalesceLocals.cpp
@@ -35,6 +35,7 @@
#include "pass.h"
#include "support/learning.h"
#include "support/permutations.h"
+#include "support/sparse_square_matrix.h"
#include "wasm.h"
#ifdef CFG_PROFILE
#include "support/timing.h"
@@ -76,34 +77,31 @@ struct CoalesceLocals
// interference state
// canonicalized - accesses should check (low, high)
- std::vector<bool> interferences;
+ sparse_square_matrix<bool> interferences;
void interfere(Index i, Index j) {
if (i == j) {
return;
}
- interferences[std::min(i, j) * numLocals + std::max(i, j)] = 1;
+ interferences.set(std::min(i, j), std::max(i, j), true);
}
// optimized version where you know that low < high
void interfereLowHigh(Index low, Index high) {
assert(low < high);
- interferences[low * numLocals + high] = 1;
+ interferences.set(low, high, true);
}
void unInterfere(Index i, Index j) {
- interferences[std::min(i, j) * numLocals + std::max(i, j)] = 0;
+ interferences.set(std::min(i, j), std::max(i, j), false);
}
bool interferes(Index i, Index j) {
- return interferences[std::min(i, j) * numLocals + std::max(i, j)];
+ return interferences.get(std::min(i, j), std::max(i, j));
}
};
void CoalesceLocals::doWalkFunction(Function* func) {
- if (!canRun(func)) {
- return;
- }
super::doWalkFunction(func);
// prioritize back edges
increaseBackEdgePriorities();
@@ -145,8 +143,7 @@ void CoalesceLocals::increaseBackEdgePriorities() {
}
void CoalesceLocals::calculateInterferences() {
- interferences.resize(numLocals * numLocals);
- std::fill(interferences.begin(), interferences.end(), false);
+ interferences.recreate(numLocals);
// We will track the values in each local, using a numbering where each index
// represents a unique different value. This array maps a local index to the
@@ -379,17 +376,19 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
// registers, for gzip)
std::vector<Type> types;
// new index * numLocals => list of all interferences of locals merged to it
- std::vector<bool> newInterferences;
+ sparse_square_matrix<bool> newInterferences;
+
// new index * numLocals => list of all copies of locals merged to it
- std::vector<uint8_t> newCopies;
+ sparse_square_matrix<uint8_t> newCopies;
+
indices.resize(numLocals);
types.resize(numLocals);
- newInterferences.resize(numLocals * numLocals);
- std::fill(newInterferences.begin(), newInterferences.end(), false);
+
auto numParams = getFunction()->getNumParams();
- // start with enough room for the params
- newCopies.resize(numParams * numLocals);
- std::fill(newCopies.begin(), newCopies.end(), 0);
+
+ newInterferences.recreate(numLocals);
+ newCopies.recreate(numLocals);
+
Index nextFree = 0;
removedCopies = 0;
// we can't reorder parameters, they are fixed in order, and cannot coalesce
@@ -399,8 +398,8 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
indices[i] = i;
types[i] = getFunction()->getLocalType(i);
for (Index j = numParams; j < numLocals; j++) {
- newInterferences[numLocals * i + j] = interferes(i, j);
- newCopies[numLocals * i + j] = getCopies(i, j);
+ newInterferences.set(i, j, interferes(i, j));
+ newCopies.set(i, j, getCopies(i, j));
}
nextFree++;
}
@@ -409,13 +408,13 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
Index found = -1;
uint8_t foundCopies = -1;
for (Index j = 0; j < nextFree; j++) {
- if (!newInterferences[j * numLocals + actual] &&
+ if (!newInterferences.get(j, actual) &&
getFunction()->getLocalType(actual) == types[j]) {
// this does not interfere, so it might be what we want. but pick the
// one eliminating the most copies (we could stop looking forward when
// there are no more items that have copies anyhow, but it doesn't seem
// to help)
- auto currCopies = newCopies[j * numLocals + actual];
+ auto currCopies = newCopies.get(j, actual);
if (found == Index(-1) || currCopies > foundCopies) {
indices[actual] = found = j;
foundCopies = currCopies;
@@ -427,7 +426,6 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
types[found] = getFunction()->getLocalType(actual);
nextFree++;
removedCopies += getCopies(found, actual);
- newCopies.resize(nextFree * numLocals);
} else {
removedCopies += foundCopies;
}
@@ -438,9 +436,9 @@ void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order,
for (Index k = i + 1; k < numLocals; k++) {
// go in the order, we only need to update for those we will see later
auto j = order[k];
- newInterferences[found * numLocals + j] =
- newInterferences[found * numLocals + j] || interferes(actual, j);
- newCopies[found * numLocals + j] += getCopies(actual, j);
+ newInterferences.set(
+ found, j, newInterferences.get(found, j) || interferes(actual, j));
+ newCopies.set(found, j, newCopies.get(found, j) + getCopies(actual, j));
}
}
}
diff --git a/src/support/sparse_square_matrix.h b/src/support/sparse_square_matrix.h
new file mode 100644
index 000000000..03ebcb5c6
--- /dev/null
+++ b/src/support/sparse_square_matrix.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// A helper type for potentially sparse N*N matrix container.
+
+#pragma once
+
+#include <assert.h>
+#include <stdint.h>
+#include <unordered_map>
+#include <vector>
+
+template<typename Ty> class sparse_square_matrix {
+ std::vector<Ty> denseStorage;
+ std::unordered_map<uint64_t, Ty> sparseStorage;
+ uint32_t N;
+
+public:
+ sparse_square_matrix() : N(0) {}
+
+ explicit sparse_square_matrix(int N) : N(N) {
+ if (N < DenseLimit) {
+ denseStorage.resize(N * N);
+ }
+ }
+
+ static const size_t DenseLimit = 8192;
+
+ uint32_t width() const { return N; }
+
+ bool usingDenseStorage() const { return !denseStorage.empty(); }
+
+ void set(uint32_t i, uint32_t j, const Ty& value) {
+ assert(i < N);
+ assert(j < N);
+ if (usingDenseStorage()) {
+ denseStorage[i * N + j] = value;
+ } else {
+ sparseStorage[i * N + j] = value;
+ }
+ }
+
+ const Ty get(uint32_t i, uint32_t j) const {
+ assert(i < N);
+ assert(j < N);
+ if (usingDenseStorage()) {
+ return denseStorage[i * N + j];
+ }
+ auto iter = sparseStorage.find(i * N + j);
+ return iter == sparseStorage.end() ? Ty() : iter->second;
+ }
+
+ // Resizes the matrix to a new n*n size, and clears all entries
+ // to the default-initialized value.
+ void recreate(uint32_t n) {
+ N = n;
+ denseStorage.clear();
+ sparseStorage.clear();
+ if (N < DenseLimit) {
+ denseStorage.resize(N * N);
+ }
+ }
+};