summaryrefslogtreecommitdiff
path: root/src/passes/DuplicateFunctionElimination.cpp
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-06-07 19:05:43 -0700
committerGitHub <noreply@github.com>2018-06-07 19:05:43 -0700
commit3af404435b3cfa90704810370703f20921c055dd (patch)
treea1ad5f2b7985db6a9b92ef651c8f7ea2368747ec /src/passes/DuplicateFunctionElimination.cpp
parent682bb461e6084048d1085f985f2a0973977d06b4 (diff)
downloadbinaryen-3af404435b3cfa90704810370703f20921c055dd.tar.gz
binaryen-3af404435b3cfa90704810370703f20921c055dd.tar.bz2
binaryen-3af404435b3cfa90704810370703f20921c055dd.zip
duplicate-function-elimination improvements (#1590)
On a codebase with 370K functions, 160K were in fact duplicate (!)... and it took many many passes to figure that out, over 2 minutes in fact (!), as A and B may be identical only after we see that the functions C1, C2 that they call are identical (so there can be long "chains" here). To avoid this, limit how many passes we do. In -O1, just do one pass - that gets most duplicates. In -O2, do 10 passes - that gets almost all of it on this codebase. And in -O3 (or -Os/-Oz) do as many passes as necessary (i.e., the old behavior). This at least lets iteration builds (-O1) be nice and fast. This PR also refactors the hashing code used in that pass, moving it to nicer header files for clearer readability. Also some other minor cleanups in hashing code that helped debug this.
Diffstat (limited to 'src/passes/DuplicateFunctionElimination.cpp')
-rw-r--r--src/passes/DuplicateFunctionElimination.cpp72
1 files changed, 18 insertions, 54 deletions
diff --git a/src/passes/DuplicateFunctionElimination.cpp b/src/passes/DuplicateFunctionElimination.cpp
index 027d16b4a..60667cdd6 100644
--- a/src/passes/DuplicateFunctionElimination.cpp
+++ b/src/passes/DuplicateFunctionElimination.cpp
@@ -23,43 +23,11 @@
#include "wasm.h"
#include "pass.h"
#include "ir/utils.h"
-#include "support/hash.h"
+#include "ir/function-utils.h"
+#include "ir/hashed.h"
namespace wasm {
-struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
- bool isFunctionParallel() override { return true; }
-
- FunctionHasher(std::map<Function*, uint32_t>* output) : output(output) {}
-
- FunctionHasher* create() override {
- return new FunctionHasher(output);
- }
-
- void doWalkFunction(Function* func) {
- assert(digest == 0);
- hash(func->getNumParams());
- for (auto type : func->params) hash(type);
- hash(func->getNumVars());
- for (auto type : func->vars) hash(type);
- hash(func->result);
- hash64(func->type.is() ? uint64_t(func->type.str) : uint64_t(0));
- hash(ExpressionAnalyzer::hash(func->body));
- output->at(func) = digest;
- }
-
-private:
- std::map<Function*, uint32_t>* output;
- uint32_t digest = 0;
-
- void hash(uint32_t hash) {
- digest = rehash(digest, hash);
- }
- void hash64(uint64_t hash) {
- digest = rehash(rehash(digest, uint32_t(hash >> 32)), uint32_t(hash));
- };
-};
-
struct FunctionReplacer : public WalkerPass<PostWalker<FunctionReplacer>> {
bool isFunctionParallel() override { return true; }
@@ -82,12 +50,22 @@ private:
struct DuplicateFunctionElimination : public Pass {
void run(PassRunner* runner, Module* module) override {
- while (1) {
+ // Multiple iterations may be necessary: A and B may be identical only after we
+ // see the functions C1 and C2 that they call are in fact identical. Rarely, such
+ // "chains" can be very long, so we limit how many we do.
+ auto& options = runner->options;
+ Index limit;
+ if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) {
+ limit = module->functions.size(); // no limit
+ } else if (options.optimizeLevel >= 2) {
+ limit = 10; // 10 passes usually does most of the work, as this is typically logarithmic
+ } else {
+ limit = 1;
+ }
+ while (limit > 0) {
+ limit--;
// Hash all the functions
- hashes.clear();
- for (auto& func : module->functions) {
- hashes[func.get()] = 0; // ensure an entry for each function - we must not modify the map shape in parallel, just the values
- }
+ auto hashes = FunctionHasher::createMap(module);
PassRunner hasherRunner(module);
hasherRunner.setIsNested(true);
hasherRunner.add<FunctionHasher>(&hashes);
@@ -113,7 +91,7 @@ struct DuplicateFunctionElimination : public Pass {
for (Index j = i + 1; j < size; j++) {
auto* second = group[j];
if (duplicates.count(second->name)) continue;
- if (equal(first, second)) {
+ if (FunctionUtils::equal(first, second)) {
// great, we can replace the second with the first!
replacements[second->name] = first->name;
duplicates.insert(second->name);
@@ -162,20 +140,6 @@ struct DuplicateFunctionElimination : public Pass {
}
}
}
-
-private:
- std::map<Function*, uint32_t> hashes;
-
- bool equal(Function* left, Function* right) {
- if (left->getNumParams() != right->getNumParams()) return false;
- if (left->getNumVars() != right->getNumVars()) return false;
- for (Index i = 0; i < left->getNumLocals(); i++) {
- if (left->getLocalType(i) != right->getLocalType(i)) return false;
- }
- if (left->result != right->result) return false;
- if (left->type != right->type) return false;
- return ExpressionAnalyzer::equal(left->body, right->body);
- }
};
Pass *createDuplicateFunctionEliminationPass() {