duplicate-function-elimination improvements (#1590)

On a codebase with 370K functions, 160K were in fact duplicate (!)... and it took many many passes to figure that out, over 2 minutes in fact (!), as A and B may be identical only after we see that the functions C1, C2 that they call are identical (so there can be long "chains" here). To avoid this, limit how many passes we do. In -O1, just do one pass - that gets most duplicates. In -O2, do 10 passes - that gets almost all of it on this codebase. And in -O3 (or -Os/-Oz) do as many passes as necessary (i.e., the old behavior). This at least lets iteration builds (-O1) be nice and fast. This PR also refactors the hashing code used in that pass, moving it to nicer header files for clearer readability. Also some other minor cleanups in hashing code that helped debug this.
author: Alon Zakai <alonzakai@gmail.com> 2018-06-07 19:05:43 -0700
committer: GitHub <noreply@github.com> 2018-06-07 19:05:43 -0700
commit: 3af404435b3cfa90704810370703f20921c055dd (patch)
tree: a1ad5f2b7985db6a9b92ef651c8f7ea2368747ec /src/ir/hashed.h
parent: 682bb461e6084048d1085f985f2a0973977d06b4 (diff)
download: binaryen-3af404435b3cfa90704810370703f20921c055dd.tar.gz
binaryen-3af404435b3cfa90704810370703f20921c055dd.tar.bz2
binaryen-3af404435b3cfa90704810370703f20921c055dd.zip
1 files changed, 47 insertions, 0 deletions
diff --git a/src/ir/hashed.h b/src/ir/hashed.h
index dc4012455..0771da6ec 100644
--- a/src/ir/hashed.h
+++ b/src/ir/hashed.h
@@ -53,6 +53,53 @@ template<typename T>
 class HashedExpressionMap : public std::unordered_map<HashedExpression, T, ExpressionHasher, ExpressionComparer> {
 };
 
+// A pass that hashes all functions
+
+struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
+  bool isFunctionParallel() override { return true; }
+
+  typedef uint32_t HashType;
+
+  struct Map : public std::map<Function*, HashType> {};
+
+  FunctionHasher(Map* output) : output(output) {}
+
+  FunctionHasher* create() override {
+    return new FunctionHasher(output);
+  }
+
+  static Map createMap(Module* module) {
+    Map hashes;
+    for (auto& func : module->functions) {
+      hashes[func.get()] = 0; // ensure an entry for each function - we must not modify the map shape in parallel, just the values
+    }
+    return hashes;
+  }
+
+  void doWalkFunction(Function* func) {
+    output->at(func) = hashFunction(func);
+  }
+
+  static HashType hashFunction(Function* func) {
+    HashType ret = 0;
+    ret = rehash(ret, (HashType)func->getNumParams());
+    for (auto type : func->params) {
+      ret = rehash(ret, (HashType)type);
+    }
+    ret = rehash(ret, (HashType)func->getNumVars());
+    for (auto type : func->vars) {
+      ret = rehash(ret, (HashType)type);
+    }
+    ret = rehash(ret, (HashType)func->result);
+    ret = rehash(ret, HashType(func->type.is() ? std::hash<wasm::Name>{}(func->type) : HashType(0)));
+    ret = rehash(ret, (HashType)ExpressionAnalyzer::hash(func->body));
+    return ret;
+  }
+
+private:
+  Map* output;
+};
+
 } // namespace wasm
 
 #endif // _wasm_ir_hashed_h
author	Alon Zakai <alonzakai@gmail.com>	2018-06-07 19:05:43 -0700
committer	GitHub <noreply@github.com>	2018-06-07 19:05:43 -0700
commit	3af404435b3cfa90704810370703f20921c055dd (patch)
tree	a1ad5f2b7985db6a9b92ef651c8f7ea2368747ec /src/ir/hashed.h
parent	682bb461e6084048d1085f985f2a0973977d06b4 (diff)
download	binaryen-3af404435b3cfa90704810370703f20921c055dd.tar.gz binaryen-3af404435b3cfa90704810370703f20921c055dd.tar.bz2 binaryen-3af404435b3cfa90704810370703f20921c055dd.zip