diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/ExpressionAnalyzer.cpp | 10 | ||||
-rw-r--r-- | src/ir/function-utils.h | 46 | ||||
-rw-r--r-- | src/ir/hashed.h | 47 | ||||
-rw-r--r-- | src/passes/DuplicateFunctionElimination.cpp | 72 | ||||
-rw-r--r-- | src/support/hash.h | 9 | ||||
-rw-r--r-- | src/tools/fuzzing.h | 3 |
6 files changed, 128 insertions, 59 deletions
diff --git a/src/ir/ExpressionAnalyzer.cpp b/src/ir/ExpressionAnalyzer.cpp index 5368c25f6..64b1ce24b 100644 --- a/src/ir/ExpressionAnalyzer.cpp +++ b/src/ir/ExpressionAnalyzer.cpp @@ -510,8 +510,14 @@ uint32_t ExpressionAnalyzer::hash(Expression* curr) { break; } case Expression::Id::ConstId: { - HASH(Const, value.type); - HASH64(Const, value.getBits()); + auto* c = curr->cast<Const>(); + hash(c->type); + auto bits = c->value.getBits(); + if (getTypeSize(c->type) == 4) { + hash(uint32_t(bits)); + } else { + hash64(bits); + } break; } case Expression::Id::UnaryId: { diff --git a/src/ir/function-utils.h b/src/ir/function-utils.h new file mode 100644 index 000000000..317b2f1b1 --- /dev/null +++ b/src/ir/function-utils.h @@ -0,0 +1,46 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_function_h +#define wasm_ir_function_h + +#include "wasm.h" +#include "ir/utils.h" + +namespace wasm { + +namespace FunctionUtils { + +// Checks if two functions are equal in all functional aspects, +// everything but their name (which can't be the same, in the same +// module!) - same params, vars, body, result, etc. +inline bool equal(Function* left, Function* right) { + if (left->getNumParams() != right->getNumParams()) return false; + if (left->getNumVars() != right->getNumVars()) return false; + for (Index i = 0; i < left->getNumLocals(); i++) { + if (left->getLocalType(i) != right->getLocalType(i)) return false; + } + if (left->result != right->result) return false; + if (left->type != right->type) return false; + return ExpressionAnalyzer::equal(left->body, right->body); +} + +} // namespace FunctionUtils + +} // namespace wasm + +#endif // wasm_ir_function_h + diff --git a/src/ir/hashed.h b/src/ir/hashed.h index dc4012455..0771da6ec 100644 --- a/src/ir/hashed.h +++ b/src/ir/hashed.h @@ -53,6 +53,53 @@ template<typename T> class HashedExpressionMap : public std::unordered_map<HashedExpression, T, ExpressionHasher, ExpressionComparer> { }; +// A pass that hashes all functions + +struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> { + bool isFunctionParallel() override { return true; } + + typedef uint32_t HashType; + + struct Map : public std::map<Function*, HashType> {}; + + FunctionHasher(Map* output) : output(output) {} + + FunctionHasher* create() override { + return new FunctionHasher(output); + } + + static Map createMap(Module* module) { + Map hashes; + for (auto& func : module->functions) { + hashes[func.get()] = 0; // ensure an entry for each function - we must not modify the map shape in parallel, just the values + } + return hashes; + } + + void doWalkFunction(Function* func) { + output->at(func) = hashFunction(func); + } + + static HashType hashFunction(Function* func) { + HashType ret = 0; + ret = rehash(ret, (HashType)func->getNumParams()); + for (auto type : func->params) { + ret = rehash(ret, (HashType)type); + } + ret = rehash(ret, (HashType)func->getNumVars()); + for (auto type : func->vars) { + ret = rehash(ret, (HashType)type); + } + ret = rehash(ret, (HashType)func->result); + ret = rehash(ret, HashType(func->type.is() ? std::hash<wasm::Name>{}(func->type) : HashType(0))); + ret = rehash(ret, (HashType)ExpressionAnalyzer::hash(func->body)); + return ret; + } + +private: + Map* output; +}; + } // namespace wasm #endif // _wasm_ir_hashed_h diff --git a/src/passes/DuplicateFunctionElimination.cpp b/src/passes/DuplicateFunctionElimination.cpp index 027d16b4a..60667cdd6 100644 --- a/src/passes/DuplicateFunctionElimination.cpp +++ b/src/passes/DuplicateFunctionElimination.cpp @@ -23,43 +23,11 @@ #include "wasm.h" #include "pass.h" #include "ir/utils.h" -#include "support/hash.h" +#include "ir/function-utils.h" +#include "ir/hashed.h" namespace wasm { -struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> { - bool isFunctionParallel() override { return true; } - - FunctionHasher(std::map<Function*, uint32_t>* output) : output(output) {} - - FunctionHasher* create() override { - return new FunctionHasher(output); - } - - void doWalkFunction(Function* func) { - assert(digest == 0); - hash(func->getNumParams()); - for (auto type : func->params) hash(type); - hash(func->getNumVars()); - for (auto type : func->vars) hash(type); - hash(func->result); - hash64(func->type.is() ? uint64_t(func->type.str) : uint64_t(0)); - hash(ExpressionAnalyzer::hash(func->body)); - output->at(func) = digest; - } - -private: - std::map<Function*, uint32_t>* output; - uint32_t digest = 0; - - void hash(uint32_t hash) { - digest = rehash(digest, hash); - } - void hash64(uint64_t hash) { - digest = rehash(rehash(digest, uint32_t(hash >> 32)), uint32_t(hash)); - }; -}; - struct FunctionReplacer : public WalkerPass<PostWalker<FunctionReplacer>> { bool isFunctionParallel() override { return true; } @@ -82,12 +50,22 @@ private: struct DuplicateFunctionElimination : public Pass { void run(PassRunner* runner, Module* module) override { - while (1) { + // Multiple iterations may be necessary: A and B may be identical only after we + // see the functions C1 and C2 that they call are in fact identical. Rarely, such + // "chains" can be very long, so we limit how many we do. + auto& options = runner->options; + Index limit; + if (options.optimizeLevel >= 3 || options.shrinkLevel >= 1) { + limit = module->functions.size(); // no limit + } else if (options.optimizeLevel >= 2) { + limit = 10; // 10 passes usually does most of the work, as this is typically logarithmic + } else { + limit = 1; + } + while (limit > 0) { + limit--; // Hash all the functions - hashes.clear(); - for (auto& func : module->functions) { - hashes[func.get()] = 0; // ensure an entry for each function - we must not modify the map shape in parallel, just the values - } + auto hashes = FunctionHasher::createMap(module); PassRunner hasherRunner(module); hasherRunner.setIsNested(true); hasherRunner.add<FunctionHasher>(&hashes); @@ -113,7 +91,7 @@ struct DuplicateFunctionElimination : public Pass { for (Index j = i + 1; j < size; j++) { auto* second = group[j]; if (duplicates.count(second->name)) continue; - if (equal(first, second)) { + if (FunctionUtils::equal(first, second)) { // great, we can replace the second with the first! replacements[second->name] = first->name; duplicates.insert(second->name); @@ -162,20 +140,6 @@ struct DuplicateFunctionElimination : public Pass { } } } - -private: - std::map<Function*, uint32_t> hashes; - - bool equal(Function* left, Function* right) { - if (left->getNumParams() != right->getNumParams()) return false; - if (left->getNumVars() != right->getNumVars()) return false; - for (Index i = 0; i < left->getNumLocals(); i++) { - if (left->getLocalType(i) != right->getLocalType(i)) return false; - } - if (left->result != right->result) return false; - if (left->type != right->type) return false; - return ExpressionAnalyzer::equal(left->body, right->body); - } }; Pass *createDuplicateFunctionEliminationPass() { diff --git a/src/support/hash.h b/src/support/hash.h index 9edbdd2c7..158f20773 100644 --- a/src/support/hash.h +++ b/src/support/hash.h @@ -22,7 +22,8 @@ namespace wasm { -inline uint32_t rehash(uint32_t x, uint32_t y) { // see http://www.cse.yorku.ca/~oz/hash.html +inline uint32_t rehash(uint32_t x, uint32_t y) { + // see http://www.cse.yorku.ca/~oz/hash.html and https://stackoverflow.com/a/2595226/1176841 uint32_t hash = 5381; while (x) { hash = ((hash << 5) + hash) ^ (x & 0xff); @@ -35,8 +36,10 @@ inline uint32_t rehash(uint32_t x, uint32_t y) { // see http://www.cse.yorku.ca/ return hash; } -inline uint64_t rehash(uint64_t x, uint64_t y) { // see boost and https://stackoverflow.com/a/2595226/1176841 - return x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); +inline uint64_t rehash(uint64_t x, uint64_t y) { + auto ret = rehash(uint32_t(x), uint32_t(x >> 32)); + ret = rehash(ret, uint32_t(y)); + return rehash(ret, uint32_t(y >> 32)); } } // namespace wasm diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index c433a81d8..062fc29a6 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -1438,3 +1438,6 @@ private: } // namespace wasm // XXX Switch class has a condition?! is it real? should the node type be the value type if it exists?! + +// TODO copy an existing function and replace just one node in it + |