summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Wirtz <dcode@dcode.io>2020-08-13 02:48:52 +0200
committerGitHub <noreply@github.com>2020-08-12 17:48:52 -0700
commit902469769dee0a3f61e7e5aaca597d3cbac139ad (patch)
tree024c9e010e9a1d406632994df747a1657eff937a
parentf067a45c1e88124173af992e66a7125fe6ab366a (diff)
downloadbinaryen-902469769dee0a3f61e7e5aaca597d3cbac139ad.tar.gz
binaryen-902469769dee0a3f61e7e5aaca597d3cbac139ad.tar.bz2
binaryen-902469769dee0a3f61e7e5aaca597d3cbac139ad.zip
Refactor hashing (#3023)
* Unifies internal hashing helpers to naturally integrate with std::hash * Removes the previous custom implementation * Computed hashes are now always size_t * Introduces a hash_combine helper * Fixes an overwritten partial hash in Relooper.cpp
-rw-r--r--src/cfg/Relooper.cpp36
-rw-r--r--src/ir/ExpressionAnalyzer.cpp29
-rw-r--r--src/ir/ExpressionManipulator.cpp1
-rw-r--r--src/ir/hashed.h25
-rw-r--r--src/ir/utils.h2
-rw-r--r--src/literal.h13
-rw-r--r--src/passes/CodeFolding.cpp16
-rw-r--r--src/passes/LocalCSE.cpp8
-rw-r--r--src/passes/pass.cpp2
-rw-r--r--src/support/hash.h33
-rw-r--r--src/wasm/wasm-type.cpp13
11 files changed, 86 insertions, 92 deletions
diff --git a/src/cfg/Relooper.cpp b/src/cfg/Relooper.cpp
index 0c9b581bd..b3bbe0153 100644
--- a/src/cfg/Relooper.cpp
+++ b/src/cfg/Relooper.cpp
@@ -669,8 +669,7 @@ struct Optimizer : public RelooperRecursor {
std::cout << "at parent " << ParentBlock->Id << '\n';
#endif
if (ParentBlock->BranchesOut.size() >= 2) {
- std::unordered_map<wasm::HashType, std::vector<BranchBlock>>
- HashedBranchesOut;
+ std::unordered_map<size_t, std::vector<BranchBlock>> HashedBranchesOut;
std::vector<Block*> BlocksToErase;
for (auto& iter : ParentBlock->BranchesOut) {
Block* CurrBlock = iter.first;
@@ -999,41 +998,42 @@ private:
// (like Shapes). Only partially hashes the branches out, no
// recursion: hashes the branch infos, looks at raw pointers
// for the blocks.
- wasm::HashType Hash(Block* Curr) {
- wasm::HashType Ret = wasm::ExpressionAnalyzer::hash(Curr->Code);
- Ret = wasm::rehash(Ret, 1);
+ size_t Hash(Block* Curr) {
+ auto digest = wasm::ExpressionAnalyzer::hash(Curr->Code);
+ wasm::rehash(digest, uint8_t(1));
if (Curr->SwitchCondition) {
- Ret = wasm::ExpressionAnalyzer::hash(Curr->SwitchCondition);
+ wasm::hash_combine(digest,
+ wasm::ExpressionAnalyzer::hash(Curr->SwitchCondition));
}
- Ret = wasm::rehash(Ret, 2);
+ wasm::rehash(digest, uint8_t(2));
for (auto& Pair : Curr->BranchesOut) {
// Hash the Block* as a pointer TODO: full hash?
- Ret =
- wasm::rehash(Ret, wasm::HashType(reinterpret_cast<size_t>(Pair.first)));
+ wasm::rehash(digest, reinterpret_cast<size_t>(Pair.first));
// Hash the Branch info properly
- Ret = wasm::rehash(Ret, Hash(Pair.second));
+ wasm::hash_combine(digest, Hash(Pair.second));
}
- return Ret;
+ return digest;
}
// Hashes the direct block contents, but not Relooper internals
// (like Shapes).
- wasm::HashType Hash(Branch* Curr) {
- wasm::HashType Ret = 0;
+ size_t Hash(Branch* Curr) {
+ auto digest = wasm::hash(0);
if (Curr->SwitchValues) {
for (auto i : *Curr->SwitchValues) {
- Ret = wasm::rehash(Ret, i); // TODO hash i
+ wasm::rehash(digest, i); // TODO hash i
}
} else {
if (Curr->Condition) {
- Ret = wasm::ExpressionAnalyzer::hash(Curr->Condition);
+ wasm::hash_combine(digest,
+ wasm::ExpressionAnalyzer::hash(Curr->Condition));
}
}
- Ret = wasm::rehash(Ret, 1);
+ wasm::rehash(digest, uint8_t(1));
if (Curr->Code) {
- Ret = wasm::ExpressionAnalyzer::hash(Curr->Code);
+ wasm::hash_combine(digest, wasm::ExpressionAnalyzer::hash(Curr->Code));
}
- return Ret;
+ return digest;
}
};
diff --git a/src/ir/ExpressionAnalyzer.cpp b/src/ir/ExpressionAnalyzer.cpp
index a6f4b601f..cdbcd9ae0 100644
--- a/src/ir/ExpressionAnalyzer.cpp
+++ b/src/ir/ExpressionAnalyzer.cpp
@@ -408,9 +408,9 @@ bool ExpressionAnalyzer::flexibleEqual(Expression* left,
}
// hash an expression, ignoring superficial details like specific internal names
-HashType ExpressionAnalyzer::hash(Expression* curr) {
+size_t ExpressionAnalyzer::hash(Expression* curr) {
struct Hasher {
- HashType digest = 0;
+ size_t digest = wasm::hash(0);
Index internalCounter = 0;
// for each internal name, its unique id
@@ -432,7 +432,7 @@ HashType ExpressionAnalyzer::hash(Expression* curr) {
if (!curr) {
continue;
}
- hash(curr->_id);
+ rehash(digest, curr->_id);
// we often don't need to hash the type, as it is tied to other values
// we are hashing anyhow, but there are exceptions: for example, a
// local.get's type is determined by the function, so if we are
@@ -441,7 +441,7 @@ HashType ExpressionAnalyzer::hash(Expression* curr) {
// if we hash between modules, then we need to take int account
// call_imports type, etc. The simplest thing is just to hash the
// type for all of them.
- hash(curr->type.getID());
+ rehash(digest, curr->type.getID());
// Blocks and loops introduce scoping.
if (auto* block = curr->dynCast<Block>()) {
noteScopeName(block->name);
@@ -459,15 +459,10 @@ HashType ExpressionAnalyzer::hash(Expression* curr) {
}
// Sometimes children are optional, e.g. return, so we must hash
// their number as well.
- hash(counter);
+ rehash(digest, counter);
}
}
- void hash(HashType hash) { digest = rehash(digest, hash); }
- void hash64(uint64_t hash) {
- digest = rehash(rehash(digest, HashType(hash >> 32)), HashType(hash));
- }
-
void visitScopeName(Name curr) {
// Names are relative, we give the same hash for
// (block $x (br $x))
@@ -475,21 +470,21 @@ HashType ExpressionAnalyzer::hash(Expression* curr) {
static_assert(sizeof(Index) == sizeof(int32_t),
"wasm64 will need changes here");
assert(internalNames.find(curr) != internalNames.end());
- return hash(internalNames[curr]);
+ rehash(digest, internalNames[curr]);
}
- void visitNonScopeName(Name curr) { return hash64(uint64_t(curr.str)); }
- void visitInt(int32_t curr) { hash(curr); }
- void visitLiteral(Literal curr) { hash(std::hash<Literal>()(curr)); }
- void visitType(Type curr) { hash(int32_t(curr.getSingle())); }
+ void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); }
+ void visitInt(int32_t curr) { rehash(digest, curr); }
+ void visitLiteral(Literal curr) { rehash(digest, curr); }
+ void visitType(Type curr) { rehash(digest, curr.getID()); }
void visitIndex(Index curr) {
static_assert(sizeof(Index) == sizeof(int32_t),
"wasm64 will need changes here");
- hash(int32_t(curr));
+ rehash(digest, curr);
}
void visitAddress(Address curr) {
static_assert(sizeof(Address) == sizeof(int32_t),
"wasm64 will need changes here");
- hash(int32_t(curr));
+ rehash(digest, curr.addr);
}
};
diff --git a/src/ir/ExpressionManipulator.cpp b/src/ir/ExpressionManipulator.cpp
index a9fd3c599..57048b9bd 100644
--- a/src/ir/ExpressionManipulator.cpp
+++ b/src/ir/ExpressionManipulator.cpp
@@ -16,7 +16,6 @@
#include "ir/load-utils.h"
#include "ir/utils.h"
-#include "support/hash.h"
namespace wasm {
diff --git a/src/ir/hashed.h b/src/ir/hashed.h
index 563e3abfe..fe959936a 100644
--- a/src/ir/hashed.h
+++ b/src/ir/hashed.h
@@ -26,16 +26,18 @@ namespace wasm {
// An expression with a cached hash value
struct HashedExpression {
Expression* expr;
- HashType hash;
+ size_t digest;
HashedExpression(Expression* expr) : expr(expr) {
if (expr) {
- hash = ExpressionAnalyzer::hash(expr);
+ digest = ExpressionAnalyzer::hash(expr);
+ } else {
+ digest = hash(0);
}
}
HashedExpression(const HashedExpression& other)
- : expr(other.expr), hash(other.hash) {}
+ : expr(other.expr), digest(other.digest) {}
};
// A pass that hashes all functions
@@ -43,7 +45,7 @@ struct HashedExpression {
struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
bool isFunctionParallel() override { return true; }
- struct Map : public std::map<Function*, HashType> {};
+ struct Map : public std::map<Function*, size_t> {};
FunctionHasher(Map* output) : output(output) {}
@@ -54,22 +56,21 @@ struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
for (auto& func : module->functions) {
// ensure an entry for each function - we must not modify the map shape in
// parallel, just the values
- hashes[func.get()] = 0;
+ hashes[func.get()] = hash(0);
}
return hashes;
}
void doWalkFunction(Function* func) { output->at(func) = hashFunction(func); }
- static HashType hashFunction(Function* func) {
- HashType ret = 0;
- ret = rehash(ret, (HashType)func->sig.params.getID());
- ret = rehash(ret, (HashType)func->sig.results.getID());
+ static size_t hashFunction(Function* func) {
+ auto digest = hash(func->sig.params.getID());
+ rehash(digest, func->sig.results.getID());
for (auto type : func->vars) {
- ret = rehash(ret, (HashType)type.getID());
+ rehash(digest, type.getID());
}
- ret = rehash(ret, (HashType)ExpressionAnalyzer::hash(func->body));
- return ret;
+ hash_combine(digest, ExpressionAnalyzer::hash(func->body));
+ return digest;
}
private:
diff --git a/src/ir/utils.h b/src/ir/utils.h
index a7f6f59bf..176699591 100644
--- a/src/ir/utils.h
+++ b/src/ir/utils.h
@@ -78,7 +78,7 @@ struct ExpressionAnalyzer {
// hash an expression, ignoring superficial details like specific internal
// names
- static HashType hash(Expression* curr);
+ static size_t hash(Expression* curr);
};
// Re-Finalizes all node types. This can be run after code was modified in
diff --git a/src/literal.h b/src/literal.h
index 4f936153d..c58dbb35c 100644
--- a/src/literal.h
+++ b/src/literal.h
@@ -528,18 +528,19 @@ template<> struct hash<wasm::Literal> {
a.getBits(bytes);
int64_t chunks[2];
memcpy(chunks, bytes, sizeof(chunks));
- return wasm::rehash(wasm::rehash(uint64_t(hash<uint32_t>()(a.type.getID())),
- uint64_t(hash<int64_t>()(chunks[0]))),
- uint64_t(hash<int64_t>()(chunks[1])));
+ auto digest = wasm::hash(a.type.getID());
+ wasm::rehash(digest, chunks[0]);
+ wasm::rehash(digest, chunks[1]);
+ return digest;
}
};
template<> struct hash<wasm::Literals> {
size_t operator()(const wasm::Literals& a) const {
- size_t h = wasm::rehash(uint64_t(0), uint64_t(a.size()));
+ auto digest = wasm::hash(a.size());
for (const auto& lit : a) {
- h = wasm::rehash(uint64_t(h), uint64_t(hash<wasm::Literal>{}(lit)));
+ wasm::rehash(digest, lit);
}
- return h;
+ return digest;
}
};
template<> struct less<wasm::Literal> {
diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp
index 04819a04d..ee54c50a6 100644
--- a/src/passes/CodeFolding.cpp
+++ b/src/passes/CodeFolding.cpp
@@ -606,25 +606,25 @@ private:
if (next.size() >= 2) {
// now we want to find a mergeable item - any item that is equal among a
// subset
- std::map<Expression*, HashType> hashes; // expression => hash value
+ std::map<Expression*, size_t> hashes; // expression => hash value
// hash value => expressions with that hash
- std::map<HashType, std::vector<Expression*>> hashed;
+ std::map<size_t, std::vector<Expression*>> hashed;
for (auto& tail : next) {
auto* item = getItem(tail, num);
auto hash = hashes[item] = ExpressionAnalyzer::hash(item);
hashed[hash].push_back(item);
}
// look at each hash value exactly once. we do this in a deterministic
- // order.
- std::set<HashType> seen;
+ // order by iterating over a vector retaining insertion order.
+ std::set<size_t> seen;
for (auto& tail : next) {
auto* item = getItem(tail, num);
- auto hash = hashes[item];
- if (seen.count(hash)) {
+ auto digest = hashes[item];
+ if (seen.count(digest)) {
continue;
}
- seen.insert(hash);
- auto& items = hashed[hash];
+ seen.insert(digest);
+ auto& items = hashed[digest];
if (items.size() == 1) {
continue;
}
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
index 81a82d1c6..f5cf323ec 100644
--- a/src/passes/LocalCSE.cpp
+++ b/src/passes/LocalCSE.cpp
@@ -62,14 +62,16 @@ struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
};
struct UsableHasher {
- HashType operator()(const Usable value) const {
- return rehash(uint64_t(value.hashed.hash), value.localType.getID());
+ size_t operator()(const Usable value) const {
+ auto digest = value.hashed.digest;
+ wasm::rehash(digest, value.localType.getID());
+ return digest;
}
};
struct UsableComparer {
bool operator()(const Usable a, const Usable b) const {
- if (a.hashed.hash != b.hashed.hash || a.localType != b.localType) {
+ if (a.hashed.digest != b.hashed.digest || a.localType != b.localType) {
return false;
}
return ExpressionAnalyzer::equal(a.hashed.expr, b.hashed.expr);
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 5a0dedf37..73e5094e8 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -686,7 +686,7 @@ struct AfterEffectFunctionChecker {
// Check Stack IR state: if the main IR changes, there should be no
// stack IR, as the stack IR would be wrong.
bool beganWithStackIR;
- HashType originalFunctionHash;
+ size_t originalFunctionHash;
// In the creator we can scan the state of the module and function before the
// pass runs.
diff --git a/src/support/hash.h b/src/support/hash.h
index 647369ac8..50ecec563 100644
--- a/src/support/hash.h
+++ b/src/support/hash.h
@@ -22,27 +22,22 @@
namespace wasm {
-typedef uint32_t HashType;
-
-inline HashType rehash(HashType x, HashType y) {
- // see http://www.cse.yorku.ca/~oz/hash.html and
- // https://stackoverflow.com/a/2595226/1176841
- HashType hash = 5381;
- while (x) {
- hash = ((hash << 5) + hash) ^ (x & 0xff);
- x >>= 8;
- }
- while (y) {
- hash = ((hash << 5) + hash) ^ (y & 0xff);
- y >>= 8;
- }
- return hash;
+// Computes the digest of `value`.
+template<typename T> inline std::size_t hash(const T& value) {
+ return std::hash<T>{}(value);
}
-inline uint64_t rehash(uint64_t x, uint64_t y) {
- auto ret = rehash(HashType(x), HashType(x >> 32));
- ret = rehash(ret, HashType(y));
- return rehash(ret, HashType(y >> 32));
+// Combines two digests into the first digest. Use instead of `rehash` if
+// `otherDigest` is another digest and not a `size_t` value.
+static inline void hash_combine(std::size_t& digest, std::size_t otherDigest) {
+ // see boost/container_hash/hash.hpp
+ digest ^= otherDigest + 0x9e3779b9 + (digest << 6) + (digest >> 2);
+}
+
+// Hashes `value` and combines the resulting digest into the existing digest.
+// Use instead of `hash_combine` if `value` is not another digest.
+template<typename T> inline void rehash(std::size_t& digest, const T& value) {
+ hash_combine(digest, hash(value));
}
} // namespace wasm
diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp
index b49f0338f..c8d8a94c5 100644
--- a/src/wasm/wasm-type.cpp
+++ b/src/wasm/wasm-type.cpp
@@ -30,21 +30,22 @@ namespace std {
template<> class hash<vector<wasm::Type>> {
public:
size_t operator()(const vector<wasm::Type>& types) const {
- uint64_t res = wasm::rehash(0, uint32_t(types.size()));
+ auto digest = wasm::hash(types.size());
for (auto t : types) {
- res = wasm::rehash(res, t.getID());
+ wasm::rehash(digest, t.getID());
}
- return res;
+ return digest;
}
};
size_t hash<wasm::Type>::operator()(const wasm::Type& type) const {
- return hash<uint64_t>{}(type.getID());
+ return wasm::hash(type.getID());
}
size_t hash<wasm::Signature>::operator()(const wasm::Signature& sig) const {
- return wasm::rehash(uint64_t(hash<uint64_t>{}(sig.params.getID())),
- uint64_t(hash<uint64_t>{}(sig.results.getID())));
+ auto digest = wasm::hash(sig.params.getID());
+ wasm::rehash(digest, sig.results.getID());
+ return digest;
}
} // namespace std