summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2021-08-12 09:32:39 -0700
committerGitHub <noreply@github.com>2021-08-12 09:32:39 -0700
commit96d2c946329f26bb742684a70cb48e98aa55083d (patch)
treef8e102ed634485743fff031f553ecc5718207749 /src
parent18022894adb20cec9dc7ef87464180eb26881266 (diff)
downloadbinaryen-96d2c946329f26bb742684a70cb48e98aa55083d.tar.gz
binaryen-96d2c946329f26bb742684a70cb48e98aa55083d.tar.bz2
binaryen-96d2c946329f26bb742684a70cb48e98aa55083d.zip
Add a shallowHash() method (#4077)
This adds and tests the new method. It will be used in a new pass later, where computing shallow hashes allows it to be done in linear time. 99% of the diff is whitespace.
Diffstat (limited to 'src')
-rw-r--r--src/ir/ExpressionAnalyzer.cpp152
-rw-r--r--src/ir/utils.h3
2 files changed, 85 insertions, 70 deletions
diff --git a/src/ir/ExpressionAnalyzer.cpp b/src/ir/ExpressionAnalyzer.cpp
index 260a9a6c4..e98b14777 100644
--- a/src/ir/ExpressionAnalyzer.cpp
+++ b/src/ir/ExpressionAnalyzer.cpp
@@ -250,47 +250,49 @@ bool ExpressionAnalyzer::flexibleEqual(Expression* left,
return Comparer().compare(left, right, comparer);
}
-// hash an expression, ignoring superficial details like specific internal names
-size_t ExpressionAnalyzer::hash(Expression* curr) {
- struct Hasher {
- size_t digest = wasm::hash(0);
-
- Index internalCounter = 0;
- // for each internal name, its unique id
- std::map<Name, Index> internalNames;
- ExpressionStack stack;
-
- Hasher(Expression* curr) {
- stack.push_back(curr);
- // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to
- // the caller. Add it here to prevent the unknown name error.
- noteScopeName(DELEGATE_CALLER_TARGET);
-
- while (stack.size() > 0) {
- curr = stack.back();
- stack.pop_back();
- if (!curr) {
- // This was an optional child that was not present. Hash a 0 to
- // represent that.
- rehash(digest, 0);
- continue;
- }
- rehash(digest, curr->_id);
- // we often don't need to hash the type, as it is tied to other values
- // we are hashing anyhow, but there are exceptions: for example, a
- // local.get's type is determined by the function, so if we are
- // hashing only expression fragments, then two from different
- // functions may turn out the same even if the type differs. Likewise,
- // if we hash between modules, then we need to take int account
- // call_imports type, etc. The simplest thing is just to hash the
- // type for all of them.
- rehash(digest, curr->type.getID());
- // Hash the contents of the expression.
- hashExpression(curr);
+namespace {
+
+struct Hasher {
+ bool visitChildren;
+
+ size_t digest = wasm::hash(0);
+
+ Index internalCounter = 0;
+ // for each internal name, its unique id
+ std::map<Name, Index> internalNames;
+ ExpressionStack stack;
+
+ Hasher(Expression* curr, bool visitChildren) : visitChildren(visitChildren) {
+ stack.push_back(curr);
+ // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to
+ // the caller. Add it here to prevent the unknown name error.
+ noteScopeName(DELEGATE_CALLER_TARGET);
+
+ while (stack.size() > 0) {
+ curr = stack.back();
+ stack.pop_back();
+ if (!curr) {
+ // This was an optional child that was not present. Hash a 0 to
+ // represent that.
+ rehash(digest, 0);
+ continue;
}
+ rehash(digest, curr->_id);
+ // we often don't need to hash the type, as it is tied to other values
+ // we are hashing anyhow, but there are exceptions: for example, a
+ // local.get's type is determined by the function, so if we are
+ // hashing only expression fragments, then two from different
+ // functions may turn out the same even if the type differs. Likewise,
+ // if we hash between modules, then we need to take int account
+ // call_imports type, etc. The simplest thing is just to hash the
+ // type for all of them.
+ rehash(digest, curr->type.getID());
+ // Hash the contents of the expression.
+ hashExpression(curr);
}
+ }
- void hashExpression(Expression* curr) {
+ void hashExpression(Expression* curr) {
#define DELEGATE_ID curr->_id
@@ -302,7 +304,10 @@ size_t ExpressionAnalyzer::hash(Expression* curr) {
// Handle each type of field, comparing it appropriately.
#define DELEGATE_GET_FIELD(id, name) cast->name
-#define DELEGATE_FIELD_CHILD(id, name) stack.push_back(cast->name);
+#define DELEGATE_FIELD_CHILD(id, name) \
+ if (visitChildren) { \
+ stack.push_back(cast->name); \
+ }
#define HASH_FIELD(name) rehash(digest, cast->name);
@@ -323,41 +328,48 @@ size_t ExpressionAnalyzer::hash(Expression* curr) {
#define DELEGATE_FIELD_SCOPE_NAME_USE(id, name) visitScopeName(cast->name);
#include "wasm-delegations-fields.def"
- }
+ }
- void noteScopeName(Name curr) {
- if (curr.is()) {
- internalNames[curr] = internalCounter++;
- }
+ void noteScopeName(Name curr) {
+ if (curr.is()) {
+ internalNames[curr] = internalCounter++;
}
- void visitScopeName(Name curr) {
- // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to
- // maximally differentiate them.
-
- // Try's delegate target can be null.
- if (!curr.is()) {
- rehash(digest, 0);
- return;
- }
- // Names are relative, we give the same hash for
- // (block $x (br $x))
- // (block $y (br $y))
- // But if the name is not known to us, hash the absolute one.
- if (!internalNames.count(curr)) {
- rehash(digest, 1);
- // Perform the same hashing as a generic name.
- visitNonScopeName(curr);
- return;
- }
- rehash(digest, 2);
- rehash(digest, internalNames[curr]);
+ }
+ void visitScopeName(Name curr) {
+ // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to
+ // maximally differentiate them.
+
+ // Try's delegate target can be null.
+ if (!curr.is()) {
+ rehash(digest, 0);
+ return;
}
- void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); }
- void visitType(Type curr) { rehash(digest, curr.getID()); }
- void visitAddress(Address curr) { rehash(digest, curr.addr); }
- };
+ // Names are relative, we give the same hash for
+ // (block $x (br $x))
+ // (block $y (br $y))
+ // But if the name is not known to us, hash the absolute one.
+ if (!internalNames.count(curr)) {
+ rehash(digest, 1);
+ // Perform the same hashing as a generic name.
+ visitNonScopeName(curr);
+ return;
+ }
+ rehash(digest, 2);
+ rehash(digest, internalNames[curr]);
+ }
+ void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); }
+ void visitType(Type curr) { rehash(digest, curr.getID()); }
+ void visitAddress(Address curr) { rehash(digest, curr.addr); }
+};
+
+} // anonymous namespace
+
+size_t ExpressionAnalyzer::hash(Expression* curr) {
+ return Hasher(curr, true).digest;
+}
- return Hasher(curr).digest;
+size_t ExpressionAnalyzer::shallowHash(Expression* curr) {
+ return Hasher(curr, false).digest;
}
} // namespace wasm
diff --git a/src/ir/utils.h b/src/ir/utils.h
index 1964f148a..69d5f8e63 100644
--- a/src/ir/utils.h
+++ b/src/ir/utils.h
@@ -85,6 +85,9 @@ struct ExpressionAnalyzer {
// hash an expression, ignoring superficial details like specific internal
// names
static size_t hash(Expression* curr);
+
+ // hash an expression, ignoring child nodes.
+ static size_t shallowHash(Expression* curr);
};
// Re-Finalizes all node types. This can be run after code was modified in