diff options
author | Alon Zakai <azakai@google.com> | 2021-08-12 09:32:39 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-12 09:32:39 -0700 |
commit | 96d2c946329f26bb742684a70cb48e98aa55083d (patch) | |
tree | f8e102ed634485743fff031f553ecc5718207749 /src | |
parent | 18022894adb20cec9dc7ef87464180eb26881266 (diff) | |
download | binaryen-96d2c946329f26bb742684a70cb48e98aa55083d.tar.gz binaryen-96d2c946329f26bb742684a70cb48e98aa55083d.tar.bz2 binaryen-96d2c946329f26bb742684a70cb48e98aa55083d.zip |
Add a shallowHash() method (#4077)
This adds and tests the new method. It will be used in a new pass later, where
computing shallow hashes allows it to be done in linear time.
99% of the diff is whitespace.
Diffstat (limited to 'src')
-rw-r--r-- | src/ir/ExpressionAnalyzer.cpp | 152 | ||||
-rw-r--r-- | src/ir/utils.h | 3 |
2 files changed, 85 insertions, 70 deletions
diff --git a/src/ir/ExpressionAnalyzer.cpp b/src/ir/ExpressionAnalyzer.cpp index 260a9a6c4..e98b14777 100644 --- a/src/ir/ExpressionAnalyzer.cpp +++ b/src/ir/ExpressionAnalyzer.cpp @@ -250,47 +250,49 @@ bool ExpressionAnalyzer::flexibleEqual(Expression* left, return Comparer().compare(left, right, comparer); } -// hash an expression, ignoring superficial details like specific internal names -size_t ExpressionAnalyzer::hash(Expression* curr) { - struct Hasher { - size_t digest = wasm::hash(0); - - Index internalCounter = 0; - // for each internal name, its unique id - std::map<Name, Index> internalNames; - ExpressionStack stack; - - Hasher(Expression* curr) { - stack.push_back(curr); - // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to - // the caller. Add it here to prevent the unknown name error. - noteScopeName(DELEGATE_CALLER_TARGET); - - while (stack.size() > 0) { - curr = stack.back(); - stack.pop_back(); - if (!curr) { - // This was an optional child that was not present. Hash a 0 to - // represent that. - rehash(digest, 0); - continue; - } - rehash(digest, curr->_id); - // we often don't need to hash the type, as it is tied to other values - // we are hashing anyhow, but there are exceptions: for example, a - // local.get's type is determined by the function, so if we are - // hashing only expression fragments, then two from different - // functions may turn out the same even if the type differs. Likewise, - // if we hash between modules, then we need to take int account - // call_imports type, etc. The simplest thing is just to hash the - // type for all of them. - rehash(digest, curr->type.getID()); - // Hash the contents of the expression. - hashExpression(curr); +namespace { + +struct Hasher { + bool visitChildren; + + size_t digest = wasm::hash(0); + + Index internalCounter = 0; + // for each internal name, its unique id + std::map<Name, Index> internalNames; + ExpressionStack stack; + + Hasher(Expression* curr, bool visitChildren) : visitChildren(visitChildren) { + stack.push_back(curr); + // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to + // the caller. Add it here to prevent the unknown name error. + noteScopeName(DELEGATE_CALLER_TARGET); + + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); + if (!curr) { + // This was an optional child that was not present. Hash a 0 to + // represent that. + rehash(digest, 0); + continue; } + rehash(digest, curr->_id); + // we often don't need to hash the type, as it is tied to other values + // we are hashing anyhow, but there are exceptions: for example, a + // local.get's type is determined by the function, so if we are + // hashing only expression fragments, then two from different + // functions may turn out the same even if the type differs. Likewise, + // if we hash between modules, then we need to take int account + // call_imports type, etc. The simplest thing is just to hash the + // type for all of them. + rehash(digest, curr->type.getID()); + // Hash the contents of the expression. + hashExpression(curr); } + } - void hashExpression(Expression* curr) { + void hashExpression(Expression* curr) { #define DELEGATE_ID curr->_id @@ -302,7 +304,10 @@ size_t ExpressionAnalyzer::hash(Expression* curr) { // Handle each type of field, comparing it appropriately. #define DELEGATE_GET_FIELD(id, name) cast->name -#define DELEGATE_FIELD_CHILD(id, name) stack.push_back(cast->name); +#define DELEGATE_FIELD_CHILD(id, name) \ + if (visitChildren) { \ + stack.push_back(cast->name); \ + } #define HASH_FIELD(name) rehash(digest, cast->name); @@ -323,41 +328,48 @@ size_t ExpressionAnalyzer::hash(Expression* curr) { #define DELEGATE_FIELD_SCOPE_NAME_USE(id, name) visitScopeName(cast->name); #include "wasm-delegations-fields.def" - } + } - void noteScopeName(Name curr) { - if (curr.is()) { - internalNames[curr] = internalCounter++; - } + void noteScopeName(Name curr) { + if (curr.is()) { + internalNames[curr] = internalCounter++; } - void visitScopeName(Name curr) { - // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to - // maximally differentiate them. - - // Try's delegate target can be null. - if (!curr.is()) { - rehash(digest, 0); - return; - } - // Names are relative, we give the same hash for - // (block $x (br $x)) - // (block $y (br $y)) - // But if the name is not known to us, hash the absolute one. - if (!internalNames.count(curr)) { - rehash(digest, 1); - // Perform the same hashing as a generic name. - visitNonScopeName(curr); - return; - } - rehash(digest, 2); - rehash(digest, internalNames[curr]); + } + void visitScopeName(Name curr) { + // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to + // maximally differentiate them. + + // Try's delegate target can be null. + if (!curr.is()) { + rehash(digest, 0); + return; } - void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); } - void visitType(Type curr) { rehash(digest, curr.getID()); } - void visitAddress(Address curr) { rehash(digest, curr.addr); } - }; + // Names are relative, we give the same hash for + // (block $x (br $x)) + // (block $y (br $y)) + // But if the name is not known to us, hash the absolute one. + if (!internalNames.count(curr)) { + rehash(digest, 1); + // Perform the same hashing as a generic name. + visitNonScopeName(curr); + return; + } + rehash(digest, 2); + rehash(digest, internalNames[curr]); + } + void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); } + void visitType(Type curr) { rehash(digest, curr.getID()); } + void visitAddress(Address curr) { rehash(digest, curr.addr); } +}; + +} // anonymous namespace + +size_t ExpressionAnalyzer::hash(Expression* curr) { + return Hasher(curr, true).digest; +} - return Hasher(curr).digest; +size_t ExpressionAnalyzer::shallowHash(Expression* curr) { + return Hasher(curr, false).digest; } } // namespace wasm diff --git a/src/ir/utils.h b/src/ir/utils.h index 1964f148a..69d5f8e63 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -85,6 +85,9 @@ struct ExpressionAnalyzer { // hash an expression, ignoring superficial details like specific internal // names static size_t hash(Expression* curr); + + // hash an expression, ignoring child nodes. + static size_t shallowHash(Expression* curr); }; // Re-Finalizes all node types. This can be run after code was modified in |