diff options
-rw-r--r-- | src/ir/ExpressionAnalyzer.cpp | 152 | ||||
-rw-r--r-- | src/ir/utils.h | 3 | ||||
-rw-r--r-- | test/example/hash.cpp | 33 |
3 files changed, 116 insertions, 72 deletions
diff --git a/src/ir/ExpressionAnalyzer.cpp b/src/ir/ExpressionAnalyzer.cpp index 260a9a6c4..e98b14777 100644 --- a/src/ir/ExpressionAnalyzer.cpp +++ b/src/ir/ExpressionAnalyzer.cpp @@ -250,47 +250,49 @@ bool ExpressionAnalyzer::flexibleEqual(Expression* left, return Comparer().compare(left, right, comparer); } -// hash an expression, ignoring superficial details like specific internal names -size_t ExpressionAnalyzer::hash(Expression* curr) { - struct Hasher { - size_t digest = wasm::hash(0); - - Index internalCounter = 0; - // for each internal name, its unique id - std::map<Name, Index> internalNames; - ExpressionStack stack; - - Hasher(Expression* curr) { - stack.push_back(curr); - // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to - // the caller. Add it here to prevent the unknown name error. - noteScopeName(DELEGATE_CALLER_TARGET); - - while (stack.size() > 0) { - curr = stack.back(); - stack.pop_back(); - if (!curr) { - // This was an optional child that was not present. Hash a 0 to - // represent that. - rehash(digest, 0); - continue; - } - rehash(digest, curr->_id); - // we often don't need to hash the type, as it is tied to other values - // we are hashing anyhow, but there are exceptions: for example, a - // local.get's type is determined by the function, so if we are - // hashing only expression fragments, then two from different - // functions may turn out the same even if the type differs. Likewise, - // if we hash between modules, then we need to take int account - // call_imports type, etc. The simplest thing is just to hash the - // type for all of them. - rehash(digest, curr->type.getID()); - // Hash the contents of the expression. - hashExpression(curr); +namespace { + +struct Hasher { + bool visitChildren; + + size_t digest = wasm::hash(0); + + Index internalCounter = 0; + // for each internal name, its unique id + std::map<Name, Index> internalNames; + ExpressionStack stack; + + Hasher(Expression* curr, bool visitChildren) : visitChildren(visitChildren) { + stack.push_back(curr); + // DELEGATE_CALLER_TARGET is a fake target used to denote delegating to + // the caller. Add it here to prevent the unknown name error. + noteScopeName(DELEGATE_CALLER_TARGET); + + while (stack.size() > 0) { + curr = stack.back(); + stack.pop_back(); + if (!curr) { + // This was an optional child that was not present. Hash a 0 to + // represent that. + rehash(digest, 0); + continue; } + rehash(digest, curr->_id); + // we often don't need to hash the type, as it is tied to other values + // we are hashing anyhow, but there are exceptions: for example, a + // local.get's type is determined by the function, so if we are + // hashing only expression fragments, then two from different + // functions may turn out the same even if the type differs. Likewise, + // if we hash between modules, then we need to take int account + // call_imports type, etc. The simplest thing is just to hash the + // type for all of them. + rehash(digest, curr->type.getID()); + // Hash the contents of the expression. + hashExpression(curr); } + } - void hashExpression(Expression* curr) { + void hashExpression(Expression* curr) { #define DELEGATE_ID curr->_id @@ -302,7 +304,10 @@ size_t ExpressionAnalyzer::hash(Expression* curr) { // Handle each type of field, comparing it appropriately. #define DELEGATE_GET_FIELD(id, name) cast->name -#define DELEGATE_FIELD_CHILD(id, name) stack.push_back(cast->name); +#define DELEGATE_FIELD_CHILD(id, name) \ + if (visitChildren) { \ + stack.push_back(cast->name); \ + } #define HASH_FIELD(name) rehash(digest, cast->name); @@ -323,41 +328,48 @@ size_t ExpressionAnalyzer::hash(Expression* curr) { #define DELEGATE_FIELD_SCOPE_NAME_USE(id, name) visitScopeName(cast->name); #include "wasm-delegations-fields.def" - } + } - void noteScopeName(Name curr) { - if (curr.is()) { - internalNames[curr] = internalCounter++; - } + void noteScopeName(Name curr) { + if (curr.is()) { + internalNames[curr] = internalCounter++; } - void visitScopeName(Name curr) { - // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to - // maximally differentiate them. - - // Try's delegate target can be null. - if (!curr.is()) { - rehash(digest, 0); - return; - } - // Names are relative, we give the same hash for - // (block $x (br $x)) - // (block $y (br $y)) - // But if the name is not known to us, hash the absolute one. - if (!internalNames.count(curr)) { - rehash(digest, 1); - // Perform the same hashing as a generic name. - visitNonScopeName(curr); - return; - } - rehash(digest, 2); - rehash(digest, internalNames[curr]); + } + void visitScopeName(Name curr) { + // We consider 3 cases here, and prefix a hash value of 0, 1, or 2 to + // maximally differentiate them. + + // Try's delegate target can be null. + if (!curr.is()) { + rehash(digest, 0); + return; } - void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); } - void visitType(Type curr) { rehash(digest, curr.getID()); } - void visitAddress(Address curr) { rehash(digest, curr.addr); } - }; + // Names are relative, we give the same hash for + // (block $x (br $x)) + // (block $y (br $y)) + // But if the name is not known to us, hash the absolute one. + if (!internalNames.count(curr)) { + rehash(digest, 1); + // Perform the same hashing as a generic name. + visitNonScopeName(curr); + return; + } + rehash(digest, 2); + rehash(digest, internalNames[curr]); + } + void visitNonScopeName(Name curr) { rehash(digest, uint64_t(curr.str)); } + void visitType(Type curr) { rehash(digest, curr.getID()); } + void visitAddress(Address curr) { rehash(digest, curr.addr); } +}; + +} // anonymous namespace + +size_t ExpressionAnalyzer::hash(Expression* curr) { + return Hasher(curr, true).digest; +} - return Hasher(curr).digest; +size_t ExpressionAnalyzer::shallowHash(Expression* curr) { + return Hasher(curr, false).digest; } } // namespace wasm diff --git a/src/ir/utils.h b/src/ir/utils.h index 1964f148a..69d5f8e63 100644 --- a/src/ir/utils.h +++ b/src/ir/utils.h @@ -85,6 +85,9 @@ struct ExpressionAnalyzer { // hash an expression, ignoring superficial details like specific internal // names static size_t hash(Expression* curr); + + // hash an expression, ignoring child nodes. + static size_t shallowHash(Expression* curr); }; // Re-Finalizes all node types. This can be run after code was modified in diff --git a/test/example/hash.cpp b/test/example/hash.cpp index ba9ca24ea..944cef14c 100644 --- a/test/example/hash.cpp +++ b/test/example/hash.cpp @@ -11,6 +11,14 @@ using namespace wasm; #define assertNotEqual(left, right) \ assert(ExpressionAnalyzer::hash(&left) != ExpressionAnalyzer::hash(&right)); +#define assertShallowEqual(left, right) \ + assert(ExpressionAnalyzer::shallowHash(&left) == \ + ExpressionAnalyzer::shallowHash(&right)); + +#define assertShallowNotEqual(left, right) \ + assert(ExpressionAnalyzer::shallowHash(&left) != \ + ExpressionAnalyzer::shallowHash(&right)); + int main() { { Const x, y; @@ -33,7 +41,7 @@ int main() { assertNotEqual(x, y); } { - // Nested child. + // Nested identical child. Drop dx, dy; Const x, y; x.set(Literal(int32_t(10))); @@ -43,7 +51,17 @@ int main() { assertEqual(dx, dy); } { - // Nested child. + // Nested identical child, checked shallowly. + Drop dx, dy; + Const x, y; + x.set(Literal(int32_t(10))); + y.set(Literal(int32_t(10))); + dx.value = &x; + dy.value = &y; + assertShallowEqual(dx, dy); + } + { + // Nested different child. Drop dx, dy; Const x, y; x.set(Literal(int32_t(10))); @@ -52,6 +70,17 @@ int main() { dy.value = &y; assertNotEqual(dx, dy); } + { + // Nested different child, checked shallowly (so we ignore the difference, + // and return equal). + Drop dx, dy; + Const x, y; + x.set(Literal(int32_t(10))); + y.set(Literal(int32_t(11))); + dx.value = &x; + dy.value = &y; + assertShallowEqual(dx, dy); + } MixedArena arena; { // Blocks |