diff options
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/hash-stringify-walker.cpp | 54 | ||||
-rw-r--r-- | src/passes/stringify-walker.h | 43 | ||||
-rw-r--r-- | test/gtest/stringify.cpp | 95 |
4 files changed, 190 insertions, 3 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 4cf28fa65..302cd7c7a 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -43,6 +43,7 @@ set(passes_SOURCES GlobalStructInference.cpp GlobalTypeOptimization.cpp GUFA.cpp + hash-stringify-walker.cpp Heap2Local.cpp I64ToI32Lowering.cpp Inlining.cpp diff --git a/src/passes/hash-stringify-walker.cpp b/src/passes/hash-stringify-walker.cpp new file mode 100644 index 000000000..d5b6e0361 --- /dev/null +++ b/src/passes/hash-stringify-walker.cpp @@ -0,0 +1,54 @@ +#include "stringify-walker.h" + +namespace wasm { + +size_t StringifyHasher::operator()(Expression* curr) const { + if (Properties::isControlFlowStructure(curr)) { + if (auto* iff = curr->dynCast<If>()) { + size_t digest = wasm::hash(iff->_id); + rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue)); + if (iff->ifFalse) { + rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse)); + } + return digest; + } + + return ExpressionAnalyzer::hash(curr); + } + + return ExpressionAnalyzer::shallowHash(curr); +} + +bool StringifyEquator::operator()(Expression* lhs, Expression* rhs) const { + if (Properties::isControlFlowStructure(lhs) && + Properties::isControlFlowStructure(rhs)) { + auto* iffl = lhs->dynCast<If>(); + auto* iffr = rhs->dynCast<If>(); + + if (iffl && iffr) { + return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) && + ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse); + } + + return ExpressionAnalyzer::equal(lhs, rhs); + } + + return ExpressionAnalyzer::shallowEqual(lhs, rhs); +} + +void HashStringifyWalker::addUniqueSymbol() { + // Use a negative value to distinguish symbols for separators from symbols + // for Expressions + hashString.push_back((uint64_t)-nextVal); + nextVal++; +} + +void HashStringifyWalker::visitExpression(Expression* curr) { + auto [it, inserted] = exprToCounter.insert({curr, nextVal}); + hashString.push_back(it->second); + if (inserted) { + nextVal++; + } +} + +} // namespace wasm diff --git a/src/passes/stringify-walker.h b/src/passes/stringify-walker.h index a7fa02b0b..a56a033d6 100644 --- a/src/passes/stringify-walker.h +++ b/src/passes/stringify-walker.h @@ -3,6 +3,7 @@ #include "ir/iteration.h" #include "ir/module-utils.h" +#include "ir/utils.h" #include "wasm-traversal.h" #include <queue> @@ -36,7 +37,7 @@ namespace wasm { * after the rest of the siblings of the if expression on line 2 are visited * - The if-condition (i32.const 0) on line 3 is visited before the if * expression on line 2. Similarly, the if-condition (i32.const 1) on line - * 11 is visisted before the if expression on line 10. + * 11 is visited before the if expression on line 10. * - The add (line 7) binary operator's left and right children (lines 8 - 9) * are visited first as they need to be on the stack before the add * operation is executed @@ -75,4 +76,44 @@ private: #include "stringify-walker-impl.h" +namespace wasm { + +// This custom hasher conforms to std::hash<Key>. Its purpose is to provide +// a custom hash for if expressions, so the if-condition of the if expression is +// not included in the hash for the if expression. This is needed because in the +// binary format, the if-condition comes before and is consumed by the if. To +// match the binary format, we hash the if condition before and separately from +// the rest of the if expression. +struct StringifyHasher { + size_t operator()(Expression* curr) const; +}; + +// This custom equator conforms to std::equal_to<Key>. Similar to +// StringifyHasher, it's purpose is to not include the if-condition when +// evaluating the equality of two if expressions. +struct StringifyEquator { + bool operator()(Expression* lhs, Expression* rhs) const; +}; + +struct HashStringifyWalker : public StringifyWalker<HashStringifyWalker> { + // After calling walkModule, this vector contains the result of encoding a + // wasm module as a string of uint64_t values. Each value represents either an + // Expression or a separator to mark the end of control flow. + std::vector<uint64_t> hashString; + // A monotonic counter used to ensure that unique expressions in the + // module are assigned a unique value in the hashString + uint64_t nextVal = 0; + // Contains a mapping of expression pointer to value to ensure we + // use the same value for matching expressions. A custom hasher and + // equator is provided in order to separate out evaluation of the if-condition + // when evaluating if expressions. + std::unordered_map<Expression*, uint64_t, StringifyHasher, StringifyEquator> + exprToCounter; + + void addUniqueSymbol(); + void visitExpression(Expression* curr); +}; + +} // namespace wasm + #endif // wasm_passes_stringify_walker_h diff --git a/test/gtest/stringify.cpp b/test/gtest/stringify.cpp index b96235cb6..06d5ccb0c 100644 --- a/test/gtest/stringify.cpp +++ b/test/gtest/stringify.cpp @@ -1,8 +1,6 @@ #include "ir/utils.h" #include "passes/stringify-walker.h" #include "print-test.h" -#include "wasm.h" -#include "gtest/gtest.h" using namespace wasm; @@ -125,3 +123,96 @@ adding unique symbol EXPECT_EQ(ss.str(), stringifyText); } + +TEST_F(StringifyTest, Stringify) { + auto moduleText = R"wasm( + (module + (func $a + (block $block_a + (drop (i32.const 20)) + (drop (i32.const 10)) + ) + (block $block_b + (drop (if (i32.const 0) + (i32.const 40) + (i32.const 5) + )) + ) + (block $block_c + (drop (if (i32.const 1) + (i32.const 30) + )) + ) + (block $block_d + (drop (i32.const 20)) + (drop (i32.const 10)) + ) + (block $block_e + (drop (if (i32.const 1) + (i32.const 30) + )) + ) + (block $block_f + (drop (if (i32.const 0) + (i32.const 30) + )) + ) + ) + ) + )wasm"; + + Module wasm; + parseWast(wasm, moduleText); + + HashStringifyWalker stringify = HashStringifyWalker(); + stringify.walkModule(&wasm); + + EXPECT_EQ(stringify.hashString, + (std::vector<uint64_t>{ + 0, // function block evaluated as a whole + (uint64_t)-1, // separate function block from function contents + 2, // block_a evaluated as a whole + 3, // block_b evaluated as a whole + 4, // block_c evaluated as a whole + 2, // block_d has the same contents as block_a + 4, // block_e has the same contents as block_c + 5, // block_f evaluated as a whole + (uint64_t)-6, // separate blocks from block contents + 7, // i32.const 20 + 8, // drop, all drops will be the same symbol + 9, // i32.const 10 + 8, // drop + (uint64_t)-10, // separate block_a contents + 11, // i32.const 0, if condition + 12, // block_b's if evaluated as a whole + 8, // drop + (uint64_t)-13, // separate block_b contents + 14, // i32.const 1, if condition + 15, // block_c's if evaluated as a whole + 8, // drop + (uint64_t)-16, // separate block_c contents + 7, // i32.const 20 + 8, // drop + 9, // i32.const 10 + 8, // drop + (uint64_t)-17, // separate block_d contents + 14, // i32.const 1, if condition + 15, // block_e if evaluated as a whole + 8, // drop + (uint64_t)-18, // separate block_e contents + 11, // i32.const 0, if condition + 15, // block_f's if evaluated as a whole + 8, // drop + (uint64_t)-19, // separate block_f contents + 20, // i32.const 40 + (uint64_t)-21, // separate block_b if-true + 22, // i32.const 5 + (uint64_t)-23, // separate block_b if-false + 24, // i32.const 30 + (uint64_t)-25, // separate block_c if-true + 24, // i32.const 30 + (uint64_t)-26, // separate block_e if-true + 24, // i32.const 30 + (uint64_t)-27 // separate block_f if-true + })); +} |