/* * Copyright 2023 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "stringify-walker.h" namespace wasm { size_t StringifyHasher::operator()(Expression* curr) const { if (Properties::isControlFlowStructure(curr)) { if (auto* iff = curr->dynCast()) { size_t digest = wasm::hash(iff->_id); rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue)); if (iff->ifFalse) { rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse)); } return digest; } return ExpressionAnalyzer::hash(curr); } return ExpressionAnalyzer::shallowHash(curr); } bool StringifyEquator::operator()(Expression* lhs, Expression* rhs) const { if (Properties::isControlFlowStructure(lhs) && Properties::isControlFlowStructure(rhs)) { auto* iffl = lhs->dynCast(); auto* iffr = rhs->dynCast(); if (iffl && iffr) { return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) && ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse); } return ExpressionAnalyzer::equal(lhs, rhs); } return ExpressionAnalyzer::shallowEqual(lhs, rhs); } void HashStringifyWalker::addUniqueSymbol(SeparatorReason reason) { // Use a negative value to distinguish symbols for separators from symbols // for Expressions assert((uint32_t)nextSeparatorVal >= nextVal); if (auto funcStart = reason.getFuncStart()) { idxToFuncName.insert({hashString.size(), funcStart->func->name}); } hashString.push_back((uint32_t)nextSeparatorVal); nextSeparatorVal--; exprs.push_back(nullptr); } void HashStringifyWalker::visitExpression(Expression* curr) { auto [it, inserted] = exprToCounter.insert({curr, nextVal}); hashString.push_back(it->second); exprs.push_back(curr); if (inserted) { nextVal++; } } std::pair HashStringifyWalker::makeRelative(uint32_t idx) const { // The upper_bound function returns an iterator to the first value in the set // that is true for idx < value. We subtract one from this returned value to // tell us which function actually contains the the idx. auto [funcIdx, func] = *--idxToFuncName.upper_bound(idx); return {idx - funcIdx, func}; } std::vector StringifyProcessor::repeatSubstrings(std::vector& hashString) { SuffixTree st(hashString); std::vector substrings(st.begin(), st.end()); for (auto& substring : substrings) { // Sort by increasing start index to ensure determinism. std::sort(substring.StartIndices.begin(), substring.StartIndices.end()); } // Substrings are sorted so that the longest substring that repeats the most // times is ordered first. This is done so that we can assume the most // worthwhile substrings to outline come first. std::sort( substrings.begin(), substrings.end(), [](SuffixTree::RepeatedSubstring a, SuffixTree::RepeatedSubstring b) { size_t aWeight = a.Length * a.StartIndices.size(); size_t bWeight = b.Length * b.StartIndices.size(); if (aWeight == bWeight) { return a.StartIndices[0] < b.StartIndices[0]; } return aWeight > bWeight; }); return substrings; } // Deduplicate substrings by iterating through the list of substrings, keeping // only those whose list of end indices is disjoint from the set of end indices // for all substrings kept so far. Substrings that are contained within other // substrings will always share an end index with those other substrings. Note // that this deduplication may be over-aggressive, since it will remove // substrings that are contained within any previous substring, even if they // have many other occurrences that are not inside other substrings. Part of the // reason dedupe can be so aggressive is an assumption 1) that the input // substrings have been sorted so that the longest substrings with the most // repeats come first and 2) these are more worthwhile to keep than subsequent // substrings of substrings, even if they appear more times. std::vector StringifyProcessor::dedupe( const std::vector& substrings) { std::unordered_set seen; std::vector result; for (auto substring : substrings) { std::vector idxToInsert; bool seenEndIdx = false; for (auto startIdx : substring.StartIndices) { // We are using the end index to ensure that each repeated substring // reported by the SuffixTree is unique. This is because LLVM's SuffixTree // reports back repeat sequences that are substrings of longer repeat // sequences with the same endIdx, and we generally prefer to outline // longer repeat sequences. uint32_t endIdx = substring.Length + startIdx; if (seen.count(endIdx)) { seenEndIdx = true; break; } idxToInsert.push_back(endIdx); } if (!seenEndIdx) { seen.insert(idxToInsert.begin(), idxToInsert.end()); result.push_back(substring); } } return result; } std::vector StringifyProcessor::filter( const std::vector& substrings, const std::vector& exprs, std::function condition) { struct FilterStringifyWalker : public StringifyWalker { bool hasFilterValue = false; std::function condition; FilterStringifyWalker(std::function condition) : condition(condition){}; void walk(Expression* curr) { hasFilterValue = false; Super::walk(curr); } void addUniqueSymbol(SeparatorReason reason) {} void visitExpression(Expression* curr) { if (condition(curr)) { hasFilterValue = true; } } }; FilterStringifyWalker walker(condition); std::vector result; for (auto substring : substrings) { bool hasFilterValue = false; for (auto idx = substring.StartIndices[0], endIdx = substring.StartIndices[0] + substring.Length; idx < endIdx; idx++) { Expression* curr = exprs[idx]; if (Properties::isControlFlowStructure(curr)) { walker.walk(curr); if (walker.hasFilterValue) { hasFilterValue = true; break; } } if (condition(curr)) { hasFilterValue = true; break; } } if (!hasFilterValue) { result.push_back(substring); } } return result; } std::vector StringifyProcessor::filterLocalSets( const std::vector& substrings, const std::vector& exprs) { return StringifyProcessor::filter( substrings, exprs, [](const Expression* curr) { return curr->is(); }); } std::vector StringifyProcessor::filterLocalGets( const std::vector& substrings, const std::vector& exprs) { return StringifyProcessor::filter( substrings, exprs, [](const Expression* curr) { return curr->is(); }); } std::vector StringifyProcessor::filterBranches( const std::vector& substrings, const std::vector& exprs) { return StringifyProcessor::filter( substrings, exprs, [](const Expression* curr) { return Properties::isBranch(curr) || curr->is(); }); } } // namespace wasm