/* * Copyright 2024 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Optimizes heap (GC) stores. // // TODO: Add dead store elimination / load forwarding here. // #include "cfg/cfg-traversal.h" #include "ir/effects.h" #include "ir/local-graph.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" namespace wasm { namespace { // In each basic block we will store the relevant heap store operations and // other actions that matter to our analysis. struct Info { std::vector actions; }; struct HeapStoreOptimization : public WalkerPass< CFGWalker, Info>> { bool isFunctionParallel() override { return true; } // Locals are not modified here. bool requiresNonNullableLocalFixups() override { return false; } std::unique_ptr create() override { return std::make_unique(); } // Branches outside of the function can be ignored, as we only look at local // state in the function. (This may need to change if we do more general dead // store elimination.) bool ignoreBranchesOutsideOfFunc = true; // Store struct.sets and blocks, as we can find patterns among those. void addAction() { if (currBasicBlock) { currBasicBlock->contents.actions.push_back(getCurrentPointer()); } } void visitStructSet(StructSet* curr) { addAction(); } void visitBlock(Block* curr) { addAction(); } void visitFunction(Function* curr) { // Now that the walk is complete and we have a CFG, find things to optimize. for (auto& block : basicBlocks) { for (auto** currp : block->contents.actions) { auto* curr = *currp; if (auto* set = curr->dynCast()) { optimizeStructSet(set, currp); } else if (auto* block = curr->dynCast()) { optimizeBlock(block); } else { WASM_UNREACHABLE("bad action"); } } } } // Optimize a struct.set. Receives also a pointer to where it is referred to, // so we can replace it (which we do if we optimize). void optimizeStructSet(StructSet* curr, Expression** currp) { // If our reference is a tee of a struct.new, we may be able to fold the // stored value into the new itself: // // (struct.set (local.tee $x (struct.new X Y Z)) X') // => // (local.set $x (struct.new X' Y Z)) // if (auto* tee = curr->ref->dynCast()) { if (auto* new_ = tee->value->dynCast()) { if (optimizeSubsequentStructSet(new_, curr, tee)) { // Success, so we do not need the struct.set any more, and the tee // can just be a set instead of us. tee->makeSet(); *currp = tee; } } } } // Similar to the above with struct.set whose reference is a tee of a new, we // can do the same for subsequent sets in a list: // // (local.set $x (struct.new X Y Z)) // (struct.set (local.get $x) X') // => // (local.set $x (struct.new X' Y Z)) // // We also handle other struct.sets immediately after this one. If the // instruction following the new is not a struct.set we push the new down if // possible. void optimizeBlock(Block* curr) { auto& list = curr->list; for (Index i = 0; i < list.size(); i++) { auto* localSet = list[i]->dynCast(); if (!localSet) { continue; } auto* new_ = localSet->value->dynCast(); if (!new_) { continue; } // This local.set of a struct.new looks good. Find struct.sets after it to // optimize. Index localSetIndex = i; for (Index j = localSetIndex + 1; j < list.size(); j++) { // Check that the next instruction is a struct.set on the same local as // the struct.new. auto* structSet = list[j]->dynCast(); auto* localGet = structSet ? structSet->ref->dynCast() : nullptr; if (!structSet || !localGet || localGet->index != localSet->index) { // Any time the pattern no longer matches, we try to push the // struct.new further down but if it is not possible we stop // optimizing possible struct.sets for this struct.new. if (trySwap(list, localSetIndex, j)) { // Update the index and continue to try again. localSetIndex = j; continue; } break; } // The pattern matches, try to optimize. if (!optimizeSubsequentStructSet(new_, structSet, localSet)) { break; } else { // Success. Replace the set with a nop, and continue to perhaps // optimize more. ExpressionManipulator::nop(structSet); } } } } // Helper function for optimizeHeapStores. Tries pushing the struct.new at // index i down to index j, swapping it with the instruction already at j, so // that it is closer to (potential) later struct.sets. bool trySwap(ExpressionList& list, Index i, Index j) { if (j == list.size() - 1) { // There is no reason to swap with the last element of the list as it // won't match the pattern because there wont be anything after. This also // avoids swapping an instruction that does not leave anything in the // stack by one that could leave something, and that which would be // incorrect. return false; } if (list[j]->is() && list[j]->dynCast()->value->is()) { // Don't swap two struct.new instructions to avoid going back and forth. return false; } // Check if the two expressions can be swapped safely considering their // effects. auto firstEffects = effects(list[i]); auto secondEffects = effects(list[j]); if (secondEffects.invalidates(firstEffects)) { return false; } std::swap(list[i], list[j]); return true; } // Given a struct.new and a struct.set that occurs right after it, and that // applies to the same data, try to apply the set during the new. This can be // either with a nested tee: // // (struct.set // (local.tee $x (struct.new X Y Z)) // X' // ) // => // (local.set $x (struct.new X' Y Z)) // // or without: // // (local.set $x (struct.new X Y Z)) // (struct.set (local.get $x) X') // => // (local.set $x (struct.new X' Y Z)) // // Returns true if we succeeded. bool optimizeSubsequentStructSet(StructNew* new_, StructSet* set, LocalSet* localSet) { // Leave unreachable code for DCE, to avoid updating types here. if (new_->type == Type::unreachable || set->type == Type::unreachable) { return false; } auto index = set->index; auto& operands = new_->operands; auto refLocalIndex = localSet->index; // Check for effects that prevent us moving the struct.set's value (X' in // the function comment) into its new position in the struct.new. First, it // must be ok to move it past the local.set (otherwise, it might read from // memory using that local, and depend on the struct.new having already // occurred; or, if it writes to that local, then it would cross another // write). auto setValueEffects = effects(set->value); if (setValueEffects.localsRead.count(refLocalIndex) || setValueEffects.localsWritten.count(refLocalIndex)) { return false; } // We must move the set's value past indexes greater than it (Y and Z in // the example in the comment on this function). If this is not with_default // then we must check for effects. // TODO When this function is called repeatedly in a sequence this can // become quadratic - perhaps we should memoize (though, struct sizes // tend to not be ridiculously large). if (!new_->isWithDefault()) { for (Index i = index + 1; i < operands.size(); i++) { auto operandEffects = effects(operands[i]); if (operandEffects.invalidates(setValueEffects)) { // TODO: we could use locals to reorder everything return false; } } } // We must also be careful of branches out from the value that skip the // local.set, see below. if (canSkipLocalSet(set, setValueEffects, localSet)) { return false; } // We can optimize here! Builder builder(*getModule()); // If this was with_default then we add default values now. That does // increase code size in some cases (if there are many values, and few sets // that get removed), but in general this optimization is worth it. if (new_->isWithDefault()) { auto& fields = new_->type.getHeapType().getStruct().fields; for (auto& field : fields) { auto zero = Literal::makeZero(field.type); operands.push_back(builder.makeConstantExpression(zero)); } } // See if we need to keep the old value. if (effects(operands[index]).hasUnremovableSideEffects()) { operands[index] = builder.makeSequence(builder.makeDrop(operands[index]), set->value); } else { operands[index] = set->value; } return true; } // We must be careful of branches that skip the local.set. For example: // // (block $out // (local.set $x (struct.new X Y Z)) // (struct.set (local.get $x) (..br $out..)) ;; X' here has a br // ) // ..local.get $x.. // // Note how we do the local.set first. Imagine we optimized to this: // // (block $out // (local.set $x (struct.new (..br $out..) Y Z)) // ) // ..local.get $x.. // // Now the br happens first, skipping the local.set entirely, and the use // later down will not get the proper value. This is the problem we must // detect here. // // We are given a struct.set, the computed effects of its value (the caller // already has those, so this is an optimization to avoid recomputation), and // the local.set. bool canSkipLocalSet(StructSet* set, const EffectAnalyzer& setValueEffects, LocalSet* localSet) { // First, if the set's value cannot branch at all, then we have no problem. if (!setValueEffects.transfersControlFlow()) { return false; } // We may branch, so do an analysis using a LocalGraph. We will check // whether it is valid to move the local.set to where the struct.set is, so // we provide StructSet as the query class. // // It is valid to reuse the LocalGraph many times because the optimization // that we do in this pass does not generate new, dangerous control flow. We // only optimize if moving a LocalSet is valid, and that does not invalidate // any other one. if (!localGraph) { localGraph.emplace(getFunction(), getModule(), StructSet::SpecificId); } auto badGets = localGraph->canMoveSet(localSet, set); if (badGets.size() == 0) { // No problems at all. return false; } // It is ok to have a local.get in the struct.set itself, as if we optimize // then that local.get goes away anyhow, that is, // // (local.set $x (struct.new X Y Z)) // (struct.set (local.get $x) (X')) // => // (local.set $x (struct.new X' Y Z)) // // Both the local.get and the struct.set are removed. if (badGets.size() >= 2) { return true; } return *badGets.begin() != set->ref; } EffectAnalyzer effects(Expression* expr) { return EffectAnalyzer(getPassOptions(), *getModule(), expr); } private: // A local graph that is constructed the first time we need it. std::optional localGraph; }; } // anonymous namespace Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); } } // namespace wasm