summaryrefslogtreecommitdiff
path: root/src/passes/HeapStoreOptimization.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/HeapStoreOptimization.cpp')
-rw-r--r--src/passes/HeapStoreOptimization.cpp239
1 files changed, 239 insertions, 0 deletions
diff --git a/src/passes/HeapStoreOptimization.cpp b/src/passes/HeapStoreOptimization.cpp
new file mode 100644
index 000000000..566290c36
--- /dev/null
+++ b/src/passes/HeapStoreOptimization.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2024 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Optimizes heap (GC) stores.
+//
+// TODO: Add dead store elimination / load forwarding here.
+//
+
+#include "ir/effects.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+struct HeapStoreOptimization
+ : public WalkerPass<PostWalker<HeapStoreOptimization>> {
+ bool isFunctionParallel() override { return true; }
+
+ // Locals are not modified here.
+ bool requiresNonNullableLocalFixups() override { return false; }
+
+ std::unique_ptr<Pass> create() override {
+ return std::make_unique<HeapStoreOptimization>();
+ }
+
+ void visitStructSet(StructSet* curr) {
+ // If our reference is a tee of a struct.new, we may be able to fold the
+ // stored value into the new itself:
+ //
+ // (struct.set (local.tee $x (struct.new X Y Z)) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ if (auto* tee = curr->ref->dynCast<LocalSet>()) {
+ if (auto* new_ = tee->value->dynCast<StructNew>()) {
+ if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
+ // Success, so we do not need the struct.set any more, and the tee
+ // can just be a set instead of us.
+ tee->makeSet();
+ replaceCurrent(tee);
+ }
+ }
+ }
+ }
+
+ // Similar to the above with struct.set whose reference is a tee of a new, we
+ // can do the same for subsequent sets in a list:
+ //
+ // (local.set $x (struct.new X Y Z))
+ // (struct.set (local.get $x) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // We also handle other struct.sets immediately after this one. If the
+ // instruction following the new is not a struct.set we push the new down if
+ // possible.
+ void visitBlock(Block* curr) { optimizeHeapStores(curr->list); }
+
+ void optimizeHeapStores(ExpressionList& list) {
+ for (Index i = 0; i < list.size(); i++) {
+ auto* localSet = list[i]->dynCast<LocalSet>();
+ if (!localSet) {
+ continue;
+ }
+ auto* new_ = localSet->value->dynCast<StructNew>();
+ if (!new_) {
+ continue;
+ }
+
+ // This local.set of a struct.new looks good. Find struct.sets after it to
+ // optimize.
+ Index localSetIndex = i;
+ for (Index j = localSetIndex + 1; j < list.size(); j++) {
+
+ // Check that the next instruction is a struct.set on the same local as
+ // the struct.new.
+ auto* structSet = list[j]->dynCast<StructSet>();
+ auto* localGet =
+ structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
+ if (!structSet || !localGet || localGet->index != localSet->index) {
+ // Any time the pattern no longer matches, we try to push the
+ // struct.new further down but if it is not possible we stop
+ // optimizing possible struct.sets for this struct.new.
+ if (trySwap(list, localSetIndex, j)) {
+ // Update the index and continue to try again.
+ localSetIndex = j;
+ continue;
+ }
+ break;
+ }
+
+ // The pattern matches, try to optimize.
+ if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
+ break;
+ } else {
+ // Success. Replace the set with a nop, and continue to perhaps
+ // optimize more.
+ ExpressionManipulator::nop(structSet);
+ }
+ }
+ }
+ }
+
+ // Helper function for optimizeHeapStores. Tries pushing the struct.new at
+ // index i down to index j, swapping it with the instruction already at j, so
+ // that it is closer to (potential) later struct.sets.
+ bool trySwap(ExpressionList& list, Index i, Index j) {
+ if (j == list.size() - 1) {
+ // There is no reason to swap with the last element of the list as it
+ // won't match the pattern because there wont be anything after. This also
+ // avoids swapping an instruction that does not leave anything in the
+ // stack by one that could leave something, and that which would be
+ // incorrect.
+ return false;
+ }
+
+ if (list[j]->is<LocalSet>() &&
+ list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
+ // Don't swap two struct.new instructions to avoid going back and forth.
+ return false;
+ }
+ // Check if the two expressions can be swapped safely considering their
+ // effects.
+ auto firstEffects = effects(list[i]);
+ auto secondEffects = effects(list[j]);
+ if (secondEffects.invalidates(firstEffects)) {
+ return false;
+ }
+
+ std::swap(list[i], list[j]);
+ return true;
+ }
+
+ // Given a struct.new and a struct.set that occurs right after it, and that
+ // applies to the same data, try to apply the set during the new. This can be
+ // either with a nested tee:
+ //
+ // (struct.set
+ // (local.tee $x (struct.new X Y Z))
+ // X'
+ // )
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // or without:
+ //
+ // (local.set $x (struct.new X Y Z))
+ // (struct.set (local.get $x) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // Returns true if we succeeded.
+ bool optimizeSubsequentStructSet(StructNew* new_,
+ StructSet* set,
+ Index refLocalIndex) {
+ // Leave unreachable code for DCE, to avoid updating types here.
+ if (new_->type == Type::unreachable || set->type == Type::unreachable) {
+ return false;
+ }
+
+ auto index = set->index;
+ auto& operands = new_->operands;
+
+ // Check for effects that prevent us moving the struct.set's value (X' in
+ // the function comment) into its new position in the struct.new. First, it
+ // must be ok to move it past the local.set (otherwise, it might read from
+ // memory using that local, and depend on the struct.new having already
+ // occurred; or, if it writes to that local, then it would cross another
+ // write).
+ auto setValueEffects = effects(set->value);
+ if (setValueEffects.localsRead.count(refLocalIndex) ||
+ setValueEffects.localsWritten.count(refLocalIndex)) {
+ return false;
+ }
+
+ // We must move the set's value past indexes greater than it (Y and Z in
+ // the example in the comment on this function). If this is not with_default
+ // then we must check for effects.
+ // TODO When this function is called repeatedly in a sequence this can
+ // become quadratic - perhaps we should memoize (though, struct sizes
+ // tend to not be ridiculously large).
+ if (!new_->isWithDefault()) {
+ for (Index i = index + 1; i < operands.size(); i++) {
+ auto operandEffects = effects(operands[i]);
+ if (operandEffects.invalidates(setValueEffects)) {
+ // TODO: we could use locals to reorder everything
+ return false;
+ }
+ }
+ }
+
+ // We can optimize here!
+ Builder builder(*getModule());
+
+ // If this was with_default then we add default values now. That does
+ // increase code size in some cases (if there are many values, and few sets
+ // that get removed), but in general this optimization is worth it.
+ if (new_->isWithDefault()) {
+ auto& fields = new_->type.getHeapType().getStruct().fields;
+ for (auto& field : fields) {
+ auto zero = Literal::makeZero(field.type);
+ operands.push_back(builder.makeConstantExpression(zero));
+ }
+ }
+
+ // See if we need to keep the old value.
+ if (effects(operands[index]).hasUnremovableSideEffects()) {
+ operands[index] =
+ builder.makeSequence(builder.makeDrop(operands[index]), set->value);
+ } else {
+ operands[index] = set->value;
+ }
+
+ return true;
+ }
+
+ EffectAnalyzer effects(Expression* expr) {
+ return EffectAnalyzer(getPassOptions(), *getModule(), expr);
+ }
+};
+
+Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }
+
+} // namespace wasm