summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/fuzz_opt.py1
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/HeapStoreOptimization.cpp239
-rw-r--r--src/passes/OptimizeInstructions.cpp193
-rw-r--r--src/passes/pass.cpp9
-rw-r--r--src/passes/passes.h1
-rw-r--r--test/lit/help/wasm-metadce.test2
-rw-r--r--test/lit/help/wasm-opt.test2
-rw-r--r--test/lit/help/wasm2js.test2
-rw-r--r--test/lit/passes/O1.wast1
-rw-r--r--test/lit/passes/heap-store-optimization.wast (renamed from test/lit/passes/optimize-instructions-gc-heap.wast)57
11 files changed, 313 insertions, 195 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index 665ae7cbd..2bf4086db 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -1559,6 +1559,7 @@ opt_choices = [
("--local-cse",),
("--heap2local",),
("--remove-unused-names", "--heap2local",),
+ ("--heap-store-optimization",),
("--generate-stack-ir",),
("--licm",),
("--local-subtyping",),
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 54f044057..b95017bac 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -47,6 +47,7 @@ set(passes_SOURCES
hash-stringify-walker.cpp
Outlining.cpp
Heap2Local.cpp
+ HeapStoreOptimization.cpp
I64ToI32Lowering.cpp
Inlining.cpp
InstrumentLocals.cpp
diff --git a/src/passes/HeapStoreOptimization.cpp b/src/passes/HeapStoreOptimization.cpp
new file mode 100644
index 000000000..566290c36
--- /dev/null
+++ b/src/passes/HeapStoreOptimization.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2024 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Optimizes heap (GC) stores.
+//
+// TODO: Add dead store elimination / load forwarding here.
+//
+
+#include "ir/effects.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+struct HeapStoreOptimization
+ : public WalkerPass<PostWalker<HeapStoreOptimization>> {
+ bool isFunctionParallel() override { return true; }
+
+ // Locals are not modified here.
+ bool requiresNonNullableLocalFixups() override { return false; }
+
+ std::unique_ptr<Pass> create() override {
+ return std::make_unique<HeapStoreOptimization>();
+ }
+
+ void visitStructSet(StructSet* curr) {
+ // If our reference is a tee of a struct.new, we may be able to fold the
+ // stored value into the new itself:
+ //
+ // (struct.set (local.tee $x (struct.new X Y Z)) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ if (auto* tee = curr->ref->dynCast<LocalSet>()) {
+ if (auto* new_ = tee->value->dynCast<StructNew>()) {
+ if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
+ // Success, so we do not need the struct.set any more, and the tee
+ // can just be a set instead of us.
+ tee->makeSet();
+ replaceCurrent(tee);
+ }
+ }
+ }
+ }
+
+ // Similar to the above with struct.set whose reference is a tee of a new, we
+ // can do the same for subsequent sets in a list:
+ //
+ // (local.set $x (struct.new X Y Z))
+ // (struct.set (local.get $x) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // We also handle other struct.sets immediately after this one. If the
+ // instruction following the new is not a struct.set we push the new down if
+ // possible.
+ void visitBlock(Block* curr) { optimizeHeapStores(curr->list); }
+
+ void optimizeHeapStores(ExpressionList& list) {
+ for (Index i = 0; i < list.size(); i++) {
+ auto* localSet = list[i]->dynCast<LocalSet>();
+ if (!localSet) {
+ continue;
+ }
+ auto* new_ = localSet->value->dynCast<StructNew>();
+ if (!new_) {
+ continue;
+ }
+
+ // This local.set of a struct.new looks good. Find struct.sets after it to
+ // optimize.
+ Index localSetIndex = i;
+ for (Index j = localSetIndex + 1; j < list.size(); j++) {
+
+ // Check that the next instruction is a struct.set on the same local as
+ // the struct.new.
+ auto* structSet = list[j]->dynCast<StructSet>();
+ auto* localGet =
+ structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
+ if (!structSet || !localGet || localGet->index != localSet->index) {
+ // Any time the pattern no longer matches, we try to push the
+ // struct.new further down but if it is not possible we stop
+ // optimizing possible struct.sets for this struct.new.
+ if (trySwap(list, localSetIndex, j)) {
+ // Update the index and continue to try again.
+ localSetIndex = j;
+ continue;
+ }
+ break;
+ }
+
+ // The pattern matches, try to optimize.
+ if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
+ break;
+ } else {
+ // Success. Replace the set with a nop, and continue to perhaps
+ // optimize more.
+ ExpressionManipulator::nop(structSet);
+ }
+ }
+ }
+ }
+
+ // Helper function for optimizeHeapStores. Tries pushing the struct.new at
+ // index i down to index j, swapping it with the instruction already at j, so
+ // that it is closer to (potential) later struct.sets.
+ bool trySwap(ExpressionList& list, Index i, Index j) {
+ if (j == list.size() - 1) {
+ // There is no reason to swap with the last element of the list as it
+ // won't match the pattern because there wont be anything after. This also
+ // avoids swapping an instruction that does not leave anything in the
+ // stack by one that could leave something, and that which would be
+ // incorrect.
+ return false;
+ }
+
+ if (list[j]->is<LocalSet>() &&
+ list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
+ // Don't swap two struct.new instructions to avoid going back and forth.
+ return false;
+ }
+ // Check if the two expressions can be swapped safely considering their
+ // effects.
+ auto firstEffects = effects(list[i]);
+ auto secondEffects = effects(list[j]);
+ if (secondEffects.invalidates(firstEffects)) {
+ return false;
+ }
+
+ std::swap(list[i], list[j]);
+ return true;
+ }
+
+ // Given a struct.new and a struct.set that occurs right after it, and that
+ // applies to the same data, try to apply the set during the new. This can be
+ // either with a nested tee:
+ //
+ // (struct.set
+ // (local.tee $x (struct.new X Y Z))
+ // X'
+ // )
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // or without:
+ //
+ // (local.set $x (struct.new X Y Z))
+ // (struct.set (local.get $x) X')
+ // =>
+ // (local.set $x (struct.new X' Y Z))
+ //
+ // Returns true if we succeeded.
+ bool optimizeSubsequentStructSet(StructNew* new_,
+ StructSet* set,
+ Index refLocalIndex) {
+ // Leave unreachable code for DCE, to avoid updating types here.
+ if (new_->type == Type::unreachable || set->type == Type::unreachable) {
+ return false;
+ }
+
+ auto index = set->index;
+ auto& operands = new_->operands;
+
+ // Check for effects that prevent us moving the struct.set's value (X' in
+ // the function comment) into its new position in the struct.new. First, it
+ // must be ok to move it past the local.set (otherwise, it might read from
+ // memory using that local, and depend on the struct.new having already
+ // occurred; or, if it writes to that local, then it would cross another
+ // write).
+ auto setValueEffects = effects(set->value);
+ if (setValueEffects.localsRead.count(refLocalIndex) ||
+ setValueEffects.localsWritten.count(refLocalIndex)) {
+ return false;
+ }
+
+ // We must move the set's value past indexes greater than it (Y and Z in
+ // the example in the comment on this function). If this is not with_default
+ // then we must check for effects.
+ // TODO When this function is called repeatedly in a sequence this can
+ // become quadratic - perhaps we should memoize (though, struct sizes
+ // tend to not be ridiculously large).
+ if (!new_->isWithDefault()) {
+ for (Index i = index + 1; i < operands.size(); i++) {
+ auto operandEffects = effects(operands[i]);
+ if (operandEffects.invalidates(setValueEffects)) {
+ // TODO: we could use locals to reorder everything
+ return false;
+ }
+ }
+ }
+
+ // We can optimize here!
+ Builder builder(*getModule());
+
+ // If this was with_default then we add default values now. That does
+ // increase code size in some cases (if there are many values, and few sets
+ // that get removed), but in general this optimization is worth it.
+ if (new_->isWithDefault()) {
+ auto& fields = new_->type.getHeapType().getStruct().fields;
+ for (auto& field : fields) {
+ auto zero = Literal::makeZero(field.type);
+ operands.push_back(builder.makeConstantExpression(zero));
+ }
+ }
+
+ // See if we need to keep the old value.
+ if (effects(operands[index]).hasUnremovableSideEffects()) {
+ operands[index] =
+ builder.makeSequence(builder.makeDrop(operands[index]), set->value);
+ } else {
+ operands[index] = set->value;
+ }
+
+ return true;
+ }
+
+ EffectAnalyzer effects(Expression* expr) {
+ return EffectAnalyzer(getPassOptions(), *getModule(), expr);
+ }
+};
+
+Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }
+
+} // namespace wasm
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index c470fa060..21bd0c5ea 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1108,12 +1108,6 @@ struct OptimizeInstructions
}
}
- void visitBlock(Block* curr) {
- if (getModule()->features.hasGC()) {
- optimizeHeapStores(curr->list);
- }
- }
-
void visitIf(If* curr) {
curr->condition = optimizeBoolean(curr->condition);
if (curr->ifFalse) {
@@ -1836,193 +1830,6 @@ struct OptimizeInstructions
optimizeStoredValue(curr->value, fields[curr->index].getByteSize());
}
}
-
- // If our reference is a tee of a struct.new, we may be able to fold the
- // stored value into the new itself:
- //
- // (struct.set (local.tee $x (struct.new X Y Z)) X')
- // =>
- // (local.set $x (struct.new X' Y Z))
- //
- if (auto* tee = curr->ref->dynCast<LocalSet>()) {
- if (auto* new_ = tee->value->dynCast<StructNew>()) {
- if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
- // Success, so we do not need the struct.set any more, and the tee
- // can just be a set instead of us.
- tee->makeSet();
- replaceCurrent(tee);
- }
- }
- }
- }
-
- // Similar to the above with struct.set whose reference is a tee of a new, we
- // can do the same for subsequent sets in a list:
- //
- // (local.set $x (struct.new X Y Z))
- // (struct.set (local.get $x) X')
- // =>
- // (local.set $x (struct.new X' Y Z))
- //
- // We also handle other struct.sets immediately after this one. If the
- // instruction following the new is not a struct.set we push the new down if
- // possible.
- void optimizeHeapStores(ExpressionList& list) {
- for (Index i = 0; i < list.size(); i++) {
- auto* localSet = list[i]->dynCast<LocalSet>();
- if (!localSet) {
- continue;
- }
- auto* new_ = localSet->value->dynCast<StructNew>();
- if (!new_) {
- continue;
- }
-
- // This local.set of a struct.new looks good. Find struct.sets after it to
- // optimize.
- Index localSetIndex = i;
- for (Index j = localSetIndex + 1; j < list.size(); j++) {
-
- // Check that the next instruction is a struct.set on the same local as
- // the struct.new.
- auto* structSet = list[j]->dynCast<StructSet>();
- auto* localGet =
- structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
- if (!structSet || !localGet || localGet->index != localSet->index) {
- // Any time the pattern no longer matches, we try to push the
- // struct.new further down but if it is not possible we stop
- // optimizing possible struct.sets for this struct.new.
- if (trySwap(list, localSetIndex, j)) {
- // Update the index and continue to try again.
- localSetIndex = j;
- continue;
- }
- break;
- }
-
- // The pattern matches, try to optimize.
- if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
- break;
- } else {
- // Success. Replace the set with a nop, and continue to perhaps
- // optimize more.
- ExpressionManipulator::nop(structSet);
- }
- }
- }
- }
-
- // Helper function for optimizeHeapStores. Tries pushing the struct.new at
- // index i down to index j, swapping it with the instruction already at j, so
- // that it is closer to (potential) later struct.sets.
- bool trySwap(ExpressionList& list, Index i, Index j) {
- if (j == list.size() - 1) {
- // There is no reason to swap with the last element of the list as it
- // won't match the pattern because there wont be anything after. This also
- // avoids swapping an instruction that does not leave anything in the
- // stack by one that could leave something, and that which would be
- // incorrect.
- return false;
- }
-
- if (list[j]->is<LocalSet>() &&
- list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
- // Don't swap two struct.new instructions to avoid going back and forth.
- return false;
- }
- // Check if the two expressions can be swapped safely considering their
- // effects.
- auto firstEffects = effects(list[i]);
- auto secondEffects = effects(list[j]);
- if (secondEffects.invalidates(firstEffects)) {
- return false;
- }
-
- std::swap(list[i], list[j]);
- return true;
- }
-
- // Given a struct.new and a struct.set that occurs right after it, and that
- // applies to the same data, try to apply the set during the new. This can be
- // either with a nested tee:
- //
- // (struct.set
- // (local.tee $x (struct.new X Y Z))
- // X'
- // )
- // =>
- // (local.set $x (struct.new X' Y Z))
- //
- // or without:
- //
- // (local.set $x (struct.new X Y Z))
- // (struct.set (local.get $x) X')
- // =>
- // (local.set $x (struct.new X' Y Z))
- //
- // Returns true if we succeeded.
- bool optimizeSubsequentStructSet(StructNew* new_,
- StructSet* set,
- Index refLocalIndex) {
- // Leave unreachable code for DCE, to avoid updating types here.
- if (new_->type == Type::unreachable || set->type == Type::unreachable) {
- return false;
- }
-
- auto index = set->index;
- auto& operands = new_->operands;
-
- // Check for effects that prevent us moving the struct.set's value (X' in
- // the function comment) into its new position in the struct.new. First, it
- // must be ok to move it past the local.set (otherwise, it might read from
- // memory using that local, and depend on the struct.new having already
- // occurred; or, if it writes to that local, then it would cross another
- // write).
- auto setValueEffects = effects(set->value);
- if (setValueEffects.localsRead.count(refLocalIndex) ||
- setValueEffects.localsWritten.count(refLocalIndex)) {
- return false;
- }
-
- // We must move the set's value past indexes greater than it (Y and Z in
- // the example in the comment on this function). If this is not with_default
- // then we must check for effects.
- // TODO When this function is called repeatedly in a sequence this can
- // become quadratic - perhaps we should memoize (though, struct sizes
- // tend to not be ridiculously large).
- if (!new_->isWithDefault()) {
- for (Index i = index + 1; i < operands.size(); i++) {
- auto operandEffects = effects(operands[i]);
- if (operandEffects.invalidates(setValueEffects)) {
- // TODO: we could use locals to reorder everything
- return false;
- }
- }
- }
-
- // We can optimize here!
- Builder builder(*getModule());
-
- // If this was with_default then we add default values now. That does
- // increase code size in some cases (if there are many values, and few sets
- // that get removed), but in general this optimization is worth it.
- if (new_->isWithDefault()) {
- auto& fields = new_->type.getHeapType().getStruct().fields;
- for (auto& field : fields) {
- auto zero = Literal::makeZero(field.type);
- operands.push_back(builder.makeConstantExpression(zero));
- }
- }
-
- // See if we need to keep the old value.
- if (effects(operands[index]).hasUnremovableSideEffects()) {
- operands[index] =
- builder.makeSequence(builder.makeDrop(operands[index]), set->value);
- } else {
- operands[index] = set->value;
- }
-
- return true;
}
void visitArrayNew(ArrayNew* curr) {
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 35d2f6779..eb499a4ad 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -209,6 +209,9 @@ void PassRegistry::registerPasses() {
createTypeRefiningPass);
registerPass(
"heap2local", "replace GC allocations with locals", createHeap2LocalPass);
+ registerPass("heap-store-optimization",
+ "optimize heap (GC) stores",
+ createHeapStoreOptimizationPass);
registerPass(
"inline-main", "inline __original_main into main", createInlineMainPass);
registerPass("inlining",
@@ -609,6 +612,9 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
addIfNoDWARFIssues("remove-unused-brs");
addIfNoDWARFIssues("remove-unused-names");
addIfNoDWARFIssues("optimize-instructions");
+ if (wasm->features.hasGC()) {
+ addIfNoDWARFIssues("heap-store-optimization");
+ }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
addIfNoDWARFIssues("pick-load-signs");
}
@@ -681,6 +687,9 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
addIfNoDWARFIssues("precompute");
}
addIfNoDWARFIssues("optimize-instructions");
+ if (wasm->features.hasGC()) {
+ addIfNoDWARFIssues("heap-store-optimization");
+ }
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) {
addIfNoDWARFIssues(
"rse"); // after all coalesce-locals, and before a final vacuum
diff --git a/src/passes/passes.h b/src/passes/passes.h
index a95365676..e4de733da 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -62,6 +62,7 @@ Pass* createGUFAPass();
Pass* createGUFACastAllPass();
Pass* createGUFAOptimizingPass();
Pass* createHeap2LocalPass();
+Pass* createHeapStoreOptimizationPass();
Pass* createI64ToI32LoweringPass();
Pass* createInlineMainPass();
Pass* createInliningPass();
diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test
index 665d78746..41a43bf1b 100644
--- a/test/lit/help/wasm-metadce.test
+++ b/test/lit/help/wasm-metadce.test
@@ -190,6 +190,8 @@
;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in
;; CHECK-NEXT: functions we modified
;; CHECK-NEXT:
+;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores
+;; CHECK-NEXT:
;; CHECK-NEXT: --heap2local replace GC allocations with
;; CHECK-NEXT: locals
;; CHECK-NEXT:
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index 4eea3c431..e5b30e304 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -199,6 +199,8 @@
;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in
;; CHECK-NEXT: functions we modified
;; CHECK-NEXT:
+;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores
+;; CHECK-NEXT:
;; CHECK-NEXT: --heap2local replace GC allocations with
;; CHECK-NEXT: locals
;; CHECK-NEXT:
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 501d1d3f1..2d933984e 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -153,6 +153,8 @@
;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in
;; CHECK-NEXT: functions we modified
;; CHECK-NEXT:
+;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores
+;; CHECK-NEXT:
;; CHECK-NEXT: --heap2local replace GC allocations with
;; CHECK-NEXT: locals
;; CHECK-NEXT:
diff --git a/test/lit/passes/O1.wast b/test/lit/passes/O1.wast
index 8ffc29301..49271699d 100644
--- a/test/lit/passes/O1.wast
+++ b/test/lit/passes/O1.wast
@@ -40,4 +40,3 @@
)
)
-
diff --git a/test/lit/passes/optimize-instructions-gc-heap.wast b/test/lit/passes/heap-store-optimization.wast
index 2988b1bd8..635963523 100644
--- a/test/lit/passes/optimize-instructions-gc-heap.wast
+++ b/test/lit/passes/heap-store-optimization.wast
@@ -1,5 +1,5 @@
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
-;; RUN: wasm-opt %s --remove-unused-names --optimize-instructions -all -S -o - \
+;; RUN: wasm-opt %s --remove-unused-names --heap-store-optimization -all -S -o - \
;; RUN: | filecheck %s
;;
;; --remove-unused-names allows the optimizer to see that the blocks have no
@@ -822,4 +822,59 @@
(func $helper-i32 (param $x i32) (result i32)
(i32.const 42)
)
+
+ ;; CHECK: (func $control-flow-in-set-value (type $5) (result i32)
+ ;; CHECK-NEXT: (local $ref (ref null $struct))
+ ;; CHECK-NEXT: (block $label
+ ;; CHECK-NEXT: (local.set $ref
+ ;; CHECK-NEXT: (struct.new $struct
+ ;; CHECK-NEXT: (if (result i32)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (then
+ ;; CHECK-NEXT: (br $label)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (else
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (local.get $ref)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $control-flow-in-set-value (result i32)
+ ;; Test we handle control flow in the struct.set's value when we combine a
+ ;; struct.set with a struct.new.
+ ;; XXX The output above is wrong atm, and the pass needs fixing!
+ (local $ref (ref null $struct))
+ (block $label
+ (struct.set $struct 0
+ (local.tee $ref
+ (struct.new $struct
+ (i32.const 1)
+ )
+ )
+ (if (result i32)
+ (i32.const 1)
+ (then
+ ;; This conditional break happens *after* the local.tee of $ref. We
+ ;; must not move code around that reorders it, since there is a use
+ ;; of the local below that could notice changes.
+ (br $label)
+ )
+ (else
+ (i32.const 42)
+ )
+ )
+ )
+ )
+ ;; We did not reach the struct.set, but we did reach the local.tee, so this
+ ;; reads the initial value of 1 (and does not trap on a nullref).
+ (struct.get $struct 0
+ (local.get $ref)
+ )
+ )
)
+