diff options
-rwxr-xr-x | scripts/fuzz_opt.py | 1 | ||||
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/HeapStoreOptimization.cpp | 239 | ||||
-rw-r--r-- | src/passes/OptimizeInstructions.cpp | 193 | ||||
-rw-r--r-- | src/passes/pass.cpp | 9 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | test/lit/help/wasm-metadce.test | 2 | ||||
-rw-r--r-- | test/lit/help/wasm-opt.test | 2 | ||||
-rw-r--r-- | test/lit/help/wasm2js.test | 2 | ||||
-rw-r--r-- | test/lit/passes/O1.wast | 1 | ||||
-rw-r--r-- | test/lit/passes/heap-store-optimization.wast (renamed from test/lit/passes/optimize-instructions-gc-heap.wast) | 57 |
11 files changed, 313 insertions, 195 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 665ae7cbd..2bf4086db 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1559,6 +1559,7 @@ opt_choices = [ ("--local-cse",), ("--heap2local",), ("--remove-unused-names", "--heap2local",), + ("--heap-store-optimization",), ("--generate-stack-ir",), ("--licm",), ("--local-subtyping",), diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 54f044057..b95017bac 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -47,6 +47,7 @@ set(passes_SOURCES hash-stringify-walker.cpp Outlining.cpp Heap2Local.cpp + HeapStoreOptimization.cpp I64ToI32Lowering.cpp Inlining.cpp InstrumentLocals.cpp diff --git a/src/passes/HeapStoreOptimization.cpp b/src/passes/HeapStoreOptimization.cpp new file mode 100644 index 000000000..566290c36 --- /dev/null +++ b/src/passes/HeapStoreOptimization.cpp @@ -0,0 +1,239 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Optimizes heap (GC) stores. +// +// TODO: Add dead store elimination / load forwarding here. +// + +#include "ir/effects.h" +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct HeapStoreOptimization + : public WalkerPass<PostWalker<HeapStoreOptimization>> { + bool isFunctionParallel() override { return true; } + + // Locals are not modified here. + bool requiresNonNullableLocalFixups() override { return false; } + + std::unique_ptr<Pass> create() override { + return std::make_unique<HeapStoreOptimization>(); + } + + void visitStructSet(StructSet* curr) { + // If our reference is a tee of a struct.new, we may be able to fold the + // stored value into the new itself: + // + // (struct.set (local.tee $x (struct.new X Y Z)) X') + // => + // (local.set $x (struct.new X' Y Z)) + // + if (auto* tee = curr->ref->dynCast<LocalSet>()) { + if (auto* new_ = tee->value->dynCast<StructNew>()) { + if (optimizeSubsequentStructSet(new_, curr, tee->index)) { + // Success, so we do not need the struct.set any more, and the tee + // can just be a set instead of us. + tee->makeSet(); + replaceCurrent(tee); + } + } + } + } + + // Similar to the above with struct.set whose reference is a tee of a new, we + // can do the same for subsequent sets in a list: + // + // (local.set $x (struct.new X Y Z)) + // (struct.set (local.get $x) X') + // => + // (local.set $x (struct.new X' Y Z)) + // + // We also handle other struct.sets immediately after this one. If the + // instruction following the new is not a struct.set we push the new down if + // possible. + void visitBlock(Block* curr) { optimizeHeapStores(curr->list); } + + void optimizeHeapStores(ExpressionList& list) { + for (Index i = 0; i < list.size(); i++) { + auto* localSet = list[i]->dynCast<LocalSet>(); + if (!localSet) { + continue; + } + auto* new_ = localSet->value->dynCast<StructNew>(); + if (!new_) { + continue; + } + + // This local.set of a struct.new looks good. Find struct.sets after it to + // optimize. + Index localSetIndex = i; + for (Index j = localSetIndex + 1; j < list.size(); j++) { + + // Check that the next instruction is a struct.set on the same local as + // the struct.new. + auto* structSet = list[j]->dynCast<StructSet>(); + auto* localGet = + structSet ? structSet->ref->dynCast<LocalGet>() : nullptr; + if (!structSet || !localGet || localGet->index != localSet->index) { + // Any time the pattern no longer matches, we try to push the + // struct.new further down but if it is not possible we stop + // optimizing possible struct.sets for this struct.new. + if (trySwap(list, localSetIndex, j)) { + // Update the index and continue to try again. + localSetIndex = j; + continue; + } + break; + } + + // The pattern matches, try to optimize. + if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) { + break; + } else { + // Success. Replace the set with a nop, and continue to perhaps + // optimize more. + ExpressionManipulator::nop(structSet); + } + } + } + } + + // Helper function for optimizeHeapStores. Tries pushing the struct.new at + // index i down to index j, swapping it with the instruction already at j, so + // that it is closer to (potential) later struct.sets. + bool trySwap(ExpressionList& list, Index i, Index j) { + if (j == list.size() - 1) { + // There is no reason to swap with the last element of the list as it + // won't match the pattern because there wont be anything after. This also + // avoids swapping an instruction that does not leave anything in the + // stack by one that could leave something, and that which would be + // incorrect. + return false; + } + + if (list[j]->is<LocalSet>() && + list[j]->dynCast<LocalSet>()->value->is<StructNew>()) { + // Don't swap two struct.new instructions to avoid going back and forth. + return false; + } + // Check if the two expressions can be swapped safely considering their + // effects. + auto firstEffects = effects(list[i]); + auto secondEffects = effects(list[j]); + if (secondEffects.invalidates(firstEffects)) { + return false; + } + + std::swap(list[i], list[j]); + return true; + } + + // Given a struct.new and a struct.set that occurs right after it, and that + // applies to the same data, try to apply the set during the new. This can be + // either with a nested tee: + // + // (struct.set + // (local.tee $x (struct.new X Y Z)) + // X' + // ) + // => + // (local.set $x (struct.new X' Y Z)) + // + // or without: + // + // (local.set $x (struct.new X Y Z)) + // (struct.set (local.get $x) X') + // => + // (local.set $x (struct.new X' Y Z)) + // + // Returns true if we succeeded. + bool optimizeSubsequentStructSet(StructNew* new_, + StructSet* set, + Index refLocalIndex) { + // Leave unreachable code for DCE, to avoid updating types here. + if (new_->type == Type::unreachable || set->type == Type::unreachable) { + return false; + } + + auto index = set->index; + auto& operands = new_->operands; + + // Check for effects that prevent us moving the struct.set's value (X' in + // the function comment) into its new position in the struct.new. First, it + // must be ok to move it past the local.set (otherwise, it might read from + // memory using that local, and depend on the struct.new having already + // occurred; or, if it writes to that local, then it would cross another + // write). + auto setValueEffects = effects(set->value); + if (setValueEffects.localsRead.count(refLocalIndex) || + setValueEffects.localsWritten.count(refLocalIndex)) { + return false; + } + + // We must move the set's value past indexes greater than it (Y and Z in + // the example in the comment on this function). If this is not with_default + // then we must check for effects. + // TODO When this function is called repeatedly in a sequence this can + // become quadratic - perhaps we should memoize (though, struct sizes + // tend to not be ridiculously large). + if (!new_->isWithDefault()) { + for (Index i = index + 1; i < operands.size(); i++) { + auto operandEffects = effects(operands[i]); + if (operandEffects.invalidates(setValueEffects)) { + // TODO: we could use locals to reorder everything + return false; + } + } + } + + // We can optimize here! + Builder builder(*getModule()); + + // If this was with_default then we add default values now. That does + // increase code size in some cases (if there are many values, and few sets + // that get removed), but in general this optimization is worth it. + if (new_->isWithDefault()) { + auto& fields = new_->type.getHeapType().getStruct().fields; + for (auto& field : fields) { + auto zero = Literal::makeZero(field.type); + operands.push_back(builder.makeConstantExpression(zero)); + } + } + + // See if we need to keep the old value. + if (effects(operands[index]).hasUnremovableSideEffects()) { + operands[index] = + builder.makeSequence(builder.makeDrop(operands[index]), set->value); + } else { + operands[index] = set->value; + } + + return true; + } + + EffectAnalyzer effects(Expression* expr) { + return EffectAnalyzer(getPassOptions(), *getModule(), expr); + } +}; + +Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); } + +} // namespace wasm diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index c470fa060..21bd0c5ea 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1108,12 +1108,6 @@ struct OptimizeInstructions } } - void visitBlock(Block* curr) { - if (getModule()->features.hasGC()) { - optimizeHeapStores(curr->list); - } - } - void visitIf(If* curr) { curr->condition = optimizeBoolean(curr->condition); if (curr->ifFalse) { @@ -1836,193 +1830,6 @@ struct OptimizeInstructions optimizeStoredValue(curr->value, fields[curr->index].getByteSize()); } } - - // If our reference is a tee of a struct.new, we may be able to fold the - // stored value into the new itself: - // - // (struct.set (local.tee $x (struct.new X Y Z)) X') - // => - // (local.set $x (struct.new X' Y Z)) - // - if (auto* tee = curr->ref->dynCast<LocalSet>()) { - if (auto* new_ = tee->value->dynCast<StructNew>()) { - if (optimizeSubsequentStructSet(new_, curr, tee->index)) { - // Success, so we do not need the struct.set any more, and the tee - // can just be a set instead of us. - tee->makeSet(); - replaceCurrent(tee); - } - } - } - } - - // Similar to the above with struct.set whose reference is a tee of a new, we - // can do the same for subsequent sets in a list: - // - // (local.set $x (struct.new X Y Z)) - // (struct.set (local.get $x) X') - // => - // (local.set $x (struct.new X' Y Z)) - // - // We also handle other struct.sets immediately after this one. If the - // instruction following the new is not a struct.set we push the new down if - // possible. - void optimizeHeapStores(ExpressionList& list) { - for (Index i = 0; i < list.size(); i++) { - auto* localSet = list[i]->dynCast<LocalSet>(); - if (!localSet) { - continue; - } - auto* new_ = localSet->value->dynCast<StructNew>(); - if (!new_) { - continue; - } - - // This local.set of a struct.new looks good. Find struct.sets after it to - // optimize. - Index localSetIndex = i; - for (Index j = localSetIndex + 1; j < list.size(); j++) { - - // Check that the next instruction is a struct.set on the same local as - // the struct.new. - auto* structSet = list[j]->dynCast<StructSet>(); - auto* localGet = - structSet ? structSet->ref->dynCast<LocalGet>() : nullptr; - if (!structSet || !localGet || localGet->index != localSet->index) { - // Any time the pattern no longer matches, we try to push the - // struct.new further down but if it is not possible we stop - // optimizing possible struct.sets for this struct.new. - if (trySwap(list, localSetIndex, j)) { - // Update the index and continue to try again. - localSetIndex = j; - continue; - } - break; - } - - // The pattern matches, try to optimize. - if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) { - break; - } else { - // Success. Replace the set with a nop, and continue to perhaps - // optimize more. - ExpressionManipulator::nop(structSet); - } - } - } - } - - // Helper function for optimizeHeapStores. Tries pushing the struct.new at - // index i down to index j, swapping it with the instruction already at j, so - // that it is closer to (potential) later struct.sets. - bool trySwap(ExpressionList& list, Index i, Index j) { - if (j == list.size() - 1) { - // There is no reason to swap with the last element of the list as it - // won't match the pattern because there wont be anything after. This also - // avoids swapping an instruction that does not leave anything in the - // stack by one that could leave something, and that which would be - // incorrect. - return false; - } - - if (list[j]->is<LocalSet>() && - list[j]->dynCast<LocalSet>()->value->is<StructNew>()) { - // Don't swap two struct.new instructions to avoid going back and forth. - return false; - } - // Check if the two expressions can be swapped safely considering their - // effects. - auto firstEffects = effects(list[i]); - auto secondEffects = effects(list[j]); - if (secondEffects.invalidates(firstEffects)) { - return false; - } - - std::swap(list[i], list[j]); - return true; - } - - // Given a struct.new and a struct.set that occurs right after it, and that - // applies to the same data, try to apply the set during the new. This can be - // either with a nested tee: - // - // (struct.set - // (local.tee $x (struct.new X Y Z)) - // X' - // ) - // => - // (local.set $x (struct.new X' Y Z)) - // - // or without: - // - // (local.set $x (struct.new X Y Z)) - // (struct.set (local.get $x) X') - // => - // (local.set $x (struct.new X' Y Z)) - // - // Returns true if we succeeded. - bool optimizeSubsequentStructSet(StructNew* new_, - StructSet* set, - Index refLocalIndex) { - // Leave unreachable code for DCE, to avoid updating types here. - if (new_->type == Type::unreachable || set->type == Type::unreachable) { - return false; - } - - auto index = set->index; - auto& operands = new_->operands; - - // Check for effects that prevent us moving the struct.set's value (X' in - // the function comment) into its new position in the struct.new. First, it - // must be ok to move it past the local.set (otherwise, it might read from - // memory using that local, and depend on the struct.new having already - // occurred; or, if it writes to that local, then it would cross another - // write). - auto setValueEffects = effects(set->value); - if (setValueEffects.localsRead.count(refLocalIndex) || - setValueEffects.localsWritten.count(refLocalIndex)) { - return false; - } - - // We must move the set's value past indexes greater than it (Y and Z in - // the example in the comment on this function). If this is not with_default - // then we must check for effects. - // TODO When this function is called repeatedly in a sequence this can - // become quadratic - perhaps we should memoize (though, struct sizes - // tend to not be ridiculously large). - if (!new_->isWithDefault()) { - for (Index i = index + 1; i < operands.size(); i++) { - auto operandEffects = effects(operands[i]); - if (operandEffects.invalidates(setValueEffects)) { - // TODO: we could use locals to reorder everything - return false; - } - } - } - - // We can optimize here! - Builder builder(*getModule()); - - // If this was with_default then we add default values now. That does - // increase code size in some cases (if there are many values, and few sets - // that get removed), but in general this optimization is worth it. - if (new_->isWithDefault()) { - auto& fields = new_->type.getHeapType().getStruct().fields; - for (auto& field : fields) { - auto zero = Literal::makeZero(field.type); - operands.push_back(builder.makeConstantExpression(zero)); - } - } - - // See if we need to keep the old value. - if (effects(operands[index]).hasUnremovableSideEffects()) { - operands[index] = - builder.makeSequence(builder.makeDrop(operands[index]), set->value); - } else { - operands[index] = set->value; - } - - return true; } void visitArrayNew(ArrayNew* curr) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 35d2f6779..eb499a4ad 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -209,6 +209,9 @@ void PassRegistry::registerPasses() { createTypeRefiningPass); registerPass( "heap2local", "replace GC allocations with locals", createHeap2LocalPass); + registerPass("heap-store-optimization", + "optimize heap (GC) stores", + createHeapStoreOptimizationPass); registerPass( "inline-main", "inline __original_main into main", createInlineMainPass); registerPass("inlining", @@ -609,6 +612,9 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { addIfNoDWARFIssues("remove-unused-brs"); addIfNoDWARFIssues("remove-unused-names"); addIfNoDWARFIssues("optimize-instructions"); + if (wasm->features.hasGC()) { + addIfNoDWARFIssues("heap-store-optimization"); + } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) { addIfNoDWARFIssues("pick-load-signs"); } @@ -681,6 +687,9 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { addIfNoDWARFIssues("precompute"); } addIfNoDWARFIssues("optimize-instructions"); + if (wasm->features.hasGC()) { + addIfNoDWARFIssues("heap-store-optimization"); + } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { addIfNoDWARFIssues( "rse"); // after all coalesce-locals, and before a final vacuum diff --git a/src/passes/passes.h b/src/passes/passes.h index a95365676..e4de733da 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -62,6 +62,7 @@ Pass* createGUFAPass(); Pass* createGUFACastAllPass(); Pass* createGUFAOptimizingPass(); Pass* createHeap2LocalPass(); +Pass* createHeapStoreOptimizationPass(); Pass* createI64ToI32LoweringPass(); Pass* createInlineMainPass(); Pass* createInliningPass(); diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 665d78746..41a43bf1b 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -190,6 +190,8 @@ ;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in ;; CHECK-NEXT: functions we modified ;; CHECK-NEXT: +;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores +;; CHECK-NEXT: ;; CHECK-NEXT: --heap2local replace GC allocations with ;; CHECK-NEXT: locals ;; CHECK-NEXT: diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 4eea3c431..e5b30e304 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -199,6 +199,8 @@ ;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in ;; CHECK-NEXT: functions we modified ;; CHECK-NEXT: +;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores +;; CHECK-NEXT: ;; CHECK-NEXT: --heap2local replace GC allocations with ;; CHECK-NEXT: locals ;; CHECK-NEXT: diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 501d1d3f1..2d933984e 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -153,6 +153,8 @@ ;; CHECK-NEXT: --gufa-optimizing GUFA plus local optimizations in ;; CHECK-NEXT: functions we modified ;; CHECK-NEXT: +;; CHECK-NEXT: --heap-store-optimization optimize heap (GC) stores +;; CHECK-NEXT: ;; CHECK-NEXT: --heap2local replace GC allocations with ;; CHECK-NEXT: locals ;; CHECK-NEXT: diff --git a/test/lit/passes/O1.wast b/test/lit/passes/O1.wast index 8ffc29301..49271699d 100644 --- a/test/lit/passes/O1.wast +++ b/test/lit/passes/O1.wast @@ -40,4 +40,3 @@ ) ) - diff --git a/test/lit/passes/optimize-instructions-gc-heap.wast b/test/lit/passes/heap-store-optimization.wast index 2988b1bd8..635963523 100644 --- a/test/lit/passes/optimize-instructions-gc-heap.wast +++ b/test/lit/passes/heap-store-optimization.wast @@ -1,5 +1,5 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: wasm-opt %s --remove-unused-names --optimize-instructions -all -S -o - \ +;; RUN: wasm-opt %s --remove-unused-names --heap-store-optimization -all -S -o - \ ;; RUN: | filecheck %s ;; ;; --remove-unused-names allows the optimizer to see that the blocks have no @@ -822,4 +822,59 @@ (func $helper-i32 (param $x i32) (result i32) (i32.const 42) ) + + ;; CHECK: (func $control-flow-in-set-value (type $5) (result i32) + ;; CHECK-NEXT: (local $ref (ref null $struct)) + ;; CHECK-NEXT: (block $label + ;; CHECK-NEXT: (local.set $ref + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (br $label) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $control-flow-in-set-value (result i32) + ;; Test we handle control flow in the struct.set's value when we combine a + ;; struct.set with a struct.new. + ;; XXX The output above is wrong atm, and the pass needs fixing! + (local $ref (ref null $struct)) + (block $label + (struct.set $struct 0 + (local.tee $ref + (struct.new $struct + (i32.const 1) + ) + ) + (if (result i32) + (i32.const 1) + (then + ;; This conditional break happens *after* the local.tee of $ref. We + ;; must not move code around that reorders it, since there is a use + ;; of the local below that could notice changes. + (br $label) + ) + (else + (i32.const 42) + ) + ) + ) + ) + ;; We did not reach the struct.set, but we did reach the local.tee, so this + ;; reads the initial value of 1 (and does not trap on a nullref). + (struct.get $struct 0 + (local.get $ref) + ) + ) ) + |