summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/Monomorphize.cpp3
-rw-r--r--src/passes/OptimizeCasts.cpp229
-rw-r--r--src/passes/pass.cpp3
-rw-r--r--src/passes/passes.h1
5 files changed, 237 insertions, 0 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 8204f1070..c19f19473 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -68,6 +68,7 @@ set(passes_SOURCES
NameTypes.cpp
OnceReduction.cpp
OptimizeAddedConstants.cpp
+ OptimizeCasts.cpp
OptimizeInstructions.cpp
OptimizeForJS.cpp
PickLoadSigns.cpp
diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp
index 80e908a83..f8ee4a6e6 100644
--- a/src/passes/Monomorphize.cpp
+++ b/src/passes/Monomorphize.cpp
@@ -214,6 +214,9 @@ struct Monomorphize : public Pass {
// expect to help. That would be faster, but we'd always run the risk of
// missing things, especially as new passes are added later and we don't
// think to add them here.
+ // Alternatively, perhaps we should have a mode that does use -O1 or
+ // even -O2 or above, as in theory any optimization could end up
+ // mattering a lot here.
void doMinimalOpts(Function* func) {
PassRunner runner(getPassRunner());
runner.options.optimizeLevel = 1;
diff --git a/src/passes/OptimizeCasts.cpp b/src/passes/OptimizeCasts.cpp
new file mode 100644
index 000000000..854110157
--- /dev/null
+++ b/src/passes/OptimizeCasts.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Refine uses of locals where possible. For example, consider this:
+//
+// (some.operation
+// (ref.cast .. (local.get $ref))
+// (local.get $ref)
+// )
+//
+// The second use might as well use the refined/cast value as well:
+//
+// (some.operation
+// (local.tee $temp
+// (ref.cast .. (local.get $ref))
+// )
+// (local.get $temp)
+// )
+//
+// This change adds a local but it switches some local.gets to use a local of a
+// more refined type. That can help other optimizations later.
+//
+// An example of an important pattern this handles are itable calls:
+//
+// (call_ref
+// (ref.cast $actual.type
+// (local.get $object)
+// )
+// (struct.get $vtable ..
+// (ref.cast $vtable
+// (struct.get $itable ..
+// (local.get $object)
+// )
+// )
+// )
+// )
+//
+// We cast to the actual type for the |this| parameter, but we technically do
+// not need to do so for reading its itable - since the itable may be of a
+// generic type, and we cast the vtable afterwards anyhow. But since we cast
+// |this|, we can use the cast value for the itable get, which may then lead to
+// removing the vtable cast after we refine the itable type. And that can lead
+// to devirtualization later.
+//
+// Closely related things appear in other passes:
+//
+// * SimplifyLocals will find locals already containing a more refined type and
+// switch to them. RedundantSetElimination does the same across basic blocks.
+// In theory one of them could be extended to also add new locals, and then
+// they would be doing something similar to this pass.
+// * LocalCSE finds repeated expressions and stores them in locals for use
+// later. In theory that pass could be extended to look not for exact copies
+// but for equivalent things through a cast, and then it would be doing
+// something similar to this pass.
+//
+// However, while those other passes could be extended to cover what this pass
+// does, we will have further cast-specific optimizations to add, which make
+// sense in new pass anyhow, and things should be simpler overall to keep such
+// casts all in one pass, here.
+//
+// TODO: Move casts earlier in a basic block as well, at least in traps-never-
+// happen mode where we can assume they never fail.
+// TODO: Look past individual basic blocks?
+// TODO: Look at LocalSet as well and not just Get. That would add some overlap
+// with the other passes mentioned above, but once we do things like
+// moving casts earlier as in the other TODO, we'd be doing uniquely
+// useful things with LocalSet here.
+//
+
+#include "ir/linear-execution.h"
+#include "ir/properties.h"
+#include "ir/utils.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+// Find the best casted verisons of local.gets: other local.gets with the same
+// value, but cast to a more refined type.
+struct BestCastFinder : public LinearExecutionWalker<BestCastFinder> {
+
+ PassOptions options;
+
+ // Map local indices to the most refined downcastings of local.gets from those
+ // indices.
+ //
+ // This is tracked in each basic block, and cleared between them.
+ std::unordered_map<Index, Expression*> mostCastedGets;
+
+ // For each most-downcasted local.get, a vector of other local.gets that could
+ // be replaced with gets of the downcasted value.
+ //
+ // This is tracked until the end of the entire function, and contains the
+ // information we need to optimize later. That is, entries here are things we
+ // want to apply.
+ std::unordered_map<Expression*, std::vector<LocalGet*>> lessCastedGets;
+
+ static void doNoteNonLinear(BestCastFinder* self, Expression** currp) {
+ self->mostCastedGets.clear();
+ }
+
+ void visitLocalSet(LocalSet* curr) {
+ // Clear any information about this local; it has a new value here.
+ mostCastedGets.erase(curr->index);
+ }
+
+ void visitLocalGet(LocalGet* curr) {
+ auto iter = mostCastedGets.find(curr->index);
+ if (iter != mostCastedGets.end()) {
+ auto* bestCast = iter->second;
+ if (curr->type != bestCast->type &&
+ Type::isSubType(bestCast->type, curr->type)) {
+ // The best cast has a more refined type, note that we want to use it.
+ lessCastedGets[bestCast].push_back(curr);
+ }
+ }
+ }
+
+ void visitRefAs(RefAs* curr) { handleRefinement(curr); }
+
+ void visitRefCast(RefCast* curr) { handleRefinement(curr); }
+
+ void handleRefinement(Expression* curr) {
+ auto* fallthrough = Properties::getFallthrough(curr, options, *getModule());
+ if (auto* get = fallthrough->dynCast<LocalGet>()) {
+ auto*& bestCast = mostCastedGets[get->index];
+ if (!bestCast) {
+ // This is the first.
+ bestCast = curr;
+ return;
+ }
+
+ // See if we are better than the current best.
+ if (curr->type != bestCast->type &&
+ Type::isSubType(curr->type, bestCast->type)) {
+ bestCast = curr;
+ }
+ }
+ }
+};
+
+// Given a set of best casts, apply them: save each best cast in a local and use
+// it in the places that want to.
+//
+// It is simpler to do this in another pass after BestCastFinder so that we do
+// not need to worry about corner cases with invalidation of pointers in things
+// we've already walked past.
+struct FindingApplier : public PostWalker<FindingApplier> {
+ BestCastFinder& finder;
+
+ FindingApplier(BestCastFinder& finder) : finder(finder) {}
+
+ void visitRefAs(RefAs* curr) { handleRefinement(curr); }
+
+ void visitRefCast(RefCast* curr) { handleRefinement(curr); }
+
+ void handleRefinement(Expression* curr) {
+ auto iter = finder.lessCastedGets.find(curr);
+ if (iter == finder.lessCastedGets.end()) {
+ return;
+ }
+
+ // This expression was the best cast for some gets. Add a new local to
+ // store this value, then use it for the gets.
+ auto var = Builder::addVar(getFunction(), curr->type);
+ auto& gets = iter->second;
+ for (auto* get : gets) {
+ get->index = var;
+ get->type = curr->type;
+ }
+
+ // Replace ourselves with a tee.
+ replaceCurrent(Builder(*getModule()).makeLocalTee(var, curr, curr->type));
+ }
+};
+
+} // anonymous namespace
+
+struct OptimizeCasts : public WalkerPass<PostWalker<OptimizeCasts>> {
+ bool isFunctionParallel() override { return true; }
+
+ std::unique_ptr<Pass> create() override {
+ return std::make_unique<OptimizeCasts>();
+ }
+
+ void doWalkFunction(Function* func) {
+ if (!getModule()->features.hasGC()) {
+ return;
+ }
+
+ // First, find the best casts that we want to use.
+ BestCastFinder finder;
+ finder.options = getPassOptions();
+ finder.walkFunctionInModule(func, getModule());
+
+ if (finder.lessCastedGets.empty()) {
+ // Nothing to do.
+ return;
+ }
+
+ // Apply the requests: use the best casts.
+ FindingApplier applier(finder);
+ applier.walkFunctionInModule(func, getModule());
+
+ // LocalGet type changes must be propagated.
+ ReFinalize().walkFunctionInModule(func, getModule());
+ }
+};
+
+Pass* createOptimizeCastsPass() { return new OptimizeCasts(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 7f79313e2..68561f7ee 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -293,6 +293,8 @@ void PassRegistry::registerPasses() {
"optimizes added constants into load/store offsets, propagating "
"them across locals too",
createOptimizeAddedConstantsPropagatePass);
+ registerPass(
+ "optimize-casts", "eliminate and reuse casts", createOptimizeCastsPass);
registerPass("optimize-instructions",
"optimizes instruction combinations",
createOptimizeInstructionsPass);
@@ -546,6 +548,7 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
addIfNoDWARFIssues("merge-locals"); // very slow on e.g. sqlite
}
if (options.optimizeLevel > 1 && wasm->features.hasGC()) {
+ addIfNoDWARFIssues("optimize-casts");
// Coalescing may prevent subtyping (as a coalesced local must have the
// supertype of all those combined into it), so subtype first.
// TODO: when optimizing for size, maybe the order should reverse?
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 65923cd25..ae0cc62e2 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -93,6 +93,7 @@ Pass* createOnceReductionPass();
Pass* createOptimizeAddedConstantsPass();
Pass* createOptimizeAddedConstantsPropagatePass();
Pass* createOptimizeInstructionsPass();
+Pass* createOptimizeCastsPass();
Pass* createOptimizeForJSPass();
Pass* createOptimizeStackIRPass();
Pass* createPickLoadSignsPass();