summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/fuzz_opt.py3
-rw-r--r--src/ir/CMakeLists.txt1
-rw-r--r--src/ir/hashed.h17
-rw-r--r--src/ir/properties.cpp42
-rw-r--r--src/ir/properties.h45
-rw-r--r--src/passes/LocalCSE.cpp660
-rw-r--r--src/passes/OptimizeInstructions.cpp27
-rw-r--r--src/passes/Precompute.cpp5
-rw-r--r--src/passes/pass.cpp6
-rw-r--r--src/tools/wasm-reduce.cpp3
-rw-r--r--test/lit/passes/O4_disable-bulk-memory.wast142
-rw-r--r--test/lit/passes/flatten_local-cse_Os.wast71
-rw-r--r--test/lit/passes/flatten_local-cse_all-features.wast1139
-rw-r--r--test/lit/passes/local-cse.wast433
-rw-r--r--test/lit/passes/local-cse_all-features.wast272
15 files changed, 1354 insertions, 1512 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index cdce33ff5..fc2192137 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -1039,7 +1039,8 @@ opt_choices = [
# ["--fpcast-emu"], # removes indirect call failures as it makes them go through regardless of type
["--inlining"],
["--inlining-optimizing"],
- ["--flatten", "--local-cse"],
+ ["--flatten", "--simplify-locals-notee-nostructure", "--local-cse"],
+ ["--local-cse"],
["--heap2local"],
["--remove-unused-names", "--heap2local"],
["--generate-stack-ir"],
diff --git a/src/ir/CMakeLists.txt b/src/ir/CMakeLists.txt
index f98f7b7aa..c17a48ac0 100644
--- a/src/ir/CMakeLists.txt
+++ b/src/ir/CMakeLists.txt
@@ -3,6 +3,7 @@ set(ir_SOURCES
ExpressionAnalyzer.cpp
ExpressionManipulator.cpp
names.cpp
+ properties.cpp
LocalGraph.cpp
ReFinalize.cpp
stack-utils.cpp
diff --git a/src/ir/hashed.h b/src/ir/hashed.h
index 4a6e1647e..99488dcfb 100644
--- a/src/ir/hashed.h
+++ b/src/ir/hashed.h
@@ -23,23 +23,6 @@
namespace wasm {
-// An expression with a cached hash value
-struct HashedExpression {
- Expression* expr;
- size_t digest;
-
- HashedExpression(Expression* expr) : expr(expr) {
- if (expr) {
- digest = ExpressionAnalyzer::hash(expr);
- } else {
- digest = hash(0);
- }
- }
-
- HashedExpression(const HashedExpression& other)
- : expr(other.expr), digest(other.digest) {}
-};
-
// A pass that hashes all functions
struct FunctionHasher : public WalkerPass<PostWalker<FunctionHasher>> {
diff --git a/src/ir/properties.cpp b/src/ir/properties.cpp
new file mode 100644
index 000000000..889ece57b
--- /dev/null
+++ b/src/ir/properties.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2021 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/properties.h"
+#include "wasm-traversal.h"
+
+namespace wasm {
+
+namespace Properties {
+
+bool isIntrinsicallyNondeterministic(Expression* curr, FeatureSet features) {
+ // Practically no wasm instructions are intrinsically nondeterministic.
+ // Exceptions occur only in GC atm.
+ if (!features.hasGC()) {
+ return false;
+ }
+
+ struct Scanner : public PostWalker<Scanner> {
+ bool deterministic = true;
+ void visitStructNew(StructNew* curr) { deterministic = false; }
+ void visitArrayNew(ArrayNew* curr) { deterministic = false; }
+ } scanner;
+ scanner.walk(curr);
+ return !scanner.deterministic;
+}
+
+} // namespace Properties
+
+} // namespace wasm
diff --git a/src/ir/properties.h b/src/ir/properties.h
index d477cb11c..4909dfbc8 100644
--- a/src/ir/properties.h
+++ b/src/ir/properties.h
@@ -335,6 +335,51 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) {
return ifTrue->type.isSingle() && ifFalse->type.isSingle();
}
+// An intrinsically-nondeterministic expression is one that can return different
+// results for the same inputs, and that difference is *not* explained by
+// other expressions that interact with this one. Hence the cause of
+// nondeterminism can be said to be "intrinsic" - it is internal and inherent in
+// the expression.
+//
+// To see the issue more concretely, consider these:
+//
+// x = load(100);
+// ..
+// y = load(100);
+//
+// versus
+//
+// x = struct.new();
+// ..
+// y = struct.new();
+//
+// Are x and y identical in both cases? For loads, we can look at the code
+// in ".." to see: if there are no possible stores to memory, then the
+// result is identical (and we have EffectAnalyzer for that). For the GC
+// allocations, though, it doesn't matter what is in "..": there is nothing
+// in the wasm that we can check to find out if the results are the same or
+// not. (In fact, in this case they are always not the same.) So the
+// nondeterminism is "intrinsic."
+//
+// Thus, loads are nondeterministic but not intrinsically so, while GC
+// allocations are actual examples of intrinsically nondeterministic
+// instructions. If wasm were to add "get current time" or "get a random number"
+// instructions then those would also be intrinsically nondeterministic.
+//
+// * Note that NaN nondeterminism is ignored here. Technically that allows e.g.
+// an f32.add to be nondeterministic, but it is a valid wasm implementation
+// to have deterministic NaN behavior, and we optimize under that assumption.
+// So NaN nondeterminism does not cause anything to be intrinsically
+// nondeterministic.
+// * Note that calls are ignored here. In theory this concept could be defined
+// either way for them (that is, we could potentially define them as either
+// intrinsically nondeterministic, or not, and each could make sense in its
+// own way). It is simpler to ignore them here, which means we only consider
+// the behavior of the expression provided here (including its chldren), and
+// not external code.
+//
+bool isIntrinsicallyNondeterministic(Expression* curr, FeatureSet features);
+
} // namespace Properties
} // namespace wasm
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
index 7e6d9cb41..b55da976c 100644
--- a/src/passes/LocalCSE.cpp
+++ b/src/passes/LocalCSE.cpp
@@ -17,32 +17,112 @@
//
// Local CSE
//
-// This requires --flatten to be run before in order to be effective,
-// and preserves flatness. The reason flatness is required is that
-// this pass assumes everything is stored in a local, and all it does
-// is alter local.sets to do local.gets of an existing value when
-// possible, replacing a recomputing of that value. That design means that
-// if there are block and if return values, nested expressions not stored
-// to a local, etc., then it can't operate on them (and will just not
-// do anything for them).
+// This finds common sub-expressions and saves them to a local to avoid
+// recomputing them. It runs in each basic block separately, and uses a simple
+// algorithm, where we track "requests" to reuse a value. That is, if we see
+// an add operation appear twice, and the inputs must be identical in both
+// cases, then the second one requests to reuse the computed value from the
+// first. The first one to appear is the "original" expression that will remain
+// in the code; we will save it's value to a local, and get it from that local
+// later:
//
-// In each linear area of execution,
-// * track each relevant (big enough) expression
-// * if already seen, write to a local if not already, and reuse
-// * invalidate the list as we see effects
+// (i32.add (A) (B))
+// (i32.add (A) (B))
//
-// TODO: global, inter-block gvn etc.
+// =>
+//
+// (local.tee $temp (i32.add (A) (B)))
+// (local.get $temp)
+//
+// The algorithm used here is as follows:
+//
+// * Scan: Hash each expression and see if it repeats later.
+// If it does:
+// * Note that the original appearance is requested to be reused
+// an additional time.
+// * Link the first expression as the original appearance of the
+// later one.
+// * Scan the children of the repeat and undo their requests to be
+// replaced (as if we will reuse the parent, we don't need to do
+// anything for the children, see below).
+//
+// * Check: Check if effects prevent some of the requests from being done. For
+// example, if the value contains a load from memory, we cannot
+// optimize around a store, as the value before the store might be
+// different (see below).
+//
+// * Apply: Go through the basic block again, this time adding a tee on an
+// expression whose value we want to use later, and replacing the
+// uses with gets.
+//
+// For example, here is what the algorithm would do on
+//
+// (i32.eqz (A))
+// ..
+// (i32.eqz (A))
+//
+// Assuming A does not have side effects that interfere, this will happen:
+//
+// 1. Scan A and add it to the hash map of things we have seen.
+// 2. Scan the eqz, and do the same for it.
+// 3. Scan the second A. Increment the first A's requests counter, and mark the
+// second A as intended to be replaced by the original A.
+// 4. Scan the second eqz, and do similar things for it: increment the requests
+// for the original eqz, and point to the original from the repeat.
+// * Then also scan its children, in this case A, and decrement the first
+// A's reuse counter, and unmark the second A's note that it is intended
+// to be replaced.
+// 5. After that, the second eqz requests to be replaced by the first, and
+// there is no request on A.
+// 6. Run through the block again, adding a tee and replacing the second eqz,
+// resulting in:
+//
+// (local.tee $temp
+// (i32.eqz
+// (A)
+// )
+// )
+// (local.get $temp)
+//
+// Note how the scanning of children avoids us adding a local for A: when we
+// reuse the parent, we don't need to also try to reuse the child.
+//
+// Effects must be considered carefully by the Checker phase. E.g.:
+//
+// x = load(a);
+// store(..)
+// y = load(a);
+//
+// Even though the load looks identical, the store means we may load a
+// different value, so we will invalidate the request to optimize here.
+//
+// This pass only finds entire expression trees, and not parts of them, so we
+// will not optimize this:
+//
+// (A (B (C (D1))))
+// (A (B (C (D2))))
+//
+// The innermost child is different, so the trees are not identical. However,
+// running flatten before running this pass would allow those to be optimized as
+// well (we would also need to simplify locals somewhat to allow the locals to
+// be identified as identical, see pass.cpp).
+//
+// TODO: Global, inter-block gvn etc. However, note that atm the cost of our
+// adding new locals here is low because their lifetimes are all within a
+// single basic block. A global optimization here could add long-lived
+// locals with register allocation costs in the entire function.
//
#include <algorithm>
#include <memory>
-#include "ir/flat.h"
#include <ir/cost.h>
#include <ir/effects.h>
-#include <ir/equivalent_sets.h>
-#include <ir/hashed.h>
+#include <ir/iteration.h>
#include <ir/linear-execution.h>
+#include <ir/properties.h>
+#include <ir/type-updating.h>
+#include <ir/utils.h>
#include <pass.h>
#include <wasm-builder.h>
#include <wasm-traversal.h>
@@ -50,224 +130,420 @@
namespace wasm {
-struct LocalCSE : public WalkerPass<LinearExecutionWalker<LocalCSE>> {
- bool isFunctionParallel() override { return true; }
+namespace {
- // CSE adds and reuses locals.
- // FIXME DWARF updating does not handle local changes yet.
- bool invalidatesDWARF() override { return true; }
+// An expression with a cached hash value.
+struct HashedExpression {
+ Expression* expr;
+ size_t digest;
- Pass* create() override { return new LocalCSE(); }
+ HashedExpression(Expression* expr, size_t digest)
+ : expr(expr), digest(digest) {}
- struct Usable {
- HashedExpression hashed;
- Type localType;
- Usable(HashedExpression hashed, Type localType)
- : hashed(hashed), localType(localType) {}
- };
+ HashedExpression(const HashedExpression& other)
+ : expr(other.expr), digest(other.digest) {}
+};
- struct UsableHasher {
- size_t operator()(const Usable value) const {
- auto digest = value.hashed.digest;
- wasm::rehash(digest, value.localType.getID());
- return digest;
- }
- };
+struct HEHasher {
+ size_t operator()(const HashedExpression hashed) const {
+ return hashed.digest;
+ }
+};
- struct UsableComparer {
- bool operator()(const Usable a, const Usable b) const {
- if (a.hashed.digest != b.hashed.digest || a.localType != b.localType) {
- return false;
- }
- return ExpressionAnalyzer::equal(a.hashed.expr, b.hashed.expr);
+// A full equality check for HashedExpressions. The hash is used as a speedup,
+// but if it matches we still verify the contents are identical.
+struct HEComparer {
+ bool operator()(const HashedExpression a, const HashedExpression b) const {
+ if (a.digest != b.digest) {
+ return false;
}
- };
-
- // information for an expression we can reuse
- struct UsableInfo {
- Expression* value; // the value we can reuse
- Index index; // the local we are assigned to, local.get that to reuse us
- EffectAnalyzer effects;
-
- UsableInfo(Expression* value,
- Index index,
- PassOptions& passOptions,
- FeatureSet features)
- : value(value), index(index), effects(passOptions, features, value) {}
- };
-
- // a list of usables in a linear execution trace
- class Usables
- : public std::
- unordered_map<Usable, UsableInfo, UsableHasher, UsableComparer> {};
-
- // locals in current linear execution trace, which we try to sink
- Usables usables;
-
- // We track locals containing the same value - the value is what matters, not
- // the index.
- EquivalentSets equivalences;
-
- bool anotherPass;
+ return ExpressionAnalyzer::equal(a.expr, b.expr);
+ }
+};
- void doWalkFunction(Function* func) {
- Flat::verifyFlatness(func);
- anotherPass = true;
- // we may need multiple rounds
- while (anotherPass) {
- anotherPass = false;
- clear();
- super::doWalkFunction(func);
- }
+// Maps hashed expressions to the list of expressions that match. That is, all
+// expressions that are equivalent (same hash, and also compare equal) will
+// be in a vector for the corresponding entry in this map.
+using HashedExprs = std::unordered_map<HashedExpression,
+ SmallVector<Expression*, 1>,
+ HEHasher,
+ HEComparer>;
+
+// Request information about an expression: whether it requests to be replaced,
+// or it is an original expression whose value can be used to replace others
+// later.
+struct RequestInfo {
+ // The number of other expressions that would like to reuse this value.
+ Index requests = 0;
+
+ // If this is a repeat value, this points to the original. (And we have
+ // incremented the original's |requests|.)
+ Expression* original = nullptr;
+
+ void validate() const {
+ // An expression cannot both be requested and make requests. If we see A
+ // appear three times, both repeats must request from the very first.
+ assert(!(requests && original));
+
+ // When we encounter a requestInfo (after its initial creation) then it
+ // must either request or be requested - otherwise, it should not exist,
+ // as we remove unneeded things from our tracking.
+ assert(requests || original);
}
+};
- static void doNoteNonLinear(LocalCSE* self, Expression** currp) {
- self->clear();
+// A map of expressions to their request info.
+struct RequestInfoMap : public std::unordered_map<Expression*, RequestInfo> {
+ void dump(std::ostream& o) {
+ for (auto& kv : *this) {
+ auto* curr = kv.first;
+ auto& info = kv.second;
+ o << *curr << " has " << info.requests << " reqs, orig: " << info.original
+ << '\n';
+ }
}
+};
- void clear() {
- usables.clear();
- equivalences.clear();
+struct Scanner
+ : public LinearExecutionWalker<Scanner, UnifiedExpressionVisitor<Scanner>> {
+ PassOptions options;
+
+ // Request info for all expressions ever seen.
+ RequestInfoMap& requestInfos;
+
+ Scanner(PassOptions options, RequestInfoMap& requestInfos)
+ : options(options), requestInfos(requestInfos) {}
+
+ // Currently active hashed expressions in the current basic block. If we see
+ // an active expression before us that is identical to us, then it becomes our
+ // original expression that we request from.
+ HashedExprs activeExprs;
+
+ // Hash values of all active expressions. We store these so that we do not end
+ // up recomputing hashes of children in an N^2 manner.
+ std::unordered_map<Expression*, size_t> activeHashes;
+
+ static void doNoteNonLinear(Scanner* self, Expression** currp) {
+ // We are starting a new basic block. Forget all the currently-hashed
+ // expressions, as we no longer want to make connections to anything from
+ // another block.
+ self->activeExprs.clear();
+ self->activeHashes.clear();
+ // Note that we do not clear requestInfos - that is information we will use
+ // later in the Applier class. That is, we've cleared all the active
+ // information, leaving the things we need later.
}
- // Checks invalidations due to a set of effects. Also optionally receive
- // an expression that was just post-visited, and that also needs to be
- // taken into account.
- void checkInvalidations(EffectAnalyzer& effects, Expression* curr = nullptr) {
- // TODO: this is O(bad)
- std::vector<Usable> invalidated;
- for (auto& sinkable : usables) {
- // Check invalidations of the values we may want to use.
- if (effects.invalidates(sinkable.second.effects)) {
- invalidated.push_back(sinkable.first);
+ void visitExpression(Expression* curr) {
+ // Compute the hash, using the pre-computed hashes of the children, which
+ // are saved. This allows us to hash everything in linear time.
+ //
+ // Note that we must compute the hash first, as we need it even for things
+ // that are not isRelevant() (if they are the children of a relevant thing).
+ auto hash = ExpressionAnalyzer::shallowHash(curr);
+ for (auto* child : ChildIterator(curr)) {
+ auto iter = activeHashes.find(child);
+ if (iter == activeHashes.end()) {
+ // The child was in another block, so this expression cannot be
+ // optimized.
+ return;
}
+ hash_combine(hash, iter->second);
+ }
+ activeHashes[curr] = hash;
+
+ // Check if this is something relevant for optimization.
+ if (!isRelevant(curr)) {
+ return;
}
- if (curr) {
- // If we are a set, we have more to check: each of the usable
- // values was from a set, and we did not consider the set in
- // the loop above - just the values. So here we must check that
- // sets do not interfere. (Note that due to flattening we
- // have no risk of tees etc.)
- if (auto* set = curr->dynCast<LocalSet>()) {
- for (auto& sinkable : usables) {
- // Check if the index is the same. Make sure to ignore
- // our own value, which we may have just added!
- if (sinkable.second.index == set->index &&
- sinkable.second.value != set->value) {
- invalidated.push_back(sinkable.first);
- }
+
+ auto& vec = activeExprs[HashedExpression(curr, hash)];
+ vec.push_back(curr);
+ if (vec.size() > 1) {
+ // This is a repeat expression. Add a request for it.
+ auto& info = requestInfos[curr];
+ auto* original = vec[0];
+ info.original = original;
+
+ // Mark the request on the original. Note that this may create the
+ // requestInfo for it, if it is the first request (this avoids us creating
+ // requests eagerly).
+ requestInfos[original].requests++;
+
+ // Remove any requests from the expression's children, as we will replace
+ // the entire thing (see explanation earlier). Note that we just need to
+ // go over our direct children, as grandchildren etc. have already been
+ // processed. That is, if we have
+ //
+ // (A (B (C))
+ // (A (B (C))
+ //
+ // Then when we see the second B we will mark the second C as no longer
+ // requesting replacement. And then when we see the second A, all it needs
+ // to update is the second B.
+ for (auto* child : ChildIterator(curr)) {
+ if (!requestInfos.count(child)) {
+ // The child never had a request. While it repeated (since the parent
+ // repeats), it was not relevant for the optimization so we never
+ // created a requestInfo for it.
+ continue;
+ }
+
+ // Remove the child.
+ auto& childInfo = requestInfos[child];
+ auto* childOriginal = childInfo.original;
+ requestInfos.erase(child);
+
+ // Update the child's original, potentially erasing it too if no
+ // requests remain.
+ assert(childOriginal);
+ auto& childOriginalRequests = requestInfos[childOriginal].requests;
+ assert(childOriginalRequests > 0);
+ childOriginalRequests--;
+ if (childOriginalRequests == 0) {
+ requestInfos.erase(childOriginal);
}
}
}
- for (auto index : invalidated) {
- usables.erase(index);
- }
}
- std::vector<Expression*> expressionStack;
+ // Only some values are relevant to be optimized.
+ bool isRelevant(Expression* curr) {
+ // * Ignore anything that is not a concrete type, as we are looking for
+ // computed values to reuse, and so none and unreachable are irrelevant.
+ // * Ignore local.get and set, as those are the things we optimize to.
+ // * Ignore constants so that we don't undo the effects of constant
+ // propagation.
+ // * Ignore things we cannot put in a local, as then we can't do this
+ // optimization at all.
+ //
+ // More things matter here, like having side effects or not, but computing
+ // them is not cheap, so leave them for later, after we know if there
+ // actually are any requests for reuse of this value (which is rare).
+ if (!curr->type.isConcrete() || curr->is<LocalGet>() ||
+ curr->is<LocalSet>() || Properties::isConstantExpression(curr) ||
+ !TypeUpdating::canHandleAsLocal(curr->type)) {
+ return false;
+ }
- static void visitPre(LocalCSE* self, Expression** currp) {
- // pre operations
- Expression* curr = *currp;
+ // If the size is at least 3, then if we have two of them we have 6,
+ // and so adding one set+one get and removing one of the items itself
+ // is not detrimental, and may be beneficial.
+ // TODO: investigate size 2
+ if (options.shrinkLevel > 0 && Measurer::measure(curr) >= 3) {
+ return true;
+ }
- EffectAnalyzer effects(self->getPassOptions(), self->getModule()->features);
- if (effects.checkPre(curr)) {
- self->checkInvalidations(effects);
+ // If we focus on speed, any reduction in cost is beneficial, as the
+ // cost of a get is essentially free.
+ if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0) {
+ return true;
}
- self->expressionStack.push_back(curr);
+ return false;
}
+};
- static void visitPost(LocalCSE* self, Expression** currp) {
- auto* curr = *currp;
+// Check for invalidations due to effects. We do this after scanning as effect
+// computation is not cheap, and there are usually not many identical fragments
+// of code.
+//
+// This updates the RequestInfos of things it sees are invalidated, which will
+// make Applier ignore them.
+struct Checker
+ : public LinearExecutionWalker<Checker, UnifiedExpressionVisitor<Checker>> {
+ PassOptions options;
+ RequestInfoMap& requestInfos;
+
+ Checker(PassOptions options, RequestInfoMap& requestInfos)
+ : options(options), requestInfos(requestInfos) {}
+
+ struct ActiveOriginalInfo {
+ // How many of the requests remain to be seen during our walk. When this
+ // reaches 0, we know that the original is no longer requested from later in
+ // the block.
+ Index requestsLeft;
+
+ // The effects in the expression.
+ EffectAnalyzer effects;
+ };
- // main operations
- self->handle(curr);
+ // The currently relevant original expressions, that is, the ones that may be
+ // optimized in the current basic block.
+ std::unordered_map<Expression*, ActiveOriginalInfo> activeOriginals;
+
+ void visitExpression(Expression* curr) {
+ // This is the first time we encounter this expression.
+ assert(!activeOriginals.count(curr));
+
+ // Given the current expression, see what it invalidates of the currently-
+ // hashed expressions, if there are any.
+ if (!activeOriginals.empty()) {
+ EffectAnalyzer effects(options, getModule()->features);
+ // We only need to visit this node itself, as we have already visited its
+ // children by the time we get here.
+ effects.visit(curr);
+
+ std::vector<Expression*> invalidated;
+ for (auto& kv : activeOriginals) {
+ auto* original = kv.first;
+ auto& originalInfo = kv.second;
+ if (effects.invalidates(originalInfo.effects)) {
+ invalidated.push_back(original);
+ }
+ }
- // post operations
+ for (auto* original : invalidated) {
+ // Remove all requests after this expression, as we cannot optimize to
+ // them.
+ requestInfos[original].requests -=
+ activeOriginals.at(original).requestsLeft;
+
+ // If no requests remain at all (that is, there were no requests we
+ // could provide before we ran into this invalidation) then we do not
+ // need this original at all.
+ if (requestInfos[original].requests == 0) {
+ requestInfos.erase(original);
+ }
- EffectAnalyzer effects(self->getPassOptions(), self->getModule()->features);
- if (effects.checkPost(curr)) {
- self->checkInvalidations(effects, curr);
+ activeOriginals.erase(original);
+ }
}
- self->expressionStack.pop_back();
- }
+ auto iter = requestInfos.find(curr);
+ if (iter == requestInfos.end()) {
+ return;
+ }
+ auto& info = iter->second;
+ info.validate();
+
+ if (info.requests > 0) {
+ // This is an original. Compute its side effects, as we cannot optimize
+ // away repeated apperances if it has any.
+ EffectAnalyzer effects(options, getModule()->features, curr);
+
+ // We also cannot optimize away something that is intrinsically
+ // nondeterministic: even if it has no side effects, if it may return a
+ // different result each time, then we cannot optimize away repeats.
+ if (effects.hasSideEffects() ||
+ Properties::isIntrinsicallyNondeterministic(curr,
+ getModule()->features)) {
+ requestInfos.erase(curr);
+ } else {
+ activeOriginals.emplace(
+ curr, ActiveOriginalInfo{info.requests, std::move(effects)});
+ }
+ } else if (info.original) {
+ // The original may have already been invalidated. If so, remove our info
+ // as well.
+ auto originalIter = activeOriginals.find(info.original);
+ if (originalIter == activeOriginals.end()) {
+ requestInfos.erase(iter);
+ return;
+ }
- // override scan to add a pre and a post check task to all nodes
- static void scan(LocalCSE* self, Expression** currp) {
- self->pushTask(visitPost, currp);
+ // After visiting this expression, we have one less request for its
+ // original, and perhaps none are left.
+ auto& originalInfo = originalIter->second;
+ if (originalInfo.requestsLeft == 1) {
+ activeOriginals.erase(info.original);
+ } else {
+ originalInfo.requestsLeft--;
+ }
+ }
+ }
- super::scan(self, currp);
+ static void doNoteNonLinear(Checker* self, Expression** currp) {
+ // Between basic blocks there can be no active originals.
+ assert(self->activeOriginals.empty());
+ }
- self->pushTask(visitPre, currp);
+ void visitFunction(Function* curr) {
+ // At the end of the function there can be no active originals.
+ assert(activeOriginals.empty());
}
+};
- void handle(Expression* curr) {
- if (auto* set = curr->dynCast<LocalSet>()) {
- // Calculate equivalences
- auto* func = getFunction();
- equivalences.reset(set->index);
- if (auto* get = set->value->dynCast<LocalGet>()) {
- if (func->getLocalType(set->index) == func->getLocalType(get->index)) {
- equivalences.add(set->index, get->index);
- }
- }
- // consider the value
- auto* value = set->value;
- if (isRelevant(value)) {
- Usable usable(value, func->getLocalType(set->index));
- auto iter = usables.find(usable);
- if (iter != usables.end()) {
- // already exists in the table, this is good to reuse
- auto& info = iter->second;
- Type localType = func->getLocalType(info.index);
- set->value =
- Builder(*getModule()).makeLocalGet(info.index, localType);
- anotherPass = true;
- } else {
- // not in table, add this, maybe we can help others later
- usables.emplace(std::make_pair(
- usable,
- UsableInfo(
- value, set->index, getPassOptions(), getModule()->features)));
- }
- }
- } else if (auto* get = curr->dynCast<LocalGet>()) {
- if (auto* set = equivalences.getEquivalents(get->index)) {
- // Canonicalize to the lowest index. This lets hashing and comparisons
- // "just work".
- get->index = *std::min_element(set->begin(), set->end());
+// Applies the optimization now that we know which requests are valid.
+struct Applier
+ : public LinearExecutionWalker<Applier, UnifiedExpressionVisitor<Applier>> {
+ RequestInfoMap requestInfos;
+
+ Applier(RequestInfoMap& requestInfos) : requestInfos(requestInfos) {}
+
+ // Maps the original expressions that we save to locals to the local indexes
+ // for them.
+ std::unordered_map<Expression*, Index> originalLocalMap;
+
+ void visitExpression(Expression* curr) {
+ auto iter = requestInfos.find(curr);
+ if (iter == requestInfos.end()) {
+ return;
+ }
+
+ const auto& info = iter->second;
+ info.validate();
+
+ if (info.requests) {
+ // We have requests for this value. Add a local and tee the value to
+ // there.
+ Index local = originalLocalMap[curr] =
+ Builder::addVar(getFunction(), curr->type);
+ replaceCurrent(
+ Builder(*getModule()).makeLocalTee(local, curr, curr->type));
+ } else if (info.original) {
+ auto& originalInfo = requestInfos.at(info.original);
+ if (originalInfo.requests) {
+ // This is a valid request of an original value. Get the value from the
+ // local.
+ assert(originalLocalMap.count(info.original));
+ replaceCurrent(
+ Builder(*getModule())
+ .makeLocalGet(originalLocalMap[info.original], curr->type));
+ originalInfo.requests--;
}
}
}
- // A relevant value is a non-trivial one, something we may want to reuse
- // and are able to.
- bool isRelevant(Expression* value) {
- if (value->is<LocalGet>()) {
- return false; // trivial, this is what we optimize to!
- }
- if (!value->type.isConcrete()) {
- return false; // don't bother with unreachable etc.
- }
- if (EffectAnalyzer(getPassOptions(), getModule()->features, value)
- .hasSideEffects()) {
- return false; // we can't combine things with side effects
- }
- auto& options = getPassRunner()->options;
- // If the size is at least 3, then if we have two of them we have 6,
- // and so adding one set+two gets and removing one of the items itself
- // is not detrimental, and may be beneficial.
- if (options.shrinkLevel > 0 && Measurer::measure(value) >= 3) {
- return true;
+ static void doNoteNonLinear(Applier* self, Expression** currp) {
+ // Clear the state between blocks.
+ self->originalLocalMap.clear();
+ }
+};
+
+} // anonymous namespace
+
+struct LocalCSE : public WalkerPass<PostWalker<LocalCSE>> {
+ bool isFunctionParallel() override { return true; }
+
+ // FIXME DWARF updating does not handle local changes yet.
+ bool invalidatesDWARF() override { return true; }
+
+ Pass* create() override { return new LocalCSE(); }
+
+ void doWalkFunction(Function* func) {
+ auto options = getPassOptions();
+
+ RequestInfoMap requestInfos;
+
+ Scanner scanner(options, requestInfos);
+ scanner.walkFunctionInModule(func, getModule());
+ if (requestInfos.empty()) {
+ // We did not find any repeated expressions at all.
+ return;
}
- // If we focus on speed, any reduction in cost is beneficial, as the
- // cost of a get is essentially free.
- if (options.shrinkLevel == 0 && CostAnalyzer(value).cost > 0) {
- return true;
+
+ Checker checker(options, requestInfos);
+ checker.walkFunctionInModule(func, getModule());
+ if (requestInfos.empty()) {
+ // No repeated expressions remain after checking for effects.
+ return;
}
- return false;
+
+ Applier applier(requestInfos);
+ applier.walkFunctionInModule(func, getModule());
+
+ TypeUpdating::handleNonDefaultableLocals(func, *getModule());
}
};
diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp
index 18fbf6d83..43d0d912f 100644
--- a/src/passes/OptimizeInstructions.cpp
+++ b/src/passes/OptimizeInstructions.cpp
@@ -1470,30 +1470,21 @@ private:
// First, check for side effects. If there are any, then we can't even
// assume things like local.get's of the same index being identical.
auto passOptions = getPassOptions();
- if (EffectAnalyzer(passOptions, getModule()->features, left)
- .hasSideEffects() ||
- EffectAnalyzer(passOptions, getModule()->features, right)
- .hasSideEffects()) {
+ auto features = getModule()->features;
+ if (EffectAnalyzer(passOptions, features, left).hasSideEffects() ||
+ EffectAnalyzer(passOptions, features, right).hasSideEffects()) {
return false;
}
// Ignore extraneous things and compare them structurally.
- left = Properties::getFallthrough(left, passOptions, getModule()->features);
- right =
- Properties::getFallthrough(right, passOptions, getModule()->features);
+ left = Properties::getFallthrough(left, passOptions, features);
+ right = Properties::getFallthrough(right, passOptions, features);
if (!ExpressionAnalyzer::equal(left, right)) {
return false;
}
- // Reference equality also needs to check for allocation, which is *not* a
- // side effect, but which results in different references:
- // repeatedly calling (struct.new $foo)'s output will return different
- // results (while i32.const etc. of course does not).
- // Note that allocations may have appeared in the original inputs to this
- // function, and skipped when we focused on what falls through; since there
- // are no side effects, any allocations there cannot reach the fallthrough.
- if (left->type.isRef()) {
- if (FindAll<StructNew>(left).has() || FindAll<ArrayNew>(left).has()) {
- return false;
- }
+ // To be equal, they must also be known to return the same result
+ // deterministically.
+ if (Properties::isIntrinsicallyNondeterministic(left, features)) {
+ return false;
}
return true;
}
diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp
index 99741ebe3..82504bff5 100644
--- a/src/passes/Precompute.cpp
+++ b/src/passes/Precompute.cpp
@@ -307,7 +307,10 @@ private:
// value*. A case where that can happen is GC data (each struct.new
// creates a new, unique struct, even if the data is equal), and so
// PrecomputingExpressionRunner will return a nonconstant flow for all
- // GC heap operations.
+ // GC heap operations. (We could also have used
+ // Properties::isIntrinsicallyNondeterministic here, but that would be
+ // less efficient to re-scan the entire expression.)
+ //
// (Other side effects are fine; if an expression does a call and we
// somehow know the entire expression precomputes to a 42, then we can
// propagate that 42 along to the users, regardless of whatever the call
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index fa706dd10..595b431ca 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -423,7 +423,13 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
// that depend on flat IR
if (options.optimizeLevel >= 4) {
addIfNoDWARFIssues("flatten");
+ // LocalCSE is particularly useful after flatten (see comment in the pass
+ // itself), but we must simplify locals a little first (as flatten adds many
+ // new and redundant ones, which make things seem different if we do not
+ // run some amount of simplify-locals first).
+ addIfNoDWARFIssues("simplify-locals-notee-nostructure");
addIfNoDWARFIssues("local-cse");
+ // TODO: add rereloop etc. here
}
addIfNoDWARFIssues("dce");
addIfNoDWARFIssues("remove-unused-names");
diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp
index 878233c84..6a442aeba 100644
--- a/src/tools/wasm-reduce.cpp
+++ b/src/tools/wasm-reduce.cpp
@@ -257,13 +257,14 @@ struct Reducer
"-O4",
"--flatten -Os",
"--flatten -O3",
- "--flatten --local-cse -Os",
+ "--flatten --simplify-locals-notee-nostructure --local-cse -Os",
"--coalesce-locals --vacuum",
"--dce",
"--duplicate-function-elimination",
"--inlining",
"--inlining-optimizing",
"--optimize-level=3 --inlining-optimizing",
+ "--local-cse",
"--memory-packing",
"--remove-unused-names --merge-blocks --vacuum",
"--optimize-instructions",
diff --git a/test/lit/passes/O4_disable-bulk-memory.wast b/test/lit/passes/O4_disable-bulk-memory.wast
index aec1030bb..4ab403159 100644
--- a/test/lit/passes/O4_disable-bulk-memory.wast
+++ b/test/lit/passes/O4_disable-bulk-memory.wast
@@ -231,13 +231,10 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (if
;; CHECK-NEXT: (i32.gt_u
- ;; CHECK-NEXT: (local.tee $0
+ ;; CHECK-NEXT: (local.tee $2
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.tee $1
- ;; CHECK-NEXT: (global.get $global$1)
- ;; CHECK-NEXT: )
;; CHECK-NEXT: (select
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (i32.const 1)
@@ -246,6 +243,9 @@
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.tee $0
+ ;; CHECK-NEXT: (global.get $global$1)
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 7)
;; CHECK-NEXT: )
@@ -253,7 +253,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.shl
- ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (local.tee $1
;; CHECK-NEXT: (memory.size)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 16)
@@ -263,14 +263,14 @@
;; CHECK-NEXT: (i32.lt_s
;; CHECK-NEXT: (memory.grow
;; CHECK-NEXT: (select
- ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.tee $3
;; CHECK-NEXT: (i32.shr_u
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.add
;; CHECK-NEXT: (i32.sub
+ ;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 65535)
;; CHECK-NEXT: )
@@ -280,7 +280,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.gt_s
- ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -299,9 +299,9 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (global.set $global$1
- ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: )
(func $~lib/allocator/arena/__memory_allocate (; 6 ;) (type $3) (param $0 i32) (result i32)
(local $1 i32)
@@ -1920,56 +1920,67 @@
;; CHECK-NEXT: (local.get $13)
;; CHECK-NEXT: )
;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $11
- ;; CHECK-NEXT: (f64.sqrt
+ ;; CHECK-NEXT: (local.set $2
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.get $17)
;; CHECK-NEXT: (local.tee $8
- ;; CHECK-NEXT: (f64.add
- ;; CHECK-NEXT: (f64.add
- ;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.tee $2
- ;; CHECK-NEXT: (f64.sub
- ;; CHECK-NEXT: (local.get $14)
- ;; CHECK-NEXT: (f64.load
- ;; CHECK-NEXT: (local.tee $1
- ;; CHECK-NEXT: (i32.load offset=8
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $12)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.shl
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: (f64.div
+ ;; CHECK-NEXT: (f64.const 0.01)
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (f64.add
+ ;; CHECK-NEXT: (f64.add
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.tee $9
+ ;; CHECK-NEXT: (f64.sub
+ ;; CHECK-NEXT: (local.get $14)
+ ;; CHECK-NEXT: (f64.load
+ ;; CHECK-NEXT: (local.tee $1
+ ;; CHECK-NEXT: (i32.load offset=8
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.load
+ ;; CHECK-NEXT: (local.get $12)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.shl
+ ;; CHECK-NEXT: (local.get $7)
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $9)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.tee $10
+ ;; CHECK-NEXT: (f64.sub
+ ;; CHECK-NEXT: (local.get $15)
+ ;; CHECK-NEXT: (f64.load offset=8
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $10)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.tee $9
- ;; CHECK-NEXT: (f64.sub
- ;; CHECK-NEXT: (local.get $15)
- ;; CHECK-NEXT: (f64.load offset=8
- ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.tee $11
+ ;; CHECK-NEXT: (f64.sub
+ ;; CHECK-NEXT: (local.get $16)
+ ;; CHECK-NEXT: (f64.load offset=16
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $11)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $9)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.tee $10
- ;; CHECK-NEXT: (f64.sub
- ;; CHECK-NEXT: (local.get $16)
- ;; CHECK-NEXT: (f64.load offset=16
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (f64.sqrt
+ ;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $10)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -1979,21 +1990,13 @@
;; CHECK-NEXT: (f64.sub
;; CHECK-NEXT: (local.get $4)
;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: (local.get $9)
;; CHECK-NEXT: (local.tee $8
;; CHECK-NEXT: (f64.mul
;; CHECK-NEXT: (f64.load offset=48
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.tee $11
- ;; CHECK-NEXT: (f64.div
- ;; CHECK-NEXT: (f64.const 0.01)
- ;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $8)
- ;; CHECK-NEXT: (local.get $11)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $8)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2003,7 +2006,7 @@
;; CHECK-NEXT: (f64.sub
;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $9)
+ ;; CHECK-NEXT: (local.get $10)
;; CHECK-NEXT: (local.get $8)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2012,7 +2015,7 @@
;; CHECK-NEXT: (f64.sub
;; CHECK-NEXT: (local.get $6)
;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $10)
+ ;; CHECK-NEXT: (local.get $11)
;; CHECK-NEXT: (local.get $8)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2024,13 +2027,8 @@
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (f64.mul
+ ;; CHECK-NEXT: (local.get $9)
;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: (local.tee $2
- ;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $17)
- ;; CHECK-NEXT: (local.get $11)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2041,7 +2039,7 @@
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $9)
+ ;; CHECK-NEXT: (local.get $10)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2053,7 +2051,7 @@
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (f64.mul
- ;; CHECK-NEXT: (local.get $10)
+ ;; CHECK-NEXT: (local.get $11)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
@@ -2943,7 +2941,8 @@
;; CHECK: (func $assembly/index/getBody (; has Stack IR ;) (param $0 i32) (result i32)
;; CHECK-NEXT: (local $1 i32)
;; CHECK-NEXT: (if (result i32)
- ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.lt_u
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (i32.load offset=4
;; CHECK-NEXT: (local.tee $1
;; CHECK-NEXT: (i32.load
@@ -2951,10 +2950,10 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: )
;; CHECK-NEXT: (if (result i32)
- ;; CHECK-NEXT: (i32.gt_u
+ ;; CHECK-NEXT: (i32.lt_u
+ ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (i32.shr_u
;; CHECK-NEXT: (i32.load
;; CHECK-NEXT: (local.tee $1
@@ -2965,7 +2964,6 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.load offset=8
;; CHECK-NEXT: (i32.add
diff --git a/test/lit/passes/flatten_local-cse_Os.wast b/test/lit/passes/flatten_local-cse_Os.wast
deleted file mode 100644
index 1f315cd2c..000000000
--- a/test/lit/passes/flatten_local-cse_Os.wast
+++ /dev/null
@@ -1,71 +0,0 @@
-;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
-;; NOTE: This test was ported using port_test.py and could be cleaned up.
-
-;; RUN: foreach %s %t wasm-opt --flatten --local-cse -Os -S -o - | filecheck %s
-
-(module
- ;; testcase from AssemblyScript
- (func "div16_internal" (param $0 i32) (param $1 i32) (result i32)
- (i32.add
- (i32.xor
- (i32.shr_s
- (i32.shl
- (local.get $0)
- (i32.const 16)
- )
- (i32.const 16)
- )
- (i32.shr_s
- (i32.shl
- (local.get $1)
- (i32.const 16)
- )
- (i32.const 16)
- )
- )
- (i32.xor
- (i32.shr_s
- (i32.shl
- (local.get $0)
- (i32.const 16)
- )
- (i32.const 16)
- )
- (i32.shr_s
- (i32.shl
- (local.get $1)
- (i32.const 16)
- )
- (i32.const 16)
- )
- )
- )
- )
-)
-;; CHECK: (type $i32_i32_=>_i32 (func (param i32 i32) (result i32)))
-
-;; CHECK: (export "div16_internal" (func $0))
-
-;; CHECK: (func $0 (; has Stack IR ;) (param $0 i32) (param $1 i32) (result i32)
-;; CHECK-NEXT: (i32.add
-;; CHECK-NEXT: (local.tee $0
-;; CHECK-NEXT: (i32.xor
-;; CHECK-NEXT: (i32.shr_s
-;; CHECK-NEXT: (i32.shl
-;; CHECK-NEXT: (local.get $0)
-;; CHECK-NEXT: (i32.const 16)
-;; CHECK-NEXT: )
-;; CHECK-NEXT: (i32.const 16)
-;; CHECK-NEXT: )
-;; CHECK-NEXT: (i32.shr_s
-;; CHECK-NEXT: (i32.shl
-;; CHECK-NEXT: (local.get $1)
-;; CHECK-NEXT: (i32.const 16)
-;; CHECK-NEXT: )
-;; CHECK-NEXT: (i32.const 16)
-;; CHECK-NEXT: )
-;; CHECK-NEXT: )
-;; CHECK-NEXT: )
-;; CHECK-NEXT: (local.get $0)
-;; CHECK-NEXT: )
-;; CHECK-NEXT: )
diff --git a/test/lit/passes/flatten_local-cse_all-features.wast b/test/lit/passes/flatten_local-cse_all-features.wast
deleted file mode 100644
index 243c8a96c..000000000
--- a/test/lit/passes/flatten_local-cse_all-features.wast
+++ /dev/null
@@ -1,1139 +0,0 @@
-;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
-;; NOTE: This test was ported using port_test.py and could be cleaned up.
-
-;; RUN: foreach %s %t wasm-opt --flatten --local-cse --all-features -S -o - | filecheck %s
-
-(module
- (memory 100 100)
- ;; CHECK: (type $none_=>_none (func))
-
- ;; CHECK: (type $i32_i32_i32_=>_i32 (func (param i32 i32 i32) (result i32)))
-
- ;; CHECK: (type $i32_=>_i32 (func (param i32) (result i32)))
-
- ;; CHECK: (type $i32_i32_=>_i32 (func (param i32 i32) (result i32)))
-
- ;; CHECK: (type $f64_f64_i32_=>_f32 (func (param f64 f64 i32) (result f32)))
-
- ;; CHECK: (memory $0 100 100)
-
- ;; CHECK: (func $basics
- ;; CHECK-NEXT: (local $x i32)
- ;; CHECK-NEXT: (local $y i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local $7 i32)
- ;; CHECK-NEXT: (local $8 i32)
- ;; CHECK-NEXT: (local $9 i32)
- ;; CHECK-NEXT: (local $10 i32)
- ;; CHECK-NEXT: (local $11 i32)
- ;; CHECK-NEXT: (local $12 i32)
- ;; CHECK-NEXT: (local $13 i32)
- ;; CHECK-NEXT: (local $14 i32)
- ;; CHECK-NEXT: (local $15 i32)
- ;; CHECK-NEXT: (local $16 i32)
- ;; CHECK-NEXT: (local $17 i32)
- ;; CHECK-NEXT: (local $18 i32)
- ;; CHECK-NEXT: (local $19 i32)
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 1)
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (if
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: (nop)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 1)
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $4)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $7
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $9
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $10
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $11
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $12
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $13
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (call $basics)
- ;; CHECK-NEXT: (local.set $14
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $15
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $16
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (i32.const 100)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $17
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $18
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $19
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $19)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $basics
- (local $x i32)
- (local $y i32)
- (drop
- (i32.add (i32.const 1) (i32.const 2))
- )
- (drop
- (i32.add (i32.const 1) (i32.const 2))
- )
- (if (i32.const 0) (nop))
- (drop ;; we can't do this yet, non-linear
- (i32.add (i32.const 1) (i32.const 2))
- )
- (drop
- (i32.add (local.get $x) (local.get $y))
- )
- (drop
- (i32.add (local.get $x) (local.get $y))
- )
- (drop
- (i32.add (local.get $x) (local.get $y))
- )
- (call $basics) ;; side effects, but no matter for our locals
- (drop
- (i32.add (local.get $x) (local.get $y))
- )
- (local.set $x (i32.const 100))
- (drop ;; x was changed!
- (i32.add (local.get $x) (local.get $y))
- )
- )
- ;; CHECK: (func $recursive1
- ;; CHECK-NEXT: (local $x i32)
- ;; CHECK-NEXT: (local $y i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: (i32.const 3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 1)
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $recursive1
- (local $x i32)
- (local $y i32)
- (drop
- (i32.add
- (i32.const 1)
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- (drop
- (i32.add
- (i32.const 1)
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- (drop
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- ;; CHECK: (func $recursive2
- ;; CHECK-NEXT: (local $x i32)
- ;; CHECK-NEXT: (local $y i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: (i32.const 3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 1)
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $recursive2
- (local $x i32)
- (local $y i32)
- (drop
- (i32.add
- (i32.const 1)
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- (drop
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- (drop
- (i32.add
- (i32.const 1)
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- )
- ;; CHECK: (func $self
- ;; CHECK-NEXT: (local $x i32)
- ;; CHECK-NEXT: (local $y i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (i32.const 2)
- ;; CHECK-NEXT: (i32.const 3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $4)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $self
- (local $x i32)
- (local $y i32)
- (drop
- (i32.add
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- (drop
- (i32.add
- (i32.const 2)
- (i32.const 3)
- )
- )
- )
- ;; CHECK: (func $loads
- ;; CHECK-NEXT: (local $0 i32)
- ;; CHECK-NEXT: (local $1 i32)
- ;; CHECK-NEXT: (local.set $0
- ;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (i32.const 10)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $1
- ;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (i32.const 10)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $loads
- (drop
- (i32.load (i32.const 10))
- )
- (drop
- (i32.load (i32.const 10)) ;; implicit traps, sad
- )
- )
- ;; CHECK: (func $8 (param $var$0 i32) (result i32)
- ;; CHECK-NEXT: (local $var$1 i32)
- ;; CHECK-NEXT: (local $var$2 i32)
- ;; CHECK-NEXT: (local $var$3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local $7 i32)
- ;; CHECK-NEXT: (local $8 i32)
- ;; CHECK-NEXT: (local $9 i32)
- ;; CHECK-NEXT: (local $10 i32)
- ;; CHECK-NEXT: (local $11 i32)
- ;; CHECK-NEXT: (local $12 i32)
- ;; CHECK-NEXT: (local $13 i32)
- ;; CHECK-NEXT: (local $14 i32)
- ;; CHECK-NEXT: (local $15 i32)
- ;; CHECK-NEXT: (local $16 i32)
- ;; CHECK-NEXT: (local $17 i32)
- ;; CHECK-NEXT: (local $18 i32)
- ;; CHECK-NEXT: (local $19 i32)
- ;; CHECK-NEXT: (local $20 i32)
- ;; CHECK-NEXT: (local $21 i32)
- ;; CHECK-NEXT: (block $label$0
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (i32.add
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: (i32.const 4)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $var$2
- ;; CHECK-NEXT: (local.get $5)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $7
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $var$2
- ;; CHECK-NEXT: (i32.const 74)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $9
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $10
- ;; CHECK-NEXT: (i32.xor
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: (i32.const -1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $11
- ;; CHECK-NEXT: (i32.and
- ;; CHECK-NEXT: (local.get $8)
- ;; CHECK-NEXT: (local.get $10)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.store
- ;; CHECK-NEXT: (local.get $5)
- ;; CHECK-NEXT: (local.get $11)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $12
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $13
- ;; CHECK-NEXT: (local.get $5)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $var$1
- ;; CHECK-NEXT: (local.get $5)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $14
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $15
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $16
- ;; CHECK-NEXT: (i32.load
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $17
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $18
- ;; CHECK-NEXT: (i32.and
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: (i32.const 8)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $19
- ;; CHECK-NEXT: (i32.or
- ;; CHECK-NEXT: (local.get $16)
- ;; CHECK-NEXT: (local.get $18)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (i32.store
- ;; CHECK-NEXT: (local.get $var$1)
- ;; CHECK-NEXT: (local.get $19)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $20
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $21
- ;; CHECK-NEXT: (local.get $20)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $20)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $8 (param $var$0 i32) (result i32)
- (local $var$1 i32)
- (local $var$2 i32)
- (local $var$3 i32)
- (block $label$0 (result i32)
- (i32.store
- (local.tee $var$2
- (i32.add
- (local.get $var$1)
- (i32.const 4)
- )
- )
- (i32.and
- (i32.load
- (local.get $var$2)
- )
- (i32.xor
- (local.tee $var$2
- (i32.const 74)
- )
- (i32.const -1)
- )
- )
- )
- (i32.store
- (local.tee $var$1
- (i32.add
- (local.get $var$1)
- (i32.const 4)
- )
- )
- (i32.or
- (i32.load
- (local.get $var$1)
- )
- (i32.and
- (local.get $var$2)
- (i32.const 8)
- )
- )
- )
- (i32.const 0)
- )
- )
- ;; CHECK: (func $loop1 (param $x i32) (param $y i32) (result i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local $7 i32)
- ;; CHECK-NEXT: (local $8 i32)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $7)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $loop1 (param $x i32) (param $y i32) (result i32)
- (local.set $x (local.get $y))
- (local.set $y (local.get $x))
- (local.set $x (local.get $y))
- (local.set $y (local.get $x))
- (return (local.get $y))
- )
- ;; CHECK: (func $loop2 (param $x i32) (param $y i32) (param $z i32) (result i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local $7 i32)
- ;; CHECK-NEXT: (local $8 i32)
- ;; CHECK-NEXT: (local $9 i32)
- ;; CHECK-NEXT: (local $10 i32)
- ;; CHECK-NEXT: (local $11 i32)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $z
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $7
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $z
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $9
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $11
- ;; CHECK-NEXT: (local.get $10)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $10)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $loop2 (param $x i32) (param $y i32) (param $z i32) (result i32)
- (local.set $x (local.get $y))
- (local.set $y (local.get $z))
- (local.set $z (local.get $x))
- (local.set $x (local.get $y))
- (local.set $y (local.get $z))
- (local.set $z (local.get $x))
- (return (local.get $x))
- )
- ;; CHECK: (func $loop3 (param $x i32) (param $y i32) (param $z i32) (result i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (local $7 i32)
- ;; CHECK-NEXT: (local $8 i32)
- ;; CHECK-NEXT: (local $9 i32)
- ;; CHECK-NEXT: (local $10 i32)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $z)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $z
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $7
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $z
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $8
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $y)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $10
- ;; CHECK-NEXT: (local.get $9)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $9)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $loop3 (param $x i32) (param $y i32) (param $z i32) (result i32)
- (local.set $x (local.get $y))
- (local.set $y (local.get $z))
- (local.set $z (local.get $y))
- (local.set $y (local.get $z))
- (local.set $z (local.get $y))
- (return (local.get $y))
- )
- ;; CHECK: (func $handle-removing (param $var$0 f64) (param $var$1 f64) (param $var$2 i32) (result f32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 f32)
- ;; CHECK-NEXT: (local $7 f32)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $var$2
- ;; CHECK-NEXT: (i32.const 32767)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $var$2
- ;; CHECK-NEXT: (i32.const 1024)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (select
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: (local.get $var$2)
- ;; CHECK-NEXT: (i32.const -2147483648)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $var$2
- ;; CHECK-NEXT: (local.get $5)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (f32.const 1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $7
- ;; CHECK-NEXT: (local.get $6)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $6)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $handle-removing (param $var$0 f64) (param $var$1 f64) (param $var$2 i32) (result f32)
- (local.set $var$2
- (select
- (local.tee $var$2
- (i32.const 32767)
- )
- (local.tee $var$2
- (i32.const 1024)
- )
- (i32.const -2147483648)
- )
- )
- (f32.const 1)
- )
-)
-;; a testcase that fails if we don't handle equivalent local canonicalization properly
-(module
- ;; CHECK: (type $2 (func (param i64 f32 i32)))
-
- ;; CHECK: (type $0 (func))
- (type $0 (func))
- ;; CHECK: (type $1 (func (param i32 f64) (result i32)))
- (type $1 (func (param i32 f64) (result i32)))
- (type $2 (func (param i64 f32 i32)))
- ;; CHECK: (global $global$0 (mut i32) (i32.const 10))
- (global $global$0 (mut i32) (i32.const 10))
- (table 23 23 funcref)
- ;; CHECK: (table $0 23 23 funcref)
-
- ;; CHECK: (export "func_1_invoker" (func $1))
- (export "func_1_invoker" (func $1))
- ;; CHECK: (export "func_6" (func $2))
- (export "func_6" (func $2))
- ;; CHECK: (func $0 (param $var$0 i64) (param $var$1 f32) (param $var$2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local $5 i32)
- ;; CHECK-NEXT: (local $6 i32)
- ;; CHECK-NEXT: (block $label$1
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.const 128)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (br_if $label$1
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (br_if $label$1
- ;; CHECK-NEXT: (local.get $4)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.const -14051)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $6
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (if
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: (global.set $global$0
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $0 (; 0 ;) (type $2) (param $var$0 i64) (param $var$1 f32) (param $var$2 i32)
- (if
- (block $label$1 (result i32)
- (drop
- (br_if $label$1
- (i32.const 0)
- (br_if $label$1
- (i32.const 128)
- (i32.const 0)
- )
- )
- )
- (i32.const -14051)
- )
- (global.set $global$0
- (i32.const 0)
- )
- )
- )
- ;; CHECK: (func $1
- ;; CHECK-NEXT: (call $0
- ;; CHECK-NEXT: (i64.const 1125899906842624)
- ;; CHECK-NEXT: (f32.const -nan:0x7fc91a)
- ;; CHECK-NEXT: (i32.const -46)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $1 (; 1 ;) (type $0)
- (call $0
- (i64.const 1125899906842624)
- (f32.const -nan:0x7fc91a)
- (i32.const -46)
- )
- )
- ;; CHECK: (func $2 (param $var$0 i32) (param $var$1 f64) (result i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (global.get $global$0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (if
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: (unreachable)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (i32.const 0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $3)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $2 (; 2 ;) (type $1) (param $var$0 i32) (param $var$1 f64) (result i32)
- (if
- (global.get $global$0)
- (unreachable)
- )
- (i32.const 0)
- )
-)
-(module
- ;; CHECK: (type $i32_=>_none (func (param i32)))
-
- ;; CHECK: (import "env" "out" (func $out (param i32)))
- (import "env" "out" (func $out (param i32)))
- ;; CHECK: (func $each-pass-must-clear (param $var$0 i32)
- ;; CHECK-NEXT: (local $1 i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local.set $1
- ;; CHECK-NEXT: (local.get $var$0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (i32.eqz
- ;; CHECK-NEXT: (local.get $var$0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (call $out
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $var$0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (call $out
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $each-pass-must-clear (param $var$0 i32)
- (call $out
- (i32.eqz
- (local.get $var$0)
- )
- )
- (call $out
- (i32.eqz
- (local.get $var$0)
- )
- )
- )
-)
-(module
- ;; CHECK: (type $none_=>_i64 (func (result i64)))
-
- ;; CHECK: (type $none_=>_none (func))
-
- ;; CHECK: (global $glob (mut i32) (i32.const 1))
- (global $glob (mut i32) (i32.const 1))
- ;; CHECK: (func $i64-shifts (result i64)
- ;; CHECK-NEXT: (local $temp i64)
- ;; CHECK-NEXT: (local $1 i64)
- ;; CHECK-NEXT: (local $2 i64)
- ;; CHECK-NEXT: (local $3 i64)
- ;; CHECK-NEXT: (local $4 i64)
- ;; CHECK-NEXT: (local $5 i64)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (local.set $1
- ;; CHECK-NEXT: (i64.add
- ;; CHECK-NEXT: (i64.const 1)
- ;; CHECK-NEXT: (i64.const 2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $temp
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $temp
- ;; CHECK-NEXT: (i64.const 9999)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $temp
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $temp)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $temp)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $5
- ;; CHECK-NEXT: (local.get $temp)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $temp)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $i64-shifts (result i64)
- (local $temp i64)
- (local.set $temp
- (i64.add
- (i64.const 1)
- (i64.const 2)
- )
- )
- (local.set $temp
- (i64.const 9999)
- )
- (local.set $temp
- (i64.add
- (i64.const 1)
- (i64.const 2)
- )
- )
- (local.get $temp)
- )
- ;; CHECK: (func $global
- ;; CHECK-NEXT: (local $x i32)
- ;; CHECK-NEXT: (local $y i32)
- ;; CHECK-NEXT: (local $2 i32)
- ;; CHECK-NEXT: (local $3 i32)
- ;; CHECK-NEXT: (local $4 i32)
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (global.get $glob)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $x
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $4
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $y
- ;; CHECK-NEXT: (local.get $x)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $global
- (local $x i32)
- (local $y i32)
- (local.set $x (global.get $glob))
- (local.set $y (global.get $glob))
- (local.set $y (global.get $glob))
- )
-)
-
-(module
- ;; After --flatten, there will be a series of chain copies between multiple
- ;; locals, but some of the locals will be funcref type and others anyref
- ;; type. We cannot make locals of different types a common subexpression.
- ;; CHECK: (type $none_=>_anyref (func (result anyref)))
-
- ;; CHECK: (type $none_=>_none (func))
-
- ;; CHECK: (func $subtype-test (result anyref)
- ;; CHECK-NEXT: (local $0 funcref)
- ;; CHECK-NEXT: (local $1 funcref)
- ;; CHECK-NEXT: (local $2 anyref)
- ;; CHECK-NEXT: (local $3 anyref)
- ;; CHECK-NEXT: (block
- ;; CHECK-NEXT: (nop)
- ;; CHECK-NEXT: (loop $label$1
- ;; CHECK-NEXT: (local.set $0
- ;; CHECK-NEXT: (ref.null func)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $1
- ;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (local.get $0)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $3
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (return
- ;; CHECK-NEXT: (local.get $2)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $subtype-test (result anyref)
- (nop)
- (loop $label$1 (result funcref)
- (ref.null func)
- )
- )
-
- ;; CHECK: (func $test
- ;; CHECK-NEXT: (local $0 anyref)
- ;; CHECK-NEXT: (local $1 funcref)
- ;; CHECK-NEXT: (local $2 funcref)
- ;; CHECK-NEXT: (block $label$1
- ;; CHECK-NEXT: (local.set $0
- ;; CHECK-NEXT: (ref.null func)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $1
- ;; CHECK-NEXT: (ref.null func)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (local.set $2
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: (drop
- ;; CHECK-NEXT: (local.get $1)
- ;; CHECK-NEXT: )
- ;; CHECK-NEXT: )
- (func $test
- (local $0 anyref)
- (drop
- (block $label$1 (result funcref)
- (local.set $0
- (ref.null func)
- )
- ;; After --flatten, this will be assigned to a local of funcref type. After
- ;; --local-cse, even if we set (ref.null func) to local $0 above, this
- ;; should not be replaced with $0, because it is of type anyref.
- (ref.null func)
- )
- )
- )
-)
diff --git a/test/lit/passes/local-cse.wast b/test/lit/passes/local-cse.wast
new file mode 100644
index 000000000..5cdf712f8
--- /dev/null
+++ b/test/lit/passes/local-cse.wast
@@ -0,0 +1,433 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; NOTE: This test was ported using port_test.py and could be cleaned up.
+
+;; RUN: foreach %s %t wasm-opt --local-cse -S -o - | filecheck %s
+
+(module
+ (memory 100 100)
+ ;; CHECK: (type $none_=>_none (func))
+
+ ;; CHECK: (type $i32_=>_i32 (func (param i32) (result i32)))
+
+ ;; CHECK: (type $none_=>_i64 (func (result i64)))
+
+ ;; CHECK: (memory $0 100 100)
+
+ ;; CHECK: (func $basics
+ ;; CHECK-NEXT: (local $x i32)
+ ;; CHECK-NEXT: (local $y i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (local $3 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (if
+ ;; CHECK-NEXT: (i32.const 0)
+ ;; CHECK-NEXT: (nop)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $3
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (call $basics)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (i32.const 100)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $basics
+ (local $x i32)
+ (local $y i32)
+ ;; These two adds can be optimized.
+ (drop
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ (drop
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ (if (i32.const 0) (nop))
+ ;; This add is after an if, which means we are no longer in the same basic
+ ;; block - which means we cannot optimize it with the previous identical
+ ;; adds.
+ (drop
+ (i32.add (i32.const 1) (i32.const 2))
+ )
+ ;; More adds with different contents than the previous, but all three are
+ ;; identical.
+ (drop
+ (i32.add (local.get $x) (local.get $y))
+ )
+ (drop
+ (i32.add (local.get $x) (local.get $y))
+ )
+ (drop
+ (i32.add (local.get $x) (local.get $y))
+ )
+ ;; Calls have side effects, but that is not a problem for these adds which
+ ;; only use locals, so we can optimize the add after the call.
+ (call $basics)
+ (drop
+ (i32.add (local.get $x) (local.get $y))
+ )
+ ;; Modify $x, which means we cannot optimize the add after the set.
+ (local.set $x (i32.const 100))
+ (drop
+ (i32.add (local.get $x) (local.get $y))
+ )
+ )
+
+ ;; CHECK: (func $recursive1
+ ;; CHECK-NEXT: (local $x i32)
+ ;; CHECK-NEXT: (local $y i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (local $3 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $3
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: (i32.const 3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $recursive1
+ (local $x i32)
+ (local $y i32)
+ ;; The first two dropped things are identical and can be optimized.
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ ;; The last thing here appears inside the previous pattern, and can still
+ ;; be optimized, with another local.
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+
+ ;; CHECK: (func $recursive2
+ ;; CHECK-NEXT: (local $x i32)
+ ;; CHECK-NEXT: (local $y i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (local $3 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $3
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: (i32.const 3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $recursive2
+ (local $x i32)
+ (local $y i32)
+ ;; As before, but the order is different, with the sub-pattern in the
+ ;; middle.
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 1)
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ )
+
+ ;; CHECK: (func $self
+ ;; CHECK-NEXT: (local $x i32)
+ ;; CHECK-NEXT: (local $y i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 2)
+ ;; CHECK-NEXT: (i32.const 3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $self
+ (local $x i32)
+ (local $y i32)
+ ;; As before, with just the large pattern and the sub pattern, but no
+ ;; repeats of the large pattern.
+ (drop
+ (i32.add
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+ (drop
+ (i32.add
+ (i32.const 2)
+ (i32.const 3)
+ )
+ )
+ )
+
+ ;; CHECK: (func $loads
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.load
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (i32.load
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $loads
+ ;; The possible trap on loads prevents optimization.
+ ;; TODO: optimize that too
+ (drop
+ (i32.load (i32.const 10))
+ )
+ (drop
+ (i32.load (i32.const 10))
+ )
+ )
+
+ ;; CHECK: (func $calls (param $x i32) (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call $calls
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call $calls
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: )
+ (func $calls (param $x i32) (result i32)
+ ;; The side effects of calls prevent optimization.
+ (drop
+ (call $calls (i32.const 10))
+ )
+ (drop
+ (call $calls (i32.const 10))
+ )
+ (i32.const 10)
+ )
+
+ ;; CHECK: (func $many-sets (result i64)
+ ;; CHECK-NEXT: (local $temp i64)
+ ;; CHECK-NEXT: (local $1 i64)
+ ;; CHECK-NEXT: (local.set $temp
+ ;; CHECK-NEXT: (local.tee $1
+ ;; CHECK-NEXT: (i64.add
+ ;; CHECK-NEXT: (i64.const 1)
+ ;; CHECK-NEXT: (i64.const 2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $temp
+ ;; CHECK-NEXT: (i64.const 9999)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $temp
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $temp)
+ ;; CHECK-NEXT: )
+ (func $many-sets (result i64)
+ (local $temp i64)
+ ;; Assign to $temp three times here. We can optimize the add regardless of
+ ;; that, and should not be confused by the sets themselves having effects
+ ;; that are in conflict (the value is what matters).
+ (local.set $temp
+ (i64.add
+ (i64.const 1)
+ (i64.const 2)
+ )
+ )
+ (local.set $temp
+ (i64.const 9999)
+ )
+ (local.set $temp
+ (i64.add
+ (i64.const 1)
+ (i64.const 2)
+ )
+ )
+ (local.get $temp)
+ )
+
+ ;; CHECK: (func $switch-children (param $x i32) (result i32)
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (block $label$1 (result i32)
+ ;; CHECK-NEXT: (br_table $label$1 $label$1
+ ;; CHECK-NEXT: (local.tee $1
+ ;; CHECK-NEXT: (i32.and
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (i32.const 3)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $switch-children (param $x i32) (result i32)
+ (block $label$1 (result i32)
+ ;; We can optimize the two children of this switch. This test verifies
+ ;; that we do so properly and do not hit an assertion involving the
+ ;; ordering of the switch's children, which was incorrect in the past.
+ (br_table $label$1 $label$1
+ (i32.and
+ (local.get $x)
+ (i32.const 3)
+ )
+ (i32.and
+ (local.get $x)
+ (i32.const 3)
+ )
+ )
+ )
+ )
+)
+
+(module
+ ;; CHECK: (type $none_=>_none (func))
+
+ ;; CHECK: (global $glob (mut i32) (i32.const 1))
+ (global $glob (mut i32) (i32.const 1))
+
+ ;; CHECK: (global $other-glob (mut i32) (i32.const 1))
+ (global $other-glob (mut i32) (i32.const 1))
+
+ ;; CHECK: (func $global
+ ;; CHECK-NEXT: (local $0 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $0
+ ;; CHECK-NEXT: (global.get $glob)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.set $other-glob
+ ;; CHECK-NEXT: (i32.const 100)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.set $glob
+ ;; CHECK-NEXT: (i32.const 200)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $glob)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $global
+ ;; We should optimize redundant global.get operations.
+ (drop (global.get $glob))
+ (drop (global.get $glob))
+ ;; We can do it past a write to another global
+ (global.set $other-glob (i32.const 100))
+ (drop (global.get $glob))
+ ;; But we can't do it past a write to our global.
+ (global.set $glob (i32.const 200))
+ (drop (global.get $glob))
+ )
+)
diff --git a/test/lit/passes/local-cse_all-features.wast b/test/lit/passes/local-cse_all-features.wast
new file mode 100644
index 000000000..b51240c50
--- /dev/null
+++ b/test/lit/passes/local-cse_all-features.wast
@@ -0,0 +1,272 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; NOTE: This test was ported using port_test.py and could be cleaned up.
+
+;; RUN: foreach %s %t wasm-opt --local-cse --all-features -S -o - | filecheck %s
+
+(module
+ ;; CHECK: (type $i32_=>_i32 (func (param i32) (result i32)))
+
+ ;; CHECK: (type $none_=>_none (func))
+
+ ;; CHECK: (elem declare func $calls $ref.func)
+
+ ;; CHECK: (func $calls (param $x i32) (result i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call_ref
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (ref.func $calls)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (call_ref
+ ;; CHECK-NEXT: (i32.const 10)
+ ;; CHECK-NEXT: (ref.func $calls)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 20)
+ ;; CHECK-NEXT: )
+ (func $calls (param $x i32) (result i32)
+ ;; The side effects of calls prevent optimization.
+ (drop
+ (call_ref (i32.const 10) (ref.func $calls))
+ )
+ (drop
+ (call_ref (i32.const 10) (ref.func $calls))
+ )
+ (i32.const 20)
+ )
+
+ ;; CHECK: (func $ref.func
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.func $ref.func)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.func $ref.func)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $ref.func
+ ;; RefFunc and other constants should be ignored - don't undo the effects
+ ;; of constant propagation.
+ (drop
+ (ref.func $ref.func)
+ )
+ (drop
+ (ref.func $ref.func)
+ )
+ )
+)
+
+(module
+ ;; CHECK: (type $A (struct (field i32)))
+ (type $A (struct (field i32)))
+
+ ;; CHECK: (type $ref|$A|_=>_none (func (param (ref $A))))
+
+ ;; CHECK: (type $B (array (mut i32)))
+ (type $B (array (mut i32)))
+
+ ;; CHECK: (type $none_=>_none (func))
+
+ ;; CHECK: (func $struct-gets (param $ref (ref $A))
+ ;; CHECK-NEXT: (local $1 i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.tee $1
+ ;; CHECK-NEXT: (struct.get $A 0
+ ;; CHECK-NEXT: (local.get $ref)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $struct-gets (param $ref (ref $A))
+ ;; Repeated loads from a struct can be optimized.
+ ;;
+ ;; Note that these struct.gets cannot trap as the reference is non-nullable,
+ ;; so there are no side effects here, and we can optimize.
+ (drop
+ (struct.get $A 0
+ (local.get $ref)
+ )
+ )
+ (drop
+ (struct.get $A 0
+ (local.get $ref)
+ )
+ )
+ (drop
+ (struct.get $A 0
+ (local.get $ref)
+ )
+ )
+ )
+
+ ;; CHECK: (func $non-nullable-value (param $ref (ref $A))
+ ;; CHECK-NEXT: (local $1 (ref null $A))
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.tee $1
+ ;; CHECK-NEXT: (select (result (ref $A))
+ ;; CHECK-NEXT: (local.get $ref)
+ ;; CHECK-NEXT: (local.get $ref)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $non-nullable-value (param $ref (ref $A))
+ ;; The value that is repeated is non-nullable, which we must do some fixups
+ ;; for after creating a local of that type.
+ (drop
+ (select (result (ref $A))
+ (local.get $ref)
+ (local.get $ref)
+ (i32.const 1)
+ )
+ )
+ (drop
+ (select (result (ref $A))
+ (local.get $ref)
+ (local.get $ref)
+ (i32.const 1)
+ )
+ )
+ )
+
+ ;; CHECK: (func $creations
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new_with_rtt $A
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (rtt.canon $A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new_with_rtt $A
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (rtt.canon $A)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (array.new_with_rtt $B
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (rtt.canon $B)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (array.new_with_rtt $B
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: (rtt.canon $B)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $creations
+ ;; Allocating GC data has no side effects, but each allocation is unique
+ ;; and so we cannot replace separate allocations with a single one.
+ (drop
+ (struct.new_with_rtt $A
+ (i32.const 1)
+ (rtt.canon $A)
+ )
+ )
+ (drop
+ (struct.new_with_rtt $A
+ (i32.const 1)
+ (rtt.canon $A)
+ )
+ )
+ (drop
+ (array.new_with_rtt $B
+ (i32.const 1)
+ (i32.const 1)
+ (rtt.canon $B)
+ )
+ )
+ (drop
+ (array.new_with_rtt $B
+ (i32.const 1)
+ (i32.const 1)
+ (rtt.canon $B)
+ )
+ )
+ )
+)
+
+(module
+ ;; Real-world testcase from AssemblyScript, containing multiple nested things
+ ;; that can be optimized. The inputs to the add (the xors) are identical, and
+ ;; we can avoid repeating them.
+ ;; CHECK: (type $i32_i32_=>_i32 (func (param i32 i32) (result i32)))
+
+ ;; CHECK: (func $div16_internal (param $0 i32) (param $1 i32) (result i32)
+ ;; CHECK-NEXT: (local $2 i32)
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (local.tee $2
+ ;; CHECK-NEXT: (i32.xor
+ ;; CHECK-NEXT: (i32.shr_s
+ ;; CHECK-NEXT: (i32.shl
+ ;; CHECK-NEXT: (local.get $0)
+ ;; CHECK-NEXT: (i32.const 16)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 16)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.shr_s
+ ;; CHECK-NEXT: (i32.shl
+ ;; CHECK-NEXT: (local.get $1)
+ ;; CHECK-NEXT: (i32.const 16)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (i32.const 16)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.get $2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $div16_internal (param $0 i32) (param $1 i32) (result i32)
+ (i32.add
+ (i32.xor
+ (i32.shr_s
+ (i32.shl
+ (local.get $0)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ (i32.shr_s
+ (i32.shl
+ (local.get $1)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ )
+ (i32.xor
+ (i32.shr_s
+ (i32.shl
+ (local.get $0)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ (i32.shr_s
+ (i32.shl
+ (local.get $1)
+ (i32.const 16)
+ )
+ (i32.const 16)
+ )
+ )
+ )
+ )
+)