summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/LocalSubtyping.cpp142
-rw-r--r--src/passes/pass.cpp9
-rw-r--r--src/passes/passes.h1
-rw-r--r--test/lit/help/optimization-opts.test3
-rw-r--r--test/lit/passes/local-subtyping-nn.wast37
-rw-r--r--test/lit/passes/local-subtyping.wast205
-rw-r--r--test/passes/Oz_fuzz-exec_all-features.txt6
8 files changed, 401 insertions, 3 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index 2b8790a4f..783a3df87 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -37,6 +37,7 @@ set(passes_SOURCES
LegalizeJSInterface.cpp
LimitSegments.cpp
LocalCSE.cpp
+ LocalSubtyping.cpp
LogExecution.cpp
LoopInvariantCodeMotion.cpp
Memory64Lowering.cpp
diff --git a/src/passes/LocalSubtyping.cpp b/src/passes/LocalSubtyping.cpp
new file mode 100644
index 000000000..f435e53d4
--- /dev/null
+++ b/src/passes/LocalSubtyping.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2021 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Refines the types of locals where possible. That is, if a local is assigned
+// types that are more specific than the local's declared type, refine the
+// declared type. This can then potentially unlock optimizations later when the
+// local is used, as we have more type info. (However, it may also increase code
+// size in theory, if we end up declaring more types - TODO investigate.)
+//
+
+#include <ir/find_all.h>
+#include <ir/linear-execution.h>
+#include <ir/utils.h>
+#include <pass.h>
+#include <wasm.h>
+
+namespace wasm {
+
+struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> {
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new LocalSubtyping(); }
+
+ // Shared code to find all sets or gets for each local index. Returns a vector
+ // that maps local indexes => vector of LocalGet*|LocalSet* expressions.
+ template<typename T>
+ std::vector<std::vector<T*>> getLocalOperations(Function* func) {
+ std::vector<std::vector<T*>> ret;
+ ret.resize(func->getNumLocals());
+ FindAll<T> operations(func->body);
+ for (auto* operation : operations.list) {
+ ret[operation->index].push_back(operation);
+ }
+ return ret;
+ }
+
+ void doWalkFunction(Function* func) {
+ if (!getModule()->features.hasGC()) {
+ return;
+ }
+
+ auto varBase = func->getVarIndexBase();
+ auto numLocals = func->getNumLocals();
+
+ auto setsForLocal = getLocalOperations<LocalSet>(func);
+ auto getsForLocal = getLocalOperations<LocalGet>(func);
+
+ // Keep iterating while we find things to change. There can be chains like
+ // X -> Y -> Z where one change enables more. Note that we are O(N^2) on
+ // that atm, but it is a rare pattern as general optimizations
+ // (SimplifyLocals and CoalesceLocals) break up such things; also, even if
+ // we tracked changes more carefully we'd have the case of nested tees
+ // where we could still be O(N^2), so we'd need something more complex here
+ // involving topological sorting. Leave that for if the need arises.
+
+ // TODO: handle cycles of X -> Y -> X etc.
+
+ bool more;
+
+ do {
+ more = false;
+
+ // First, refinalize which will recompute least upper bounds on ifs and
+ // blocks, etc., potentially finding a more specific type. Note that
+ // that utility does not tell us if it changed anything, so we depend on
+ // the next step for knowing if there is more work to do.
+ ReFinalize().walkFunctionInModule(func, getModule());
+
+ // Second, find vars whose actual applied values allow a more specific
+ // type.
+
+ for (Index i = varBase; i < numLocals; i++) {
+ // Find all the types assigned to the var, and compute the optimal LUB.
+ // Note that we do not need to take into account the initial value of
+ // zero or null that locals have: that value has the type of the local,
+ // which is a supertype of all the assigned values anyhow. It will never
+ // be able to tell us of a more specific subtype that is possible.
+ std::unordered_set<Type> types;
+ for (auto* set : setsForLocal[i]) {
+ types.insert(set->value->type);
+ }
+ if (types.empty()) {
+ // Nothing is assigned to this local (other opts will remove it).
+ continue;
+ }
+
+ auto oldType = func->getLocalType(i);
+ auto newType = Type::getLeastUpperBound(types);
+ assert(newType != Type::none); // in valid wasm there must be a LUB
+
+ // Remove non-nullability if we disallow that in locals.
+ if (!getModule()->features.hasGCNNLocals() && newType.isNonNullable()) {
+ newType = Type(newType.getHeapType(), Nullable);
+ // Note that the old type must have been nullable as well, as non-
+ // nullable types cannot be locals without that feature being enabled,
+ // which means that we will not have to do any extra work to handle
+ // non-nullability if we update the type: we are just updating the
+ // heap type, and leaving the type nullable as it was.
+ assert(oldType.isNullable());
+ }
+
+ if (newType != oldType) {
+ // We found a more specific type!
+ assert(Type::isSubType(newType, oldType));
+ func->vars[i - varBase] = newType;
+ more = true;
+
+ // Update gets and tees.
+ for (auto* get : getsForLocal[i]) {
+ get->type = newType;
+ }
+
+ // NB: These tee updates will not be needed if the type of tees
+ // becomes that of their value, in the spec.
+ for (auto* set : setsForLocal[i]) {
+ if (set->isTee() && set->type != Type::unreachable) {
+ set->type = newType;
+ }
+ }
+ }
+ }
+ } while (more);
+ }
+};
+
+Pass* createLocalSubtypingPass() { return new LocalSubtyping(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 10a688313..d21464c2b 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -171,6 +171,9 @@ void PassRegistry::registerPasses() {
registerPass("local-cse",
"common subexpression elimination inside basic blocks",
createLocalCSEPass);
+ registerPass("local-subtyping",
+ "apply more specific subtypes to locals where possible",
+ createLocalSubtypingPass);
registerPass("log-execution",
"instrument the build with logging of where execution goes",
createLogExecutionPass);
@@ -454,6 +457,12 @@ void PassRunner::addDefaultFunctionOptimizationPasses() {
if (options.optimizeLevel >= 3 || options.shrinkLevel >= 2) {
addIfNoDWARFIssues("merge-locals"); // very slow on e.g. sqlite
}
+ if (options.optimizeLevel > 1 && wasm->features.hasGC()) {
+ // Coalescing may prevent subtyping (as a coalesced local must have the
+ // supertype of all those combined into it), so subtype first.
+ // TODO: when optimizing for size, maybe the order should reverse?
+ addIfNoDWARFIssues("local-subtyping");
+ }
addIfNoDWARFIssues("coalesce-locals");
addIfNoDWARFIssues("simplify-locals");
addIfNoDWARFIssues("vacuum");
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 5f265470d..004f8f319 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -59,6 +59,7 @@ Pass* createLegalizeJSInterfacePass();
Pass* createLegalizeJSInterfaceMinimallyPass();
Pass* createLimitSegmentsPass();
Pass* createLocalCSEPass();
+Pass* createLocalSubtypingPass();
Pass* createLogExecutionPass();
Pass* createInstrumentLocalsPass();
Pass* createInstrumentMemoryPass();
diff --git a/test/lit/help/optimization-opts.test b/test/lit/help/optimization-opts.test
index 3350015a1..0d4ed48a8 100644
--- a/test/lit/help/optimization-opts.test
+++ b/test/lit/help/optimization-opts.test
@@ -292,6 +292,9 @@
;; CHECK-NEXT: --local-cse common subexpression elimination
;; CHECK-NEXT: inside basic blocks
;; CHECK-NEXT:
+;; CHECK-NEXT: --local-subtyping apply more specific subtypes to
+;; CHECK-NEXT: locals where possible
+;; CHECK-NEXT:
;; CHECK-NEXT: --log-execution instrument the build with
;; CHECK-NEXT: logging of where execution goes
;; CHECK-NEXT:
diff --git a/test/lit/passes/local-subtyping-nn.wast b/test/lit/passes/local-subtyping-nn.wast
new file mode 100644
index 000000000..7eca6558a
--- /dev/null
+++ b/test/lit/passes/local-subtyping-nn.wast
@@ -0,0 +1,37 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+;; RUN: wasm-opt %s --local-subtyping -all --enable-gc-nn-locals -S -o - \
+;; RUN: | filecheck %s
+
+(module
+ ;; CHECK: (type $struct (struct ))
+ (type $struct (struct))
+
+ ;; CHECK: (import "out" "i32" (func $i32 (result i32)))
+ (import "out" "i32" (func $i32 (result i32)))
+
+ ;; CHECK: (func $non-nullable
+ ;; CHECK-NEXT: (local $x (ref $struct))
+ ;; CHECK-NEXT: (local $y (ref $none_=>_i32))
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $non-nullable
+ (local $x (ref null $struct))
+ (local $y anyref)
+ ;; x is assigned a value that is non-nullable.
+ (local.set $x
+ (ref.as_non_null (ref.null $struct))
+ )
+ ;; x is assigned a value that is non-nullable, and also allows a more
+ ;; specific heap type.
+ (local.set $y
+ (ref.func $i32)
+ )
+ )
+)
diff --git a/test/lit/passes/local-subtyping.wast b/test/lit/passes/local-subtyping.wast
new file mode 100644
index 000000000..59f93ce43
--- /dev/null
+++ b/test/lit/passes/local-subtyping.wast
@@ -0,0 +1,205 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+;; RUN: wasm-opt %s --local-subtyping -all -S -o - \
+;; RUN: | filecheck %s
+
+(module
+ ;; CHECK: (import "out" "i32" (func $i32 (result i32)))
+ (import "out" "i32" (func $i32 (result i32)))
+ ;; CHECK: (import "out" "i64" (func $i64 (result i64)))
+ (import "out" "i64" (func $i64 (result i64)))
+
+ ;; Refinalization can find a more specific type, where the declared type was
+ ;; not the optimal LUB.
+ ;; CHECK: (func $refinalize (param $x i32)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (if (result (ref func))
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (block $block (result (ref func))
+ ;; CHECK-NEXT: (br $block
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $refinalize (param $x i32)
+ (drop
+ (if (result anyref)
+ (local.get $x)
+ (ref.func $i32)
+ (ref.func $i64)
+ )
+ )
+ (drop
+ (block $block (result anyref)
+ (br $block
+ (ref.func $i32)
+ )
+ (ref.func $i64)
+ )
+ )
+ )
+
+ ;; A simple case where a local has a single assignment that we can use as a
+ ;; more specific type. A similar thing with a parameter, however, is not a
+ ;; thing we can optimize. Also, ignore a local with zero assignments.
+ ;; CHECK: (func $simple-local-but-not-param (param $x anyref)
+ ;; CHECK-NEXT: (local $y (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local $unused anyref)
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $simple-local-but-not-param (param $x anyref)
+ (local $y anyref)
+ (local $unused anyref)
+ (local.set $x
+ (ref.func $i32)
+ )
+ (local.set $y
+ (ref.func $i32)
+ )
+ )
+
+ ;; CHECK: (func $locals-with-multiple-assignments
+ ;; CHECK-NEXT: (local $x funcref)
+ ;; CHECK-NEXT: (local $y (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local $z (ref null $none_=>_i64))
+ ;; CHECK-NEXT: (local $w funcref)
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $z
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $z
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $w
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $w
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $locals-with-multiple-assignments
+ (local $x anyref)
+ (local $y anyref)
+ (local $z anyref)
+ (local $w funcref)
+ ;; x is assigned two different types with a new LUB possible
+ (local.set $x
+ (ref.func $i32)
+ )
+ (local.set $x
+ (ref.func $i64)
+ )
+ ;; y and z are assigned the same more specific type twice
+ (local.set $y
+ (ref.func $i32)
+ )
+ (local.set $y
+ (ref.func $i32)
+ )
+ (local.set $z
+ (ref.func $i64)
+ )
+ (local.set $z
+ (ref.func $i64)
+ )
+ ;; w is assigned two different types *without* a new LUB possible, as it
+ ;; already had the optimal LUB
+ (local.set $w
+ (ref.func $i32)
+ )
+ (local.set $w
+ (ref.func $i64)
+ )
+ )
+
+ ;; In some cases multiple iterations are necessary, as one inferred new type
+ ;; applies to a get which then allows another inference.
+ ;; CHECK: (func $multiple-iterations
+ ;; CHECK-NEXT: (local $x (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local $y (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local $z (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $z
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $multiple-iterations
+ (local $x anyref)
+ (local $y anyref)
+ (local $z anyref)
+ (local.set $x
+ (ref.func $i32)
+ )
+ (local.set $y
+ (local.get $x)
+ )
+ (local.set $z
+ (local.get $y)
+ )
+ )
+
+ ;; Sometimes a refinalize is necessary in between the iterations.
+ ;; CHECK: (func $multiple-iterations-refinalize (param $i i32)
+ ;; CHECK-NEXT: (local $x (ref null $none_=>_i32))
+ ;; CHECK-NEXT: (local $y (ref null $none_=>_i64))
+ ;; CHECK-NEXT: (local $z funcref)
+ ;; CHECK-NEXT: (local.set $x
+ ;; CHECK-NEXT: (ref.func $i32)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $y
+ ;; CHECK-NEXT: (ref.func $i64)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (local.set $z
+ ;; CHECK-NEXT: (select (result funcref)
+ ;; CHECK-NEXT: (local.get $x)
+ ;; CHECK-NEXT: (local.get $y)
+ ;; CHECK-NEXT: (local.get $i)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $multiple-iterations-refinalize (param $i i32)
+ (local $x anyref)
+ (local $y anyref)
+ (local $z anyref)
+ (local.set $x
+ (ref.func $i32)
+ )
+ (local.set $y
+ (ref.func $i64)
+ )
+ (local.set $z
+ (select
+ (local.get $x)
+ (local.get $y)
+ (local.get $i)
+ )
+ )
+ )
+)
diff --git a/test/passes/Oz_fuzz-exec_all-features.txt b/test/passes/Oz_fuzz-exec_all-features.txt
index 6d342196e..b0f4c6448 100644
--- a/test/passes/Oz_fuzz-exec_all-features.txt
+++ b/test/passes/Oz_fuzz-exec_all-features.txt
@@ -153,7 +153,7 @@
)
)
(func $2 (; has Stack IR ;)
- (local $0 anyref)
+ (local $0 (ref null $extendedstruct))
(call $log
(i32.const 1)
)
@@ -247,7 +247,7 @@
(unreachable)
)
(func $4 (; has Stack IR ;)
- (local $0 anyref)
+ (local $0 (ref null $struct))
(local.set $0
(struct.new_default_with_rtt $struct
(rtt.canon $struct)
@@ -272,7 +272,7 @@
)
)
(func $5 (; has Stack IR ;)
- (local $0 anyref)
+ (local $0 (ref null $extendedstruct))
(local.set $0
(struct.new_default_with_rtt $extendedstruct
(rtt.canon $extendedstruct)