diff options
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/LocalSubtyping.cpp | 142 | ||||
-rw-r--r-- | src/passes/pass.cpp | 9 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | test/lit/help/optimization-opts.test | 3 | ||||
-rw-r--r-- | test/lit/passes/local-subtyping-nn.wast | 37 | ||||
-rw-r--r-- | test/lit/passes/local-subtyping.wast | 205 | ||||
-rw-r--r-- | test/passes/Oz_fuzz-exec_all-features.txt | 6 |
8 files changed, 401 insertions, 3 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 2b8790a4f..783a3df87 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -37,6 +37,7 @@ set(passes_SOURCES LegalizeJSInterface.cpp LimitSegments.cpp LocalCSE.cpp + LocalSubtyping.cpp LogExecution.cpp LoopInvariantCodeMotion.cpp Memory64Lowering.cpp diff --git a/src/passes/LocalSubtyping.cpp b/src/passes/LocalSubtyping.cpp new file mode 100644 index 000000000..f435e53d4 --- /dev/null +++ b/src/passes/LocalSubtyping.cpp @@ -0,0 +1,142 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Refines the types of locals where possible. That is, if a local is assigned +// types that are more specific than the local's declared type, refine the +// declared type. This can then potentially unlock optimizations later when the +// local is used, as we have more type info. (However, it may also increase code +// size in theory, if we end up declaring more types - TODO investigate.) +// + +#include <ir/find_all.h> +#include <ir/linear-execution.h> +#include <ir/utils.h> +#include <pass.h> +#include <wasm.h> + +namespace wasm { + +struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new LocalSubtyping(); } + + // Shared code to find all sets or gets for each local index. Returns a vector + // that maps local indexes => vector of LocalGet*|LocalSet* expressions. + template<typename T> + std::vector<std::vector<T*>> getLocalOperations(Function* func) { + std::vector<std::vector<T*>> ret; + ret.resize(func->getNumLocals()); + FindAll<T> operations(func->body); + for (auto* operation : operations.list) { + ret[operation->index].push_back(operation); + } + return ret; + } + + void doWalkFunction(Function* func) { + if (!getModule()->features.hasGC()) { + return; + } + + auto varBase = func->getVarIndexBase(); + auto numLocals = func->getNumLocals(); + + auto setsForLocal = getLocalOperations<LocalSet>(func); + auto getsForLocal = getLocalOperations<LocalGet>(func); + + // Keep iterating while we find things to change. There can be chains like + // X -> Y -> Z where one change enables more. Note that we are O(N^2) on + // that atm, but it is a rare pattern as general optimizations + // (SimplifyLocals and CoalesceLocals) break up such things; also, even if + // we tracked changes more carefully we'd have the case of nested tees + // where we could still be O(N^2), so we'd need something more complex here + // involving topological sorting. Leave that for if the need arises. + + // TODO: handle cycles of X -> Y -> X etc. + + bool more; + + do { + more = false; + + // First, refinalize which will recompute least upper bounds on ifs and + // blocks, etc., potentially finding a more specific type. Note that + // that utility does not tell us if it changed anything, so we depend on + // the next step for knowing if there is more work to do. + ReFinalize().walkFunctionInModule(func, getModule()); + + // Second, find vars whose actual applied values allow a more specific + // type. + + for (Index i = varBase; i < numLocals; i++) { + // Find all the types assigned to the var, and compute the optimal LUB. + // Note that we do not need to take into account the initial value of + // zero or null that locals have: that value has the type of the local, + // which is a supertype of all the assigned values anyhow. It will never + // be able to tell us of a more specific subtype that is possible. + std::unordered_set<Type> types; + for (auto* set : setsForLocal[i]) { + types.insert(set->value->type); + } + if (types.empty()) { + // Nothing is assigned to this local (other opts will remove it). + continue; + } + + auto oldType = func->getLocalType(i); + auto newType = Type::getLeastUpperBound(types); + assert(newType != Type::none); // in valid wasm there must be a LUB + + // Remove non-nullability if we disallow that in locals. + if (!getModule()->features.hasGCNNLocals() && newType.isNonNullable()) { + newType = Type(newType.getHeapType(), Nullable); + // Note that the old type must have been nullable as well, as non- + // nullable types cannot be locals without that feature being enabled, + // which means that we will not have to do any extra work to handle + // non-nullability if we update the type: we are just updating the + // heap type, and leaving the type nullable as it was. + assert(oldType.isNullable()); + } + + if (newType != oldType) { + // We found a more specific type! + assert(Type::isSubType(newType, oldType)); + func->vars[i - varBase] = newType; + more = true; + + // Update gets and tees. + for (auto* get : getsForLocal[i]) { + get->type = newType; + } + + // NB: These tee updates will not be needed if the type of tees + // becomes that of their value, in the spec. + for (auto* set : setsForLocal[i]) { + if (set->isTee() && set->type != Type::unreachable) { + set->type = newType; + } + } + } + } + } while (more); + } +}; + +Pass* createLocalSubtypingPass() { return new LocalSubtyping(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 10a688313..d21464c2b 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -171,6 +171,9 @@ void PassRegistry::registerPasses() { registerPass("local-cse", "common subexpression elimination inside basic blocks", createLocalCSEPass); + registerPass("local-subtyping", + "apply more specific subtypes to locals where possible", + createLocalSubtypingPass); registerPass("log-execution", "instrument the build with logging of where execution goes", createLogExecutionPass); @@ -454,6 +457,12 @@ void PassRunner::addDefaultFunctionOptimizationPasses() { if (options.optimizeLevel >= 3 || options.shrinkLevel >= 2) { addIfNoDWARFIssues("merge-locals"); // very slow on e.g. sqlite } + if (options.optimizeLevel > 1 && wasm->features.hasGC()) { + // Coalescing may prevent subtyping (as a coalesced local must have the + // supertype of all those combined into it), so subtype first. + // TODO: when optimizing for size, maybe the order should reverse? + addIfNoDWARFIssues("local-subtyping"); + } addIfNoDWARFIssues("coalesce-locals"); addIfNoDWARFIssues("simplify-locals"); addIfNoDWARFIssues("vacuum"); diff --git a/src/passes/passes.h b/src/passes/passes.h index 5f265470d..004f8f319 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -59,6 +59,7 @@ Pass* createLegalizeJSInterfacePass(); Pass* createLegalizeJSInterfaceMinimallyPass(); Pass* createLimitSegmentsPass(); Pass* createLocalCSEPass(); +Pass* createLocalSubtypingPass(); Pass* createLogExecutionPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); diff --git a/test/lit/help/optimization-opts.test b/test/lit/help/optimization-opts.test index 3350015a1..0d4ed48a8 100644 --- a/test/lit/help/optimization-opts.test +++ b/test/lit/help/optimization-opts.test @@ -292,6 +292,9 @@ ;; CHECK-NEXT: --local-cse common subexpression elimination ;; CHECK-NEXT: inside basic blocks ;; CHECK-NEXT: +;; CHECK-NEXT: --local-subtyping apply more specific subtypes to +;; CHECK-NEXT: locals where possible +;; CHECK-NEXT: ;; CHECK-NEXT: --log-execution instrument the build with ;; CHECK-NEXT: logging of where execution goes ;; CHECK-NEXT: diff --git a/test/lit/passes/local-subtyping-nn.wast b/test/lit/passes/local-subtyping-nn.wast new file mode 100644 index 000000000..7eca6558a --- /dev/null +++ b/test/lit/passes/local-subtyping-nn.wast @@ -0,0 +1,37 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --local-subtyping -all --enable-gc-nn-locals -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (type $struct (struct )) + (type $struct (struct)) + + ;; CHECK: (import "out" "i32" (func $i32 (result i32))) + (import "out" "i32" (func $i32 (result i32))) + + ;; CHECK: (func $non-nullable + ;; CHECK-NEXT: (local $x (ref $struct)) + ;; CHECK-NEXT: (local $y (ref $none_=>_i32)) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $non-nullable + (local $x (ref null $struct)) + (local $y anyref) + ;; x is assigned a value that is non-nullable. + (local.set $x + (ref.as_non_null (ref.null $struct)) + ) + ;; x is assigned a value that is non-nullable, and also allows a more + ;; specific heap type. + (local.set $y + (ref.func $i32) + ) + ) +) diff --git a/test/lit/passes/local-subtyping.wast b/test/lit/passes/local-subtyping.wast new file mode 100644 index 000000000..59f93ce43 --- /dev/null +++ b/test/lit/passes/local-subtyping.wast @@ -0,0 +1,205 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --local-subtyping -all -S -o - \ +;; RUN: | filecheck %s + +(module + ;; CHECK: (import "out" "i32" (func $i32 (result i32))) + (import "out" "i32" (func $i32 (result i32))) + ;; CHECK: (import "out" "i64" (func $i64 (result i64))) + (import "out" "i64" (func $i64 (result i64))) + + ;; Refinalization can find a more specific type, where the declared type was + ;; not the optimal LUB. + ;; CHECK: (func $refinalize (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (if (result (ref func)) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block $block (result (ref func)) + ;; CHECK-NEXT: (br $block + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $refinalize (param $x i32) + (drop + (if (result anyref) + (local.get $x) + (ref.func $i32) + (ref.func $i64) + ) + ) + (drop + (block $block (result anyref) + (br $block + (ref.func $i32) + ) + (ref.func $i64) + ) + ) + ) + + ;; A simple case where a local has a single assignment that we can use as a + ;; more specific type. A similar thing with a parameter, however, is not a + ;; thing we can optimize. Also, ignore a local with zero assignments. + ;; CHECK: (func $simple-local-but-not-param (param $x anyref) + ;; CHECK-NEXT: (local $y (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local $unused anyref) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-local-but-not-param (param $x anyref) + (local $y anyref) + (local $unused anyref) + (local.set $x + (ref.func $i32) + ) + (local.set $y + (ref.func $i32) + ) + ) + + ;; CHECK: (func $locals-with-multiple-assignments + ;; CHECK-NEXT: (local $x funcref) + ;; CHECK-NEXT: (local $y (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local $z (ref null $none_=>_i64)) + ;; CHECK-NEXT: (local $w funcref) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $z + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $z + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $w + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $w + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $locals-with-multiple-assignments + (local $x anyref) + (local $y anyref) + (local $z anyref) + (local $w funcref) + ;; x is assigned two different types with a new LUB possible + (local.set $x + (ref.func $i32) + ) + (local.set $x + (ref.func $i64) + ) + ;; y and z are assigned the same more specific type twice + (local.set $y + (ref.func $i32) + ) + (local.set $y + (ref.func $i32) + ) + (local.set $z + (ref.func $i64) + ) + (local.set $z + (ref.func $i64) + ) + ;; w is assigned two different types *without* a new LUB possible, as it + ;; already had the optimal LUB + (local.set $w + (ref.func $i32) + ) + (local.set $w + (ref.func $i64) + ) + ) + + ;; In some cases multiple iterations are necessary, as one inferred new type + ;; applies to a get which then allows another inference. + ;; CHECK: (func $multiple-iterations + ;; CHECK-NEXT: (local $x (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local $y (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local $z (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $z + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $multiple-iterations + (local $x anyref) + (local $y anyref) + (local $z anyref) + (local.set $x + (ref.func $i32) + ) + (local.set $y + (local.get $x) + ) + (local.set $z + (local.get $y) + ) + ) + + ;; Sometimes a refinalize is necessary in between the iterations. + ;; CHECK: (func $multiple-iterations-refinalize (param $i i32) + ;; CHECK-NEXT: (local $x (ref null $none_=>_i32)) + ;; CHECK-NEXT: (local $y (ref null $none_=>_i64)) + ;; CHECK-NEXT: (local $z funcref) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (ref.func $i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $y + ;; CHECK-NEXT: (ref.func $i64) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $z + ;; CHECK-NEXT: (select (result funcref) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (local.get $i) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $multiple-iterations-refinalize (param $i i32) + (local $x anyref) + (local $y anyref) + (local $z anyref) + (local.set $x + (ref.func $i32) + ) + (local.set $y + (ref.func $i64) + ) + (local.set $z + (select + (local.get $x) + (local.get $y) + (local.get $i) + ) + ) + ) +) diff --git a/test/passes/Oz_fuzz-exec_all-features.txt b/test/passes/Oz_fuzz-exec_all-features.txt index 6d342196e..b0f4c6448 100644 --- a/test/passes/Oz_fuzz-exec_all-features.txt +++ b/test/passes/Oz_fuzz-exec_all-features.txt @@ -153,7 +153,7 @@ ) ) (func $2 (; has Stack IR ;) - (local $0 anyref) + (local $0 (ref null $extendedstruct)) (call $log (i32.const 1) ) @@ -247,7 +247,7 @@ (unreachable) ) (func $4 (; has Stack IR ;) - (local $0 anyref) + (local $0 (ref null $struct)) (local.set $0 (struct.new_default_with_rtt $struct (rtt.canon $struct) @@ -272,7 +272,7 @@ ) ) (func $5 (; has Stack IR ;) - (local $0 anyref) + (local $0 (ref null $extendedstruct)) (local.set $0 (struct.new_default_with_rtt $extendedstruct (rtt.canon $extendedstruct) |