summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2022-06-01 07:17:21 -0700
committerGitHub <noreply@github.com>2022-06-01 14:17:21 +0000
commit623e08e88db3ebc913fe76e7f60e89fa030f884d (patch)
tree3e3ccd5c1b45ade4d7f65066dced5991cd294a71
parent49763aa9a7fb0f07588a9d19db6896356e52c5f8 (diff)
downloadbinaryen-623e08e88db3ebc913fe76e7f60e89fa030f884d.tar.gz
binaryen-623e08e88db3ebc913fe76e7f60e89fa030f884d.tar.bz2
binaryen-623e08e88db3ebc913fe76e7f60e89fa030f884d.zip
Global Struct Inference pass: Infer two constants in struct.get (#4659)
This optimizes constants in the megamorphic case of two: when we know two function references are possible, we could in theory emit this: (select (ref.func A) (ref.func B) (ref.eq (..ref value..) ;; globally, only 2 things are possible here, and one has ;; ref.func A as its value, and the other ref.func B (ref.func A)) That is, compare to one of the values, and emit the two possible values there. Other optimizations can then turn a call_ref on this select into an if over two direct calls, leading to devirtualization. We cannot compare a ref.func directly (since function references are not comparable), and so instead we look at immutable global structs. If we find a struct type that has only two possible values in some field, and the structs are in immutable globals (which happens in the vtable case in j2wasm for example), then we can compare the references of the struct to decide between the two values in the field.
-rwxr-xr-xscripts/fuzz_opt.py1
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/GlobalStructInference.cpp244
-rw-r--r--src/passes/pass.cpp2
-rw-r--r--src/passes/passes.h1
-rw-r--r--test/lit/help/wasm-opt.test2
-rw-r--r--test/lit/help/wasm2js.test2
-rw-r--r--test/lit/passes/gsi.wast806
8 files changed, 1059 insertions, 0 deletions
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index 083d476fb..54c24be86 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -1154,6 +1154,7 @@ opt_choices = [
["--inlining-optimizing"],
["--flatten", "--simplify-locals-notee-nostructure", "--local-cse"],
["--global-refining"],
+ ["--gsi"],
["--gto"],
["--local-cse"],
["--heap2local"],
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index c83e95d9e..f74dd4f0b 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -38,6 +38,7 @@ set(passes_SOURCES
FuncCastEmulation.cpp
GenerateDynCalls.cpp
GlobalRefining.cpp
+ GlobalStructInference.cpp
GlobalTypeOptimization.cpp
Heap2Local.cpp
I64ToI32Lowering.cpp
diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp
new file mode 100644
index 000000000..42fadf295
--- /dev/null
+++ b/src/passes/GlobalStructInference.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Finds types which are only created in assignments to immutable globals. For
+// such types we can replace a struct.get with this pattern:
+//
+// (struct.get $foo i
+// (..ref..))
+// =>
+// (select
+// (value1)
+// (value2)
+// (ref.eq
+// (..ref..)
+// (global.get $global1)))
+//
+// That is a valid transformation if there are only two struct.news of $foo, it
+// is created in two immutable globals $global1 and $global2, the field is
+// immutable, the values of field |i| in them are value1 and value2
+// respectively, and $foo has no subtypes. In that situation, the reference must
+// be one of those two, so we can compare the reference to the globals and pick
+// the right value there. (We can also handle subtypes, if we look at their
+// values as well, see below.)
+//
+// The benefit of this optimization is primarily in the case of constant values
+// that we can heavily optimize, like function references (constant function
+// refs let us inline, etc.). Function references cannot be directly compared,
+// so we cannot use ConstantFieldPropagation or such with an extension to
+// multiple values, as the select pattern shown above can't be used - it needs a
+// comparison. But we can compare structs, so if the function references are in
+// vtables, and the vtables follow the above pattern, then we can optimize.
+//
+
+#include "ir/find_all.h"
+#include "ir/module-utils.h"
+#include "ir/subtypes.h"
+#include "pass.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+struct GlobalStructInference : public Pass {
+ // Maps optimizable struct types to the globals whose init is a struct.new of
+ // them. If a global is not present here, it cannot be optimized.
+ std::unordered_map<HeapType, std::vector<Name>> typeGlobals;
+
+ void run(PassRunner* runner, Module* module) override {
+ if (getTypeSystem() != TypeSystem::Nominal) {
+ Fatal() << "GlobalStructInference requires nominal typing";
+ }
+
+ // First, find all the information we need. We need to know which struct
+ // types are created in functions, because we will not be able to optimize
+ // those.
+
+ using HeapTypes = std::unordered_set<HeapType>;
+
+ ModuleUtils::ParallelFunctionAnalysis<HeapTypes> analysis(
+ *module, [&](Function* func, HeapTypes& types) {
+ if (func->imported()) {
+ return;
+ }
+
+ for (auto* structNew : FindAll<StructNew>(func->body).list) {
+ auto type = structNew->type;
+ if (type.isRef()) {
+ types.insert(type.getHeapType());
+ }
+ }
+ });
+
+ // We cannot optimize types that appear in a struct.new in a function, which
+ // we just collected and merge now.
+ HeapTypes unoptimizable;
+
+ for (auto& [func, types] : analysis.map) {
+ for (auto type : types) {
+ unoptimizable.insert(type);
+ }
+ }
+
+ // Process the globals.
+ for (auto& global : module->globals) {
+ if (global->imported()) {
+ continue;
+ }
+
+ // We cannot optimize a type that appears in a non-toplevel location in a
+ // global init.
+ for (auto* structNew : FindAll<StructNew>(global->init).list) {
+ auto type = structNew->type;
+ if (type.isRef() && structNew != global->init) {
+ unoptimizable.insert(type.getHeapType());
+ }
+ }
+
+ if (!global->init->type.isRef()) {
+ continue;
+ }
+
+ auto type = global->init->type.getHeapType();
+
+ // We cannot optimize mutable globals.
+ if (global->mutable_) {
+ unoptimizable.insert(type);
+ continue;
+ }
+
+ // Finally, if this is a struct.new then it is one we can optimize; note
+ // it.
+ if (global->init->is<StructNew>()) {
+ typeGlobals[type].push_back(global->name);
+ }
+ }
+
+ // A struct.get might also read from any of the subtypes. As a result, an
+ // unoptimizable type makes all its supertypes unoptimizable as well.
+ // TODO: this could be specific per field (and not all supers have all
+ // fields)
+ for (auto type : unoptimizable) {
+ while (1) {
+ typeGlobals.erase(type);
+ auto super = type.getSuperType();
+ if (!super) {
+ break;
+ }
+ type = *super;
+ }
+ }
+
+ // Similarly, propagate global names: if one type has [global1], then a get
+ // of any supertype might access that, so propagate to them.
+ auto typeGlobalsCopy = typeGlobals;
+ for (auto& [type, globals] : typeGlobalsCopy) {
+ auto curr = type;
+ while (1) {
+ auto super = curr.getSuperType();
+ if (!super) {
+ break;
+ }
+ curr = *super;
+ for (auto global : globals) {
+ typeGlobals[curr].push_back(global);
+ }
+ }
+ }
+
+ if (typeGlobals.empty()) {
+ // We found nothing we can optimize.
+ return;
+ }
+
+ // Optimize based on the above.
+ struct FunctionOptimizer
+ : public WalkerPass<PostWalker<FunctionOptimizer>> {
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new FunctionOptimizer(parent); }
+
+ FunctionOptimizer(GlobalStructInference& parent) : parent(parent) {}
+
+ void visitStructGet(StructGet* curr) {
+ auto type = curr->ref->type;
+ if (type == Type::unreachable) {
+ return;
+ }
+
+ auto iter = parent.typeGlobals.find(type.getHeapType());
+ if (iter == parent.typeGlobals.end()) {
+ return;
+ }
+
+ auto& globals = iter->second;
+
+ // TODO: more sizes
+ if (globals.size() != 2) {
+ return;
+ }
+
+ // Check if the relevant fields contain constants, and are immutable.
+ auto& wasm = *getModule();
+ auto fieldIndex = curr->index;
+ auto& field = type.getHeapType().getStruct().fields[fieldIndex];
+ if (field.mutable_ == Mutable) {
+ return;
+ }
+ auto fieldType = field.type;
+ std::vector<Literal> values;
+ for (Index i = 0; i < globals.size(); i++) {
+ auto* structNew = wasm.getGlobal(globals[i])->init->cast<StructNew>();
+ if (structNew->isWithDefault()) {
+ values.push_back(Literal::makeZero(fieldType));
+ } else {
+ auto* init = structNew->operands[fieldIndex];
+ if (!Properties::isConstantExpression(init)) {
+ // Non-constant; give up entirely.
+ return;
+ }
+ values.push_back(Properties::getLiteral(init));
+ }
+ }
+
+ // Excellent, we can optimize here! Emit a select.
+ //
+ // Note that we must trap on null, so add a ref.as_non_null here.
+ Builder builder(wasm);
+ replaceCurrent(builder.makeSelect(
+ builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
+ builder.makeGlobalGet(
+ globals[0], wasm.getGlobal(globals[0])->type)),
+ builder.makeConstantExpression(values[0]),
+ builder.makeConstantExpression(values[1])));
+ }
+
+ private:
+ GlobalStructInference& parent;
+ };
+
+ FunctionOptimizer(*this).run(runner, module);
+ }
+};
+
+} // anonymous namespace
+
+Pass* createGlobalStructInferencePass() { return new GlobalStructInference(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 04947ae07..85a240dc3 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -170,6 +170,8 @@ void PassRegistry::registerPasses() {
"global-refining", "refine the types of globals", createGlobalRefiningPass);
registerPass(
"gto", "globally optimize GC types", createGlobalTypeOptimizationPass);
+ registerPass(
+ "gsi", "globally optimize struct values", createGlobalStructInferencePass);
registerPass("type-refining",
"apply more specific subtypes to type fields where possible",
createTypeRefiningPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index d7a6f9989..2c73ed91e 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -52,6 +52,7 @@ Pass* createGenerateDynCallsPass();
Pass* createGenerateI64DynCallsPass();
Pass* createGenerateStackIRPass();
Pass* createGlobalRefiningPass();
+Pass* createGlobalStructInferencePass();
Pass* createGlobalTypeOptimizationPass();
Pass* createHeap2LocalPass();
Pass* createI64ToI32LoweringPass();
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index d786bfdfc..1985d70ec 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -166,6 +166,8 @@
;; CHECK-NEXT:
;; CHECK-NEXT: --global-refining refine the types of globals
;; CHECK-NEXT:
+;; CHECK-NEXT: --gsi globally optimize struct values
+;; CHECK-NEXT:
;; CHECK-NEXT: --gto globally optimize GC types
;; CHECK-NEXT:
;; CHECK-NEXT: --heap2local replace GC allocations with
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 504e08726..5c0f249e3 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -128,6 +128,8 @@
;; CHECK-NEXT:
;; CHECK-NEXT: --global-refining refine the types of globals
;; CHECK-NEXT:
+;; CHECK-NEXT: --gsi globally optimize struct values
+;; CHECK-NEXT:
;; CHECK-NEXT: --gto globally optimize GC types
;; CHECK-NEXT:
;; CHECK-NEXT: --heap2local replace GC allocations with
diff --git a/test/lit/passes/gsi.wast b/test/lit/passes/gsi.wast
new file mode 100644
index 000000000..9b54a98ca
--- /dev/null
+++ b/test/lit/passes/gsi.wast
@@ -0,0 +1,806 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: foreach %s %t wasm-opt --nominal --gsi -all -S -o - | filecheck %s
+
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; A non-reference global does not confuse us.
+ ;; CHECK: (global $global-other i32 (i32.const 123456))
+ (global $global-other i32 (i32.const 123456))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ ;; We can infer that this get can reference either $global1 or $global2,
+ ;; and nothing else (aside from a null), and can emit a select between
+ ;; those values.
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; As above, but now the field is mutable, so we cannot optimize.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field (mut i32)) data))
+ (type $struct (struct (mut i32)))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; Just one global. We do not optimize here - we let other passes do that.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; Three globals. For now, we do not optimize here.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (global $global3 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 99999)
+ ;; CHECK-NEXT: ))
+ (global $global3 (ref $struct) (struct.new $struct
+ (i32.const 99999)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; A struct.new inside a function stops us from optimizing.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.new $struct
+ (i32.const 1)
+ )
+ )
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; We ignore imports, as we assume a closed world, but that might change in the
+;; future. For now, we will optimize here.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (import "a" "b" (global $global-import (ref $struct)))
+ (import "a" "b" (global $global-import (ref $struct)))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; A struct.new in a non-toplevel position in a global stops us from
+;; optimizing.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $tuple (struct_subtype (field anyref) (field anyref) data))
+ (type $tuple (struct anyref anyref))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (global $global-tuple (ref $tuple) (struct.new $tuple
+ ;; CHECK-NEXT: (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 999999)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (ref.null any)
+ ;; CHECK-NEXT: ))
+ (global $global-tuple (ref $tuple) (struct.new $tuple
+ (struct.new $struct
+ (i32.const 999999)
+ )
+ (ref.null any)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; As above, but remove the struct.new in a nested position, while keeping all
+;; the other stuff in the above test. Now we should optimize.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $tuple (struct_subtype (field anyref) (field anyref) data))
+ (type $tuple (struct anyref anyref))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (global $global-tuple (ref $tuple) (struct.new $tuple
+ ;; CHECK-NEXT: (ref.null any)
+ ;; CHECK-NEXT: (ref.null any)
+ ;; CHECK-NEXT: ))
+ (global $global-tuple (ref $tuple) (struct.new $tuple
+ (ref.null any)
+ (ref.null any)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; When one of the globals is mutable, we cannot optimize.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (mut (ref $struct)) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (mut (ref $struct)) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; A subtype is not optimizable, which prevents $struct from being optimized.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct_subtype i32 data))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (type $sub-struct (struct_subtype (field i32) $struct))
+ (type $sub-struct (struct_subtype i32 $struct))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new $sub-struct
+ ;; CHECK-NEXT: (i32.const 999999)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.new $sub-struct
+ (i32.const 999999)
+ )
+ )
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; A *super*-type is not optimizable, but that does not block us, and we can
+;; optimize.
+(module
+ ;; CHECK: (type $super-struct (struct_subtype (field i32) data))
+ (type $super-struct (struct_subtype i32 data))
+
+ ;; CHECK: (type $struct (struct_subtype (field i32) $super-struct))
+ (type $struct (struct_subtype i32 $super-struct))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.new $super-struct
+ ;; CHECK-NEXT: (i32.const 999999)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.new $super-struct
+ (i32.const 999999)
+ )
+ )
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; One global for each of the type and the subtype. The optimization will pick
+;; between their 2 values.
+(module
+ ;; CHECK: (type $super-struct (struct_subtype (field i32) data))
+ (type $super-struct (struct_subtype i32 data))
+
+ ;; CHECK: (type $struct (struct_subtype (field i32) $super-struct))
+ (type $struct (struct_subtype i32 $super-struct))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $super-struct) (struct.new $super-struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $super-struct) (struct.new $super-struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $super-struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ ;; We cannot optimize the first - it has just one global - but the second
+ ;; will consider the struct and sub-struct, find 2 possible values, and
+ ;; optimize.
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ (drop
+ (struct.get $super-struct 0
+ (ref.null $super-struct)
+ )
+ )
+ )
+)
+
+;; One global has a non-constant field, so we cannot optimize.
+(module
+ ;; CHECK: (type $struct (struct_subtype (field i32) data))
+ (type $struct (struct i32))
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global1 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.add
+ ;; CHECK-NEXT: (i32.const 41)
+ ;; CHECK-NEXT: (i32.const 1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct) (struct.new $struct
+ (i32.add
+ (i32.const 41)
+ (i32.const 1)
+ )
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct) (struct.new $struct
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct) (struct.new $struct
+ (i32.const 1337)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct 0
+ ;; CHECK-NEXT: (ref.null $struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ (drop
+ (struct.get $struct 0
+ (ref.null $struct)
+ )
+ )
+ )
+)
+
+;; One global each for two subtypes of a common supertype, and one for the
+;; supertype.
+(module
+ ;; CHECK: (type $super-struct (struct_subtype (field i32) data))
+ (type $super-struct (struct_subtype i32 data))
+
+ ;; CHECK: (type $struct1 (struct_subtype (field i32) (field f32) $super-struct))
+ (type $struct1 (struct_subtype i32 f32 $super-struct))
+
+ ;; CHECK: (type $struct2 (struct_subtype (field i32) (field f64) $super-struct))
+ (type $struct2 (struct_subtype i32 f64 $super-struct))
+
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global0 (ref $super-struct) (struct.new $super-struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global0 (ref $super-struct) (struct.new $super-struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global1 (ref $struct1) (struct.new $struct1
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (f32.const 3.141590118408203)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct1) (struct.new $struct1
+ (i32.const 1337)
+ (f32.const 3.14159)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct2) (struct.new $struct2
+ ;; CHECK-NEXT: (i32.const 99999)
+ ;; CHECK-NEXT: (f64.const 2.71828)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct2) (struct.new $struct2
+ (i32.const 99999)
+ (f64.const 2.71828)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $super-struct 0
+ ;; CHECK-NEXT: (ref.null $super-struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct1 0
+ ;; CHECK-NEXT: (ref.null $struct1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $struct2 0
+ ;; CHECK-NEXT: (ref.null $struct2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ ;; This has three possible values due to the two children, so we do not
+ ;; optimize.
+ (drop
+ (struct.get $super-struct 0
+ (ref.null $super-struct)
+ )
+ )
+ ;; These each have one possible value, so we also do not optimize.
+ (drop
+ (struct.get $struct1 0
+ (ref.null $struct1)
+ )
+ )
+ (drop
+ (struct.get $struct2 0
+ (ref.null $struct2)
+ )
+ )
+ )
+)
+
+;; As above, but now the subtypes each have 2 values, and we can optimize.
+(module
+ ;; CHECK: (type $super-struct (struct_subtype (field i32) data))
+ (type $super-struct (struct_subtype i32 data))
+
+ ;; CHECK: (type $struct1 (struct_subtype (field i32) (field f32) $super-struct))
+ (type $struct1 (struct_subtype i32 f32 $super-struct))
+
+ ;; CHECK: (type $struct2 (struct_subtype (field i32) (field f64) $super-struct))
+ (type $struct2 (struct_subtype i32 f64 $super-struct))
+
+
+ ;; CHECK: (type $none_=>_none (func_subtype func))
+
+ ;; CHECK: (global $global0 (ref $super-struct) (struct.new $super-struct
+ ;; CHECK-NEXT: (i32.const 42)
+ ;; CHECK-NEXT: ))
+ (global $global0 (ref $super-struct) (struct.new $super-struct
+ (i32.const 42)
+ ))
+
+ ;; CHECK: (global $global1 (ref $struct1) (struct.new $struct1
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (f32.const 3.141590118408203)
+ ;; CHECK-NEXT: ))
+ (global $global1 (ref $struct1) (struct.new $struct1
+ (i32.const 1337)
+ (f32.const 3.14159)
+ ))
+
+ ;; CHECK: (global $global1b (ref $struct1) (struct.new $struct1
+ ;; CHECK-NEXT: (i32.const 1338)
+ ;; CHECK-NEXT: (f32.const 3.141590118408203)
+ ;; CHECK-NEXT: ))
+ (global $global1b (ref $struct1) (struct.new $struct1
+ (i32.const 1338)
+ (f32.const 3.14159)
+ ))
+
+ ;; CHECK: (global $global2 (ref $struct2) (struct.new $struct2
+ ;; CHECK-NEXT: (i32.const 99999)
+ ;; CHECK-NEXT: (f64.const 2.71828)
+ ;; CHECK-NEXT: ))
+ (global $global2 (ref $struct2) (struct.new $struct2
+ (i32.const 99999)
+ (f64.const 2.71828)
+ ))
+
+ ;; CHECK: (global $global2b (ref $struct2) (struct.new $struct2
+ ;; CHECK-NEXT: (i32.const 99998)
+ ;; CHECK-NEXT: (f64.const 2.71828)
+ ;; CHECK-NEXT: ))
+ (global $global2b (ref $struct2) (struct.new $struct2
+ (i32.const 99998)
+ (f64.const 2.71828)
+ ))
+
+ ;; CHECK: (func $test (type $none_=>_none)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (struct.get $super-struct 0
+ ;; CHECK-NEXT: (ref.null $super-struct)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 1337)
+ ;; CHECK-NEXT: (i32.const 1338)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global1)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (select
+ ;; CHECK-NEXT: (i32.const 99999)
+ ;; CHECK-NEXT: (i32.const 99998)
+ ;; CHECK-NEXT: (ref.eq
+ ;; CHECK-NEXT: (ref.as_non_null
+ ;; CHECK-NEXT: (ref.null $struct2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (global.get $global2)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $test
+ ;; This still cannot be optimized.
+ (drop
+ (struct.get $super-struct 0
+ (ref.null $super-struct)
+ )
+ )
+ ;; These can be optimized, and will be different from one another.
+ (drop
+ (struct.get $struct1 0
+ (ref.null $struct1)
+ )
+ )
+ (drop
+ (struct.get $struct2 0
+ (ref.null $struct2)
+ )
+ )
+ )
+)