diff options
author | Alon Zakai <azakai@google.com> | 2022-09-26 13:49:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-26 20:49:50 +0000 |
commit | b23b47d54a2fb02b048e7eb8de8043358599ec8b (patch) | |
tree | a0df6c52cae239318893b069172674145903598f | |
parent | b40fa4a885f219d8f317f125880aa0ee9f46b62f (diff) | |
download | binaryen-b23b47d54a2fb02b048e7eb8de8043358599ec8b.tar.gz binaryen-b23b47d54a2fb02b048e7eb8de8043358599ec8b.tar.bz2 binaryen-b23b47d54a2fb02b048e7eb8de8043358599ec8b.zip |
[GUFA] Infer a RefEq value of 0 when possible (#5081)
If the PossibleContents for the two sides have no possible intersection then the
result must be 0.
-rw-r--r-- | src/ir/possible-contents.cpp | 59 | ||||
-rw-r--r-- | src/ir/possible-contents.h | 14 | ||||
-rw-r--r-- | src/passes/GUFA.cpp | 53 | ||||
-rw-r--r-- | test/gtest/possible-contents.cpp | 50 | ||||
-rw-r--r-- | test/lit/passes/gufa-refs.wast | 291 |
5 files changed, 456 insertions, 11 deletions
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 83d55e6f9..4755c9904 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -86,7 +86,7 @@ void PossibleContents::combine(const PossibleContents& other) { // combination here is if they have the same type (since we've already ruled // out the case of them being equal). If they have the same type then // neither is a reference and we can emit an exact type (since subtyping is - // not relevant for non-references. + // not relevant for non-references). if (type == otherType) { value = ExactType(type); } else { @@ -132,6 +132,48 @@ void PossibleContents::combine(const PossibleContents& other) { value = Many(); } +bool PossibleContents::haveIntersection(const PossibleContents& a, + const PossibleContents& b) { + if (a.isNone() || b.isNone()) { + // One is the empty set, so nothing can intersect here. + return false; + } + + if (a.isMany() || b.isMany()) { + // One is the set of all things, so definitely something can intersect since + // we've ruled out an empty set for both. + return true; + } + + auto aType = a.getType(); + auto bType = b.getType(); + + if (aType.isNullable() && bType.isNullable()) { + // Null is possible on both sides. Assume that an intersection can exist, + // but we could be more precise here and check if the types belong to + // different hierarchies, in which case the nulls would differ TODO. For + // now we only use this API from the RefEq logic, so this is fully precise. + return true; + } + + if (a.hasExactType() && b.hasExactType() && a.getType() != b.getType()) { + // The values must be different since their types are different. + return false; + } + + if (!Type::isSubType(aType, bType) && !Type::isSubType(bType, aType)) { + // No type can appear in both a and b, so the types differ, so the values + // differ. + return false; + } + + // TODO: we can also optimize things like different Literals, but existing + // passes do such things already so it is low priority. + + // It appears they can intersect. + return true; +} + namespace { // We are going to do a very large flow operation, potentially, as we create @@ -378,9 +420,6 @@ struct InfoCollector PossibleContents::literal(Literal(curr->func, curr->type.getHeapType()))); } void visitRefEq(RefEq* curr) { - // TODO: optimize when possible (e.g. when both sides must contain the same - // global, or if we infer exact types that are different then the - // result must be 0) addRoot(curr); } void visitTableGet(TableGet* curr) { @@ -416,12 +455,9 @@ struct InfoCollector } void visitRefCast(RefCast* curr) { - // We will handle this in a special way later during the flow, as ref.cast - // only allows valid values to flow through. addChildParentLink(curr->ref, curr); } void visitRefTest(RefTest* curr) { - // We will handle this similarly to RefCast. addChildParentLink(curr->ref, curr); } void visitBrOn(BrOn* curr) { @@ -1114,7 +1150,11 @@ private: // values to flow through it. void flowRefCast(const PossibleContents& contents, RefCast* cast); - // The possible contents may allow us to infer an outcome, like with RefCast. + // The possible contents may allow us to infer an outcome in various + // instructions. If the expression has a single child, that is what is + // updated by the new |contents| (which we pass in to avoid doing an extra + // lookup); if there is more than one child, then to keep the code simple we + // expect the function to look up the children's effects manually. void flowRefTest(const PossibleContents& contents, RefTest* test); // We will need subtypes during the flow, so compute them once ahead of time. @@ -1734,6 +1774,7 @@ void Flower::flowRefCast(const PossibleContents& contents, RefCast* cast) { } void Flower::flowRefTest(const PossibleContents& contents, RefTest* test) { + // TODO move to gufa pass; this must happen at the end PossibleContents filtered; if (contents.isMany()) { // Just pass the Many through. @@ -1792,8 +1833,6 @@ void Flower::dump(Location location) { std::cout << " sigresultloc " << '\n'; } else if (auto* loc = std::get_if<NullLocation>(&location)) { std::cout << " Nullloc " << loc->type << '\n'; - } else if (auto* loc = std::get_if<UniqueLocation>(&location)) { - std::cout << " Specialloc " << loc->index << '\n'; } else { std::cout << " (other)\n"; } diff --git a/src/ir/possible-contents.h b/src/ir/possible-contents.h index 38bc263e9..f3ad37d92 100644 --- a/src/ir/possible-contents.h +++ b/src/ir/possible-contents.h @@ -167,6 +167,13 @@ public: // This returns false for None and Many, for whom it is not well-defined. bool hasExactType() const { return isExactType() || isLiteral(); } + // Returns whether the given contents have any intersection, that is, whether + // some value exists that can appear in both |a| and |b|. For example, if + // either is None, or if they are both ExactTypes but of different types, then + // they have no intersection. + static bool haveIntersection(const PossibleContents& a, + const PossibleContents& b); + // Whether we can make an Expression* for this containing the proper contents. // We can do that for a Literal (emitting a Const or RefFunc etc.) or a // Global (emitting a GlobalGet), but not for anything else yet. @@ -529,6 +536,13 @@ public: return iter->second; } + // Helper for the common case of an expression location that is not a + // multivalue. + PossibleContents getContents(Expression* curr) { + assert(curr->type.size() == 1); + return getContents(ExpressionLocation{curr, 0}); + } + private: std::unordered_map<Location, PossibleContents> locationContents; }; diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp index 1467b1312..53693420e 100644 --- a/src/passes/GUFA.cpp +++ b/src/passes/GUFA.cpp @@ -69,6 +69,34 @@ struct GUFAOptimizer bool optimized = false; + // As we optimize, we replace expressions and create new ones. For new ones + // we can infer their contents based on what they replaced, e.g., if we + // replaced a local.get with a const, then the PossibleContents of the const + // are the same as the local.get (in this simple example, we could also just + // infer them from the const itself, of course). Rather than update the + // ContentOracle with new contents, which is a shared object among threads, + // each function-parallel worker stores a map of new things it created to the + // contents for them. + std::unordered_map<Expression*, PossibleContents> newContents; + + Expression* replaceCurrent(Expression* rep) { + newContents[rep] = oracle.getContents(getCurrent()); + + return WalkerPass< + PostWalker<GUFAOptimizer, + UnifiedExpressionVisitor<GUFAOptimizer>>>::replaceCurrent(rep); + } + + const PossibleContents getContents(Expression* curr) { + // If this is something we added ourselves, use that; otherwise the info is + // in the oracle. + if (auto iter = newContents.find(curr); iter != newContents.end()) { + return iter->second; + } + + return oracle.getContents(curr); + } + void visitExpression(Expression* curr) { // Skip things we can't improve in any way. auto type = curr->type; @@ -91,7 +119,7 @@ struct GUFAOptimizer // Ok, this is an interesting location that we might optimize. See what the // oracle says is possible there. - auto contents = oracle.getContents(ExpressionLocation{curr, 0}); + auto contents = getContents(curr); auto& options = getPassOptions(); auto& wasm = *getModule(); @@ -182,6 +210,29 @@ struct GUFAOptimizer } } + void visitRefEq(RefEq* curr) { + if (curr->type == Type::unreachable) { + // Leave this for DCE. + return; + } + + auto leftContents = getContents(curr->left); + auto rightContents = getContents(curr->right); + + if (!PossibleContents::haveIntersection(leftContents, rightContents)) { + // The contents prove the two sides cannot contain the same reference, so + // we infer 0. + // + // Note that this is fine even if one of the sides is None. In that case, + // no value is possible there, and the intersection is empty, so we will + // get here and emit a 0. That 0 will never be reached as the None child + // will be turned into an unreachable, so it does not cause any problem. + auto* result = Builder(*getModule()).makeConst(Literal(int32_t(0))); + replaceCurrent(getDroppedChildrenAndAppend( + curr, *getModule(), getPassOptions(), result)); + } + } + // TODO: If an instruction would trap on null, like struct.get, we could // remove it here if it has no possible contents and if we are in // traps-never-happen mode (that is, we'd have proven it can only trap, diff --git a/test/gtest/possible-contents.cpp b/test/gtest/possible-contents.cpp index 27ab5b8fb..1e1f0e5c9 100644 --- a/test/gtest/possible-contents.cpp +++ b/test/gtest/possible-contents.cpp @@ -85,6 +85,9 @@ protected: PossibleContents::global("i32Global2", Type::i32); PossibleContents f64Global = PossibleContents::global("f64Global", Type::f64); PossibleContents anyGlobal = PossibleContents::global("anyGlobal", anyref); + PossibleContents funcGlobal = PossibleContents::global("funcGlobal", funcref); + PossibleContents nonNullFuncGlobal = + PossibleContents::global("funcGlobal", Type(HeapType::func, NonNullable)); PossibleContents nonNullFunc = PossibleContents::literal( Literal("func", Signature(Type::none, Type::none))); @@ -242,6 +245,53 @@ TEST_F(PossibleContentsTest, TestOracleMinimal) { Literal(int32_t(42))); } +// Asserts a and b have an intersection (or do not), and checks both orderings. +void assertHaveIntersection(PossibleContents a, PossibleContents b) { + EXPECT_TRUE(PossibleContents::haveIntersection(a, b)); + EXPECT_TRUE(PossibleContents::haveIntersection(b, a)); +} +void assertLackIntersection(PossibleContents a, PossibleContents b) { + EXPECT_FALSE(PossibleContents::haveIntersection(a, b)); + EXPECT_FALSE(PossibleContents::haveIntersection(b, a)); +} + +TEST_F(PossibleContentsTest, TestIntersection) { + // None has no contents, so nothing to intersect. + assertLackIntersection(none, none); + assertLackIntersection(none, i32Zero); + assertLackIntersection(none, many); + + // Many intersects with anything (but none). + assertHaveIntersection(many, many); + assertHaveIntersection(many, i32Zero); + + // Different exact types cannot intersect. + assertLackIntersection(exactI32, exactAnyref); + assertLackIntersection(i32Zero, exactAnyref); + + // But nullable ones can - the null can be the intersection. + assertHaveIntersection(exactFuncSignatureType, exactAnyref); + assertHaveIntersection(exactFuncSignatureType, funcNull); + assertHaveIntersection(anyNull, funcNull); + + // Identical types might. + assertHaveIntersection(exactI32, exactI32); + assertHaveIntersection(i32Zero, i32Zero); + assertHaveIntersection(exactFuncSignatureType, exactFuncSignatureType); + assertHaveIntersection(i32Zero, i32One); // TODO: this could be inferred false + + // Due to subtyping, an intersection might exist. + assertHaveIntersection(funcGlobal, funcGlobal); + assertHaveIntersection(funcGlobal, exactFuncSignatureType); + + // Neither is a subtype of the other, but nulls are possible, so a null can be + // the intersection. + assertHaveIntersection(funcGlobal, anyGlobal); + + // Without null on one side, we cannot intersect. + assertLackIntersection(nonNullFuncGlobal, anyGlobal); +} + TEST_F(PossibleContentsTest, TestOracleManyTypes) { // Test for a node with many possible types. The pass limits how many it // notices to not use excessive memory, so even though 4 are possible here, diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast index 42149855f..d8d12e9f0 100644 --- a/test/lit/passes/gufa-refs.wast +++ b/test/lit/passes/gufa-refs.wast @@ -2448,15 +2448,26 @@ ;; CHECK: (type $subsubstruct (struct_subtype (field i32) (field i32) (field i32) $substruct)) (type $subsubstruct (struct_subtype (field i32) (field i32) (field i32) $substruct)) + ;; CHECK: (type $other (struct_subtype data)) + (type $other (struct_subtype data)) + ;; CHECK: (type $none_=>_i32 (func_subtype (result i32) func)) ;; CHECK: (type $i32_=>_none (func_subtype (param i32) func)) + ;; CHECK: (type $i32_ref?|$struct|_ref?|$struct|_ref?|$other|_ref|$struct|_ref|$struct|_ref|$other|_=>_none (func_subtype (param i32 (ref null $struct) (ref null $struct) (ref null $other) (ref $struct) (ref $struct) (ref $other)) func)) + + ;; CHECK: (type $none_=>_ref|eq| (func_subtype (result (ref eq)) func)) + ;; CHECK: (import "a" "b" (func $import (result i32))) (import "a" "b" (func $import (result i32))) ;; CHECK: (export "ref.test-inexact" (func $ref.test-inexact)) + ;; CHECK: (export "ref.eq-zero" (func $ref.eq-zero)) + + ;; CHECK: (export "ref.eq-unknown" (func $ref.eq-unknown)) + ;; CHECK: (func $test (type $none_=>_none) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (unreachable) @@ -2747,6 +2758,286 @@ ) ) ) + + ;; CHECK: (func $ref.eq-zero (type $none_=>_none) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ref.eq-zero (export "ref.eq-zero") + ;; We do not track specific references, so only the types can be used here. + ;; Using the types, we can infer that two different ExactTypes cannot + ;; contain the same reference, so we infer a 0. + (drop + (ref.eq + (struct.new $struct + (i32.const 1) + ) + (struct.new $substruct + (i32.const 2) + (i32.const 3) + ) + ) + ) + ;; A null and a non-null reference cannot be identical, so we infer 0. + (drop + (ref.eq + (ref.null $struct) + (struct.new $struct + (i32.const 5) + ) + ) + ) + ) + + ;; CHECK: (func $ref.eq-unknown (type $i32_ref?|$struct|_ref?|$struct|_ref?|$other|_ref|$struct|_ref|$struct|_ref|$other|_=>_none) (param $x i32) (param $struct (ref null $struct)) (param $struct2 (ref null $struct)) (param $other (ref null $other)) (param $nn-struct (ref $struct)) (param $nn-struct2 (ref $struct)) (param $nn-other (ref $other)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select (result (ref $substruct)) + ;; CHECK-NEXT: (struct.new $substruct + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.new $subsubstruct + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.get $other) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.get $struct2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (local.get $nn-struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $nn-struct) + ;; CHECK-NEXT: (local.get $nn-struct2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $nn-struct) + ;; CHECK-NEXT: (local.get $nn-other) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ref.eq-unknown (export "ref.eq-unknown") (param $x i32) (param $struct (ref null $struct)) (param $struct2 (ref null $struct)) (param $other (ref null $other)) (param $nn-struct (ref $struct)) (param $nn-struct2 (ref $struct)) (param $nn-other (ref $other)) + ;; Here we cannot infer as the type is identical. (Though, if we used more + ;; than the type, we could see they cannot be identical.) + (drop + (ref.eq + (struct.new $struct + (i32.const 4) + ) + (struct.new $struct + (i32.const 5) + ) + ) + ) + ;; These nulls are identical, so we could infer 1, but we leave that for + ;; other passes, and do not infer here. + (drop + (ref.eq + (ref.null $struct) + (ref.null $struct) + ) + ) + ;; One side has two possible types, which we see as Many, and so we cannot + ;; infer anything here. With a cone type, however, we could infer a 0. + ;; TODO: add more tests for cone types here when we get them + (drop + (ref.eq + (struct.new $struct + (i32.const 1) + ) + (select + (struct.new $substruct + (i32.const 2) + (i32.const 3) + ) + (struct.new $subsubstruct + (i32.const 4) + (i32.const 5) + (i32.const 6) + ) + (local.get $x) + ) + ) + ) + ;; When nulls are possible, we cannot infer anything (with or without the + ;; same type on both sides). + (drop + (ref.eq + (local.get $struct) + (local.get $other) + ) + ) + (drop + (ref.eq + (local.get $struct) + (local.get $struct2) + ) + ) + ;; A null is only possible on one side, but the same non-null value could be + ;; on both. + (drop + (ref.eq + (local.get $struct) + (local.get $nn-struct) + ) + ) + ;; The type is identical, and non-null, but we don't know if the value is + ;; the same or not. + (drop + (ref.eq + (local.get $nn-struct) + (local.get $nn-struct2) + ) + ) + ;; Non-null on both sides, and incompatible types, so we should be able to + ;; infer 0, but we need cone types for that. Until we have them, these are + ;; Many and so we infer nothing. TODO + (drop + (ref.eq + (local.get $nn-struct) + (local.get $nn-other) + ) + ) + ;; We can ignore unreachable code. + (drop + (ref.eq + (ref.null $struct) + (unreachable) + ) + ) + ;; The called function here traps and never returns an actual value, which + ;; will lead to an unreachable emitted right after the call. We should not + ;; prevent that from happening: an unreachable must be emitted (we will also + ;; emit an i32.const 0, which will never be reached, and not cause issues). + (drop + (ref.eq + (ref.null $struct) + (call $unreachable) + ) + ) + ) + + ;; CHECK: (func $unreachable (type $none_=>_ref|eq|) (result (ref eq)) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $unreachable (result (ref eq)) + (unreachable) + ) + + ;; CHECK: (func $ref.eq-updates (type $none_=>_none) + ;; CHECK-NEXT: (local $x eqref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (ref.null eq) + ;; CHECK-NEXT: (ref.null eq) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (block (result (ref null $struct)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $import) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ref.eq-updates + (local $x (ref null eq)) + ;; The local.get will be optimized to a ref.null. After that we will leave + ;; the ref.eq as it is. This guards against a possible bug of us not + ;; setting the contents of the new ref.null expression just created: the + ;; parent ref.eq will query the contents right after adding that expression, + ;; and the contents must be set or else we'll think nothing is possible + ;; there. + ;; + ;; (We could optimize ref.eq of two nulls to 1, but we leave that for other + ;; passes.) + (drop + (ref.eq + (ref.null eq) + (local.get $x) + ) + ) + ;; Another situation we need to be careful with effects of updates. Here + ;; we have a block whose result we can infer to a null, but that does not + ;; let us optimize the ref.eq, and we also must be careful to not drop side + ;; effects - the call must remain. + (drop + (ref.eq + (block (result eqref) + (drop + (call $import) + ) + (ref.null $struct) + ) + (ref.null $struct) + ) + ) + ) ) (module |