3 files changed, 239 insertions, 19 deletions
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index 2efd61391..209028f25 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -1359,8 +1359,13 @@ private:
   // exists it is not added.
   void connectDuringFlow(Location from, Location to);
 
-  // Contents sent to a global location can be filtered in a special way during
-  // the flow, which is handled in this helper.
+  // Contents sent to certain locations can be filtered in a special way during
+  // the flow, which is handled in these helpers. These may update
+  // |worthSendingMore| which is whether it is worth sending any more content to
+  // this location in the future.
+  void filterExpressionContents(PossibleContents& contents,
+                                const ExpressionLocation& exprLoc,
+                                bool& worthSendingMore);
   void filterGlobalContents(PossibleContents& contents,
                             const GlobalLocation& globalLoc);
 
@@ -1384,6 +1389,30 @@ private:
   // We will need subtypes during the flow, so compute them once ahead of time.
   std::unique_ptr<SubTypes> subTypes;
 
+  // The depth of children for each type. This is 0 if the type has no
+  // subtypes, 1 if it has subtypes but none of those have subtypes themselves,
+  // and so forth.
+  std::unordered_map<HeapType, Index> maxDepths;
+
+  // Given a ConeType, return the normalized depth, that is, the canonical depth
+  // given the actual children it has. If this is a full cone, then we can
+  // always pick the actual maximal depth and use that instead of FullDepth==-1.
+  // For a non-full cone, we also reduce the depth as much as possible, so it is
+  // equal to the maximum depth of an existing subtype.
+  Index getNormalizedConeDepth(Type type, Index depth) {
+    return std::min(depth, maxDepths[type.getHeapType()]);
+  }
+
+  void normalizeConeType(PossibleContents& cone) {
+    assert(cone.isConeType());
+    auto type = cone.getType();
+    auto before = cone.getCone().depth;
+    auto normalized = getNormalizedConeDepth(type, before);
+    if (normalized != before) {
+      cone = PossibleContents::coneType(type, normalized);
+    }
+  }
+
 #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2
   // Dump out a location for debug purposes.
   void dump(Location location);
@@ -1530,6 +1559,7 @@ Flower::Flower(Module& wasm) : wasm(wasm) {
   if (getTypeSystem() == TypeSystem::Nominal ||
       getTypeSystem() == TypeSystem::Isorecursive) {
     subTypes = std::make_unique<SubTypes>(wasm);
+    maxDepths = subTypes->getMaxDepths();
   }
 
 #ifdef POSSIBLE_CONTENTS_DEBUG
@@ -1616,14 +1646,23 @@ bool Flower::updateContents(LocationIndex locationIndex,
 
   // It is not worth sending any more to this location if we are now in the
   // worst possible case, as no future value could cause any change.
-  //
-  // Many is always the worst possible case. A cone type of a non-reference is
-  // also the worst case, since subtyping is not relevant there, and so if we
-  // only know something about the type then we already know nothing beyond what
-  // the type in the wasm tells us (and from there we can only go to Many).
-  bool worthSendingMore = !contents.isMany();
-  if (!contents.getType().isRef() && contents.isConeType()) {
-    worthSendingMore = false;
+  bool worthSendingMore = true;
+  if (contents.isConeType()) {
+    if (!contents.getType().isRef()) {
+      // A cone type of a non-reference is the worst case, since subtyping is
+      // not relevant there, and so if we only know something about the type
+      // then we already know nothing beyond what the type in the wasm tells us
+      // (and from there we can only go to Many).
+      worthSendingMore = false;
+    } else {
+      // Normalize all reference cones. There is never a point to flow around
+      // anything non-normalized, which might lead to extra work. For example,
+      // if A has no subtypes, then a full cone for A is really the same as one
+      // with depth 0 (an exact type). And we don't want to see the full cone
+      // arrive and think it was an improvement over the one with depth 0 and do
+      // more flowing based on that.
+      normalizeConeType(contents);
+    }
   }
 
   // Check if anything changed. Note that we check not just the content but
@@ -1639,16 +1678,30 @@ bool Flower::updateContents(LocationIndex locationIndex,
 
   // Handle special cases: Some locations can only contain certain contents, so
   // filter accordingly.
-  if (auto* globalLoc =
-        std::get_if<GlobalLocation>(&getLocation(locationIndex))) {
+  auto location = getLocation(locationIndex);
+  bool filtered = false;
+  if (auto* exprLoc = std::get_if<ExpressionLocation>(&location)) {
+    // TODO: Replace this with specific filterFoo or flowBar methods like we
+    //       have for flowRefCast and filterGlobalContents. That could save a
+    //       little wasted work here. Might be best to do that after the spec is
+    //       fully stable.
+    filterExpressionContents(contents, *exprLoc, worthSendingMore);
+    filtered = true;
+  } else if (auto* globalLoc = std::get_if<GlobalLocation>(&location)) {
     filterGlobalContents(contents, *globalLoc);
-    if (contents == oldContents &&
-        contents.getType() == oldContents.getType()) {
-      // Nothing actually changed after filtering, so just return.
-      return worthSendingMore;
-    }
+    filtered = true;
   }
 
+  if (filtered && contents == oldContents &&
+      contents.getType() == oldContents.getType()) {
+    // Nothing actually changed after filtering, so just return.
+    return worthSendingMore;
+  }
+
+  // After filtering we should always have more precise information than "many"
+  // - in the worst case, we can have the type declared in the wasm.
+  assert(!contents.isMany());
+
 #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2
   std::cout << "  updateContents has something new\n";
   contents.dump(std::cout, &wasm);
@@ -1776,6 +1829,68 @@ void Flower::connectDuringFlow(Location from, Location to) {
   }
 }
 
+void Flower::filterExpressionContents(PossibleContents& contents,
+                                      const ExpressionLocation& exprLoc,
+                                      bool& worthSendingMore) {
+  auto type = exprLoc.expr->type;
+  if (!type.isRef()) {
+    return;
+  }
+
+  // The caller cannot know of a situation where it might not be worth sending
+  // more to a reference - all that logic is in here. That is, the rest of this
+  // function is the only place we can mark |worthSendingMore| as false for a
+  // reference.
+  assert(worthSendingMore);
+
+  // The maximal contents here are the declared type and all subtypes. Nothing
+  // else can pass through, so filter such things out.
+  auto maximalContents = PossibleContents::fullConeType(type);
+  contents.intersectWithFullCone(maximalContents);
+  if (contents.isNone()) {
+    // Nothing was left here at all.
+    return;
+  }
+
+  // Normalize the intersection. We want to check later if any more content can
+  // arrive here, and also we want to avoid flowing around anything non-
+  // normalized, as explained earlier.
+  //
+  // Note that this normalization is necessary even though |contents| was
+  // normalized before the intersection, e.g.:
+  /*
+  //      A
+  //     / \
+  //    B   C
+  //        |
+  //        D
+  */
+  // Consider the case where |maximalContents| is Cone(B, Infinity) and the
+  // original |contents| was Cone(A, 2) (which is normalized). The naive
+  // intersection is Cone(B, 1), since the core intersection logic makes no
+  // assumptions about the rest of the types. That is then normalized to
+  // Cone(B, 0) since there happens to be no subtypes for B.
+  //
+  // Note that the intersection may also not be a cone type, if it is a global
+  // or literal. In that case we don't have anything more to do here.
+  if (!contents.isConeType()) {
+    return;
+  }
+
+  normalizeConeType(contents);
+
+  // There is a chance that the intersection is equal to the maximal contents,
+  // which would mean nothing more can arrive here. (Note that we can't
+  // normalize |maximalContents| before the intersection as
+  // intersectWithFullCone assumes a full/infinite cone.)
+  normalizeConeType(maximalContents);
+
+  if (contents == maximalContents) {
+    // We already contain everything possible, so this is the worst case.
+    worthSendingMore = false;
+  }
+}
+
 void Flower::filterGlobalContents(PossibleContents& contents,
                                   const GlobalLocation& globalLoc) {
   auto* global = wasm.getGlobal(globalLoc.name);
diff --git a/test/gtest/possible-contents.cpp b/test/gtest/possible-contents.cpp
index 7b8f0c4a8..802d08c18 100644
--- a/test/gtest/possible-contents.cpp
+++ b/test/gtest/possible-contents.cpp
@@ -854,6 +854,33 @@ TEST_F(PossibleContentsTest, TestOracleManyTypes) {
   auto bodyContents =
     oracle.getContents(ResultLocation{wasm->getFunction("foo"), 0});
   ASSERT_TRUE(bodyContents.isConeType());
-  EXPECT_TRUE(bodyContents.getType().getHeapType() == HeapType::data);
-  EXPECT_TRUE(bodyContents.getCone().depth == 1);
+  EXPECT_EQ(bodyContents.getType().getHeapType(), HeapType::data);
+  EXPECT_EQ(bodyContents.getCone().depth, Index(1));
+}
+
+TEST_F(PossibleContentsTest, TestOracleNoFullCones) {
+  // PossibleContents should be normalized, so we never have full cones (depth
+  // infinity).
+  auto wasm = parse(R"(
+    (module
+      (type $A (struct_subtype (field i32) data))
+      (type $B (struct_subtype (field i32) $A))
+      (type $C (struct_subtype (field i32) $B))
+      (func $foo (export "foo")
+        ;; Note we must declare $C so that $B and $C have uses and are not
+        ;; removed automatically from consideration.
+        (param $a (ref $A)) (param $c (ref $C))
+        (result (ref any))
+        (local.get $a)
+      )
+    )
+  )");
+  ContentOracle oracle(*wasm);
+  // The function is exported, and all we know about the parameter $a is that it
+  // is some subtype of $A. This is normalized to depth 2 because that is the
+  // actual depth of subtypes.
+  auto bodyContents =
+    oracle.getContents(ResultLocation{wasm->getFunction("foo"), 0});
+  ASSERT_TRUE(bodyContents.isConeType());
+  EXPECT_EQ(bodyContents.getCone().depth, Index(2));
 }
diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast
index ec58e10a0..a5ef2735a 100644
--- a/test/lit/passes/gufa-refs.wast
+++ b/test/lit/passes/gufa-refs.wast
@@ -5209,6 +5209,8 @@
 
   ;; CHECK:      (type $none_=>_ref|$A| (func_subtype (result (ref $A)) func))
 
+  ;; CHECK:      (type $none_=>_none (func_subtype func))
+
   ;; CHECK:      (import "a" "b" (global $A (ref $A)))
   (import "a" "b" (global $A (ref $A)))
 
@@ -5324,4 +5326,80 @@
       )
     )
   )
+
+  ;; CHECK:      (func $filtering (type $none_=>_none)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (block $B (result (ref $B))
+  ;; CHECK-NEXT:      (drop
+  ;; CHECK-NEXT:       (br_on_cast_static $B $B
+  ;; CHECK-NEXT:        (struct.new $A
+  ;; CHECK-NEXT:         (i32.const 100)
+  ;; CHECK-NEXT:        )
+  ;; CHECK-NEXT:       )
+  ;; CHECK-NEXT:      )
+  ;; CHECK-NEXT:      (unreachable)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (block $A (result (ref $A))
+  ;; CHECK-NEXT:      (drop
+  ;; CHECK-NEXT:       (br_on_cast_static $A $A
+  ;; CHECK-NEXT:        (struct.new $A
+  ;; CHECK-NEXT:         (i32.const 200)
+  ;; CHECK-NEXT:        )
+  ;; CHECK-NEXT:       )
+  ;; CHECK-NEXT:      )
+  ;; CHECK-NEXT:      (unreachable)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $filtering
+    ;; Check for filtering of values by the declared type in the wasm. We do not
+    ;; have specific filtering or flowing for br_on_* yet, so it will always
+    ;; send the value to the branch target. But the target has a declared type
+    ;; of $B, which means the exact $A gets filtered out, and nothing remains,
+    ;; so we can append an unreachable.
+    ;;
+    ;; When we add filtering/flowing for br_on_* this test should continue to
+    ;; pass and only the comment will need to be updated, so if you are reading
+    ;; this and it is stale, please fix that :)
+    (drop
+      (block $B (result (ref $B))
+        (drop
+          (br_on_cast_static $B $B
+            (struct.new $A
+              (i32.const 100)
+            )
+          )
+        )
+        (unreachable)
+      )
+    )
+    ;; But casting to $A will succeed, so the block is reachable, and also the
+    ;; cast will return 1.
+    (drop
+      (ref.test_static $A
+        (block $A (result (ref $A))
+          (drop
+            (br_on_cast_static $A $A
+              (struct.new $A
+                (i32.const 200)
+              )
+            )
+          )
+          (unreachable)
+        )
+      )
+    )
+  )
 )