[Wasm GC] Filter GUFA expression locations by their type (#5149)

Now that we have a cone type, we are able to represent in PossibleContents the natural content of a wasm location: a type or any of its subtypes. This allows us to enforce the wasm typing rules, that is, to filter the data arriving at a location by the wasm type of the location. Technically this could be unnecessary if we had full implementations of flowFoo and so forth, that is, tailored code for each wasm expression that makes sure we only contain and flow content that fits in the wasm type. Atm we don't have that, and until the wasm spec stabilizes it's probably not worth the effort. Instead, simply filter based on the type, which gives the same result (though it does take a little more work; I measured it at 3% or so of runtime). While doing so normalize cones to their actual maximum depth, which simplifies things and will help more later as well.
author: Alon Zakai <azakai@google.com> 2022-10-18 14:46:06 -0700
committer: GitHub <noreply@github.com> 2022-10-18 14:46:06 -0700
commit: 6a60b9e1d9fbaeeda20ac31b289fda4daa48d209 (patch)
tree: 6cef1e1282e2d50b334a086c115ea095cf204f74 /src/ir/possible-contents.cpp
parent: e4fd739f72eae49aa47bef06af9f38625a3a9f33 (diff)
download: binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.tar.gz
binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.tar.bz2
binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.zip
1 files changed, 132 insertions, 17 deletions
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index 2efd61391..209028f25 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -1359,8 +1359,13 @@ private:
   // exists it is not added.
   void connectDuringFlow(Location from, Location to);
 
-  // Contents sent to a global location can be filtered in a special way during
-  // the flow, which is handled in this helper.
+  // Contents sent to certain locations can be filtered in a special way during
+  // the flow, which is handled in these helpers. These may update
+  // |worthSendingMore| which is whether it is worth sending any more content to
+  // this location in the future.
+  void filterExpressionContents(PossibleContents& contents,
+                                const ExpressionLocation& exprLoc,
+                                bool& worthSendingMore);
   void filterGlobalContents(PossibleContents& contents,
                             const GlobalLocation& globalLoc);
 
@@ -1384,6 +1389,30 @@ private:
   // We will need subtypes during the flow, so compute them once ahead of time.
   std::unique_ptr<SubTypes> subTypes;
 
+  // The depth of children for each type. This is 0 if the type has no
+  // subtypes, 1 if it has subtypes but none of those have subtypes themselves,
+  // and so forth.
+  std::unordered_map<HeapType, Index> maxDepths;
+
+  // Given a ConeType, return the normalized depth, that is, the canonical depth
+  // given the actual children it has. If this is a full cone, then we can
+  // always pick the actual maximal depth and use that instead of FullDepth==-1.
+  // For a non-full cone, we also reduce the depth as much as possible, so it is
+  // equal to the maximum depth of an existing subtype.
+  Index getNormalizedConeDepth(Type type, Index depth) {
+    return std::min(depth, maxDepths[type.getHeapType()]);
+  }
+
+  void normalizeConeType(PossibleContents& cone) {
+    assert(cone.isConeType());
+    auto type = cone.getType();
+    auto before = cone.getCone().depth;
+    auto normalized = getNormalizedConeDepth(type, before);
+    if (normalized != before) {
+      cone = PossibleContents::coneType(type, normalized);
+    }
+  }
+
 #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2
   // Dump out a location for debug purposes.
   void dump(Location location);
@@ -1530,6 +1559,7 @@ Flower::Flower(Module& wasm) : wasm(wasm) {
   if (getTypeSystem() == TypeSystem::Nominal ||
       getTypeSystem() == TypeSystem::Isorecursive) {
     subTypes = std::make_unique<SubTypes>(wasm);
+    maxDepths = subTypes->getMaxDepths();
   }
 
 #ifdef POSSIBLE_CONTENTS_DEBUG
@@ -1616,14 +1646,23 @@ bool Flower::updateContents(LocationIndex locationIndex,
 
   // It is not worth sending any more to this location if we are now in the
   // worst possible case, as no future value could cause any change.
-  //
-  // Many is always the worst possible case. A cone type of a non-reference is
-  // also the worst case, since subtyping is not relevant there, and so if we
-  // only know something about the type then we already know nothing beyond what
-  // the type in the wasm tells us (and from there we can only go to Many).
-  bool worthSendingMore = !contents.isMany();
-  if (!contents.getType().isRef() && contents.isConeType()) {
-    worthSendingMore = false;
+  bool worthSendingMore = true;
+  if (contents.isConeType()) {
+    if (!contents.getType().isRef()) {
+      // A cone type of a non-reference is the worst case, since subtyping is
+      // not relevant there, and so if we only know something about the type
+      // then we already know nothing beyond what the type in the wasm tells us
+      // (and from there we can only go to Many).
+      worthSendingMore = false;
+    } else {
+      // Normalize all reference cones. There is never a point to flow around
+      // anything non-normalized, which might lead to extra work. For example,
+      // if A has no subtypes, then a full cone for A is really the same as one
+      // with depth 0 (an exact type). And we don't want to see the full cone
+      // arrive and think it was an improvement over the one with depth 0 and do
+      // more flowing based on that.
+      normalizeConeType(contents);
+    }
   }
 
   // Check if anything changed. Note that we check not just the content but
@@ -1639,16 +1678,30 @@ bool Flower::updateContents(LocationIndex locationIndex,
 
   // Handle special cases: Some locations can only contain certain contents, so
   // filter accordingly.
-  if (auto* globalLoc =
-        std::get_if<GlobalLocation>(&getLocation(locationIndex))) {
+  auto location = getLocation(locationIndex);
+  bool filtered = false;
+  if (auto* exprLoc = std::get_if<ExpressionLocation>(&location)) {
+    // TODO: Replace this with specific filterFoo or flowBar methods like we
+    //       have for flowRefCast and filterGlobalContents. That could save a
+    //       little wasted work here. Might be best to do that after the spec is
+    //       fully stable.
+    filterExpressionContents(contents, *exprLoc, worthSendingMore);
+    filtered = true;
+  } else if (auto* globalLoc = std::get_if<GlobalLocation>(&location)) {
     filterGlobalContents(contents, *globalLoc);
-    if (contents == oldContents &&
-        contents.getType() == oldContents.getType()) {
-      // Nothing actually changed after filtering, so just return.
-      return worthSendingMore;
-    }
+    filtered = true;
   }
 
+  if (filtered && contents == oldContents &&
+      contents.getType() == oldContents.getType()) {
+    // Nothing actually changed after filtering, so just return.
+    return worthSendingMore;
+  }
+
+  // After filtering we should always have more precise information than "many"
+  // - in the worst case, we can have the type declared in the wasm.
+  assert(!contents.isMany());
+
 #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2
   std::cout << "  updateContents has something new\n";
   contents.dump(std::cout, &wasm);
@@ -1776,6 +1829,68 @@ void Flower::connectDuringFlow(Location from, Location to) {
   }
 }
 
+void Flower::filterExpressionContents(PossibleContents& contents,
+                                      const ExpressionLocation& exprLoc,
+                                      bool& worthSendingMore) {
+  auto type = exprLoc.expr->type;
+  if (!type.isRef()) {
+    return;
+  }
+
+  // The caller cannot know of a situation where it might not be worth sending
+  // more to a reference - all that logic is in here. That is, the rest of this
+  // function is the only place we can mark |worthSendingMore| as false for a
+  // reference.
+  assert(worthSendingMore);
+
+  // The maximal contents here are the declared type and all subtypes. Nothing
+  // else can pass through, so filter such things out.
+  auto maximalContents = PossibleContents::fullConeType(type);
+  contents.intersectWithFullCone(maximalContents);
+  if (contents.isNone()) {
+    // Nothing was left here at all.
+    return;
+  }
+
+  // Normalize the intersection. We want to check later if any more content can
+  // arrive here, and also we want to avoid flowing around anything non-
+  // normalized, as explained earlier.
+  //
+  // Note that this normalization is necessary even though |contents| was
+  // normalized before the intersection, e.g.:
+  /*
+  //      A
+  //     / \
+  //    B   C
+  //        |
+  //        D
+  */
+  // Consider the case where |maximalContents| is Cone(B, Infinity) and the
+  // original |contents| was Cone(A, 2) (which is normalized). The naive
+  // intersection is Cone(B, 1), since the core intersection logic makes no
+  // assumptions about the rest of the types. That is then normalized to
+  // Cone(B, 0) since there happens to be no subtypes for B.
+  //
+  // Note that the intersection may also not be a cone type, if it is a global
+  // or literal. In that case we don't have anything more to do here.
+  if (!contents.isConeType()) {
+    return;
+  }
+
+  normalizeConeType(contents);
+
+  // There is a chance that the intersection is equal to the maximal contents,
+  // which would mean nothing more can arrive here. (Note that we can't
+  // normalize |maximalContents| before the intersection as
+  // intersectWithFullCone assumes a full/infinite cone.)
+  normalizeConeType(maximalContents);
+
+  if (contents == maximalContents) {
+    // We already contain everything possible, so this is the worst case.
+    worthSendingMore = false;
+  }
+}
+
 void Flower::filterGlobalContents(PossibleContents& contents,
                                   const GlobalLocation& globalLoc) {
   auto* global = wasm.getGlobal(globalLoc.name);
author	Alon Zakai <azakai@google.com>	2022-10-18 14:46:06 -0700
committer	GitHub <noreply@github.com>	2022-10-18 14:46:06 -0700
commit	6a60b9e1d9fbaeeda20ac31b289fda4daa48d209 (patch)
tree	6cef1e1282e2d50b334a086c115ea095cf204f74 /src/ir/possible-contents.cpp
parent	e4fd739f72eae49aa47bef06af9f38625a3a9f33 (diff)
download	binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.tar.gz binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.tar.bz2 binaryen-6a60b9e1d9fbaeeda20ac31b289fda4daa48d209.zip