2 files changed, 105 insertions, 6 deletions
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index d63830c40..6c6c1b97e 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -1695,6 +1695,34 @@ bool Flower::updateContents(LocationIndex locationIndex,
   std::cout << '\n';
 #endif
 
+  auto location = getLocation(locationIndex);
+
+  // Handle special cases: Some locations can only contain certain contents, so
+  // filter accordingly. In principle we need to filter both before and after
+  // combining with existing content; filtering afterwards is obviously
+  // necessary as combining two things will create something larger than both,
+  // and our representation has limitations (e.g. two different ref types will
+  // result in a cone, potentially a very large one). Filtering beforehand is
+  // necessary for the a more subtle reason: consider a location that contains
+  // an i8 which is sent a 0 and then 0x100. If we filter only after, then we'd
+  // combine 0 and 0x100 first and get "unknown integer"; only by filtering
+  // 0x100 to 0 beforehand (since 0x100 & 0xff => 0) will we combine 0 and 0 and
+  // not change anything, which is correct.
+  //
+  // For efficiency reasons we aim to only filter once, depending on the type of
+  // filtering. Most can be filtered a single time afterwards, while for data
+  // locations, where the issue is packed integer fields, it's necessary to do
+  // it before as we've mentioned, and also sufficient (see details in
+  // filterDataContents).
+  if (auto* dataLoc = std::get_if<DataLocation>(&location)) {
+    filterDataContents(newContents, *dataLoc);
+#if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2
+    std::cout << "  pre-filtered contents:\n";
+    newContents.dump(std::cout, &wasm);
+    std::cout << '\n';
+#endif
+  }
+
   contents.combine(newContents);
 
   if (contents.isNone()) {
@@ -1730,9 +1758,7 @@ bool Flower::updateContents(LocationIndex locationIndex,
     return worthSendingMore;
   }
 
-  // Handle special cases: Some locations can only contain certain contents, so
-  // filter accordingly.
-  auto location = getLocation(locationIndex);
+  // Handle filtering (see comment earlier, this is the later filtering stage).
   bool filtered = false;
   if (auto* exprLoc = std::get_if<ExpressionLocation>(&location)) {
     // TODO: Replace this with specific filterFoo or flowBar methods like we
@@ -1743,9 +1769,6 @@ bool Flower::updateContents(LocationIndex locationIndex,
   } else if (auto* globalLoc = std::get_if<GlobalLocation>(&location)) {
     filterGlobalContents(contents, *globalLoc);
     filtered = true;
-  } else if (auto* dataLoc = std::get_if<DataLocation>(&location)) {
-    filterDataContents(contents, *dataLoc);
-    filtered = true;
   }
 
   // Check if anything changed after filtering, if we did so.
@@ -1999,6 +2022,18 @@ void Flower::filterDataContents(PossibleContents& contents,
       //      value reach.
       contents = PossibleContents::fromType(contents.getType());
     }
+    // Given that the above only (1) turns an i32 into a masked i32 or (2) turns
+    // anything else into an unknown i32, this is safe to run as pre-filtering,
+    // that is, before we combine contents, since
+    //
+    //  (a) two constants are ok as masking is distributive,
+    //        (x & M) U (y & M)  ==  (x U y) & M
+    //  (b) if one is a constant and the other is not then
+    //        (x & M) U ?  ==  ?  ==  (x U ?)  ==  (x U ?) & M
+    //      (where ? is an unknown i32)
+    //  (c) and if both are not constants then likewise we always end up as an
+    //      unknown i32
+    //
   }
 }
 
diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast
index fe3eb70fc..a1652c7d3 100644
--- a/test/lit/passes/gufa-refs.wast
+++ b/test/lit/passes/gufa-refs.wast
@@ -5525,3 +5525,67 @@
     )
   )
 )
+
+;; Packed field combination.
+(module
+  ;; CHECK:      (type $A (struct (field i8)))
+  (type $A (struct (field i8)))
+  (type $B (struct (field i8)))
+
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (func $A (type $none_=>_none)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get_u $A 0
+  ;; CHECK-NEXT:    (struct.new $A
+  ;; CHECK-NEXT:     (i32.const 305419896)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get_u $A 0
+  ;; CHECK-NEXT:    (struct.new_default $A)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $A
+    ;; We write two values to $A, which are different, so we cannot infer.
+    (drop
+      (struct.get_u $A 0
+        (struct.new $A
+          (i32.const 0x12345678)
+        )
+      )
+    )
+    (drop
+      (struct.get_u $A 0
+        (struct.new_default $A)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $B (type $none_=>_none)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $B
+    ;; We write two values to $B, which *seem* different, but given the field is
+    ;; packed they are both actually 0, so we can optimize here.
+    (drop
+      (struct.get_u $B 0
+        (struct.new $B
+          (i32.const 0x12345600) ;; only this changed compared to func $A
+        )
+      )
+    )
+    (drop
+      (struct.get_u $B 0
+        (struct.new_default $B)
+      )
+    )
+  )
+)