4 files changed, 228 insertions, 20 deletions
diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h
index f68692363..bc2eb5485 100644
--- a/src/ir/struct-utils.h
+++ b/src/ir/struct-utils.h
@@ -221,15 +221,21 @@ public:
   SubTypes subTypes;
 
   void propagateToSuperTypes(StructValuesMap<T>& infos) {
-    propagate(infos, false);
+    propagate(infos, false, true);
+  }
+
+  void propagateToSubTypes(StructValuesMap<T>& infos) {
+    propagate(infos, true, false);
   }
 
   void propagateToSuperAndSubTypes(StructValuesMap<T>& infos) {
-    propagate(infos, true);
+    propagate(infos, true, true);
   }
 
 private:
-  void propagate(StructValuesMap<T>& combinedInfos, bool toSubTypes) {
+  void propagate(StructValuesMap<T>& combinedInfos,
+                 bool toSubTypes,
+                 bool toSuperTypes) {
     UniqueDeferredQueue<HeapType> work;
     for (auto& [type, _] : combinedInfos) {
       work.push(type);
@@ -238,13 +244,15 @@ private:
       auto type = work.pop();
       auto& infos = combinedInfos[type];
 
-      // Propagate shared fields to the supertype.
-      if (auto superType = type.getSuperType()) {
-        auto& superInfos = combinedInfos[*superType];
-        auto& superFields = superType->getStruct().fields;
-        for (Index i = 0; i < superFields.size(); i++) {
-          if (superInfos[i].combine(infos[i])) {
-            work.push(*superType);
+      if (toSuperTypes) {
+        // Propagate shared fields to the supertype.
+        if (auto superType = type.getSuperType()) {
+          auto& superInfos = combinedInfos[*superType];
+          auto& superFields = superType->getStruct().fields;
+          for (Index i = 0; i < superFields.size(); i++) {
+            if (superInfos[i].combine(infos[i])) {
+              work.push(*superType);
+            }
           }
         }
       }
diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp
index aff67b4df..ff154dd2f 100644
--- a/src/ir/type-updating.cpp
+++ b/src/ir/type-updating.cpp
@@ -75,6 +75,12 @@ void GlobalTypeRewriter::update() {
   }
 
   auto buildResults = typeBuilder.build();
+#ifndef NDEBUG
+  if (auto* err = buildResults.getError()) {
+    Fatal() << "Internal GlobalTypeRewriter build error: " << err->reason
+            << " at index " << err->index;
+  }
+#endif
   auto& newTypes = *buildResults;
 
   // Map the old types to the new ones. This uses the fact that type indices
diff --git a/src/passes/GlobalTypeOptimization.cpp b/src/passes/GlobalTypeOptimization.cpp
index e8cee2b10..7e1ebf04e 100644
--- a/src/passes/GlobalTypeOptimization.cpp
+++ b/src/passes/GlobalTypeOptimization.cpp
@@ -31,7 +31,6 @@
 #include "ir/type-updating.h"
 #include "ir/utils.h"
 #include "pass.h"
-#include "support/small_set.h"
 #include "wasm-builder.h"
 #include "wasm-type.h"
 #include "wasm.h"
@@ -137,6 +136,17 @@ struct GlobalTypeOptimization : public Pass {
     //    read in any sub or supertype, as such a read may alias any of those
     //    types (where the field is present).
     //
+    //    Note that we *can* propagate reads only to supertypes, but we are
+    //    limited in what we optimize. If type A has fields {a, b}, and its
+    //    subtype B has the same fields, and if field a is only used in reads of
+    //    type B, then we still cannot remove it. If we removed it then A would
+    //    have fields {b}, that is, field b would be at index 0, while type B
+    //    would still be {a, b} which has field b at index 1, which is not
+    //    compatible. The only case in which we can optimize is to remove a
+    //    field from the end, that is, we could remove field b from A.
+    //    Otherwise, as mentioned before we can only remove a field if we also
+    //    remove it from all sub- and super-types.
+    //
     //  * For immutability, this is necessary because we cannot have a
     //    supertype's field be immutable while a subtype's is not - they must
     //    match for us to preserve subtyping.
@@ -147,7 +157,10 @@ struct GlobalTypeOptimization : public Pass {
     //    immutable). Note that by making more things immutable we therefore
     //    make it possible to apply more specific subtypes in subtype fields.
     StructUtils::TypeHierarchyPropagator<FieldInfo> propagator(*module);
-    propagator.propagateToSuperAndSubTypes(combinedSetGetInfos);
+    auto subSupers = combinedSetGetInfos;
+    propagator.propagateToSuperAndSubTypes(subSupers);
+    auto subs = std::move(combinedSetGetInfos);
+    propagator.propagateToSubTypes(subs);
 
     // Process the propagated info.
     for (auto type : propagator.subTypes.types) {
@@ -155,7 +168,8 @@ struct GlobalTypeOptimization : public Pass {
         continue;
       }
       auto& fields = type.getStruct().fields;
-      auto& infos = combinedSetGetInfos[type];
+      auto& subSuper = subSupers[type];
+      auto& sub = subs[type];
 
       // Process immutability.
       for (Index i = 0; i < fields.size(); i++) {
@@ -164,7 +178,7 @@ struct GlobalTypeOptimization : public Pass {
           continue;
         }
 
-        if (infos[i].hasWrite) {
+        if (subSuper[i].hasWrite) {
           // A set exists.
           continue;
         }
@@ -175,16 +189,42 @@ struct GlobalTypeOptimization : public Pass {
         vec[i] = true;
       }
 
-      // Process removability. First, see if we can remove anything before we
-      // start to allocate info for that.
-      if (std::any_of(infos.begin(), infos.end(), [&](const FieldInfo& info) {
-            return !info.hasRead;
-          })) {
+      // Process removability. We check separately for the ability to
+      // remove in a general way based on sub+super-propagated info (that is,
+      // fields that are not used in sub- or super-types, and so we can
+      // definitely remove them from all the relevant types) and also in the
+      // specific way that only works for removing at the end, which as
+      // mentioned above only looks at super-types.
+      std::set<Index> removableIndexes;
+      for (Index i = 0; i < fields.size(); i++) {
+        if (!subSuper[i].hasRead) {
+          removableIndexes.insert(i);
+        }
+      }
+      for (int i = int(fields.size()) - 1; i >= 0; i--) {
+        // Unlike above, a write would stop us here: above we propagated to both
+        // sub- and super-types, which means if we see no reads then there is no
+        // possible read of the data at all. But here we just propagated to
+        // subtypes, and so we need to care about the case where the parent
+        // writes to a field but does not read from it - we still need those
+        // writes to happen as children may read them. (Note that if no child
+        // reads this field, and since we check for reads in parents here, that
+        // means the field is not read anywhere at all, and we would have
+        // handled that case in the previous loop anyhow.)
+        if (!sub[i].hasRead && !sub[i].hasWrite) {
+          removableIndexes.insert(i);
+        } else {
+          // Once we see something we can't remove, we must stop, as we can only
+          // remove from the end in this case.
+          break;
+        }
+      }
+      if (!removableIndexes.empty()) {
         auto& indexesAfterRemoval = indexesAfterRemovals[type];
         indexesAfterRemoval.resize(fields.size());
         Index skip = 0;
         for (Index i = 0; i < fields.size(); i++) {
-          if (infos[i].hasRead) {
+          if (!removableIndexes.count(i)) {
             indexesAfterRemoval[i] = i - skip;
           } else {
             indexesAfterRemoval[i] = RemovedField;
diff --git a/test/lit/passes/gto-removals.wast b/test/lit/passes/gto-removals.wast
index fc5ed90bf..92e98ee2a 100644
--- a/test/lit/passes/gto-removals.wast
+++ b/test/lit/passes/gto-removals.wast
@@ -635,3 +635,157 @@
     (unreachable)
   )
 )
+
+;; We can remove fields from the end if they are only used in subtypes, because
+;; the subtypes can always add fields at the end (and only at the end).
+(module
+  ;; CHECK:      (type $child (struct_subtype (field i32) (field i64) (field f32) (field f64) (field anyref) $parent))
+  (type $child (struct_subtype (field i32) (field i64) (field f32) (field f64) (field anyref) $parent))
+
+  ;; CHECK:      (type $parent (struct_subtype (field i32) (field i64) data))
+  (type $parent (struct_subtype (field i32) (field i64) (field f32) (field f64) data))
+
+  ;; CHECK:      (type $ref|$parent|_ref|$child|_=>_none (func_subtype (param (ref $parent) (ref $child)) func))
+
+  ;; CHECK:      (func $func (type $ref|$parent|_ref|$child|_=>_none) (param $x (ref $parent)) (param $y (ref $child))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $parent 1
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 0
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 2
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 3
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 4
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $func (param $x (ref $parent)) (param $y (ref $child))
+    ;; The parent has fields 0, 1, 2, 3 and the child adds 4.
+    ;; Use fields only 1 in the parent, and all the rest in the child. We can
+    ;; only remove from the end in the child, which means we can remove 2 and 3
+    ;; in the parent, but not 0.
+    (drop (struct.get $parent 1 (local.get $x)))
+    (drop (struct.get $child  0 (local.get $y)))
+    (drop (struct.get $child  2 (local.get $y)))
+    (drop (struct.get $child  3 (local.get $y)))
+    (drop (struct.get $child  4 (local.get $y)))
+  )
+)
+
+(module
+  ;; CHECK:      (type $child (struct_subtype (field i32) (field i64) (field (mut f32)) (field f64) (field anyref) $parent))
+  (type $child (struct_subtype (field (mut i32)) (field (mut i64)) (field (mut f32)) (field (mut f64)) (field (mut anyref)) $parent))
+
+  ;; CHECK:      (type $parent (struct_subtype (field i32) (field i64) (field (mut f32)) data))
+  (type $parent (struct_subtype (field (mut i32)) (field (mut i64)) (field (mut f32)) (field (mut f64)) data))
+
+  ;; CHECK:      (type $ref|$parent|_ref|$child|_=>_none (func_subtype (param (ref $parent) (ref $child)) func))
+
+  ;; CHECK:      (func $func (type $ref|$parent|_ref|$child|_=>_none) (param $x (ref $parent)) (param $y (ref $child))
+  ;; CHECK-NEXT:  (struct.set $parent 2
+  ;; CHECK-NEXT:   (local.get $x)
+  ;; CHECK-NEXT:   (f32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $parent 1
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 0
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 2
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 3
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child 4
+  ;; CHECK-NEXT:    (local.get $y)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $func (param $x (ref $parent)) (param $y (ref $child))
+    ;; As above, but add a write in the parent of field 2. That prevents us from
+    ;; removing it from the parent.
+    (struct.set $parent 2 (local.get $x) (f32.const 0))
+
+    (drop (struct.get $parent 1 (local.get $x)))
+    (drop (struct.get $child  0 (local.get $y)))
+    (drop (struct.get $child  2 (local.get $y)))
+    (drop (struct.get $child  3 (local.get $y)))
+    (drop (struct.get $child  4 (local.get $y)))
+  )
+)
+
+;; A parent with two children, and there are only reads of the parent. Those
+;; reads might be of data of either child, of course (as a refernce to the
+;; parent might point to them), so we cannot optimize here.
+(module
+  ;; CHECK:      (type $parent (struct_subtype (field i32) data))
+  (type $parent (struct_subtype (field i32) data))
+  ;; CHECK:      (type $ref|$parent|_ref|$child1|_ref|$child2|_=>_none (func_subtype (param (ref $parent) (ref $child1) (ref $child2)) func))
+
+  ;; CHECK:      (type $child1 (struct_subtype (field i32) $parent))
+  (type $child1 (struct_subtype (field i32) $parent))
+  ;; CHECK:      (type $child2 (struct_subtype (field i32) $parent))
+  (type $child2 (struct_subtype (field i32) $parent))
+
+  ;; CHECK:      (func $func (type $ref|$parent|_ref|$child1|_ref|$child2|_=>_none) (param $parent (ref $parent)) (param $child1 (ref $child1)) (param $child2 (ref $child2))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $parent 0
+  ;; CHECK-NEXT:    (local.get $parent)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $func (param $parent (ref $parent)) (param $child1 (ref $child1)) (param $child2 (ref $child2))
+    (drop (struct.get $parent 0 (local.get $parent)))
+  )
+)
+
+;; As above, but now the read is just of one child. We can remove the field
+;; from the parent and the other child.
+(module
+  ;; CHECK:      (type $child1 (struct_subtype (field i32) $parent))
+  (type $child1 (struct_subtype (field i32) $parent))
+
+  ;; CHECK:      (type $ref|$parent|_ref|$child1|_ref|$child2|_=>_none (func_subtype (param (ref $parent) (ref $child1) (ref $child2)) func))
+
+  ;; CHECK:      (type $parent (struct_subtype  data))
+  (type $parent (struct_subtype (field i32) data))
+  ;; CHECK:      (type $child2 (struct_subtype  $parent))
+  (type $child2 (struct_subtype (field i32) $parent))
+
+  ;; CHECK:      (func $func (type $ref|$parent|_ref|$child1|_ref|$child2|_=>_none) (param $parent (ref $parent)) (param $child1 (ref $child1)) (param $child2 (ref $child2))
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $child1 0
+  ;; CHECK-NEXT:    (local.get $child1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $func (param $parent (ref $parent)) (param $child1 (ref $child1)) (param $child2 (ref $child2))
+    (drop (struct.get $child1 0 (local.get $child1)))
+  )
+)