2 files changed, 354 insertions, 82 deletions
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
index 621ceec2a..f73a9b380 100644
--- a/src/passes/ConstantFieldPropagation.cpp
+++ b/src/passes/ConstantFieldPropagation.cpp
@@ -169,6 +169,11 @@ struct StructValues : public std::vector<PossibleConstantValues> {
     assert(index < size());
     return std::vector<PossibleConstantValues>::operator[](index);
   }
+
+  const PossibleConstantValues& operator[](size_t index) const {
+    assert(index < size());
+    return std::vector<PossibleConstantValues>::operator[](index);
+  }
 };
 
 // Map of types to information about the values their fields can take.
@@ -206,13 +211,21 @@ struct StructValuesMap : public std::unordered_map<HeapType, StructValues> {
 using FunctionStructValuesMap = std::unordered_map<Function*, StructValuesMap>;
 
 // Scan each function to note all its writes to struct fields.
+//
+// We track information from struct.new and struct.set separately, because in
+// struct.new we know more about the type - we know the actual exact type being
+// written to, and not just that it is of a subtype of the instruction's type,
+// which helps later.
 struct Scanner : public WalkerPass<PostWalker<Scanner>> {
   bool isFunctionParallel() override { return true; }
 
-  Pass* create() override { return new Scanner(functionInfos); }
+  Pass* create() override {
+    return new Scanner(functionNewInfos, functionSetInfos);
+  }
 
-  Scanner(FunctionStructValuesMap& functionInfos)
-    : functionInfos(functionInfos) {}
+  Scanner(FunctionStructValuesMap& functionNewInfos,
+          FunctionStructValuesMap& functionSetInfos)
+    : functionNewInfos(functionNewInfos), functionSetInfos(functionSetInfos) {}
 
   void visitStructNew(StructNew* curr) {
     auto type = curr->type;
@@ -222,7 +235,7 @@ struct Scanner : public WalkerPass<PostWalker<Scanner>> {
 
     // Note writes to all the fields of the struct.
     auto heapType = type.getHeapType();
-    auto& values = getStructValues(heapType);
+    auto& values = functionNewInfos[getFunction()][heapType];
     auto& fields = heapType.getStruct().fields;
     for (Index i = 0; i < fields.size(); i++) {
       auto& fieldValues = values[i];
@@ -242,15 +255,13 @@ struct Scanner : public WalkerPass<PostWalker<Scanner>> {
 
     // Note a write to this field of the struct.
     auto heapType = type.getHeapType();
-    noteExpression(curr->value, getStructValues(heapType)[curr->index]);
+    auto& values = functionSetInfos[getFunction()][heapType];
+    noteExpression(curr->value, values[curr->index]);
   }
 
 private:
-  FunctionStructValuesMap& functionInfos;
-
-  StructValues& getStructValues(HeapType type) {
-    return functionInfos[getFunction()][type];
-  }
+  FunctionStructValuesMap& functionNewInfos;
+  FunctionStructValuesMap& functionSetInfos;
 
   // Note a value, checking whether it is a constant or not.
   void noteExpression(Expression* expr, PossibleConstantValues& info) {
@@ -348,28 +359,34 @@ struct ConstantFieldPropagation : public Pass {
     }
 
     // Find and analyze all writes inside each function.
-    FunctionStructValuesMap functionInfos;
+    FunctionStructValuesMap functionNewInfos, functionSetInfos;
     for (auto& func : module->functions) {
       // Initialize the data for each function, so that we can operate on this
       // structure in parallel without modifying it.
-      functionInfos[func.get()];
+      functionNewInfos[func.get()];
+      functionSetInfos[func.get()];
     }
-    Scanner scanner(functionInfos);
+    Scanner scanner(functionNewInfos, functionSetInfos);
     scanner.run(runner, module);
     scanner.walkModuleCode(module);
 
     // Combine the data from the functions.
-    StructValuesMap combinedInfos;
-    for (auto& kv : functionInfos) {
-      StructValuesMap& infos = kv.second;
-      for (auto& kv : infos) {
-        auto type = kv.first;
-        auto& info = kv.second;
-        for (Index i = 0; i < info.size(); i++) {
-          combinedInfos[type][i].combine(info[i]);
+    auto combine = [](const FunctionStructValuesMap& functionInfos,
+                      StructValuesMap& combinedInfos) {
+      for (auto& kv : functionInfos) {
+        const StructValuesMap& infos = kv.second;
+        for (auto& kv : infos) {
+          auto type = kv.first;
+          auto& info = kv.second;
+          for (Index i = 0; i < info.size(); i++) {
+            combinedInfos[type][i].combine(info[i]);
+          }
         }
       }
-    }
+    };
+    StructValuesMap combinedNewInfos, combinedSetInfos;
+    combine(functionNewInfos, combinedNewInfos);
+    combine(functionSetInfos, combinedSetInfos);
 
     // Handle subtyping. |combinedInfo| so far contains data that represents
     // each struct.new and struct.set's operation on the struct type used in
@@ -398,44 +415,64 @@ struct ConstantFieldPropagation : public Pass {
     // efficient, we therefore propagate information about the possible values
     // in each field to both subtypes and supertypes.
     //
-    // TODO: Model struct.new separately from struct.set. With new we actually
-    //       do know the specific type being written to, which means a get is
-    //       only relevant for a new if the get is of a subtype. That means we
-    //       only need to propagate values from new to subtypes.
+    // struct.new on the other hand knows exactly what type is being written to,
+    // and so given a get of $A and a new of $B, the new is relevant for the get
+    // iff $A is a subtype of $B, so we only need to propagate in one direction
+    // there, to supertypes.
     //
     // TODO: A topological sort could avoid repeated work here perhaps.
+
     SubTypes subTypes(*module);
-    UniqueDeferredQueue<HeapType> work;
-    for (auto& kv : combinedInfos) {
-      auto type = kv.first;
-      work.push(type);
-    }
-    while (!work.empty()) {
-      auto type = work.pop();
-      auto& infos = combinedInfos[type];
-
-      // Propagate shared fields to the supertype.
-      HeapType superType;
-      if (type.getSuperType(superType)) {
-        auto& superInfos = combinedInfos[superType];
-        auto& superFields = superType.getStruct().fields;
-        for (Index i = 0; i < superFields.size(); i++) {
-          if (superInfos[i].combine(infos[i])) {
-            work.push(superType);
+
+    auto propagate = [&subTypes](StructValuesMap& combinedInfos,
+                                 bool toSubTypes) {
+      UniqueDeferredQueue<HeapType> work;
+      for (auto& kv : combinedInfos) {
+        auto type = kv.first;
+        work.push(type);
+      }
+      while (!work.empty()) {
+        auto type = work.pop();
+        auto& infos = combinedInfos[type];
+
+        // Propagate shared fields to the supertype.
+        HeapType superType;
+        if (type.getSuperType(superType)) {
+          auto& superInfos = combinedInfos[superType];
+          auto& superFields = superType.getStruct().fields;
+          for (Index i = 0; i < superFields.size(); i++) {
+            if (superInfos[i].combine(infos[i])) {
+              work.push(superType);
+            }
           }
         }
-      }
 
-      // Propagate shared fields to the subtypes.
-      auto numFields = type.getStruct().fields.size();
-      for (auto subType : subTypes.getSubTypes(type)) {
-        auto& subInfos = combinedInfos[subType];
-        for (Index i = 0; i < numFields; i++) {
-          if (subInfos[i].combine(infos[i])) {
-            work.push(subType);
+        if (toSubTypes) {
+          // Propagate shared fields to the subtypes.
+          auto numFields = type.getStruct().fields.size();
+          for (auto subType : subTypes.getSubTypes(type)) {
+            auto& subInfos = combinedInfos[subType];
+            for (Index i = 0; i < numFields; i++) {
+              if (subInfos[i].combine(infos[i])) {
+                work.push(subType);
+              }
+            }
           }
         }
       }
+    };
+    propagate(combinedNewInfos, false);
+    propagate(combinedSetInfos, true);
+
+    // Combine both sources of information to the final information that gets
+    // care about.
+    StructValuesMap combinedInfos = std::move(combinedNewInfos);
+    for (auto& kv : combinedSetInfos) {
+      auto type = kv.first;
+      auto& info = kv.second;
+      for (Index i = 0; i < info.size(); i++) {
+        combinedInfos[type][i].combine(info[i]);
+      }
     }
 
     // Optimize.
diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast
index 684e5cf98..fce78d126 100644
--- a/test/lit/passes/cfp.wast
+++ b/test/lit/passes/cfp.wast
@@ -553,9 +553,7 @@
 
 ;; Subtyping: Create a supertype and get a subtype. As we never create a
 ;;            subtype, the get must trap anyhow (the reference it receives can
-;;            only by null in this closed world). Since the only possible writes
-;;            to the field are of the value 10, we will optimize to that. (But
-;;            as it will trap anyhow, that doesn't really matter.)
+;;            only be null in this closed world).
 (module
   ;; CHECK:      (type $none_=>_none (func))
 
@@ -582,6 +580,61 @@
   )
   ;; CHECK:      (func $get
   ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.null $substruct)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $get
+    (drop
+      (struct.get $substruct 0
+        (ref.null $substruct)
+      )
+    )
+  )
+)
+
+;; As above, but in addition to a new of $struct also add a set. The set could
+;; in principle be relevant for the get as far as this pass knows, and so we
+;; will optimize the result to the only possible value. (In practice, though,
+;; it will trap anyhow.)
+(module
+  ;; CHECK:      (type $struct (struct (field (mut i32))))
+  (type $struct (struct (mut i32)))
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (type $substruct (struct (field (mut i32))) (extends $struct))
+  (type $substruct (struct (mut i32)) (extends $struct))
+
+  ;; CHECK:      (func $create
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $struct
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:    (rtt.canon $struct)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (struct.set $struct 0
+  ;; CHECK-NEXT:   (ref.null $struct)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $create
+    (drop
+      (struct.new_with_rtt $struct
+        (i32.const 10)
+        (rtt.canon $struct)
+      )
+    )
+    (struct.set $struct 0
+      (ref.null $struct)
+      (i32.const 10)
+    )
+  )
+  ;; CHECK:      (func $get
+  ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (block (result i32)
   ;; CHECK-NEXT:    (drop
   ;; CHECK-NEXT:     (ref.as_non_null
@@ -771,17 +824,15 @@
 )
 
 ;; Subtyping: Create both a subtype and a supertype, with different constants
-;;            for the shared field, but get from the subtype. As the field is
-;;            shared between the types, we cannot optimize here: we model the
-;;            struct.new as a set, and if there is a set of the supertype, then
-;;            it might be to a reference of the subtype.
-;;            TODO: model new and set separately
+;;            for the shared field, but get from the subtype. The field is
+;;            shared between the types, but we only create the substruct with
+;;            one value, so we can optimize.
 (module
+  ;; CHECK:      (type $none_=>_none (func))
+
   ;; CHECK:      (type $substruct (struct (field i32) (field f64)) (extends $struct))
   (type $substruct (struct i32 f64) (extends $struct))
 
-  ;; CHECK:      (type $none_=>_none (func))
-
   ;; CHECK:      (type $struct (struct (field i32)))
   (type $struct (struct i32))
 
@@ -817,6 +868,75 @@
   )
   ;; CHECK:      (func $get
   ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (ref.null $substruct)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $get
+    (drop
+      (struct.get $substruct 0
+        (ref.null $substruct)
+      )
+    )
+  )
+)
+
+;; As above, but add a set of $struct. The set prevents the optimization.
+(module
+  ;; CHECK:      (type $struct (struct (field (mut i32))))
+  (type $struct (struct (mut i32)))
+
+  ;; CHECK:      (type $substruct (struct (field (mut i32)) (field f64)) (extends $struct))
+  (type $substruct (struct (mut i32) f64) (extends $struct))
+
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (func $create
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $struct
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:    (rtt.canon $struct)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (struct.set $struct 0
+  ;; CHECK-NEXT:   (ref.null $struct)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $substruct
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:    (f64.const 3.14159)
+  ;; CHECK-NEXT:    (rtt.canon $substruct)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $create
+    (drop
+      (struct.new_with_rtt $struct
+        (i32.const 10)
+        (rtt.canon $struct)
+      )
+    )
+    (struct.set $struct 0
+      (ref.null $struct)
+      (i32.const 10)
+    )
+    (drop
+      (struct.new_with_rtt $substruct
+        (i32.const 20)
+        (f64.const 3.14159)
+        (rtt.canon $substruct)
+      )
+    )
+  )
+  ;; CHECK:      (func $get
+  ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (struct.get $substruct 0
   ;; CHECK-NEXT:    (ref.null $substruct)
   ;; CHECK-NEXT:   )
@@ -967,22 +1087,17 @@
 
 ;; Multi-level subtyping with conflicts. The even-numbered fields will get
 ;; different values in the sub-most type. Create the top and bottom types, but
-;; not the middle one. Any field which receives more than one value, no matter
-;; where, cannot be optimized, which leaves the odd fields as optimizable, as
-;; well as fields only present in the sub-most class anyhow (if they have a
-;; constant value in the single construction there).
-;; (See notes earlier on the limitations of our modeling new and set in the same
-;; way.)
+;; not the middle one.
 (module
   ;; CHECK:      (type $struct3 (struct (field i32) (field i32) (field f64) (field f64) (field anyref) (field anyref)) (extends $struct2))
   (type $struct3 (struct i32 i32 f64 f64 anyref anyref) (extends $struct2))
 
-  ;; CHECK:      (type $struct2 (struct (field i32) (field i32) (field f64) (field f64)) (extends $struct1))
-  (type $struct2 (struct i32 i32 f64 f64) (extends $struct1))
-
   ;; CHECK:      (type $struct1 (struct (field i32) (field i32)))
   (type $struct1 (struct i32 i32))
 
+  ;; CHECK:      (type $struct2 (struct (field i32) (field i32) (field f64) (field f64)) (extends $struct1))
+  (type $struct2 (struct i32 i32 f64 f64) (extends $struct1))
+
   ;; CHECK:      (type $anyref_=>_none (func (param anyref)))
 
   ;; CHECK:      (type $none_=>_none (func))
@@ -1055,8 +1170,13 @@
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
-  ;; CHECK-NEXT:   (struct.get $struct2 1
-  ;; CHECK-NEXT:    (ref.null $struct2)
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (ref.null $struct2)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 999)
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
@@ -1090,8 +1210,13 @@
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
-  ;; CHECK-NEXT:   (struct.get $struct3 1
-  ;; CHECK-NEXT:    (ref.null $struct3)
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (ref.null $struct3)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 999)
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
@@ -1196,15 +1321,116 @@
 )
 
 ;; Multi-level subtyping with a different value in the middle of the chain. We
-;; cannot optimize any of these (for the very last field in the subtype, if we
-;; modeled new separately from set then we could, as mentioned earlier).
+;; can only optimize $struct3.
 (module
-  ;; CHECK:      (type $struct1 (struct (field i32)))
-  (type $struct1 (struct i32))
-  ;; CHECK:      (type $struct2 (struct (field i32) (field f64)) (extends $struct1))
-  (type $struct2 (struct i32 f64) (extends $struct1))
-  ;; CHECK:      (type $struct3 (struct (field i32) (field f64) (field anyref)) (extends $struct2))
-  (type $struct3 (struct i32 f64 anyref) (extends $struct2))
+  ;; CHECK:      (type $struct1 (struct (field (mut i32))))
+  (type $struct1 (struct (mut i32)))
+  ;; CHECK:      (type $struct2 (struct (field (mut i32)) (field f64)) (extends $struct1))
+  (type $struct2 (struct (mut i32) f64) (extends $struct1))
+  ;; CHECK:      (type $none_=>_none (func))
+
+  ;; CHECK:      (type $struct3 (struct (field (mut i32)) (field f64) (field anyref)) (extends $struct2))
+  (type $struct3 (struct (mut i32) f64 anyref) (extends $struct2))
+
+  ;; CHECK:      (func $create
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $struct1
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:    (rtt.canon $struct1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $struct2
+  ;; CHECK-NEXT:    (i32.const 9999)
+  ;; CHECK-NEXT:    (f64.const 0)
+  ;; CHECK-NEXT:    (rtt.canon $struct2)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new_with_rtt $struct3
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:    (f64.const 0)
+  ;; CHECK-NEXT:    (ref.null any)
+  ;; CHECK-NEXT:    (rtt.canon $struct3)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $create
+    (drop
+      (struct.new_with_rtt $struct1
+        (i32.const 10)
+        (rtt.canon $struct1)
+      )
+    )
+    (drop
+      (struct.new_with_rtt $struct2
+        (i32.const 9999) ;; use a different value here
+        (f64.const 0)
+        (rtt.canon $struct2)
+      )
+    )
+    (drop
+      (struct.new_with_rtt $struct3
+        (i32.const 10)
+        (f64.const 0)
+        (ref.null any)
+        (rtt.canon $struct3)
+      )
+    )
+  )
+  ;; CHECK:      (func $get
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $struct1 0
+  ;; CHECK-NEXT:    (ref.null $struct1)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.get $struct2 0
+  ;; CHECK-NEXT:    (ref.null $struct2)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block (result i32)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (ref.as_non_null
+  ;; CHECK-NEXT:      (ref.null $struct3)
+  ;; CHECK-NEXT:     )
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $get
+    ;; Get field 0 in all the types.
+    (drop
+      (struct.get $struct1 0
+        (ref.null $struct1)
+      )
+    )
+    (drop
+      (struct.get $struct2 0
+        (ref.null $struct2)
+      )
+    )
+    (drop
+      (struct.get $struct3 0
+        (ref.null $struct3)
+      )
+    )
+  )
+)
+
+;; As above, but add not just a new of the middle class with a different value
+;; but also a set. That prevents all optimizations.
+(module
+  ;; CHECK:      (type $struct2 (struct (field (mut i32)) (field f64)) (extends $struct1))
+  (type $struct2 (struct (mut i32) f64) (extends $struct1))
+
+  ;; CHECK:      (type $struct1 (struct (field (mut i32))))
+  (type $struct1 (struct (mut i32)))
+
+  ;; CHECK:      (type $struct3 (struct (field (mut i32)) (field f64) (field anyref)) (extends $struct2))
+  (type $struct3 (struct (mut i32) f64 anyref) (extends $struct2))
 
   ;; CHECK:      (type $none_=>_none (func))
 
@@ -1222,6 +1448,10 @@
   ;; CHECK-NEXT:    (rtt.canon $struct2)
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (struct.set $struct2 0
+  ;; CHECK-NEXT:   (ref.null $struct2)
+  ;; CHECK-NEXT:   (i32.const 9999)
+  ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (struct.new_with_rtt $struct3
   ;; CHECK-NEXT:    (i32.const 10)
@@ -1245,6 +1475,11 @@
         (rtt.canon $struct2)
       )
     )
+    (struct.set $struct2 0
+      (ref.null $struct2)
+      (i32.const 9999) ;; use a different value here
+      (f64.const 0)
+    )
     (drop
       (struct.new_with_rtt $struct3
         (i32.const 10)