1 files changed, 161 insertions, 3 deletions
diff --git a/src/passes/RemoveUnusedModuleElements.cpp b/src/passes/RemoveUnusedModuleElements.cpp
index 6070d8bce..95318acd2 100644
--- a/src/passes/RemoveUnusedModuleElements.cpp
+++ b/src/passes/RemoveUnusedModuleElements.cpp
@@ -40,6 +40,7 @@
 #include "ir/element-utils.h"
 #include "ir/intrinsics.h"
 #include "ir/module-utils.h"
+#include "ir/subtypes.h"
 #include "ir/utils.h"
 #include "pass.h"
 #include "wasm-builder.h"
@@ -56,6 +57,10 @@ enum class ModuleElementKind { Function, Global, Tag, Table, ElementSegment };
 // name of the particular element.
 using ModuleElement = std::pair<ModuleElementKind, Name>;
 
+// A pair of a struct type and a field index, together defining a field in a
+// particular type.
+using StructField = std::pair<HeapType, Index>;
+
 // Visit or walk an expression to find what things are referenced.
 struct ReferenceFinder : public PostWalker<ReferenceFinder> {
   // Our findings are placed in these data structures, which the user of this
@@ -63,12 +68,14 @@ struct ReferenceFinder : public PostWalker<ReferenceFinder> {
   std::vector<ModuleElement> elements;
   std::vector<HeapType> callRefTypes;
   std::vector<Name> refFuncs;
+  std::vector<StructField> structFields;
   bool usesMemory = false;
 
   // Add an item to the output data structures.
   void note(ModuleElement element) { elements.push_back(element); }
   void noteCallRef(HeapType type) { callRefTypes.push_back(type); }
   void noteRefFunc(Name refFunc) { refFuncs.push_back(refFunc); }
+  void note(StructField structField) { structFields.push_back(structField); }
 
   // Visitors
 
@@ -161,6 +168,13 @@ struct ReferenceFinder : public PostWalker<ReferenceFinder> {
       note(ModuleElement(ModuleElementKind::Tag, tag));
     }
   }
+  void visitStructGet(StructGet* curr) {
+    if (curr->ref->type == Type::unreachable || curr->ref->type.isNull()) {
+      return;
+    }
+    auto type = curr->ref->type.getHeapType();
+    note(StructField{type, curr->index});
+  }
   void visitArrayNewSeg(ArrayNewSeg* curr) {
     switch (curr->op) {
       case NewData:
@@ -207,7 +221,6 @@ struct Analyzer {
   // If we walked the child immediately then we would make $bar used. But that
   // global is only used if we actually read that field from the struct. We
   // perform that analysis in readStructFields  unreadStructFieldExprMap, below.
-  // TODO: this is not implemented yet
   std::vector<Expression*> expressionQueue;
 
   bool usesMemory = false;
@@ -234,6 +247,17 @@ struct Analyzer {
   // imports.
   std::unordered_map<HeapType, std::unordered_set<Name>> uncalledRefFuncMap;
 
+  // Similar to calledSignatures/uncalledRefFuncMap, we store the StructFields
+  // we've seen reads from, and also expressions stored in such fields that
+  // could be read if ever we see a read of that field in the future. That is,
+  // for an expression stored into a struct field to be read, we need to both
+  // see that expression written to the field, and see some other place read
+  // that field (similar to with functions that we need to see the RefFunc and
+  // also a CallRef that can actually call it).
+  std::unordered_set<StructField> readStructFields;
+  std::unordered_map<StructField, std::vector<Expression*>>
+    unreadStructFieldExprMap;
+
   Analyzer(Module* module,
            const PassOptions& options,
            const std::vector<ModuleElement>& roots)
@@ -261,6 +285,9 @@ struct Analyzer {
     }
   }
 
+  // We'll compute SubTypes if we need them.
+  std::optional<SubTypes> subTypes;
+
   // Process expressions in the expression queue while we have any, visiting
   // them (using their contents) and adding children. Returns whether we did any
   // work.
@@ -286,6 +313,9 @@ struct Analyzer {
       for (auto func : finder.refFuncs) {
         useRefFunc(func);
       }
+      for (auto structField : finder.structFields) {
+        useStructField(structField);
+      }
       if (finder.usesMemory) {
         usesMemory = true;
       }
@@ -345,6 +375,36 @@ struct Analyzer {
     }
   }
 
+  void useStructField(StructField structField) {
+    if (!readStructFields.count(structField)) {
+      // Avoid a structured binding as the C++ spec does not allow capturing
+      // them in lambdas, which we need below.
+      auto type = structField.first;
+      auto index = structField.second;
+
+      // This is the first time we see a read of this data. Note that it is
+      // read, and also all subtypes since we might be reading from them as
+      // well.
+      if (!subTypes) {
+        subTypes = SubTypes(*module);
+      }
+      subTypes->iterSubTypes(type, [&](HeapType subType, Index depth) {
+        auto subStructField = StructField{subType, index};
+        readStructFields.insert(subStructField);
+
+        // Walk all the unread data we've queued: we queued it for the
+        // possibility of it ever being read, which just happened.
+        auto iter = unreadStructFieldExprMap.find(subStructField);
+        if (iter != unreadStructFieldExprMap.end()) {
+          for (auto* expr : iter->second) {
+            use(expr);
+          }
+        }
+        unreadStructFieldExprMap.erase(subStructField);
+      });
+    }
+  }
+
   // As processExpressions, but for module elements.
   bool processModule() {
     bool worked = false;
@@ -399,9 +459,106 @@ struct Analyzer {
 
   // Add the children of a used expression to be walked, if we should do so.
   void scanChildren(Expression* curr) {
-    for (auto* child : ChildIterator(curr)) {
-      use(child);
+    // For now, the only special handling we have is fields of struct.new, which
+    // we defer walking of to when we know there is a read that can actually
+    // read them, see comments above on |expressionQueue|. We can only do that
+    // optimization in closed world (as otherwise the field might be read
+    // outside of the code we can see), and when it is reached (if it's
+    // unreachable then we don't know the type, and can defer that to DCE to
+    // remove).
+    if (!options.closedWorld || curr->type == Type::unreachable ||
+        !curr->is<StructNew>()) {
+      for (auto* child : ChildIterator(curr)) {
+        use(child);
+      }
+      return;
     }
+
+    auto* new_ = curr->cast<StructNew>();
+    auto type = new_->type.getHeapType();
+
+    for (Index i = 0; i < new_->operands.size(); i++) {
+      auto* operand = new_->operands[i];
+      auto structField = StructField{type, i};
+      if (readStructFields.count(structField) ||
+          EffectAnalyzer(options, *module, operand).hasSideEffects()) {
+        // This data can be read, so just walk it. Or, this has side effects,
+        // which is tricky to reason about - the side effects must happen even
+        // if we never read the struct field - so give up and consider it used.
+        use(operand);
+      } else {
+        // This data does not need to be read now, but might be read later. Note
+        // it as unread.
+        unreadStructFieldExprMap[structField].push_back(operand);
+
+        // We also must note that anything in this operand is referenced, even
+        // if it never ends up used, so the IR remains valid.
+        addReferences(operand);
+      }
+    }
+  }
+
+  // Add references to all things appearing in an expression. This is called
+  // when we know an expression will appear in the output, which means it must
+  // remain valid IR and not refer to nonexistent things.
+  //
+  // This is only called on things without side effects (if there are such
+  // effects then we would have had to assume the worst earlier, and not get
+  // here).
+  void addReferences(Expression* curr) {
+    // Find references anywhere in this expression so we can apply them.
+    ReferenceFinder finder;
+    finder.setModule(module);
+    finder.walk(curr);
+
+    for (auto element : finder.elements) {
+      referenced.insert(element);
+
+      auto& [kind, value] = element;
+      if (kind == ModuleElementKind::Global) {
+        // Like functions, (non-imported) globals have contents. For functions,
+        // things are simple: if a function ends up with references but no uses
+        // then we can simply empty out the function (by setting its body to an
+        // unreachable). We don't have a simple way to do the same for globals,
+        // unfortunately. For now, scan the global's contents and add references
+        // as needed.
+        // TODO: We could try to empty the global out, for example, replace it
+        //       with a null if it is nullable, or replace all gets of it with
+        //       something else, but that is not trivial.
+        auto* global = module->getGlobal(value);
+        if (!global->imported()) {
+          // Note that infinite recursion is not a danger here since a global
+          // can only refer to previous globals.
+          addReferences(global->init);
+        }
+      }
+    }
+
+    for (auto func : finder.refFuncs) {
+      // If a function ends up referenced but not used then later down we will
+      // empty it out by replacing its body with an unreachable, which always
+      // validates. For that reason all we need to do here is mark the function
+      // as referenced - we don't need to do anything with the body.
+      //
+      // Note that it is crucial that we do not call useRefFunc() here: we are
+      // just adding a reference to the function, and not actually using the
+      // RefFunc. (Only useRefFunc() + a CallRef of the proper type are enough
+      // to make a function itself used.)
+      referenced.insert(ModuleElement(ModuleElementKind::Function, func));
+    }
+
+    if (finder.usesMemory) {
+      // TODO: We could do better here, but leave that for the full refactor
+      //       here that will also add multimemory. Then this will be as simple
+      //       as supporting tables here (which are just more module elements).
+      usesMemory = true;
+    }
+
+    // Note: nothing to do with |callRefTypes| and |structFields|, which only
+    // involve types. This function only cares about references to module
+    // elements like functions, globals, and tables. (References to types are
+    // handled in an entirely different way in Binaryen IR, and we don't need to
+    // worry about it.)
   }
 };
 
@@ -505,6 +662,7 @@ struct RemoveUnusedModuleElements : public Pass {
       return true;
     });
     module->removeGlobals([&](Global* curr) {
+      // See TODO in addReferences - we may be able to do better here.
       return !needed(ModuleElement(ModuleElementKind::Global, curr->name));
     });
     module->removeTags([&](Tag* curr) {