summaryrefslogtreecommitdiff
path: root/src/passes/RemoveUnusedModuleElements.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/passes/RemoveUnusedModuleElements.cpp')
-rw-r--r--src/passes/RemoveUnusedModuleElements.cpp164
1 files changed, 161 insertions, 3 deletions
diff --git a/src/passes/RemoveUnusedModuleElements.cpp b/src/passes/RemoveUnusedModuleElements.cpp
index 6070d8bce..95318acd2 100644
--- a/src/passes/RemoveUnusedModuleElements.cpp
+++ b/src/passes/RemoveUnusedModuleElements.cpp
@@ -40,6 +40,7 @@
#include "ir/element-utils.h"
#include "ir/intrinsics.h"
#include "ir/module-utils.h"
+#include "ir/subtypes.h"
#include "ir/utils.h"
#include "pass.h"
#include "wasm-builder.h"
@@ -56,6 +57,10 @@ enum class ModuleElementKind { Function, Global, Tag, Table, ElementSegment };
// name of the particular element.
using ModuleElement = std::pair<ModuleElementKind, Name>;
+// A pair of a struct type and a field index, together defining a field in a
+// particular type.
+using StructField = std::pair<HeapType, Index>;
+
// Visit or walk an expression to find what things are referenced.
struct ReferenceFinder : public PostWalker<ReferenceFinder> {
// Our findings are placed in these data structures, which the user of this
@@ -63,12 +68,14 @@ struct ReferenceFinder : public PostWalker<ReferenceFinder> {
std::vector<ModuleElement> elements;
std::vector<HeapType> callRefTypes;
std::vector<Name> refFuncs;
+ std::vector<StructField> structFields;
bool usesMemory = false;
// Add an item to the output data structures.
void note(ModuleElement element) { elements.push_back(element); }
void noteCallRef(HeapType type) { callRefTypes.push_back(type); }
void noteRefFunc(Name refFunc) { refFuncs.push_back(refFunc); }
+ void note(StructField structField) { structFields.push_back(structField); }
// Visitors
@@ -161,6 +168,13 @@ struct ReferenceFinder : public PostWalker<ReferenceFinder> {
note(ModuleElement(ModuleElementKind::Tag, tag));
}
}
+ void visitStructGet(StructGet* curr) {
+ if (curr->ref->type == Type::unreachable || curr->ref->type.isNull()) {
+ return;
+ }
+ auto type = curr->ref->type.getHeapType();
+ note(StructField{type, curr->index});
+ }
void visitArrayNewSeg(ArrayNewSeg* curr) {
switch (curr->op) {
case NewData:
@@ -207,7 +221,6 @@ struct Analyzer {
// If we walked the child immediately then we would make $bar used. But that
// global is only used if we actually read that field from the struct. We
// perform that analysis in readStructFields unreadStructFieldExprMap, below.
- // TODO: this is not implemented yet
std::vector<Expression*> expressionQueue;
bool usesMemory = false;
@@ -234,6 +247,17 @@ struct Analyzer {
// imports.
std::unordered_map<HeapType, std::unordered_set<Name>> uncalledRefFuncMap;
+ // Similar to calledSignatures/uncalledRefFuncMap, we store the StructFields
+ // we've seen reads from, and also expressions stored in such fields that
+ // could be read if ever we see a read of that field in the future. That is,
+ // for an expression stored into a struct field to be read, we need to both
+ // see that expression written to the field, and see some other place read
+ // that field (similar to with functions that we need to see the RefFunc and
+ // also a CallRef that can actually call it).
+ std::unordered_set<StructField> readStructFields;
+ std::unordered_map<StructField, std::vector<Expression*>>
+ unreadStructFieldExprMap;
+
Analyzer(Module* module,
const PassOptions& options,
const std::vector<ModuleElement>& roots)
@@ -261,6 +285,9 @@ struct Analyzer {
}
}
+ // We'll compute SubTypes if we need them.
+ std::optional<SubTypes> subTypes;
+
// Process expressions in the expression queue while we have any, visiting
// them (using their contents) and adding children. Returns whether we did any
// work.
@@ -286,6 +313,9 @@ struct Analyzer {
for (auto func : finder.refFuncs) {
useRefFunc(func);
}
+ for (auto structField : finder.structFields) {
+ useStructField(structField);
+ }
if (finder.usesMemory) {
usesMemory = true;
}
@@ -345,6 +375,36 @@ struct Analyzer {
}
}
+ void useStructField(StructField structField) {
+ if (!readStructFields.count(structField)) {
+ // Avoid a structured binding as the C++ spec does not allow capturing
+ // them in lambdas, which we need below.
+ auto type = structField.first;
+ auto index = structField.second;
+
+ // This is the first time we see a read of this data. Note that it is
+ // read, and also all subtypes since we might be reading from them as
+ // well.
+ if (!subTypes) {
+ subTypes = SubTypes(*module);
+ }
+ subTypes->iterSubTypes(type, [&](HeapType subType, Index depth) {
+ auto subStructField = StructField{subType, index};
+ readStructFields.insert(subStructField);
+
+ // Walk all the unread data we've queued: we queued it for the
+ // possibility of it ever being read, which just happened.
+ auto iter = unreadStructFieldExprMap.find(subStructField);
+ if (iter != unreadStructFieldExprMap.end()) {
+ for (auto* expr : iter->second) {
+ use(expr);
+ }
+ }
+ unreadStructFieldExprMap.erase(subStructField);
+ });
+ }
+ }
+
// As processExpressions, but for module elements.
bool processModule() {
bool worked = false;
@@ -399,9 +459,106 @@ struct Analyzer {
// Add the children of a used expression to be walked, if we should do so.
void scanChildren(Expression* curr) {
- for (auto* child : ChildIterator(curr)) {
- use(child);
+ // For now, the only special handling we have is fields of struct.new, which
+ // we defer walking of to when we know there is a read that can actually
+ // read them, see comments above on |expressionQueue|. We can only do that
+ // optimization in closed world (as otherwise the field might be read
+ // outside of the code we can see), and when it is reached (if it's
+ // unreachable then we don't know the type, and can defer that to DCE to
+ // remove).
+ if (!options.closedWorld || curr->type == Type::unreachable ||
+ !curr->is<StructNew>()) {
+ for (auto* child : ChildIterator(curr)) {
+ use(child);
+ }
+ return;
}
+
+ auto* new_ = curr->cast<StructNew>();
+ auto type = new_->type.getHeapType();
+
+ for (Index i = 0; i < new_->operands.size(); i++) {
+ auto* operand = new_->operands[i];
+ auto structField = StructField{type, i};
+ if (readStructFields.count(structField) ||
+ EffectAnalyzer(options, *module, operand).hasSideEffects()) {
+ // This data can be read, so just walk it. Or, this has side effects,
+ // which is tricky to reason about - the side effects must happen even
+ // if we never read the struct field - so give up and consider it used.
+ use(operand);
+ } else {
+ // This data does not need to be read now, but might be read later. Note
+ // it as unread.
+ unreadStructFieldExprMap[structField].push_back(operand);
+
+ // We also must note that anything in this operand is referenced, even
+ // if it never ends up used, so the IR remains valid.
+ addReferences(operand);
+ }
+ }
+ }
+
+ // Add references to all things appearing in an expression. This is called
+ // when we know an expression will appear in the output, which means it must
+ // remain valid IR and not refer to nonexistent things.
+ //
+ // This is only called on things without side effects (if there are such
+ // effects then we would have had to assume the worst earlier, and not get
+ // here).
+ void addReferences(Expression* curr) {
+ // Find references anywhere in this expression so we can apply them.
+ ReferenceFinder finder;
+ finder.setModule(module);
+ finder.walk(curr);
+
+ for (auto element : finder.elements) {
+ referenced.insert(element);
+
+ auto& [kind, value] = element;
+ if (kind == ModuleElementKind::Global) {
+ // Like functions, (non-imported) globals have contents. For functions,
+ // things are simple: if a function ends up with references but no uses
+ // then we can simply empty out the function (by setting its body to an
+ // unreachable). We don't have a simple way to do the same for globals,
+ // unfortunately. For now, scan the global's contents and add references
+ // as needed.
+ // TODO: We could try to empty the global out, for example, replace it
+ // with a null if it is nullable, or replace all gets of it with
+ // something else, but that is not trivial.
+ auto* global = module->getGlobal(value);
+ if (!global->imported()) {
+ // Note that infinite recursion is not a danger here since a global
+ // can only refer to previous globals.
+ addReferences(global->init);
+ }
+ }
+ }
+
+ for (auto func : finder.refFuncs) {
+ // If a function ends up referenced but not used then later down we will
+ // empty it out by replacing its body with an unreachable, which always
+ // validates. For that reason all we need to do here is mark the function
+ // as referenced - we don't need to do anything with the body.
+ //
+ // Note that it is crucial that we do not call useRefFunc() here: we are
+ // just adding a reference to the function, and not actually using the
+ // RefFunc. (Only useRefFunc() + a CallRef of the proper type are enough
+ // to make a function itself used.)
+ referenced.insert(ModuleElement(ModuleElementKind::Function, func));
+ }
+
+ if (finder.usesMemory) {
+ // TODO: We could do better here, but leave that for the full refactor
+ // here that will also add multimemory. Then this will be as simple
+ // as supporting tables here (which are just more module elements).
+ usesMemory = true;
+ }
+
+ // Note: nothing to do with |callRefTypes| and |structFields|, which only
+ // involve types. This function only cares about references to module
+ // elements like functions, globals, and tables. (References to types are
+ // handled in an entirely different way in Binaryen IR, and we don't need to
+ // worry about it.)
}
};
@@ -505,6 +662,7 @@ struct RemoveUnusedModuleElements : public Pass {
return true;
});
module->removeGlobals([&](Global* curr) {
+ // See TODO in addReferences - we may be able to do better here.
return !needed(ModuleElement(ModuleElementKind::Global, curr->name));
});
module->removeTags([&](Tag* curr) {