diff options
-rw-r--r-- | src/ir/struct-utils.h | 254 | ||||
-rw-r--r-- | src/ir/subtypes.h | 86 | ||||
-rw-r--r-- | src/passes/ConstantFieldPropagation.cpp | 318 |
3 files changed, 409 insertions, 249 deletions
diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h new file mode 100644 index 000000000..b052a15f7 --- /dev/null +++ b/src/ir/struct-utils.h @@ -0,0 +1,254 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_struct_utils_h +#define wasm_ir_struct_utils_h + +#include "ir/subtypes.h" +#include "wasm.h" + +namespace wasm { + +// A vector of a template type's values. One such vector will be used per struct +// type, where each element in the vector represents a field. We always assume +// that the vectors are pre-initialized to the right length before accessing any +// data, which this class enforces using assertions, and which is implemented in +// StructValuesMap. +template<typename T> struct StructValues : public std::vector<T> { + T& operator[](size_t index) { + assert(index < this->size()); + return std::vector<T>::operator[](index); + } + + const T& operator[](size_t index) const { + assert(index < this->size()); + return std::vector<T>::operator[](index); + } +}; + +// Maps heap types to a StructValues for that heap type. +// +// Also provides a combineInto() helper that combines one map into another. This +// depends on the underlying T defining a combine() method. +template<typename T> +struct StructValuesMap : public std::unordered_map<HeapType, StructValues<T>> { + // When we access an item, if it does not already exist, create it with a + // vector of the right length for that type. + StructValues<T>& operator[](HeapType type) { + auto inserted = this->insert({type, {}}); + auto& values = inserted.first->second; + if (inserted.second) { + values.resize(type.getStruct().fields.size()); + } + return values; + } + + void combineInto(StructValuesMap<T>& combinedInfos) const { + for (auto& kv : *this) { + auto type = kv.first; + auto& info = kv.second; + for (Index i = 0; i < info.size(); i++) { + combinedInfos[type][i].combine(info[i]); + } + } + } + + void dump(std::ostream& o) { + o << "dump " << this << '\n'; + for (auto& kv : (*this)) { + auto type = kv.first; + auto& vec = kv.second; + o << "dump " << type << " " << &vec << ' '; + for (auto x : vec) { + x.dump(o); + o << " "; + }; + o << '\n'; + } + } +}; + +// Map of functions to StructValuesMap. This lets us compute in parallel while +// we walk the module, and afterwards we will merge them all. +template<typename T> +struct FunctionStructValuesMap + : public std::unordered_map<Function*, StructValuesMap<T>> { + FunctionStructValuesMap(Module& wasm) { + // Initialize the data for each function in preparation for parallel + // computation. + for (auto& func : wasm.functions) { + (*this)[func.get()]; + } + } + + // Combine information across functions. + void combineInto(StructValuesMap<T>& combinedInfos) const { + for (auto& kv : *this) { + const StructValuesMap<T>& infos = kv.second; + infos.combineInto(combinedInfos); + } + } +}; + +// A generic scanner that finds struct operations and calls hooks to update +// information. Subclasses must define these methods: +// +// * Note an expression written into a field. +// +// void noteExpression(Expression* expr, HeapType type, Index index, T& info); +// +// * Note a default value written during creation. +// +// void noteDefault(Type fieldType, HeapType type, Index index, T& info); +// +// * Note a copied value (read from this field and written to the same, possibly +// in another object). +// +// void noteCopy(HeapType type, Index index, T& info); +// +// We track information from struct.new and struct.set separately, because in +// struct.new we know more about the type - we know the actual exact type being +// written to, and not just that it is of a subtype of the instruction's type, +// which helps later. +template<typename T, typename SubType> +struct Scanner : public WalkerPass<PostWalker<Scanner<T, SubType>>> { + bool isFunctionParallel() override { return true; } + + Scanner(FunctionStructValuesMap<T>& functionNewInfos, + FunctionStructValuesMap<T>& functionSetInfos) + : functionNewInfos(functionNewInfos), functionSetInfos(functionSetInfos) {} + + void visitStructNew(StructNew* curr) { + auto type = curr->type; + if (type == Type::unreachable) { + return; + } + + // Note writes to all the fields of the struct. + auto heapType = type.getHeapType(); + auto& fields = heapType.getStruct().fields; + auto& infos = functionNewInfos[this->getFunction()][heapType]; + for (Index i = 0; i < fields.size(); i++) { + if (curr->isWithDefault()) { + static_cast<SubType*>(this)->noteDefault( + fields[i].type, heapType, i, infos[i]); + } else { + noteExpressionOrCopy(curr->operands[i], heapType, i, infos[i]); + } + } + } + + void visitStructSet(StructSet* curr) { + auto type = curr->ref->type; + if (type == Type::unreachable) { + return; + } + + // Note a write to this field of the struct. + noteExpressionOrCopy( + curr->value, + type.getHeapType(), + curr->index, + functionSetInfos[this->getFunction()][type.getHeapType()][curr->index]); + } + + void + noteExpressionOrCopy(Expression* expr, HeapType type, Index index, T& info) { + // Look at the value falling through, if it has the exact same type + // (otherwise, we'd need to consider both the type actually written and the + // type of the fallthrough, somehow). + auto* fallthrough = Properties::getFallthrough( + expr, this->getPassOptions(), *this->getModule()); + if (fallthrough->type == expr->type) { + expr = fallthrough; + } + if (auto* get = expr->dynCast<StructGet>()) { + if (get->index == index && get->ref->type != Type::unreachable && + get->ref->type.getHeapType() == type) { + static_cast<SubType*>(this)->noteCopy(type, index, info); + return; + } + } + static_cast<SubType*>(this)->noteExpression(expr, type, index, info); + } + + FunctionStructValuesMap<T>& functionNewInfos; + FunctionStructValuesMap<T>& functionSetInfos; +}; + +// Helper class to propagate information about fields to sub- and/or super- +// classes in the type hierarchy. While propagating it calls a method +// +// to.combine(from) +// +// which combines the information from |from| into |to|, and should return true +// if we changed something. +template<typename T> class TypeHierarchyPropagator { +public: + TypeHierarchyPropagator(Module& wasm) : subTypes(wasm) {} + + SubTypes subTypes; + + void propagateToSuperTypes(StructValuesMap<T>& infos) { + propagate(infos, false); + } + + void propagateToSuperAndSubTypes(StructValuesMap<T>& infos) { + propagate(infos, true); + } + +private: + void propagate(StructValuesMap<T>& combinedInfos, bool toSubTypes) { + UniqueDeferredQueue<HeapType> work; + for (auto& kv : combinedInfos) { + auto type = kv.first; + work.push(type); + } + while (!work.empty()) { + auto type = work.pop(); + auto& infos = combinedInfos[type]; + + // Propagate shared fields to the supertype. + HeapType superType; + if (type.getSuperType(superType)) { + auto& superInfos = combinedInfos[superType]; + auto& superFields = superType.getStruct().fields; + for (Index i = 0; i < superFields.size(); i++) { + if (superInfos[i].combine(infos[i])) { + work.push(superType); + } + } + } + + if (toSubTypes) { + // Propagate shared fields to the subtypes. + auto numFields = type.getStruct().fields.size(); + for (auto subType : subTypes.getSubTypes(type)) { + auto& subInfos = combinedInfos[subType]; + for (Index i = 0; i < numFields; i++) { + if (subInfos[i].combine(infos[i])) { + work.push(subType); + } + } + } + } + } + } +}; + +} // namespace wasm + +#endif // wasm_ir_struct_utils_h diff --git a/src/ir/subtypes.h b/src/ir/subtypes.h new file mode 100644 index 000000000..661eaf119 --- /dev/null +++ b/src/ir/subtypes.h @@ -0,0 +1,86 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_subtypes_h +#define wasm_ir_subtypes_h + +#include "ir/module-utils.h" +#include "wasm.h" + +namespace wasm { + +// Analyze subtyping relationships and provide useful interfaces to discover +// them. +struct SubTypes { + SubTypes(Module& wasm) { + std::unordered_map<HeapType, Index> typeIndices; + ModuleUtils::collectHeapTypes(wasm, types, typeIndices); + for (auto type : types) { + note(type); + } + } + + const std::vector<HeapType>& getSubTypes(HeapType type) { + return typeSubTypes[type]; + } + + // Get all subtypes of a type, and their subtypes and so forth, recursively. + std::vector<HeapType> getAllSubTypes(HeapType type) { + std::vector<HeapType> ret, work; + work.push_back(type); + while (!work.empty()) { + auto curr = work.back(); + work.pop_back(); + for (auto sub : getSubTypes(curr)) { + ret.push_back(sub); + work.push_back(sub); + } + } + return ret; + } + + // Get all supertypes of a type. The order in the output vector is with the + // immediate supertype first, then its supertype, and so forth. + std::vector<HeapType> getAllSuperTypes(HeapType type) { + std::vector<HeapType> ret; + while (1) { + HeapType super; + if (!type.getSuperType(super)) { + return ret; + } + ret.push_back(super); + type = super; + } + } + + std::vector<HeapType> types; + +private: + // Add a type to the graph. + void note(HeapType type) { + HeapType super; + if (type.getSuperType(super)) { + typeSubTypes[super].push_back(type); + } + } + + // Maps a type to its subtypes. + std::unordered_map<HeapType, std::vector<HeapType>> typeSubTypes; +}; + +} // namespace wasm + +#endif // wasm_ir_subtypes_h diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index d00e61ca5..0894a822a 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -29,6 +29,7 @@ #include "ir/module-utils.h" #include "ir/properties.h" +#include "ir/struct-utils.h" #include "ir/utils.h" #include "pass.h" #include "support/unique_deferring_queue.h" @@ -40,35 +41,6 @@ namespace wasm { namespace { -// A nominal type always knows who its supertype is, if there is one; this class -// provides the list of immediate subtypes. -struct SubTypes { - SubTypes(Module& wasm) { - std::vector<HeapType> types; - std::unordered_map<HeapType, Index> typeIndices; - ModuleUtils::collectHeapTypes(wasm, types, typeIndices); - for (auto type : types) { - note(type); - } - } - - const std::unordered_set<HeapType>& getSubTypes(HeapType type) { - return typeSubTypes[type]; - } - -private: - // Add a type to the graph. - void note(HeapType type) { - HeapType super; - if (type.getSuperType(super)) { - typeSubTypes[super].insert(type); - } - } - - // Maps a type to its subtypes. - std::unordered_map<HeapType, std::unordered_set<HeapType>> typeSubTypes; -}; - // Represents data about what constant values are possible in a particular // place. There may be no values, or one, or many, or if a non-constant value is // possible, then all we can say is that the value is "unknown" - it can be @@ -159,148 +131,9 @@ private: Literal value; }; -// A vector of PossibleConstantValues. One such vector will be used per struct -// type, where each element in the vector represents a field. We always assume -// that the vectors are pre-initialized to the right length before accessing any -// data, which this class enforces using assertions, and which is implemented in -// StructValuesMap. -struct StructValues : public std::vector<PossibleConstantValues> { - PossibleConstantValues& operator[](size_t index) { - assert(index < size()); - return std::vector<PossibleConstantValues>::operator[](index); - } - - const PossibleConstantValues& operator[](size_t index) const { - assert(index < size()); - return std::vector<PossibleConstantValues>::operator[](index); - } -}; - -// Map of types to information about the values their fields can take. -// Concretely, this maps a type to a StructValues which has one element per -// field. -struct StructValuesMap : public std::unordered_map<HeapType, StructValues> { - // When we access an item, if it does not already exist, create it with a - // vector of the right length for that type. - StructValues& operator[](HeapType type) { - auto inserted = insert({type, {}}); - auto& values = inserted.first->second; - if (inserted.second) { - values.resize(type.getStruct().fields.size()); - } - return values; - } - - void dump(std::ostream& o) { - o << "dump " << this << '\n'; - for (auto& kv : (*this)) { - auto type = kv.first; - auto& vec = kv.second; - o << "dump " << type << " " << &vec << ' '; - for (auto x : vec) { - x.dump(o); - o << " "; - }; - o << '\n'; - } - } -}; - -// Map of functions to their field value infos. We compute those in parallel, -// then later we will merge them all. -using FunctionStructValuesMap = std::unordered_map<Function*, StructValuesMap>; - -// Scan each function to note all its writes to struct fields. -// -// We track information from struct.new and struct.set separately, because in -// struct.new we know more about the type - we know the actual exact type being -// written to, and not just that it is of a subtype of the instruction's type, -// which helps later. -struct Scanner : public WalkerPass<PostWalker<Scanner>> { - bool isFunctionParallel() override { return true; } - - Pass* create() override { - return new Scanner(functionNewInfos, functionSetInfos); - } - - Scanner(FunctionStructValuesMap& functionNewInfos, - FunctionStructValuesMap& functionSetInfos) - : functionNewInfos(functionNewInfos), functionSetInfos(functionSetInfos) {} - - void visitStructNew(StructNew* curr) { - auto type = curr->type; - if (type == Type::unreachable) { - return; - } - - // Note writes to all the fields of the struct. - auto heapType = type.getHeapType(); - auto& values = functionNewInfos[getFunction()][heapType]; - auto& fields = heapType.getStruct().fields; - for (Index i = 0; i < fields.size(); i++) { - if (curr->isWithDefault()) { - values[i].note(Literal::makeZero(fields[i].type)); - } else { - noteExpression(curr->operands[i], heapType, i, functionNewInfos); - } - } - } - - void visitStructSet(StructSet* curr) { - auto type = curr->ref->type; - if (type == Type::unreachable) { - return; - } - - // Note a write to this field of the struct. - noteExpression( - curr->value, type.getHeapType(), curr->index, functionSetInfos); - } - -private: - FunctionStructValuesMap& functionNewInfos; - FunctionStructValuesMap& functionSetInfos; - - // Note a value, checking whether it is a constant or not. - void noteExpression(Expression* expr, - HeapType type, - Index index, - FunctionStructValuesMap& valuesMap) { - expr = Properties::getFallthrough(expr, getPassOptions(), *getModule()); - - // Ignore copies: when we set a value to a field from that same field, no - // new values are actually introduced. - // - // Note that this is only sound by virtue of the overall analysis in this - // pass: the object read from may be of a subclass, and so subclass values - // may be actually written here. But as our analysis considers subclass - // values too (as it must) then that is safe. That is, if a subclass of $A - // adds a value X that can be loaded from (struct.get $A $b), then consider - // a copy - // - // (struct.set $A $b (struct.get $A $b)) - // - // Our analysis will figure out that X can appear in that copy's get, and so - // the copy itself does not add any information about values. - // - // TODO: This may be extensible to a copy from a subtype by the above - // analysis (but this is already entering the realm of diminishing - // returns). - if (auto* get = expr->dynCast<StructGet>()) { - if (get->index == index && get->ref->type != Type::unreachable && - get->ref->type.getHeapType() == type) { - return; - } - } - - auto& info = valuesMap[getFunction()][type][index]; - if (!Properties::isConstantExpression(expr)) { - info.noteUnknown(); - } else { - info.note(Properties::getLiteral(expr)); - } - } -}; +using PCVStructValuesMap = StructValuesMap<PossibleConstantValues>; +using PCVFunctionStructValuesMap = + FunctionStructValuesMap<PossibleConstantValues>; // Optimize struct gets based on what we've learned about writes. // @@ -312,7 +145,7 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { Pass* create() override { return new FunctionOptimizer(infos); } - FunctionOptimizer(StructValuesMap& infos) : infos(infos) {} + FunctionOptimizer(PCVStructValuesMap& infos) : infos(infos) {} void visitStructGet(StructGet* curr) { auto type = curr->ref->type; @@ -374,11 +207,62 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> { } private: - StructValuesMap& infos; + PCVStructValuesMap& infos; bool changed = false; }; +struct PCVScanner : public Scanner<PossibleConstantValues, PCVScanner> { + Pass* create() override { + return new PCVScanner(functionNewInfos, functionSetInfos); + } + + PCVScanner(FunctionStructValuesMap<PossibleConstantValues>& functionNewInfos, + FunctionStructValuesMap<PossibleConstantValues>& functionSetInfos) + : Scanner<PossibleConstantValues, PCVScanner>(functionNewInfos, + functionSetInfos) {} + + void noteExpression(Expression* expr, + HeapType type, + Index index, + PossibleConstantValues& info) { + + if (!Properties::isConstantExpression(expr)) { + info.noteUnknown(); + } else { + info.note(Properties::getLiteral(expr)); + } + } + + void noteDefault(Type fieldType, + HeapType type, + Index index, + PossibleConstantValues& info) { + info.note(Literal::makeZero(fieldType)); + } + + void noteCopy(HeapType type, Index index, PossibleConstantValues& info) { + // Ignore copies: when we set a value to a field from that same field, no + // new values are actually introduced. + // + // Note that this is only sound by virtue of the overall analysis in this + // pass: the object read from may be of a subclass, and so subclass values + // may be actually written here. But as our analysis considers subclass + // values too (as it must) then that is safe. That is, if a subclass of $A + // adds a value X that can be loaded from (struct.get $A $b), then consider + // a copy + // + // (struct.set $A $b (struct.get $A $b)) + // + // Our analysis will figure out that X can appear in that copy's get, and so + // the copy itself does not add any information about values. + // + // TODO: This may be extensible to a copy from a subtype by the above + // analysis (but this is already entering the realm of diminishing + // returns). + } +}; + struct ConstantFieldPropagation : public Pass { void run(PassRunner* runner, Module* module) override { if (getTypeSystem() != TypeSystem::Nominal) { @@ -386,34 +270,16 @@ struct ConstantFieldPropagation : public Pass { } // Find and analyze all writes inside each function. - FunctionStructValuesMap functionNewInfos, functionSetInfos; - for (auto& func : module->functions) { - // Initialize the data for each function, so that we can operate on this - // structure in parallel without modifying it. - functionNewInfos[func.get()]; - functionSetInfos[func.get()]; - } - Scanner scanner(functionNewInfos, functionSetInfos); + PCVFunctionStructValuesMap functionNewInfos(*module), + functionSetInfos(*module); + PCVScanner scanner(functionNewInfos, functionSetInfos); scanner.run(runner, module); scanner.walkModuleCode(module); // Combine the data from the functions. - auto combine = [](const FunctionStructValuesMap& functionInfos, - StructValuesMap& combinedInfos) { - for (auto& kv : functionInfos) { - const StructValuesMap& infos = kv.second; - for (auto& kv : infos) { - auto type = kv.first; - auto& info = kv.second; - for (Index i = 0; i < info.size(); i++) { - combinedInfos[type][i].combine(info[i]); - } - } - } - }; - StructValuesMap combinedNewInfos, combinedSetInfos; - combine(functionNewInfos, combinedNewInfos); - combine(functionSetInfos, combinedSetInfos); + PCVStructValuesMap combinedNewInfos, combinedSetInfos; + functionNewInfos.combineInto(combinedNewInfos); + functionSetInfos.combineInto(combinedSetInfos); // Handle subtyping. |combinedInfo| so far contains data that represents // each struct.new and struct.set's operation on the struct type used in @@ -446,61 +312,15 @@ struct ConstantFieldPropagation : public Pass { // and so given a get of $A and a new of $B, the new is relevant for the get // iff $A is a subtype of $B, so we only need to propagate in one direction // there, to supertypes. - // - // TODO: A topological sort could avoid repeated work here perhaps. - - SubTypes subTypes(*module); - - auto propagate = [&subTypes](StructValuesMap& combinedInfos, - bool toSubTypes) { - UniqueDeferredQueue<HeapType> work; - for (auto& kv : combinedInfos) { - auto type = kv.first; - work.push(type); - } - while (!work.empty()) { - auto type = work.pop(); - auto& infos = combinedInfos[type]; - - // Propagate shared fields to the supertype. - HeapType superType; - if (type.getSuperType(superType)) { - auto& superInfos = combinedInfos[superType]; - auto& superFields = superType.getStruct().fields; - for (Index i = 0; i < superFields.size(); i++) { - if (superInfos[i].combine(infos[i])) { - work.push(superType); - } - } - } - - if (toSubTypes) { - // Propagate shared fields to the subtypes. - auto numFields = type.getStruct().fields.size(); - for (auto subType : subTypes.getSubTypes(type)) { - auto& subInfos = combinedInfos[subType]; - for (Index i = 0; i < numFields; i++) { - if (subInfos[i].combine(infos[i])) { - work.push(subType); - } - } - } - } - } - }; - propagate(combinedNewInfos, false); - propagate(combinedSetInfos, true); + + TypeHierarchyPropagator<PossibleConstantValues> propagator(*module); + propagator.propagateToSuperTypes(combinedNewInfos); + propagator.propagateToSuperAndSubTypes(combinedSetInfos); // Combine both sources of information to the final information that gets // care about. - StructValuesMap combinedInfos = std::move(combinedNewInfos); - for (auto& kv : combinedSetInfos) { - auto type = kv.first; - auto& info = kv.second; - for (Index i = 0; i < info.size(); i++) { - combinedInfos[type][i].combine(info[i]); - } - } + PCVStructValuesMap combinedInfos = std::move(combinedNewInfos); + combinedSetInfos.combineInto(combinedInfos); // Optimize. // TODO: Skip this if we cannot optimize anything |