/* * Copyright 2022 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef wasm_ir_possible_contents_h #define wasm_ir_possible_contents_h #include #include "ir/possible-constant.h" #include "ir/subtypes.h" #include "support/hash.h" #include "support/small_vector.h" #include "wasm-builder.h" #include "wasm.h" namespace wasm { // // PossibleContents represents the possible contents at a particular location // (such as in a local or in a function parameter). This is a little similar to // PossibleConstantValues, but considers more types of contents than constant // values - in particular, it can track types to some extent. // // The specific contents this can vary over are: // // * None: No possible value. // // * Literal: One possible constant value like an i32 of 42. // // * Global: The name of a global whose value is here. We do not know // the actual value at compile time, but we know it is equal // to that global. Typically we can only infer this for // immutable globals. // // * ConeType: Any possible value of a particular type, and a possible // "cone" of a certain depth below it. If the depth is 0 // then only the exact type is possible; if the depth is 1 // then either that type or its immediate subtypes, and so // forth. // A depth of -1 means unlimited: all subtypes are allowed. // If the type here is nullable then null is also allowed. // TODO: Add ConeTypePlusContents or such, which would be // used on e.g. a struct.new with an immutable field // to which we assign a constant: not only do we know // the type, but also certain field's values. // // * Many: Anything else. Many things are possible here, and we do // not track what they might be, so we must assume the worst // in the calling code. // class PossibleContents { struct None : public std::monostate {}; struct GlobalInfo { Name name; // The type of contents. Note that this may not match the type of the // global, if we were filtered. For example: // // (ref.as_non_null // (global.get $nullable-global) // ) // // The contents flowing out will be a Global, but of a non-nullable type, // unlike the original global. Type type; // TODO: Consider adding a depth here, or merging this with ConeType in some // way. In principle, not having depth info can lead to loss of // precision. bool operator==(const GlobalInfo& other) const { return name == other.name && type == other.type; } }; struct ConeType { Type type; Index depth; bool operator==(const ConeType& other) const { return type == other.type && depth == other.depth; } }; struct Many : public std::monostate {}; // TODO: This is similar to the variant in PossibleConstantValues, and perhaps // we could share code, but extending a variant using template magic may // not be worthwhile. Another option might be to make PCV inherit from // this and disallow ConeType etc., but PCV might get slower. using Variant = std::variant; Variant value; // Internal convenience for creating a cone type with depth 0, i.e,, an exact // type. static ConeType ExactType(Type type) { return ConeType{type, 0}; } static constexpr Index FullDepth = -1; // Internal convenience for creating a cone type of unbounded depth, i.e., the // full cone of all subtypes for that type. static ConeType FullConeType(Type type) { return ConeType{type, FullDepth}; } template PossibleContents(T value) : value(value) {} public: PossibleContents() : value(None()) {} PossibleContents(const PossibleContents& other) = default; // Most users will use one of the following static functions to construct a // new instance: static PossibleContents none() { return PossibleContents{None()}; } static PossibleContents literal(Literal c) { return PossibleContents{c}; } static PossibleContents global(Name name, Type type) { return PossibleContents{GlobalInfo{name, type}}; } // Helper for a cone type with depth 0, i.e., an exact type. static PossibleContents exactType(Type type) { return PossibleContents{ExactType(type)}; } // Helper for a cone with unbounded depth, i.e., the full cone of all subtypes // for that type. static PossibleContents fullConeType(Type type) { return PossibleContents{FullConeType(type)}; } static PossibleContents coneType(Type type, Index depth) { return PossibleContents{ConeType{type, depth}}; } static PossibleContents many() { return PossibleContents{Many()}; } // Helper for creating a PossibleContents based on a wasm type, that is, where // all we know is the wasm type. static PossibleContents fromType(Type type) { assert(type != Type::none); if (type.isRef()) { // For a reference, subtyping matters. return fullConeType(type); } if (type == Type::unreachable) { // Nothing is possible here. return none(); } // Otherwise, this is a concrete MVP type. assert(type.isConcrete()); return exactType(type); } PossibleContents& operator=(const PossibleContents& other) = default; bool operator==(const PossibleContents& other) const { return value == other.value; } bool operator!=(const PossibleContents& other) const { return !(*this == other); } // Combine the information in a given PossibleContents to this one. The // contents here will then include whatever content was possible in |other|. [[nodiscard]] static PossibleContents combine(const PossibleContents& a, const PossibleContents& b); void combine(const PossibleContents& other) { *this = PossibleContents::combine(*this, other); } // Removes anything not in |other| from this object, so that it ends up with // only their intersection. void intersect(const PossibleContents& other); bool isNone() const { return std::get_if(&value); } bool isLiteral() const { return std::get_if(&value); } bool isGlobal() const { return std::get_if(&value); } bool isConeType() const { return std::get_if(&value); } bool isMany() const { return std::get_if(&value); } Literal getLiteral() const { assert(isLiteral()); return std::get(value); } Name getGlobal() const { assert(isGlobal()); return std::get(value).name; } bool isNull() const { return isLiteral() && getLiteral().isNull(); } // Return the relevant type here. Note that the *meaning* of the type varies // by the contents: type $foo of a global means that type or any subtype, as a // subtype might be written to it, while type $foo of a Literal or a ConeType // with depth zero means that type and nothing else, etc. (see also // hasExactType). // // If no type is possible, return unreachable; if many types are, return none. Type getType() const { if (auto* literal = std::get_if(&value)) { return literal->type; } else if (auto* global = std::get_if(&value)) { return global->type; } else if (auto* coneType = std::get_if(&value)) { return coneType->type; } else if (std::get_if(&value)) { return Type::unreachable; } else if (std::get_if(&value)) { return Type::none; } else { WASM_UNREACHABLE("bad value"); } } // Returns cone type info. This can be called on non-cone types as well, and // it returns a cone that best describes them. That is, this is like getType() // but it also provides an indication about the depth, if relevant. (If cone // info is not relevant, like when getType() returns none or unreachable, the // depth is set to 0.) ConeType getCone() const { if (auto* literal = std::get_if(&value)) { return ExactType(literal->type); } else if (auto* global = std::get_if(&value)) { return FullConeType(global->type); } else if (auto* coneType = std::get_if(&value)) { return *coneType; } else if (std::get_if(&value)) { return ExactType(Type::unreachable); } else if (std::get_if(&value)) { return ExactType(Type::none); } else { WASM_UNREACHABLE("bad value"); } } // Returns whether the relevant cone for this, as computed by getCone(), is of // full size, that is, includes all subtypes. bool hasFullCone() const { return getCone().depth == FullDepth; } // Returns whether this is a cone type and also is of full size. This differs // from hasFullCone() in that the former can return true for a global, for // example, while this cannot (a global is not a cone type, but the // information we have about its cone is that it is full). bool isFullConeType() const { return isConeType() && hasFullCone(); } // Returns whether the type we can report here is exact, that is, nothing of a // strict subtype might show up - the contents here have an exact type. // // This returns false for None and Many, for whom it is not well-defined. bool hasExactType() const { if (isLiteral()) { return true; } if (auto* coneType = std::get_if(&value)) { return coneType->depth == 0; } return false; } // Returns whether the given contents have any intersection, that is, whether // some value exists that can appear in both |a| and |b|. For example, if // either is None, or if they are different literals, then they have no // intersection. static bool haveIntersection(const PossibleContents& a, const PossibleContents& b); // Returns whether |a| is a subset of |b|, that is, all possible contents of // |a| are also possible in |b|. static bool isSubContents(const PossibleContents& a, const PossibleContents& b); // Whether we can make an Expression* for this containing the proper contents. // We can do that for a Literal (emitting a Const or RefFunc etc.) or a // Global (emitting a GlobalGet), but not for anything else yet. bool canMakeExpression() const { return isLiteral() || isGlobal(); } Expression* makeExpression(Module& wasm) { assert(canMakeExpression()); Builder builder(wasm); if (isLiteral()) { return builder.makeConstantExpression(getLiteral()); } else { auto name = getGlobal(); // Note that we load the type from the module, rather than use the type // in the GlobalInfo, as that type may not match the global (see comment // in the GlobalInfo declaration above). return builder.makeGlobalGet(name, wasm.getGlobal(name)->type); } } size_t hash() const { // First hash the index of the variant, then add the internals for each. size_t ret = std::hash()(value.index()); if (isNone() || isMany()) { // Nothing to add. } else if (isLiteral()) { rehash(ret, getLiteral()); } else if (auto* global = std::get_if(&value)) { rehash(ret, global->name); rehash(ret, global->type); } else if (auto* coneType = std::get_if(&value)) { rehash(ret, coneType->type); rehash(ret, coneType->depth); } else { WASM_UNREACHABLE("bad variant"); } return ret; } void dump(std::ostream& o, Module* wasm = nullptr) const { o << '['; if (isNone()) { o << "None"; } else if (isLiteral()) { o << "Literal " << getLiteral(); auto t = getType(); if (t.isRef()) { auto h = t.getHeapType(); o << " HT: " << h; } } else if (isGlobal()) { o << "GlobalInfo $" << getGlobal() << " T: " << getType(); } else if (auto* coneType = std::get_if(&value)) { auto t = coneType->type; o << "ConeType " << t; if (coneType->depth == 0) { o << " exact"; } else { o << " depth=" << coneType->depth; } if (t.isRef()) { auto h = t.getHeapType(); o << " HT: " << h; if (wasm && wasm->typeNames.count(h)) { o << " $" << wasm->typeNames[h].name; } if (t.isNullable()) { o << " null"; } } } else if (isMany()) { o << "Many"; } else { WASM_UNREACHABLE("bad variant"); } o << ']'; } }; // The various *Location structs (ExpressionLocation, ResultLocation, etc.) // describe particular locations where content can appear. // The location of a specific IR expression. struct ExpressionLocation { Expression* expr; // If this expression contains a tuple then each index in the tuple will have // its own location with a corresponding tupleIndex. If this is not a tuple // then we only use tupleIndex 0. Index tupleIndex; bool operator==(const ExpressionLocation& other) const { return expr == other.expr && tupleIndex == other.tupleIndex; } }; // The location of one of the parameters of a function. struct ParamLocation { Function* func; Index index; bool operator==(const ParamLocation& other) const { return func == other.func && index == other.index; } }; // The location of a value in a local. struct LocalLocation { Function* func; Index index; bool operator==(const LocalLocation& other) const { return func == other.func && index == other.index; } }; // The location of one of the results of a function. struct ResultLocation { Function* func; Index index; bool operator==(const ResultLocation& other) const { return func == other.func && index == other.index; } }; // The location of a global in the module. struct GlobalLocation { Name name; bool operator==(const GlobalLocation& other) const { return name == other.name; } }; // The location of one of the parameters in a function signature. struct SignatureParamLocation { HeapType type; Index index; bool operator==(const SignatureParamLocation& other) const { return type == other.type && index == other.index; } }; // The location of one of the results in a function signature. struct SignatureResultLocation { HeapType type; Index index; bool operator==(const SignatureResultLocation& other) const { return type == other.type && index == other.index; } }; // The location of contents in a struct or array (i.e., things that can fit in a // dataref). Note that this is specific to this type - it does not include data // about subtypes or supertypes. // // We store the truncated bits here when the field is packed. That is, if -1 is // written to an i8 then the value here will be 0xff. StructGet/ArrayGet // operations that read a signed value must then perform a sign-extend // operation. struct DataLocation { HeapType type; // The index of the field in a struct, or 0 for an array (where we do not // attempt to differentiate by index). Index index; bool operator==(const DataLocation& other) const { return type == other.type && index == other.index; } }; // The location of anything written to a particular tag. struct TagLocation { Name tag; // If the tag has more than one element, we'll have a separate TagLocation for // each, with corresponding indexes. If the tag has just one element we'll // only have one TagLocation with index 0. Index tupleIndex; bool operator==(const TagLocation& other) const { return tag == other.tag && tupleIndex == other.tupleIndex; } }; // The location of an exnref materialized by a catch_ref or catch_all_ref clause // of a try_table. No data is stored here. exnrefs contain a tag and a payload // at run-time, as well as potential metadata such as stack traces, but we don't // track that. So this is the same as NullLocation in a way: we just need *a* // source of contents for places that receive an exnref. struct CaughtExnRefLocation { bool operator==(const CaughtExnRefLocation& other) const { return true; } }; // A null value. This is used as the location of the default value of a var in a // function, a null written to a struct field in struct.new_with_default, etc. struct NullLocation { Type type; bool operator==(const NullLocation& other) const { return type == other.type; } }; // A special type of location that does not refer to something concrete in the // wasm, but is used to optimize the graph. A "cone read" is a struct.get or // array.get of a type that is not exact, so it can read from either that type // of some of the subtypes (up to a particular subtype depth). // // In general a read of a cone type + depth (as opposed to an exact type) will // require N incoming links, from each of the N subtypes - and we need that // for each struct.get of a cone. If there are M such gets then we have N * M // edges for this. Instead, we make a single canonical "cone read" location, and // add a single link to it from each get, which is only N + M (plus the cost // of adding "latency" in requiring an additional step along the way for the // data to flow along). struct ConeReadLocation { HeapType type; // As in PossibleContents, this represents the how deep we go with subtypes. // 0 means an exact type, 1 means immediate subtypes, etc. (Note that 0 is not // needed since that is what DataLocation already is.) Index depth; // The index of the field in a struct, or 0 for an array (where we do not // attempt to differentiate by index). Index index; bool operator==(const ConeReadLocation& other) const { return type == other.type && depth == other.depth && index == other.index; } }; // A location is a variant over all the possible flavors of locations that we // have. using Location = std::variant; } // namespace wasm namespace std { std::ostream& operator<<(std::ostream& stream, const wasm::PossibleContents& contents); template<> struct hash { size_t operator()(const wasm::PossibleContents& contents) const { return contents.hash(); } }; // Define hashes of all the *Location flavors so that Location itself is // hashable and we can use it in unordered maps and sets. template<> struct hash { size_t operator()(const wasm::ExpressionLocation& loc) const { return std::hash>{}( {size_t(loc.expr), loc.tupleIndex}); } }; template<> struct hash { size_t operator()(const wasm::ParamLocation& loc) const { return std::hash>{}( {size_t(loc.func), loc.index}); } }; template<> struct hash { size_t operator()(const wasm::LocalLocation& loc) const { return std::hash>{}( {size_t(loc.func), loc.index}); } }; template<> struct hash { size_t operator()(const wasm::ResultLocation& loc) const { return std::hash>{}( {size_t(loc.func), loc.index}); } }; template<> struct hash { size_t operator()(const wasm::GlobalLocation& loc) const { return std::hash{}(loc.name); } }; template<> struct hash { size_t operator()(const wasm::SignatureParamLocation& loc) const { return std::hash>{}( {loc.type, loc.index}); } }; template<> struct hash { size_t operator()(const wasm::SignatureResultLocation& loc) const { return std::hash>{}( {loc.type, loc.index}); } }; template<> struct hash { size_t operator()(const wasm::DataLocation& loc) const { return std::hash>{}( {loc.type, loc.index}); } }; template<> struct hash { size_t operator()(const wasm::TagLocation& loc) const { return std::hash>{}( {loc.tag, loc.tupleIndex}); } }; template<> struct hash { size_t operator()(const wasm::CaughtExnRefLocation& loc) const { return std::hash()("caught-exnref-location"); } }; template<> struct hash { size_t operator()(const wasm::NullLocation& loc) const { return std::hash{}(loc.type); } }; template<> struct hash { size_t operator()(const wasm::ConeReadLocation& loc) const { return std::hash>{}( {loc.type, loc.depth, loc.index}); } }; } // namespace std namespace wasm { // Analyze the entire wasm file to find which contents are possible in which // locations. This assumes a closed world and starts from roots - newly created // values - and propagates them to the locations they reach. After the // analysis the user of this class can ask which contents are possible at any // location. // // This focuses on useful information for the typical user of this API. // Specifically, we find out: // // 1. What locations have no content reaching them at all. That means the code // is unreachable. (Other passes may handle this, but ContentOracle does it // for all things, so it might catch situations other passes do not cover; // and, it takes no effort to support this here). // 2. For all locations, we try to find when they must contain a constant value // like i32(42) or ref.func(foo). // 3. For locations that contain references, information about the subtypes // possible there. For example, if something has wasm type anyref in the IR, // we might find it must contain an exact type of something specific. // // Note that there is not much use in providing type info for locations that are // *not* references. If a local is i32, for example, then it cannot contain any // subtype anyhow, since i32 is not a reference and has no subtypes. And we know // the type i32 from the wasm anyhow, that is, the caller will know it. // Therefore the only useful information we can provide on top of the info // already in the wasm is either that nothing can be there (1, above), or that a // constant must be there (2, above), and so we do not make an effort to track // non-reference types here. This makes the internals of ContentOracle simpler // and faster. A noticeable outcome of that is that querying the contents of an // i32 local will return Many and not ConeType{i32, 0} (assuming we could not // infer either that there must be nothing there, or a constant). Again, the // caller is assumed to know the wasm IR type anyhow, and also other // optimization passes work on the types in the IR, so we do not focus on that // here. class ContentOracle { Module& wasm; const PassOptions& options; void analyze(); public: ContentOracle(Module& wasm, const PassOptions& options) : wasm(wasm), options(options) { analyze(); } // Get the contents possible at a location. PossibleContents getContents(Location location) { auto iter = locationContents.find(location); if (iter == locationContents.end()) { // We know of no possible contents here. return PossibleContents::none(); } return iter->second; } // Helper for the common case of an expression location that is not a // multivalue. PossibleContents getContents(Expression* curr) { assert(curr->type.size() == 1); return getContents(ExpressionLocation{curr, 0}); } private: std::unordered_map locationContents; }; } // namespace wasm #endif // wasm_ir_possible_contents_h