/* * Copyright 2022 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "analysis/cfg.h" #include "ir/bits.h" #include "ir/branch-utils.h" #include "ir/eh-utils.h" #include "ir/gc-type-utils.h" #include "ir/linear-execution.h" #include "ir/local-graph.h" #include "ir/module-utils.h" #include "ir/possible-contents.h" #include "support/insert_ordered.h" #include "wasm.h" namespace std { std::ostream& operator<<(std::ostream& stream, const wasm::PossibleContents& contents) { contents.dump(stream); return stream; } } // namespace std namespace wasm { PossibleContents PossibleContents::combine(const PossibleContents& a, const PossibleContents& b) { auto aType = a.getType(); auto bType = b.getType(); // First handle the trivial cases of them being equal, or one of them is // None or Many. if (a == b) { return a; } if (b.isNone()) { return a; } if (a.isNone()) { return b; } if (a.isMany()) { return a; } if (b.isMany()) { return b; } if (!aType.isRef() || !bType.isRef()) { // At least one is not a reference. The only possibility left for a useful // combination here is if they have the same type (since we've already ruled // out the case of them being equal). If they have the same type then // neither is a reference and we can emit an exact type (since subtyping is // not relevant for non-references). if (aType == bType) { return ExactType(aType); } else { return Many(); } } // Special handling for references from here. if (a.isNull() && b.isNull()) { // These must be nulls in different hierarchies, otherwise a would have // been handled by the `a == b` case above. assert(aType != bType); return Many(); } auto lub = Type::getLeastUpperBound(aType, bType); if (lub == Type::none) { // The types are not in the same hierarchy. return Many(); } // From here we can assume there is a useful LUB. // Nulls can be combined in by just adding nullability to a type. if (a.isNull() || b.isNull()) { // Only one of them can be null here, since we already handled the case // where they were both null. assert(!a.isNull() || !b.isNull()); // If only one is a null then we can use the type info from the b, and // just add in nullability. For example, a literal of type T and a null // becomes an exact type of T that allows nulls, and so forth. auto mixInNull = [](ConeType cone) { cone.type = Type(cone.type.getHeapType(), Nullable); return cone; }; if (!a.isNull()) { return mixInNull(a.getCone()); } else if (!b.isNull()) { return mixInNull(b.getCone()); } } // Find a ConeType that describes both inputs, using the shared ancestor which // is the LUB. We need to find how big a cone we need: the cone must be big // enough to contain both the inputs. auto aDepth = a.getCone().depth; auto bDepth = b.getCone().depth; Index newDepth; if (aDepth == FullDepth || bDepth == FullDepth) { // At least one has full (infinite) depth, so we know the new depth must // be the same. newDepth = FullDepth; } else { // The depth we need under the lub is how far from the lub we are, plus // the depth of our cone. // TODO: we could make a single loop that also does the LUB, at the same // time, and also avoids calling getDepth() which loops once more? auto aDepthFromRoot = aType.getHeapType().getDepth(); auto bDepthFromRoot = bType.getHeapType().getDepth(); auto lubDepthFromRoot = lub.getHeapType().getDepth(); assert(lubDepthFromRoot <= aDepthFromRoot); assert(lubDepthFromRoot <= bDepthFromRoot); Index aDepthUnderLub = aDepthFromRoot - lubDepthFromRoot + aDepth; Index bDepthUnderLub = bDepthFromRoot - lubDepthFromRoot + bDepth; // The total cone must be big enough to contain all the above. newDepth = std::max(aDepthUnderLub, bDepthUnderLub); } return ConeType{lub, newDepth}; } void PossibleContents::intersect(const PossibleContents& other) { // This does not yet handle all possible content. assert(other.isFullConeType() || other.isLiteral() || other.isNone()); if (*this == other) { // Nothing changes. return; } if (!haveIntersection(*this, other)) { // There is no intersection at all. // Note that this code path handles |this| or |other| being None. value = None(); return; } if (isSubContents(other, *this)) { // The intersection is just |other|. // Note that this code path handles |this| being Many. value = other.value; return; } if (isSubContents(*this, other)) { // The intersection is just |this|. return; } if (isLiteral() || other.isLiteral()) { // We've ruled out either being a subcontents of the other. A literal has // no other intersection possibility. value = None(); return; } auto type = getType(); auto otherType = other.getType(); auto heapType = type.getHeapType(); auto otherHeapType = otherType.getHeapType(); // If both inputs are nullable then the intersection is nullable as well. auto nullability = type.isNullable() && otherType.isNullable() ? Nullable : NonNullable; auto setNoneOrNull = [&]() { if (nullability == Nullable) { value = Literal::makeNull(heapType); } else { value = None(); } }; // If the heap types are not compatible then they are in separate hierarchies // and there is no intersection, aside from possibly a null of the bottom // type. auto isSubType = HeapType::isSubType(heapType, otherHeapType); auto otherIsSubType = HeapType::isSubType(otherHeapType, heapType); if (!isSubType && !otherIsSubType) { if (heapType.getBottom() == otherHeapType.getBottom()) { setNoneOrNull(); } else { value = None(); } return; } // The heap types are compatible, so intersect the cones. auto depthFromRoot = heapType.getDepth(); auto otherDepthFromRoot = otherHeapType.getDepth(); // To compute the new cone, find the new heap type for it, and to compute its // depth, consider the adjustments to the existing depths that stem from the // choice of new heap type. HeapType newHeapType; if (depthFromRoot < otherDepthFromRoot) { newHeapType = otherHeapType; } else { newHeapType = heapType; } // Note the global's information, if we started as a global. In that case, the // code below will refine our type but we can remain a global, which we will // accomplish by restoring our global status at the end. std::optional globalName; if (isGlobal()) { globalName = getGlobal(); } auto newType = Type(newHeapType, nullability); // By assumption |other| has full depth. Consider the other cone in |this|. if (hasFullCone()) { // Both are full cones, so the result is as well. value = FullConeType(newType); } else { // The result is a partial cone. If the cone starts in |otherHeapType| then // we need to adjust the depth down, since it will be smaller than the // original cone: /* // .. // / // otherHeapType // / \ // heapType .. // \ */ // E.g. if |this| is a cone of depth 10, and |otherHeapType| is an immediate // subtype of |this|, then the new cone must be of depth 9. auto newDepth = getCone().depth; if (newHeapType == otherHeapType) { assert(depthFromRoot <= otherDepthFromRoot); auto reduction = otherDepthFromRoot - depthFromRoot; if (reduction > newDepth) { // The cone on heapType does not even reach the cone on otherHeapType, // so the result is not a cone. setNoneOrNull(); return; } newDepth -= reduction; } value = ConeType{newType, newDepth}; } if (globalName) { // Restore the global but keep the new and refined type. value = GlobalInfo{*globalName, getType()}; } } bool PossibleContents::haveIntersection(const PossibleContents& a, const PossibleContents& b) { if (a.isNone() || b.isNone()) { // One is the empty set, so nothing can intersect here. return false; } if (a.isMany() || b.isMany()) { // One is the set of all things, so definitely something can intersect since // we've ruled out an empty set for both. return true; } if (a == b) { // The intersection is equal to them. return true; } auto aType = a.getType(); auto bType = b.getType(); if (!aType.isRef() || !bType.isRef()) { // At least one is not a reference. The only way they can intersect is if // the type is identical, and they are not both literals (we've already // ruled out them being identical earlier). return aType == bType && (!a.isLiteral() || !b.isLiteral()); } // From here on we focus on references. auto aHeapType = aType.getHeapType(); auto bHeapType = bType.getHeapType(); if (aType.isNullable() && bType.isNullable() && aHeapType.getBottom() == bHeapType.getBottom()) { // A compatible null is possible on both sides. return true; } // We ruled out having a compatible null on both sides. If one is simply a // null then no chance for an intersection remains. if (a.isNull() || b.isNull()) { return false; } auto aSubB = HeapType::isSubType(aHeapType, bHeapType); auto bSubA = HeapType::isSubType(bHeapType, aHeapType); if (!aSubB && !bSubA) { // No type can appear in both a and b, so the types differ, so the values // do not overlap. return false; } // From here on we focus on references and can ignore the case of null - any // intersection must be of a non-null value, so we can focus on the heap // types. auto aDepthFromRoot = aHeapType.getDepth(); auto bDepthFromRoot = bHeapType.getDepth(); if (aSubB) { // A is a subtype of B. For there to be an intersection we need their cones // to intersect, that is, to rule out the case where the cone from B is not // deep enough to reach A. assert(aDepthFromRoot >= bDepthFromRoot); return aDepthFromRoot - bDepthFromRoot <= b.getCone().depth; } else if (bSubA) { assert(bDepthFromRoot >= aDepthFromRoot); return bDepthFromRoot - aDepthFromRoot <= a.getCone().depth; } else { WASM_UNREACHABLE("we ruled out no subtyping before"); } // TODO: we can also optimize things like different Literals, but existing // passes do such things already so it is low priority. } bool PossibleContents::isSubContents(const PossibleContents& a, const PossibleContents& b) { if (a == b) { return true; } if (a.isNone()) { return true; } if (b.isNone()) { return false; } if (a.isMany()) { return false; } if (b.isMany()) { return true; } if (a.isLiteral()) { // Note we already checked for |a == b| above. We need b to be a set that // contains the literal a. return !b.isLiteral() && Type::isSubType(a.getType(), b.getType()); } if (b.isLiteral()) { return false; } if (b.isFullConeType()) { if (a.isNull()) { return b.getType().isNullable(); } return Type::isSubType(a.getType(), b.getType()); } if (a.isFullConeType()) { // We've already ruled out b being a full cone type before. return false; } WASM_UNREACHABLE("unhandled case of isSubContents"); } namespace { // We are going to do a very large flow operation, potentially, as we create // a Location for every interesting part in the entire wasm, and some of those // places will have lots of links (like a struct field may link out to every // single struct.get of that type), so we must make the data structures here // as efficient as possible. Towards that goal, we work with location // *indexes* where possible, which are small (32 bits) and do not require any // complex hashing when we use them in sets or maps. // // Note that we do not use indexes everywhere, since the initial analysis is // done in parallel, and we do not have a fixed indexing of locations yet. When // we merge the parallel data we create that indexing, and use indexes from then // on. using LocationIndex = uint32_t; #ifndef NDEBUG // Assert on not having duplicates in a vector. template void disallowDuplicates(const T& targets) { #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::unordered_set uniqueTargets; for (const auto& target : targets) { uniqueTargets.insert(target); } assert(uniqueTargets.size() == targets.size()); #endif } #endif // A link indicates a flow of content from one location to another. For // example, if we do a local.get and return that value from a function, then // we have a link from the ExpressionLocation of that local.get to a // ResultLocation. template struct Link { T from; T to; bool operator==(const Link& other) const { return from == other.from && to == other.to; } }; using LocationLink = Link; using IndexLink = Link; } // anonymous namespace } // namespace wasm namespace std { template<> struct hash { size_t operator()(const wasm::LocationLink& loc) const { return std::hash>{}( {loc.from, loc.to}); } }; template<> struct hash { size_t operator()(const wasm::IndexLink& loc) const { return std::hash>{}( {loc.from, loc.to}); } }; } // namespace std namespace wasm { namespace { // The data we gather from each function, as we process them in parallel. Later // this will be merged into a single big graph. struct CollectedFuncInfo { // All the links we found in this function. Rarely are there duplicates // in this list (say when writing to the same global location from another // global location), and we do not try to deduplicate here, just store them in // a plain array for now, which is faster (later, when we merge all the info // from the functions, we need to deduplicate anyhow). std::vector links; // All the roots of the graph, that is, places that begin by containing some // particular content. That includes i32.const, ref.func, struct.new, etc. All // possible contents in the rest of the graph flow from such places. // // The vector here is of the location of the root and then its contents. std::vector> roots; // In some cases we need to know the parent of the expression. Consider this: // // (struct.set $A k // (local.get $ref) // (local.get $value) // ) // // Imagine that the first local.get, for $ref, receives a new value. That can // affect where the struct.set sends values: if previously that local.get had // no possible contents, and now it does, then we have DataLocations to // update. Likewise, when the second local.get is updated we must do the same, // but again which DataLocations we update depends on the ref passed to the // struct.set. To handle such things, we set add a childParent link, and then // when we update the child we can find the parent and handle any special // behavior we need there. std::unordered_map childParents; // All functions that might be called from the outside. Any RefFunc suggests // that, in open world. (We could be more precise and use our flow analysis to // see which, in fact, flow outside, but it is unclear how useful that would // be. Anyhow, closed-world is more important to optimize, and avoids this.) std::unordered_set calledFromOutside; }; // Does a walk while maintaining a map of names of branch targets to those // expressions, so they can be found by their name. // TODO: can this replace ControlFlowWalker in other places? template> struct BreakTargetWalker : public PostWalker { std::unordered_map breakTargets; Expression* findBreakTarget(Name name) { return breakTargets[name]; } static void scan(SubType* self, Expression** currp) { auto* curr = *currp; BranchUtils::operateOnScopeNameDefs( curr, [&](Name name) { self->breakTargets[name] = curr; }); PostWalker::scan(self, currp); } }; // Walk the wasm and find all the links we need to care about, and the locations // and roots related to them. This builds up a CollectedFuncInfo data structure. // After all InfoCollectors run, those data structures will be merged and the // main flow will begin. struct InfoCollector : public BreakTargetWalker> { CollectedFuncInfo& info; const PassOptions& options; InfoCollector(CollectedFuncInfo& info, const PassOptions& options) : info(info), options(options) {} // Check if a type is relevant for us. If not, we can ignore it entirely. bool isRelevant(Type type) { if (type == Type::unreachable || type == Type::none) { return false; } if (type.isTuple()) { for (auto t : type) { if (isRelevant(t)) { return true; } } } return true; } bool isRelevant(Signature sig) { return isRelevant(sig.params) || isRelevant(sig.results); } bool isRelevant(Expression* curr) { return curr && isRelevant(curr->type); } template bool isRelevant(const T& vec) { for (auto* expr : vec) { if (isRelevant(expr->type)) { return true; } } return false; } // Each visit*() call is responsible for connecting the children of a node to // that node. Responsibility for connecting the node's output to anywhere // else (another expression or the function itself, if we are at the top // level) is the responsibility of the outside. void visitBlock(Block* curr) { if (curr->list.empty()) { return; } // The final item in the block can flow a value to here as well. receiveChildValue(curr->list.back(), curr); } void visitIf(If* curr) { // Each arm may flow out a value. receiveChildValue(curr->ifTrue, curr); receiveChildValue(curr->ifFalse, curr); } void visitLoop(Loop* curr) { receiveChildValue(curr->body, curr); } void visitBreak(Break* curr) { // Connect the value (if present) to the break target. handleBreakValue(curr); // The value may also flow through in a br_if (the type will indicate that, // which receiveChildValue will notice). receiveChildValue(curr->value, curr); } void visitSwitch(Switch* curr) { handleBreakValue(curr); } void visitLoad(Load* curr) { // We could infer the exact type here, but as no subtyping is possible, it // would have no benefit, so just add a generic root (which will be "Many"). // See the comment on the ContentOracle class. addRoot(curr); } void visitStore(Store* curr) {} void visitAtomicRMW(AtomicRMW* curr) { addRoot(curr); } void visitAtomicCmpxchg(AtomicCmpxchg* curr) { addRoot(curr); } void visitAtomicWait(AtomicWait* curr) { addRoot(curr); } void visitAtomicNotify(AtomicNotify* curr) { addRoot(curr); } void visitAtomicFence(AtomicFence* curr) {} void visitSIMDExtract(SIMDExtract* curr) { addRoot(curr); } void visitSIMDReplace(SIMDReplace* curr) { addRoot(curr); } void visitSIMDShuffle(SIMDShuffle* curr) { addRoot(curr); } void visitSIMDTernary(SIMDTernary* curr) { addRoot(curr); } void visitSIMDShift(SIMDShift* curr) { addRoot(curr); } void visitSIMDLoad(SIMDLoad* curr) { addRoot(curr); } void visitSIMDLoadStoreLane(SIMDLoadStoreLane* curr) { addRoot(curr); } void visitMemoryInit(MemoryInit* curr) {} void visitDataDrop(DataDrop* curr) {} void visitMemoryCopy(MemoryCopy* curr) {} void visitMemoryFill(MemoryFill* curr) {} void visitConst(Const* curr) { addRoot(curr, PossibleContents::literal(curr->value)); } void visitUnary(Unary* curr) { // We could optimize cases like this using interpreter integration: if the // input is a Literal, we could interpret the Literal result. However, if // the input is a literal then the GUFA pass will emit a Const there, and // the Precompute pass can use that later to interpret a result. That is, // the input we need here, a constant, is already something GUFA can emit as // an output. As a result, integrating the interpreter here would perhaps // make compilation require fewer steps, but it wouldn't let us optimize // more than we could before. addRoot(curr); } void visitBinary(Binary* curr) { addRoot(curr); } void visitSelect(Select* curr) { receiveChildValue(curr->ifTrue, curr); receiveChildValue(curr->ifFalse, curr); } void visitDrop(Drop* curr) {} void visitMemorySize(MemorySize* curr) { addRoot(curr); } void visitMemoryGrow(MemoryGrow* curr) { addRoot(curr); } void visitRefNull(RefNull* curr) { addRoot( curr, PossibleContents::literal(Literal::makeNull(curr->type.getHeapType()))); } void visitRefIsNull(RefIsNull* curr) { // TODO: Optimize when possible. For example, if we can infer an exact type // here which allows us to know the result then we should do so. This // is unlike the case in visitUnary, above: the information that lets // us optimize *cannot* be written into Binaryen IR (unlike a Literal) // so using it during this pass allows us to optimize new things. addRoot(curr); } void visitRefFunc(RefFunc* curr) { addRoot( curr, PossibleContents::literal(Literal(curr->func, curr->type.getHeapType()))); // The presence of a RefFunc indicates the function may be called // indirectly, so add the relevant connections for this particular function. // We do so here in the RefFunc so that we only do it for functions that // actually have a RefFunc. auto* func = getModule()->getFunction(curr->func); for (Index i = 0; i < func->getParams().size(); i++) { info.links.push_back( {SignatureParamLocation{func->type, i}, ParamLocation{func, i}}); } for (Index i = 0; i < func->getResults().size(); i++) { info.links.push_back( {ResultLocation{func, i}, SignatureResultLocation{func->type, i}}); } if (!options.closedWorld) { info.calledFromOutside.insert(curr->func); } } void visitRefEq(RefEq* curr) { addRoot(curr); } void visitTableGet(TableGet* curr) { // TODO: be more precise addRoot(curr); } void visitTableSet(TableSet* curr) {} void visitTableSize(TableSize* curr) { addRoot(curr); } void visitTableGrow(TableGrow* curr) { addRoot(curr); } void visitTableFill(TableFill* curr) { addRoot(curr); } void visitTableCopy(TableCopy* curr) { addRoot(curr); } void visitTableInit(TableInit* curr) {} void visitNop(Nop* curr) {} void visitUnreachable(Unreachable* curr) {} #ifndef NDEBUG // For now we only handle pops in a catch body, see visitTry(). To check for // errors, use counter of the pops we handled and all the pops; those sums // must agree at the end, or else we've seen something we can't handle. Index totalPops = 0; Index handledPops = 0; #endif void visitPop(Pop* curr) { #ifndef NDEBUG totalPops++; #endif } void visitRefI31(RefI31* curr) { // TODO: optimize like struct references addRoot(curr); } void visitI31Get(I31Get* curr) { // TODO: optimize like struct references addRoot(curr); } void visitRefCast(RefCast* curr) { receiveChildValue(curr->ref, curr); } void visitRefTest(RefTest* curr) { addRoot(curr); } void visitBrOn(BrOn* curr) { // TODO: optimize when possible handleBreakValue(curr); receiveChildValue(curr->ref, curr); } void visitRefAs(RefAs* curr) { if (curr->op == ExternConvertAny || curr->op == AnyConvertExtern) { // The external conversion ops emit something of a completely different // type, which we must mark as a root. addRoot(curr); return; } // All other RefAs operations flow values through while refining them (the // filterExpressionContents method will handle the refinement // automatically). receiveChildValue(curr->value, curr); } void visitLocalSet(LocalSet* curr) { if (!isRelevant(curr->value->type)) { return; } // Tees flow out the value (receiveChildValue will see if this is a tee // based on the type, automatically). receiveChildValue(curr->value, curr); // We handle connecting local.gets to local.sets below, in visitFunction. } void visitLocalGet(LocalGet* curr) { // We handle connecting local.gets to local.sets below, in visitFunction. } // Globals read and write from their location. void visitGlobalGet(GlobalGet* curr) { if (isRelevant(curr->type)) { // FIXME: we allow tuples in globals, so GlobalLocation needs a tupleIndex // and we should loop here. assert(!curr->type.isTuple()); info.links.push_back( {GlobalLocation{curr->name}, ExpressionLocation{curr, 0}}); } } void visitGlobalSet(GlobalSet* curr) { if (isRelevant(curr->value->type)) { info.links.push_back( {ExpressionLocation{curr->value, 0}, GlobalLocation{curr->name}}); } } // Iterates over a list of children and adds links to parameters and results // as needed. The param/result functions receive the index and create the // proper location for it. template void handleCall(T* curr, std::function makeParamLocation, std::function makeResultLocation) { Index i = 0; for (auto* operand : curr->operands) { if (isRelevant(operand->type)) { info.links.push_back( {ExpressionLocation{operand, 0}, makeParamLocation(i)}); } i++; } // Add results, if anything flows out. for (Index i = 0; i < curr->type.size(); i++) { if (isRelevant(curr->type[i])) { info.links.push_back( {makeResultLocation(i), ExpressionLocation{curr, i}}); } } // If this is a return call then send the result to the function return as // well. if (curr->isReturn) { auto results = getFunction()->getResults(); for (Index i = 0; i < results.size(); i++) { auto result = results[i]; if (isRelevant(result)) { info.links.push_back( {makeResultLocation(i), ResultLocation{getFunction(), i}}); } } } } // Calls send values to params in their possible targets, and receive // results. template void handleDirectCall(T* curr, Name targetName) { auto* target = getModule()->getFunction(targetName); handleCall( curr, [&](Index i) { assert(i <= target->getParams().size()); return ParamLocation{target, i}; }, [&](Index i) { assert(i <= target->getResults().size()); return ResultLocation{target, i}; }); } template void handleIndirectCall(T* curr, HeapType targetType) { // If the heap type is not a signature, which is the case for a bottom type // (null) then nothing can be called. if (!targetType.isSignature()) { assert(targetType.isBottom()); return; } handleCall( curr, [&](Index i) { assert(i <= targetType.getSignature().params.size()); return SignatureParamLocation{targetType, i}; }, [&](Index i) { assert(i <= targetType.getSignature().results.size()); return SignatureResultLocation{targetType, i}; }); } template void handleIndirectCall(T* curr, Type targetType) { // If the type is unreachable, nothing can be called (and there is no heap // type to get). if (targetType != Type::unreachable) { handleIndirectCall(curr, targetType.getHeapType()); } } void visitCall(Call* curr) { Name targetName; if (!Intrinsics(*getModule()).isCallWithoutEffects(curr)) { // This is just a normal call. handleDirectCall(curr, curr->target); return; } // A call-without-effects receives a function reference and calls it, the // same as a CallRef. When we have a flag for non-closed-world, we should // handle this automatically by the reference flowing out to an import, // which is what binaryen intrinsics look like. For now, to support use // cases of a closed world but that also use this intrinsic, handle the // intrinsic specifically here. (Without that, the closed world assumption // makes us ignore the function ref that flows to an import, so we are not // aware that it is actually called.) auto* target = curr->operands.back(); // We must ignore the last element when handling the call - the target is // used to perform the call, and not sent during the call. curr->operands.pop_back(); if (auto* refFunc = target->dynCast()) { // We can see exactly where this goes. handleDirectCall(curr, refFunc->func); } else { // We can't see where this goes. We must be pessimistic and assume it // can call anything of the proper type, the same as a CallRef. (We could // look at the possible contents of |target| during the flow, but that // would require special logic like we have for StructGet etc., and the // intrinsics will be lowered away anyhow, so just running after that is // a workaround.) handleIndirectCall(curr, target->type); } // Restore the target. curr->operands.push_back(target); } void visitCallIndirect(CallIndirect* curr) { // TODO: the table identity could also be used here // TODO: optimize the call target like CallRef handleIndirectCall(curr, curr->heapType); } void visitCallRef(CallRef* curr) { handleIndirectCall(curr, curr->target->type); } // Creates a location for a null of a particular type and adds a root for it. // Such roots are where the default value of an i32 local comes from, or the // value in a ref.null. Location getNullLocation(Type type) { auto location = NullLocation{type}; addRoot(location, PossibleContents::literal(Literal::makeZero(type))); return location; } // Iterates over a list of children and adds links from them. The target of // those link is created using a function that is passed in, which receives // the index of the child. void linkChildList(ExpressionList& operands, std::function makeTarget) { Index i = 0; for (auto* operand : operands) { // This helper is not used from places that allow a tuple (hence we can // hardcode the index 0 a few lines down). assert(!operand->type.isTuple()); if (isRelevant(operand->type)) { info.links.push_back({ExpressionLocation{operand, 0}, makeTarget(i)}); } i++; } } void visitStructNew(StructNew* curr) { if (curr->type == Type::unreachable) { return; } auto type = curr->type.getHeapType(); if (curr->isWithDefault()) { // Link the default values to the struct's fields. auto& fields = type.getStruct().fields; for (Index i = 0; i < fields.size(); i++) { info.links.push_back( {getNullLocation(fields[i].type), DataLocation{type, i}}); } } else { // Link the operands to the struct's fields. linkChildList(curr->operands, [&](Index i) { return DataLocation{type, i}; }); } addRoot(curr, PossibleContents::exactType(curr->type)); } void visitArrayNew(ArrayNew* curr) { if (curr->type == Type::unreachable) { return; } auto type = curr->type.getHeapType(); if (curr->init) { info.links.push_back( {ExpressionLocation{curr->init, 0}, DataLocation{type, 0}}); } else { info.links.push_back( {getNullLocation(type.getArray().element.type), DataLocation{type, 0}}); } addRoot(curr, PossibleContents::exactType(curr->type)); } void visitArrayNewData(ArrayNewData* curr) { if (curr->type == Type::unreachable) { return; } addRoot(curr, PossibleContents::exactType(curr->type)); auto heapType = curr->type.getHeapType(); Type elemType = heapType.getArray().element.type; addRoot(DataLocation{heapType, 0}, PossibleContents::fromType(elemType)); } void visitArrayNewElem(ArrayNewElem* curr) { if (curr->type == Type::unreachable) { return; } addRoot(curr, PossibleContents::exactType(curr->type)); auto heapType = curr->type.getHeapType(); Type segType = getModule()->getElementSegment(curr->segment)->type; addRoot(DataLocation{heapType, 0}, PossibleContents::fromType(segType)); return; } void visitArrayNewFixed(ArrayNewFixed* curr) { if (curr->type == Type::unreachable) { return; } if (!curr->values.empty()) { auto type = curr->type.getHeapType(); linkChildList(curr->values, [&](Index i) { // The index i is ignored, as we do not track indexes in Arrays - // everything is modeled as if at index 0. return DataLocation{type, 0}; }); } addRoot(curr, PossibleContents::exactType(curr->type)); } // Struct operations access the struct fields' locations. void visitStructGet(StructGet* curr) { if (!isRelevant(curr->ref)) { // If references are irrelevant then we will ignore them, and we won't // have information about this struct.get's reference, which means we // won't have information to compute relevant values for this struct.get. // Instead, just mark this as an unknown value (root). addRoot(curr); return; } // The struct.get will receive different values depending on the contents // in the reference, so mark us as the parent of the ref, and we will // handle all of this in a special way during the flow. Note that we do // not even create a DataLocation here; anything that we need will be // added during the flow. addChildParentLink(curr->ref, curr); } void visitStructSet(StructSet* curr) { if (curr->ref->type == Type::unreachable) { return; } // See comment on visitStructGet. Here we also connect the value. addChildParentLink(curr->ref, curr); addChildParentLink(curr->value, curr); } // Array operations access the array's location, parallel to how structs work. void visitArrayGet(ArrayGet* curr) { if (!isRelevant(curr->ref)) { addRoot(curr); return; } addChildParentLink(curr->ref, curr); } void visitArraySet(ArraySet* curr) { if (curr->ref->type == Type::unreachable) { return; } addChildParentLink(curr->ref, curr); addChildParentLink(curr->value, curr); } void visitArrayLen(ArrayLen* curr) { // TODO: optimize when possible (perhaps we can infer a Literal for the // length) addRoot(curr); } void visitArrayCopy(ArrayCopy* curr) { if (curr->type == Type::unreachable) { return; } // Our flow handling of GC data is not simple: we have special code for each // read and write instruction. Therefore, to avoid adding special code for // ArrayCopy, model it as a combination of an ArrayRead and ArrayWrite, by // just emitting fake expressions for those. The fake expressions are not // part of the main IR, which is potentially confusing during debugging, // however, which is a downside. Builder builder(*getModule()); auto* get = builder.makeArrayGet(curr->srcRef, curr->srcIndex, curr->srcRef->type); visitArrayGet(get); auto* set = builder.makeArraySet(curr->destRef, curr->destIndex, get); visitArraySet(set); } void visitArrayFill(ArrayFill* curr) { if (curr->type == Type::unreachable) { return; } // See ArrayCopy, above. Builder builder(*getModule()); auto* set = builder.makeArraySet(curr->ref, curr->index, curr->value); visitArraySet(set); } template void visitArrayInit(ArrayInit* curr) { // Check for both unreachability and a bottom type. In either case we have // no work to do, and would error on an assertion below in finding the array // type. auto field = GCTypeUtils::getField(curr->ref->type); if (!field) { return; } // See ArrayCopy, above. Here an additional complexity is that we need to // model the read from the segment. As in TableGet, for now we just assume // any value is possible there (a root in the graph), which we set up // manually here as a fake unknown value, using a fake local.get that we // root. // TODO: be more precise about what is in the table auto valueType = field->type; Builder builder(*getModule()); auto* get = builder.makeLocalGet(-1, valueType); addRoot(get); auto* set = builder.makeArraySet(curr->ref, curr->index, get); visitArraySet(set); } void visitArrayInitData(ArrayInitData* curr) { visitArrayInit(curr); } void visitArrayInitElem(ArrayInitElem* curr) { visitArrayInit(curr); } void visitStringNew(StringNew* curr) { if (curr->type == Type::unreachable) { return; } addRoot(curr, PossibleContents::exactType(curr->type)); } void visitStringConst(StringConst* curr) { addRoot(curr, PossibleContents::literal(Literal(std::string(curr->string.str)))); } void visitStringMeasure(StringMeasure* curr) { // TODO: optimize when possible addRoot(curr); } void visitStringEncode(StringEncode* curr) { // TODO: optimize when possible addRoot(curr); } void visitStringConcat(StringConcat* curr) { // TODO: optimize when possible addRoot(curr); } void visitStringEq(StringEq* curr) { // TODO: optimize when possible addRoot(curr); } void visitStringWTF16Get(StringWTF16Get* curr) { // TODO: optimize when possible addRoot(curr); } void visitStringSliceWTF(StringSliceWTF* curr) { // TODO: optimize when possible addRoot(curr); } // TODO: Model which throws can go to which catches. For now, anything thrown // is sent to the location of that tag, and any catch of that tag can // read them. void visitTry(Try* curr) { receiveChildValue(curr->body, curr); for (auto* catchBody : curr->catchBodies) { receiveChildValue(catchBody, curr); } auto numTags = curr->catchTags.size(); for (Index tagIndex = 0; tagIndex < numTags; tagIndex++) { auto tag = curr->catchTags[tagIndex]; auto* body = curr->catchBodies[tagIndex]; auto params = getModule()->getTag(tag)->sig.params; if (params.size() == 0) { continue; } // Find the pop of the tag's contents. The body must start with such a // pop, which might be of a tuple. auto* pop = EHUtils::findPop(body); // There must be a pop since we checked earlier if it was an empty tag, // and would not reach here. assert(pop); assert(pop->type.size() == params.size()); for (Index i = 0; i < params.size(); i++) { if (isRelevant(params[i])) { info.links.push_back( {TagLocation{tag, i}, ExpressionLocation{pop, i}}); } } #ifndef NDEBUG // This pop was in the position we can handle, note that (see visitPop // for details). handledPops++; #endif } } void visitTryTable(TryTable* curr) { receiveChildValue(curr->body, curr); // Connect caught tags with their branch targets, and materialize non-null // exnref values. auto numTags = curr->catchTags.size(); for (Index tagIndex = 0; tagIndex < numTags; tagIndex++) { auto tag = curr->catchTags[tagIndex]; auto target = curr->catchDests[tagIndex]; Index exnrefIndex = 0; if (tag.is()) { auto params = getModule()->getTag(tag)->sig.params; for (Index i = 0; i < params.size(); i++) { if (isRelevant(params[i])) { info.links.push_back( {TagLocation{tag, i}, getBreakTargetLocation(target, i)}); } } exnrefIndex = params.size(); } if (curr->catchRefs[tagIndex]) { auto location = CaughtExnRefLocation{}; addRoot(location, PossibleContents::fromType(Type(HeapType::exn, NonNullable))); info.links.push_back( {location, getBreakTargetLocation(target, exnrefIndex)}); } } } void visitThrow(Throw* curr) { auto& operands = curr->operands; if (!isRelevant(operands)) { return; } auto tag = curr->tag; for (Index i = 0; i < curr->operands.size(); i++) { info.links.push_back( {ExpressionLocation{operands[i], 0}, TagLocation{tag, i}}); } } void visitRethrow(Rethrow* curr) {} void visitThrowRef(ThrowRef* curr) {} void visitTupleMake(TupleMake* curr) { if (isRelevant(curr->type)) { for (Index i = 0; i < curr->operands.size(); i++) { info.links.push_back({ExpressionLocation{curr->operands[i], 0}, ExpressionLocation{curr, i}}); } } } void visitTupleExtract(TupleExtract* curr) { if (isRelevant(curr->type)) { info.links.push_back({ExpressionLocation{curr->tuple, curr->index}, ExpressionLocation{curr, 0}}); } } // Adds a result to the current function, such as from a return or the value // that flows out. void addResult(Expression* value) { if (value && isRelevant(value->type)) { for (Index i = 0; i < value->type.size(); i++) { info.links.push_back( {ExpressionLocation{value, i}, ResultLocation{getFunction(), i}}); } } } void visitReturn(Return* curr) { addResult(curr->value); } void visitContBind(ContBind* curr) { // TODO: optimize when possible addRoot(curr); } void visitContNew(ContNew* curr) { // TODO: optimize when possible addRoot(curr); } void visitResume(Resume* curr) { // TODO: optimize when possible addRoot(curr); } void visitSuspend(Suspend* curr) { // TODO: optimize when possible addRoot(curr); } void visitFunction(Function* func) { // Functions with a result can flow a value out from their body. addResult(func->body); // See visitPop(). assert(handledPops == totalPops); // Handle local.get/sets: each set must write to the proper gets. // // Note that we do not use LocalLocation because LocalGraph gives us more // precise information: we generate direct links from sets to relevant gets // rather than consider each local index a single location, which // LocalLocation does. (LocalLocation is useful in cases where we do need a // single location, such as when we consider what type to give the local; // the type must be the same for all gets of that local.) LocalGraph localGraph(func, getModule()); for (auto& [curr, _] : localGraph.locations) { auto* get = curr->dynCast(); if (!get) { continue; } auto index = get->index; auto type = func->getLocalType(index); if (!isRelevant(type)) { continue; } // Each get reads from its relevant sets. for (auto* set : localGraph.getSets(get)) { for (Index i = 0; i < type.size(); i++) { Location source; if (set) { // This is a normal local.set. source = ExpressionLocation{set->value, i}; } else if (getFunction()->isParam(index)) { // This is a parameter. source = ParamLocation{getFunction(), index}; } else { // This is the default value from the function entry, a null. source = getNullLocation(type[i]); } info.links.push_back({source, ExpressionLocation{get, i}}); } } } } // Helpers // Returns the location of a break target by the name (e.g. returns the // location of a block, if the name is the name of a block). Also receives the // index in a tuple, if this is part of a tuple value. Location getBreakTargetLocation(Name target, Index i) { return ExpressionLocation{findBreakTarget(target), i}; } // Handles the value sent in a break instruction. Does not handle anything // else like the condition etc. void handleBreakValue(Expression* curr) { BranchUtils::operateOnScopeNameUsesAndSentValues( curr, [&](Name target, Expression* value) { if (value && isRelevant(value->type)) { for (Index i = 0; i < value->type.size(); i++) { // Breaks send the contents of the break value to the branch target // that the break goes to. info.links.push_back({ExpressionLocation{value, i}, getBreakTargetLocation(target, i)}); } } }); } // Connect a child's value to the parent, that is, all content in the child is // now considered possible in the parent as well. void receiveChildValue(Expression* child, Expression* parent) { if (isRelevant(parent) && isRelevant(child)) { // The tuple sizes must match (or, if not a tuple, the size should be 1 in // both cases). assert(child->type.size() == parent->type.size()); for (Index i = 0; i < child->type.size(); i++) { info.links.push_back( {ExpressionLocation{child, i}, ExpressionLocation{parent, i}}); } } } // See the comment on CollectedFuncInfo::childParents. void addChildParentLink(Expression* child, Expression* parent) { if (isRelevant(child->type)) { info.childParents[child] = parent; } } // Adds a root, if the expression is relevant. If the value is not specified, // mark the root as containing Many (which is the common case, so avoid // verbose code). void addRoot(Expression* curr, PossibleContents contents = PossibleContents::many()) { // TODO Use a cone type here when relevant if (isRelevant(curr)) { if (contents.isMany()) { contents = PossibleContents::fromType(curr->type); } addRoot(ExpressionLocation{curr, 0}, contents); } } // As above, but given an arbitrary location and not just an expression. void addRoot(Location loc, PossibleContents contents = PossibleContents::many()) { info.roots.emplace_back(loc, contents); } }; // TrapsNeverHappen Oracle. This makes inferences *backwards* from traps that we // know will not happen due to the TNH assumption. For example, // // (local.get $a) // (ref.cast $B (local.get $a)) // // The cast happens right after the first local.get, and we assume it does not // fail, so the local must contain a B, even though the IR only has A. // // This analysis complements ContentOracle, which uses this analysis internally. // ContentOracle does a forward flow analysis (as content moves from place to // place) which increases from "nothing", while this does a backwards analysis // that decreases from "everything" (or rather, from the type declared in the // IR), so the two cannot be done at once. // // TODO: We could cycle between this and ContentOracle for repeated // improvements. // TODO: This pass itself could benefit from internal cycles. // // This analysis mainly focuses on information across calls, as simple backwards // inference is done in OptimizeCasts. Note that it is not needed if a call is // inlined, obviously, and so it mostly helps cases like functions too large to // inline, or when optimizing for size, or with indirect calls. // // We track cast parameters by mapping an index to the type it is definitely // cast to if the function is entered. From that information we can infer things // about the values being sent to the function (which we can assume must have // the right type so that the casts do not trap). using CastParams = std::unordered_map; // The information we collect and utilize as we operate in parallel in each // function. struct TNHInfo { CastParams castParams; // TODO: Returns as well: when we see (ref.cast (call $foo)) in all callers // then we can refine inside $foo (in closed world). // We gather calls in parallel in order to process them later. std::vector calls; std::vector callRefs; // Note if a function body definitely traps. bool traps = false; // We gather inferences in parallel and combine them at the end. std::unordered_map inferences; }; class TNHOracle : public ModuleUtils::ParallelFunctionAnalysis { const PassOptions& options; public: using Parent = ModuleUtils::ParallelFunctionAnalysis; TNHOracle(Module& wasm, const PassOptions& options) : Parent(wasm, [this, &options](Function* func, TNHInfo& info) { scan(func, info, options); }), options(options) { // After the scanning phase that we run in the constructor, continue to the // second phase of analysis: inference. infer(); } // Get the type we inferred was possible at a location. PossibleContents getContents(Expression* curr) { auto naiveContents = PossibleContents::fullConeType(curr->type); // If we inferred nothing, use the naive type. auto iter = inferences.find(curr); if (iter == inferences.end()) { return naiveContents; } auto& contents = iter->second; // We only store useful contents that improve on the naive estimate that // uses the type in the IR. assert(contents != naiveContents); return contents; } private: // Maps expressions to the content we inferred there. If an expression is not // here then expression->type (the type in Binaryen IR) is all we have. std::unordered_map inferences; // Phase 1: Scan to find cast parameters and calls. This operates on a single // function, and is called in parallel. void scan(Function* func, TNHInfo& info, const PassOptions& options); // Phase 2: Infer contents based on what we scanned. void infer(); // Optimize one specific call (or call_ref). void optimizeCallCasts(Expression* call, const ExpressionList& operands, const CastParams& targetCastParams, const analysis::CFGBlockIndexes& blockIndexes, TNHInfo& info); }; void TNHOracle::scan(Function* func, TNHInfo& info, const PassOptions& options) { if (func->imported()) { return; } // Gather parameters that are definitely cast in the function entry. struct EntryScanner : public LinearExecutionWalker { Module& wasm; const PassOptions& options; TNHInfo& info; EntryScanner(Module& wasm, const PassOptions& options, TNHInfo& info) : wasm(wasm), options(options), info(info) {} // Note while we are still in the entry (first) block. bool inEntryBlock = true; static void doNoteNonLinear(EntryScanner* self, Expression** currp) { // This is the end of the first basic block. self->inEntryBlock = false; } void visitCall(Call* curr) { info.calls.push_back(curr); } void visitCallRef(CallRef* curr) { // We can only optimize call_ref in closed world, as otherwise the // call can go somewhere we can't see. if (options.closedWorld) { info.callRefs.push_back(curr); } } void visitRefAs(RefAs* curr) { if (curr->op == RefAsNonNull) { noteCast(curr->value, curr->type); } } void visitRefCast(RefCast* curr) { noteCast(curr->ref, curr->type); } // Note a cast of an expression to a particular type. void noteCast(Expression* expr, Type type) { if (!inEntryBlock) { return; } auto* fallthrough = Properties::getFallthrough(expr, options, wasm); if (auto* get = fallthrough->dynCast()) { // To optimize, this needs to be a param, and of a useful type. // // Note that if we see more than one cast we keep the first one. This is // not important in optimized code, as the most refined cast would be // the only one to exist there, so it's ok to keep things simple here. if (getFunction()->isParam(get->index) && type != get->type && info.castParams.count(get->index) == 0) { info.castParams[get->index] = type; } } } // Operations that trap on null are equivalent to casts to non-null, in that // they imply that their input is non-null if traps never happen. // // We only look at them if the input is actually nullable, since if they // are non-nullable then we can add no information. (This is equivalent // to the handling of RefAsNonNull above, in the sense that in optimized // code the RefAs will not appear if the input is already non-nullable). // This function is called with the reference that will be trapped on, // if it is null. void notePossibleTrap(Expression* expr) { if (!expr->type.isRef() || expr->type.isNonNullable()) { return; } noteCast(expr, Type(expr->type.getHeapType(), NonNullable)); } void visitStructGet(StructGet* curr) { notePossibleTrap(curr->ref); } void visitStructSet(StructSet* curr) { notePossibleTrap(curr->ref); } void visitArrayGet(ArrayGet* curr) { notePossibleTrap(curr->ref); } void visitArraySet(ArraySet* curr) { notePossibleTrap(curr->ref); } void visitArrayLen(ArrayLen* curr) { notePossibleTrap(curr->ref); } void visitArrayCopy(ArrayCopy* curr) { notePossibleTrap(curr->srcRef); notePossibleTrap(curr->destRef); } void visitArrayFill(ArrayFill* curr) { notePossibleTrap(curr->ref); } void visitArrayInitData(ArrayInitData* curr) { notePossibleTrap(curr->ref); } void visitArrayInitElem(ArrayInitElem* curr) { notePossibleTrap(curr->ref); } void visitFunction(Function* curr) { // In optimized TNH code, a function that always traps will be turned // into a singleton unreachable instruction, so it is enough to check // for that. if (curr->body->is()) { info.traps = true; } } } scanner(wasm, options, info); scanner.walkFunction(func); } void TNHOracle::infer() { // Phase 2: Inside each function, optimize calls based on the cast params of // the called function (which we noted during phase 1). // // Specifically, each time we call a target that will cast a param, we can // infer that the param must have that type (or else we'd trap, but we are // assuming traps never happen). // // While doing so we must be careful of control flow transfers right before // the call: // // (call $target // (A) // (br_if ..) // (B) // ) // // If we branch in the br_if then we might execute A and then something else // entirely, and not reach B or the call. In that case we can't infer anything // about A (perhaps, for example, we branch away exactly when A would fail the // cast). Therefore in the optimization below we only optimize code that, if // reached, will definitely reach the call, like B. // // TODO: Some control flow transfers are ok, so long as we must reach the // call, like if we replace the br_if with an if with two arms (and no // branches in either). // TODO: We can also infer backwards past basic blocks from casts, even // without calls. Any cast tells us something about the uses of that // value that must reach the cast. // TODO: We can do a whole-program flow of this information. // For call_ref, we need to know which functions belong to each type. Gather // that first. This map will map each heap type to each function that is of // that type or a subtype, i.e., might be called when that type is seen in a // call_ref target. std::unordered_map> typeFunctions; if (options.closedWorld) { for (auto& func : wasm.functions) { auto type = func->type; auto& info = map[wasm.getFunction(func->name)]; if (info.traps) { // This function definitely traps, so we can assume it is never called, // and don't need to even bother putting it in |typeFunctions|. continue; } while (1) { typeFunctions[type].push_back(func.get()); if (auto super = type.getDeclaredSuperType()) { type = *super; } else { break; } } } } doAnalysis([&](Function* func, TNHInfo& info) { // We will need some CFG information below. Computing this is expensive, so // only do it if we find optimization opportunities. std::optional blockIndexes; auto ensureCFG = [&]() { if (!blockIndexes) { auto cfg = analysis::CFG::fromFunction(func); blockIndexes = analysis::CFGBlockIndexes(cfg); } }; for (auto* call : info.calls) { auto& targetInfo = map[wasm.getFunction(call->target)]; auto& targetCastParams = targetInfo.castParams; if (targetCastParams.empty()) { continue; } // This looks promising, create the CFG if we haven't already, and // optimize. ensureCFG(); optimizeCallCasts( call, call->operands, targetCastParams, *blockIndexes, info); // Note that we don't need to do anything for targetInfo.traps for a // direct call: the inliner will inline the singleton unreachable in the // target function anyhow. } for (auto* call : info.callRefs) { auto targetType = call->target->type; if (!targetType.isRef()) { // This is unreachable or null, and other passes will optimize that. continue; } // We should only get here in a closed world, in which we know which // functions might be called (the scan phase only notes callRefs if we are // in fact in a closed world). assert(options.closedWorld); auto iter = typeFunctions.find(targetType.getHeapType()); if (iter == typeFunctions.end()) { // No function exists of this type, so the call_ref will trap. We can // mark the target as empty, which has the identical effect. info.inferences[call->target] = PossibleContents::none(); continue; } // Go through the targets and ignore any that will trap. That will leave // us with the actually possible targets. // // Note that we did not even add functions that certainly trap to // |typeFunctions| at all, so those are already excluded. const auto& targets = iter->second; std::vector possibleTargets; for (Function* target : targets) { auto& targetInfo = map[target]; // If any of our operands will fail a cast, then we will trap. bool traps = false; for (auto& [castIndex, castType] : targetInfo.castParams) { auto operandType = call->operands[castIndex]->type; auto result = GCTypeUtils::evaluateCastCheck(operandType, castType); if (result == GCTypeUtils::Failure) { traps = true; break; } } if (!traps) { possibleTargets.push_back(target); } } if (possibleTargets.empty()) { // No target is possible. info.inferences[call->target] = PossibleContents::none(); continue; } if (possibleTargets.size() == 1) { // There is exactly one possible call target, which means we can // actually infer what the call_ref is calling. Add that as an // inference. // TODO: We could also optimizeCallCasts() here, but it is low priority // as other opts will make this call direct later, after which a // lot of other optimizations become possible anyhow. auto target = possibleTargets[0]->name; info.inferences[call->target] = PossibleContents::literal( Literal(target, wasm.getFunction(target)->type)); continue; } // More than one target exists: apply the intersection of their // constraints. That is, if they all cast the k-th parameter to type T (or // more) than we can apply that here. auto numParams = call->operands.size(); std::vector sharedCastParamsVec(numParams, Type::unreachable); for (auto* target : possibleTargets) { auto& targetInfo = map[target]; auto& targetCastParams = targetInfo.castParams; for (Index i = 0; i < numParams; i++) { auto iter = targetCastParams.find(i); if (iter == targetCastParams.end()) { // If the target does not cast, we cannot do anything with this // parameter; mark it as unoptimizable with an impossible type. sharedCastParamsVec[i] = Type::none; continue; } // This function casts this param. Combine this with existing info. auto castType = iter->second; sharedCastParamsVec[i] = Type::getLeastUpperBound(sharedCastParamsVec[i], castType); } } // Build a map of the interesting cast params we found, and if there are // any, optimize using them. CastParams sharedCastParams; for (Index i = 0; i < numParams; i++) { auto type = sharedCastParamsVec[i]; if (type != Type::none) { sharedCastParams[i] = type; } } if (!sharedCastParams.empty()) { ensureCFG(); optimizeCallCasts( call, call->operands, sharedCastParams, *blockIndexes, info); } } }); // Combine all of our inferences from the parallel phase above us into the // final list of inferences. for (auto& [_, info] : map) { for (auto& [expr, contents] : info.inferences) { inferences[expr] = contents; } } } void TNHOracle::optimizeCallCasts(Expression* call, const ExpressionList& operands, const CastParams& targetCastParams, const analysis::CFGBlockIndexes& blockIndexes, TNHInfo& info) { // Optimize in the same basic block as the call: all instructions still in // that block will definitely execute if the call is reached. We will do that // by going backwards through the call's operands and fallthrough values, and // optimizing while we are still in the same basic block. auto callBlockIndex = blockIndexes.get(call); // Operands must exist since there is a cast param, so a param exists. assert(operands.size() > 0); for (int i = int(operands.size() - 1); i >= 0; i--) { auto* operand = operands[i]; if (blockIndexes.get(operand) != callBlockIndex) { // Control flow might transfer; stop. break; } auto iter = targetCastParams.find(i); if (iter == targetCastParams.end()) { // This param is not cast, so skip it. continue; } // If the call executes then this parameter is definitely reached (since it // is in the same basic block), and we know that it will be cast to a more // refined type. auto castType = iter->second; // Apply what we found to the operand and also to its fallthrough // values. // // At the loop entry |curr| has been checked for a possible control flow // transfer (and that problem ruled out). auto* curr = operand; while (1) { // Note the type if it is useful. if (castType != curr->type) { // There are two constraints on this location: any value there must // be of the declared type (curr->type) and also the cast type, so // we know only their intersection can appear here. auto declared = PossibleContents::fullConeType(curr->type); auto intersection = PossibleContents::fullConeType(castType); intersection.intersect(declared); if (intersection.isConeType()) { auto intersectionType = intersection.getType(); if (intersectionType != curr->type) { // We inferred a more refined type. info.inferences[curr] = intersection; } } else { // Otherwise, the intersection can be a null (if the heap types are // incompatible, but a null is allowed), or empty. We can apply // either. assert(intersection.isNull() || intersection.isNone()); info.inferences[curr] = intersection; } } auto* next = Properties::getImmediateFallthrough(curr, options, wasm); if (next == curr) { // No fallthrough, we're done with this param. break; } // There is a fallthrough. Check for a control flow transfer. if (blockIndexes.get(next) != callBlockIndex) { // Control flow might transfer; stop. We also cannot look at any further // operands (if a child of this operand is in another basic block from // the call, so are previous operands), so return from the entire // function. return; } // Continue to the fallthrough. curr = next; } } } // Main logic for building data for the flow analysis and then performing that // analysis. struct Flower { Module& wasm; const PassOptions& options; Flower(Module& wasm, const PassOptions& options); // Each LocationIndex will have one LocationInfo that contains the relevant // information we need for each location. struct LocationInfo { // The location at this index. Location location; // The possible contents in that location. PossibleContents contents; // A list of the target locations to which this location sends content. // TODO: benchmark SmallVector<1> here, as commonly there may be a single // target (an expression has one parent) std::vector targets; LocationInfo(Location location) : location(location) {} }; // Maps location indexes to the info stored there, as just described above. std::vector locations; // Reverse mapping of locations to their indexes. std::unordered_map locationIndexes; const Location& getLocation(LocationIndex index) { assert(index < locations.size()); return locations[index].location; } PossibleContents& getContents(LocationIndex index) { assert(index < locations.size()); return locations[index].contents; } // Check what we know about the type of an expression, using static // information from a TrapsNeverHappen oracle (see TNHOracle), if we have one. PossibleContents getTNHContents(Expression* curr) { if (!tnhOracle) { // No oracle; just use the type in the IR. return PossibleContents::fullConeType(curr->type); } return tnhOracle->getContents(curr); } private: std::unique_ptr tnhOracle; std::vector& getTargets(LocationIndex index) { assert(index < locations.size()); return locations[index].targets; } // Convert the data into the efficient LocationIndex form we will use during // the flow analysis. This method returns the index of a location, allocating // one if this is the first time we see it. LocationIndex getIndex(const Location& location) { auto iter = locationIndexes.find(location); if (iter != locationIndexes.end()) { return iter->second; } // Allocate a new index here. size_t index = locations.size(); #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " new index " << index << " for "; dump(location); #endif if (index >= std::numeric_limits::max()) { // 32 bits should be enough since each location takes at least one byte // in the binary, and we don't have 4GB wasm binaries yet... do we? Fatal() << "Too many locations for 32 bits"; } locations.emplace_back(location); locationIndexes[location] = index; return index; } bool hasIndex(const Location& location) { return locationIndexes.find(location) != locationIndexes.end(); } IndexLink getIndexes(const LocationLink& link) { return {getIndex(link.from), getIndex(link.to)}; } // See the comment on CollectedFuncInfo::childParents. This is the merged info // from all the functions and the global scope. std::unordered_map childParents; // The work remaining to do during the flow: locations that we need to flow // content from, after new content reached them. // // Using a set here is efficient as multiple updates may arrive to a location // before we get to processing it. // // The items here could be {location, newContents}, but it is more efficient // to have already written the new contents to the main data structure. That // avoids larger data here, and also, updating the contents as early as // possible is helpful as anything reading them meanwhile (before we get to // their work item in the queue) will see the newer value, possibly avoiding // flowing an old value that would later be overwritten. // // This must be ordered to avoid nondeterminism. The problem is that our // operations are imprecise and so the transitive property does not hold: // (AvB)vC may differ from Av(BvC). Likewise (AvB)^C may differ from // (A^C)v(B^C). An example of the latter is if a location is sent a null func // and an i31, and the location can only contain funcref. If the null func // arrives first, then later we'd merge null func + i31 which ends up as Many, // and then we filter that to funcref and get funcref. But if the i31 arrived // first, we'd filter it into nothing, and then the null func that arrives // later would be the final result. This would not happen if our operations // were precise, but we only make approximations here to avoid unacceptable // overhead, such as cone types but not arbitrary unions, etc. InsertOrderedSet workQueue; // All existing links in the graph. We keep this to know when a link we want // to add is new or not. std::unordered_set links; // Update a location with new contents that are added to everything already // present there. If the update changes the contents at that location (if // there was anything new) then we also need to flow from there, which we will // do by adding the location to the work queue, and eventually flowAfterUpdate // will be called on this location. // // Returns whether it is worth sending new contents to this location in the // future. If we return false, the sending location never needs to do that // ever again. bool updateContents(LocationIndex locationIndex, PossibleContents newContents); // Slow helper that converts a Location to a LocationIndex. This should be // avoided. TODO: remove the remaining uses of this. bool updateContents(const Location& location, const PossibleContents& newContents) { return updateContents(getIndex(location), newContents); } // Flow contents from a location where a change occurred. This sends the new // contents to all the normal targets of this location (using // flowToTargetsAfterUpdate), and also handles special cases of flow after. void flowAfterUpdate(LocationIndex locationIndex); // Internal part of flowAfterUpdate that handles sending new values to the // given location index's normal targets (that is, the ones listed in the // |targets| vector). void flowToTargetsAfterUpdate(LocationIndex locationIndex, const PossibleContents& contents); // Add a new connection while the flow is happening. If the link already // exists it is not added. void connectDuringFlow(Location from, Location to); // Contents sent to certain locations can be filtered in a special way during // the flow, which is handled in these helpers. These may update // |worthSendingMore| which is whether it is worth sending any more content to // this location in the future. void filterExpressionContents(PossibleContents& contents, const ExpressionLocation& exprLoc, bool& worthSendingMore); void filterGlobalContents(PossibleContents& contents, const GlobalLocation& globalLoc); void filterDataContents(PossibleContents& contents, const DataLocation& dataLoc); void filterPackedDataReads(PossibleContents& contents, const ExpressionLocation& exprLoc); // Reads from GC data: a struct.get or array.get. This is given the type of // the read operation, the field that is read on that type, the known contents // in the reference the read receives, and the read instruction itself. We // compute where we need to read from based on the type and the ref contents // and get that data, adding new links in the graph as needed. void readFromData(Type declaredType, Index fieldIndex, const PossibleContents& refContents, Expression* read); // Similar to readFromData, but does a write for a struct.set or array.set. void writeToData(Expression* ref, Expression* value, Index fieldIndex); // We will need subtypes during the flow, so compute them once ahead of time. std::unique_ptr subTypes; // The depth of children for each type. This is 0 if the type has no // subtypes, 1 if it has subtypes but none of those have subtypes themselves, // and so forth. std::unordered_map maxDepths; // Given a ConeType, return the normalized depth, that is, the canonical depth // given the actual children it has. If this is a full cone, then we can // always pick the actual maximal depth and use that instead of FullDepth==-1. // For a non-full cone, we also reduce the depth as much as possible, so it is // equal to the maximum depth of an existing subtype. Index getNormalizedConeDepth(Type type, Index depth) { return std::min(depth, maxDepths[type.getHeapType()]); } void normalizeConeType(PossibleContents& cone) { assert(cone.isConeType()); auto type = cone.getType(); auto before = cone.getCone().depth; auto normalized = getNormalizedConeDepth(type, before); if (normalized != before) { cone = PossibleContents::coneType(type, normalized); } } #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 // Dump out a location for debug purposes. void dump(Location location); #endif }; Flower::Flower(Module& wasm, const PassOptions& options) : wasm(wasm), options(options) { // If traps never happen, create a TNH oracle. // // Atm this oracle only helps on GC content, so disable it without GC. if (options.trapsNeverHappen && wasm.features.hasGC()) { #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "tnh phase\n"; #endif tnhOracle = std::make_unique(wasm, options); } #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "parallel phase\n"; #endif // First, collect information from each function. ModuleUtils::ParallelFunctionAnalysis analysis( wasm, [&](Function* func, CollectedFuncInfo& info) { InfoCollector finder(info, options); if (func->imported()) { // Imports return unknown values. auto results = func->getResults(); for (Index i = 0; i < results.size(); i++) { finder.addRoot(ResultLocation{func, i}, PossibleContents::fromType(results[i])); } return; } finder.walkFunctionInModule(func, &wasm); }); #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "single phase\n"; #endif // Also walk the global module code (for simplicity, also add it to the // function map, using a "function" key of nullptr). auto& globalInfo = analysis.map[nullptr]; InfoCollector finder(globalInfo, options); finder.walkModuleCode(&wasm); #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "global init phase\n"; #endif // Connect global init values (which we've just processed, as part of the // module code) to the globals they initialize. for (auto& global : wasm.globals) { if (global->imported()) { // Imports are unknown values. finder.addRoot(GlobalLocation{global->name}, PossibleContents::fromType(global->type)); continue; } auto* init = global->init; if (finder.isRelevant(init->type)) { globalInfo.links.push_back( {ExpressionLocation{init, 0}, GlobalLocation{global->name}}); } } // Merge the function information into a single large graph that represents // the entire program all at once, indexing and deduplicating everything as we // go. #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "merging+indexing phase\n"; #endif // The merged roots. (Note that all other forms of merged data are declared at // the class level, since we need them during the flow, but the roots are only // needed to start the flow, so we can declare them here.) // // This must be insert-ordered for the same reason as |workQueue| is, see // above. InsertOrderedMap roots; // Any function that may be called from the outside, like an export, is a // root, since they can be called with unknown parameters. auto calledFromOutside = [&](Name funcName) { auto* func = wasm.getFunction(funcName); auto params = func->getParams(); for (Index i = 0; i < func->getParams().size(); i++) { roots[ParamLocation{func, i}] = PossibleContents::fromType(params[i]); } }; for (auto& [func, info] : analysis.map) { for (auto& link : info.links) { links.insert(getIndexes(link)); } for (auto& [root, value] : info.roots) { roots[root] = value; // Ensure an index even for a root with no links to it - everything needs // an index. getIndex(root); } for (auto [child, parent] : info.childParents) { // In practice we do not have any childParent connections with a tuple; // assert on that just to be safe. assert(!child->type.isTuple()); childParents[getIndex(ExpressionLocation{child, 0})] = getIndex(ExpressionLocation{parent, 0}); } for (auto func : info.calledFromOutside) { calledFromOutside(func); } } // We no longer need the function-level info. analysis.map.clear(); #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "external phase\n"; #endif // Exports can be modified from the outside. for (auto& ex : wasm.exports) { if (ex->kind == ExternalKind::Function) { calledFromOutside(ex->value); } else if (ex->kind == ExternalKind::Table) { // If any table is exported, assume any function in any table (including // other tables) can be called from the outside. // TODO: This could be more precise about which tables are exported and // which are not: perhaps one table is exported but we can optimize // the functions in another table, which is not exported. However, // it is simpler to treat them all the same, and this handles the // common case of no tables being exported at all. // TODO: This does not handle table.get/table.set, or call_ref, for which // we'd need to see which references are used and which escape etc. // For now, assume a closed world for such such advanced use cases / // assume this pass won't be run in them anyhow. // TODO: do this only once if multiple tables are exported for (auto& elementSegment : wasm.elementSegments) { for (auto* curr : elementSegment->data) { if (auto* refFunc = curr->dynCast()) { calledFromOutside(refFunc->func); } } } } else if (ex->kind == ExternalKind::Global) { // Exported mutable globals are roots, since the outside may write any // value to them. auto name = ex->value; auto* global = wasm.getGlobal(name); if (global->mutable_) { roots[GlobalLocation{name}] = PossibleContents::fromType(global->type); } } } #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "struct phase\n"; #endif subTypes = std::make_unique(wasm); maxDepths = subTypes->getMaxDepths(); #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "Link-targets phase\n"; #endif // Add all links to the targets vectors of the source locations, which we will // use during the flow. for (auto& link : links) { getTargets(link.from).push_back(link.to); } #ifndef NDEBUG // Each vector of targets (which is a vector for efficiency) must have no // duplicates. for (auto& info : locations) { disallowDuplicates(info.targets); } #endif #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "roots phase\n"; #endif // Set up the roots, which are the starting state for the flow analysis: send // their initial content to them to start the flow. for (const auto& [location, value] : roots) { #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " init root\n"; dump(location); value.dump(std::cout, &wasm); std::cout << '\n'; #endif updateContents(location, value); } #ifdef POSSIBLE_CONTENTS_DEBUG std::cout << "flow phase\n"; size_t iters = 0; #endif // Flow the data while there is still stuff flowing. while (!workQueue.empty()) { #ifdef POSSIBLE_CONTENTS_DEBUG iters++; if ((iters & 255) == 0) { std::cout << iters++ << " iters, work left: " << workQueue.size() << '\n'; } #endif auto iter = workQueue.begin(); auto locationIndex = *iter; workQueue.erase(iter); flowAfterUpdate(locationIndex); } // TODO: Add analysis and retrieval logic for fields of immutable globals, // including multiple levels of depth (necessary for itables in j2wasm). } bool Flower::updateContents(LocationIndex locationIndex, PossibleContents newContents) { auto& contents = getContents(locationIndex); auto oldContents = contents; #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << "\nupdateContents\n"; dump(getLocation(locationIndex)); contents.dump(std::cout, &wasm); std::cout << "\n with new contents \n"; newContents.dump(std::cout, &wasm); std::cout << '\n'; #endif auto location = getLocation(locationIndex); // Handle special cases: Some locations can only contain certain contents, so // filter accordingly. In principle we need to filter both before and after // combining with existing content; filtering afterwards is obviously // necessary as combining two things will create something larger than both, // and our representation has limitations (e.g. two different ref types will // result in a cone, potentially a very large one). Filtering beforehand is // necessary for the a more subtle reason: consider a location that contains // an i8 which is sent a 0 and then 0x100. If we filter only after, then we'd // combine 0 and 0x100 first and get "unknown integer"; only by filtering // 0x100 to 0 beforehand (since 0x100 & 0xff => 0) will we combine 0 and 0 and // not change anything, which is correct. // // For efficiency reasons we aim to only filter once, depending on the type of // filtering. Most can be filtered a single time afterwards, while for data // locations, where the issue is packed integer fields, it's necessary to do // it before as we've mentioned, and also sufficient (see details in // filterDataContents). if (auto* dataLoc = std::get_if(&location)) { filterDataContents(newContents, *dataLoc); #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " pre-filtered data contents:\n"; newContents.dump(std::cout, &wasm); std::cout << '\n'; #endif } else if (auto* exprLoc = std::get_if(&location)) { if (exprLoc->expr->is() || exprLoc->expr->is()) { // Packed data reads must be filtered before the combine() operation, as // we must only combine the filtered contents (e.g. if 0xff arrives which // as a signed read is truly 0xffffffff then we cannot first combine the // existing 0xffffffff with the new 0xff, as they are different, and the // result will no longer be a constant). filterPackedDataReads(newContents, *exprLoc); #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " pre-filtered packed read contents:\n"; newContents.dump(std::cout, &wasm); std::cout << '\n'; #endif } } contents.combine(newContents); if (contents.isNone()) { // There is still nothing here. There is nothing more to do here but to // return that it is worth sending more. return true; } // It is not worth sending any more to this location if we are now in the // worst possible case, as no future value could cause any change. bool worthSendingMore = true; if (contents.isConeType()) { if (!contents.getType().isRef()) { // A cone type of a non-reference is the worst case, since subtyping is // not relevant there, and so if we only know something about the type // then we already know nothing beyond what the type in the wasm tells us // (and from there we can only go to Many). worthSendingMore = false; } else { // Normalize all reference cones. There is never a point to flow around // anything non-normalized, which might lead to extra work. For example, // if A has no subtypes, then a full cone for A is really the same as one // with depth 0 (an exact type). And we don't want to see the full cone // arrive and think it was an improvement over the one with depth 0 and do // more flowing based on that. normalizeConeType(contents); } } // Check if anything changed. if (contents == oldContents) { // Nothing actually changed, so just return. return worthSendingMore; } // Handle filtering (see comment earlier, this is the later filtering stage). bool filtered = false; if (auto* exprLoc = std::get_if(&location)) { // TODO: Replace this with specific filterFoo or flowBar methods like we // have for filterGlobalContents. That could save a little wasted work // here. Might be best to do that after the spec is fully stable. filterExpressionContents(contents, *exprLoc, worthSendingMore); filtered = true; } else if (auto* globalLoc = std::get_if(&location)) { filterGlobalContents(contents, *globalLoc); filtered = true; } // Check if anything changed after filtering, if we did so. if (filtered) { #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " filtered contents:\n"; contents.dump(std::cout, &wasm); std::cout << '\n'; #endif if (contents == oldContents) { return worthSendingMore; } } #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " updateContents has something new\n"; contents.dump(std::cout, &wasm); std::cout << '\n'; #endif // After filtering we should always have more precise information than "many" // - in the worst case, we can have the type declared in the wasm. assert(!contents.isMany()); // Add a work item if there isn't already. workQueue.insert(locationIndex); return worthSendingMore; } void Flower::flowAfterUpdate(LocationIndex locationIndex) { const auto location = getLocation(locationIndex); auto& contents = getContents(locationIndex); // We are called after a change at a location. A change means that some // content has arrived, since we never send empty values around. Assert on // that. assert(!contents.isNone()); #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << "\nflowAfterUpdate to:\n"; dump(location); std::cout << " arriving:\n"; contents.dump(std::cout, &wasm); std::cout << '\n'; #endif // Flow the contents to the normal targets of this location. flowToTargetsAfterUpdate(locationIndex, contents); // We are mostly done, except for handling interesting/special cases in the // flow, additional operations that we need to do aside from sending the new // contents to the normal (statically linked) targets. if (auto* exprLoc = std::get_if(&location)) { auto iter = childParents.find(locationIndex); if (iter == childParents.end()) { return; } // This is indeed one of the special cases where it is the child of a // parent, and we need to do some special handling because of that child- // parent connection. [[maybe_unused]] auto* child = exprLoc->expr; auto parentIndex = iter->second; auto* parent = std::get(getLocation(parentIndex)).expr; #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " special, parent:\n" << *parent << '\n'; #endif if (auto* get = parent->dynCast()) { // |child| is the reference child of a struct.get. assert(get->ref == child); readFromData(get->ref->type, get->index, contents, get); } else if (auto* set = parent->dynCast()) { // |child| is either the reference or the value child of a struct.set. assert(set->ref == child || set->value == child); writeToData(set->ref, set->value, set->index); } else if (auto* get = parent->dynCast()) { assert(get->ref == child); readFromData(get->ref->type, 0, contents, get); } else if (auto* set = parent->dynCast()) { assert(set->ref == child || set->value == child); writeToData(set->ref, set->value, 0); } else { // TODO: ref.test and all other casts can be optimized (see the cast // helper code used in OptimizeInstructions and RemoveUnusedBrs) WASM_UNREACHABLE("bad childParents content"); } } } void Flower::flowToTargetsAfterUpdate(LocationIndex locationIndex, const PossibleContents& contents) { // Send the new contents to all the targets of this location. As we do so, // prune any targets that we do not need to bother sending content to in the // future, to save space and work later. auto& targets = getTargets(locationIndex); targets.erase(std::remove_if(targets.begin(), targets.end(), [&](LocationIndex targetIndex) { #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " send to target\n"; dump(getLocation(targetIndex)); #endif return !updateContents(targetIndex, contents); }), targets.end()); if (contents.isMany()) { // We contain Many, and just called updateContents on our targets to send // that value to them. We'll never need to send anything from here ever // again, since we sent the worst case possible already, so we can just // clear our targets vector. But we should have already removed all the // targets in the above remove_if operation, since they should have all // notified us that we do not need to send them any more updates. assert(targets.empty()); } } void Flower::connectDuringFlow(Location from, Location to) { auto newLink = LocationLink{from, to}; auto newIndexLink = getIndexes(newLink); if (links.count(newIndexLink) == 0) { // This is a new link. Add it to the known links. links.insert(newIndexLink); // Add it to the |targets| vector. auto& targets = getTargets(newIndexLink.from); targets.push_back(newIndexLink.to); #ifndef NDEBUG disallowDuplicates(targets); #endif // In addition to adding the link, which will ensure new contents appearing // later will be sent along, we also update with the current contents. updateContents(to, getContents(getIndex(from))); } } void Flower::filterExpressionContents(PossibleContents& contents, const ExpressionLocation& exprLoc, bool& worthSendingMore) { auto type = exprLoc.expr->type; if (type.isTuple()) { // TODO: Optimize tuples here as well. We would need to take into account // exprLoc.tupleIndex for that in all the below. return; } // The caller cannot know of a situation where it might not be worth sending // more to a reference - all that logic is in here. That is, the rest of this // function is the only place we can mark |worthSendingMore| as false for a // reference. bool isRef = type.isRef(); assert(!isRef || worthSendingMore); // The TNH oracle informs us of the maximal contents possible here. auto maximalContents = getTNHContents(exprLoc.expr); #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << "TNHOracle informs us that " << *exprLoc.expr << " contains " << maximalContents << "\n"; #endif contents.intersect(maximalContents); if (contents.isNone()) { // Nothing was left here at all. return; } // For references we need to normalize the intersection, see below. For non- // references, we are done (we did all the relevant work in the intersect() // call). if (!isRef) { return; } // Normalize the intersection. We want to check later if any more content can // arrive here, and also we want to avoid flowing around anything non- // normalized, as explained earlier. // // Note that this normalization is necessary even though |contents| was // normalized before the intersection, e.g.: /* // A // / \ // B C // | // D */ // Consider the case where |maximalContents| is Cone(B, Infinity) and the // original |contents| was Cone(A, 2) (which is normalized). The naive // intersection is Cone(B, 1), since the core intersection logic makes no // assumptions about the rest of the types. That is then normalized to // Cone(B, 0) since there happens to be no subtypes for B. // // Note that the intersection may also not be a cone type, if it is a global // or literal. In that case we don't have anything more to do here. if (!contents.isConeType()) { return; } normalizeConeType(contents); // There is a chance that the intersection is equal to the maximal contents, // which would mean nothing more can arrive here. normalizeConeType(maximalContents); if (contents == maximalContents) { // We already contain everything possible, so this is the worst case. worthSendingMore = false; } } void Flower::filterGlobalContents(PossibleContents& contents, const GlobalLocation& globalLoc) { auto* global = wasm.getGlobal(globalLoc.name); if (global->mutable_ == Immutable) { // This is an immutable global. We never need to consider this value as // "Many", since in the worst case we can just use the immutable value. That // is, we can always replace this value with (global.get $name) which will // get the right value. Likewise, using the immutable global value is often // better than a cone type (even an exact one), but TODO: we could note both // a cone/exact type *and* that something is equal to a global, in some // cases. See https://github.com/WebAssembly/binaryen/pull/5083 if (contents.isMany() || contents.isConeType()) { contents = PossibleContents::global(global->name, global->type); // TODO: We could do better here, to set global->init->type instead of // global->type, or even the contents.getType() - either of those // may be more refined. But other passes will handle that in // general (by refining the global's type). #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " setting immglobal to ImmutableGlobal\n"; contents.dump(std::cout, &wasm); std::cout << '\n'; #endif } } } void Flower::filterDataContents(PossibleContents& contents, const DataLocation& dataLoc) { auto field = GCTypeUtils::getField(dataLoc.type, dataLoc.index); if (!field) { // This is a bottom type; nothing will be written here. assert(dataLoc.type.isBottom()); contents = PossibleContents::none(); return; } if (field->isPacked()) { // We must handle packed fields carefully. if (contents.isLiteral()) { // This is a constant. We can truncate it and use that value. auto mask = Literal(int32_t(Bits::lowBitMask(field->getByteSize() * 8))); contents = PossibleContents::literal(contents.getLiteral().and_(mask)); } else { // This is not a constant. We can't even handle a global here, as we'd // need to track that this global's value must be truncated before it is // used, and we don't do that atm. Leave only the type. // TODO Consider tracking packing on GlobalInfo alongside the type. // Another option is to make GUFA.cpp apply packing on the read, // like CFP does - but that can only be done when replacing a // StructGet of a packed field, and not anywhere else we saw that // value reach. contents = PossibleContents::fromType(contents.getType()); } // Given that the above only (1) turns an i32 into a masked i32 or (2) turns // anything else into an unknown i32, this is safe to run as pre-filtering, // that is, before we combine contents, since // // (a) two constants are ok as masking is distributive, // (x & M) U (y & M) == (x U y) & M // (b) if one is a constant and the other is not then // (x & M) U ? == ? == (x U ?) == (x U ?) & M // (where ? is an unknown i32) // (c) and if both are not constants then likewise we always end up as an // unknown i32 // } } void Flower::filterPackedDataReads(PossibleContents& contents, const ExpressionLocation& exprLoc) { auto* expr = exprLoc.expr; // Packed fields are stored as the truncated bits (see comment on // DataLocation; the actual truncation is done in filterDataContents), which // means that unsigned gets just work but signed ones need fixing (and we only // know how to do that here, when we reach the get and see if it is signed). auto signed_ = false; Expression* ref; Index index; if (auto* get = expr->dynCast()) { signed_ = get->signed_; ref = get->ref; index = get->index; } else if (auto* get = expr->dynCast()) { signed_ = get->signed_; ref = get->ref; // Arrays are treated as having a single field. index = 0; } else { WASM_UNREACHABLE("bad packed read"); } if (!signed_) { return; } // We are reading data here, so the reference must be a valid struct or // array, otherwise we would never have gotten here. assert(ref->type.isRef()); auto field = GCTypeUtils::getField(ref->type.getHeapType(), index); assert(field); if (!field->isPacked()) { return; } if (contents.isLiteral()) { // This is a constant. We can sign-extend it and use that value. auto shifts = Literal(int32_t(32 - field->getByteSize() * 8)); auto lit = contents.getLiteral(); lit = lit.shl(shifts); lit = lit.shrS(shifts); contents = PossibleContents::literal(lit); } else { // This is not a constant. As in filterDataContents, give up and leave // only the type, since we have no way to track the sign-extension on // top of whatever this is. contents = PossibleContents::fromType(contents.getType()); } } void Flower::readFromData(Type declaredType, Index fieldIndex, const PossibleContents& refContents, Expression* read) { #ifndef NDEBUG // We must not have anything in the reference that is invalid for the wasm // type there. auto maximalContents = PossibleContents::fullConeType(declaredType); assert(PossibleContents::isSubContents(refContents, maximalContents)); #endif // The data that a struct.get reads depends on two things: the reference that // we read from, and the relevant DataLocations. The reference determines // which DataLocations are relevant: if it is an exact type then we have a // single DataLocation to read from, the one type that can be read from there. // Otherwise, we might read from any subtype, and so all their DataLocations // are relevant. // // What can be confusing is that the information about the reference is also // inferred during the flow. That is, we use our current information about the // reference to decide what to do here. But the flow is not finished yet! // To keep things valid, we must therefore react to changes in either the // reference - when we see that more types might be read from here - or the // DataLocations - when new things are written to the data we can read from. // Specifically, at every point in time we want to preserve the property that // we've read from all relevant types based on the current reference, and // we've read the very latest possible contents from those types. And then // since we preserve that property til the end of the flow, it is also valid // then. At the end of the flow, the current reference's contents are the // final and correct contents for that location, which means we've ended up // with the proper result: the struct.get reads everything it should. // // To implement what was just described, we call this function when the // reference is updated. This function will then set up connections in the // graph so that updates to the relevant DataLocations will reach us in the // future. if (refContents.isNull() || refContents.isNone()) { // Nothing is read here as this is either a null or unreachable code. (Note // that the contents must be a subtype of the wasm type, which rules out // other possibilities like a non-null literal such as an integer or a // function reference.) return; } #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " add special reads\n"; #endif // The only possibilities left are a cone type (the worst case is where the // cone matches the wasm type), or a global. // // TODO: The Global case may have a different cone type than the heapType, // which we could use here. // TODO: A Global may refer to an immutable global, which we can read the // field from potentially (reading it from the struct.new/array.new // in the definition of it, if it is not imported; or, we could track // the contents of immutable fields of allocated objects, and not just // represent them as an exact type). // See the test TODO with text "We optimize some of this, but stop at // reading from the immutable global" assert(refContents.isGlobal() || refContents.isConeType()); // Just look at the cone here, discarding information about this being a // global, if it was one. All that matters from now is the cone. We also // normalize the cone to avoid wasted work later. auto cone = refContents.getCone(); auto normalizedDepth = getNormalizedConeDepth(cone.type, cone.depth); // We create a ConeReadLocation for the canonical cone of this type, to // avoid bloating the graph, see comment on ConeReadLocation(). auto coneReadLocation = ConeReadLocation{cone.type.getHeapType(), normalizedDepth, fieldIndex}; if (!hasIndex(coneReadLocation)) { // This is the first time we use this location, so create the links for it // in the graph. subTypes->iterSubTypes( cone.type.getHeapType(), normalizedDepth, [&](HeapType type, Index depth) { connectDuringFlow(DataLocation{type, fieldIndex}, coneReadLocation); }); // TODO: we can end up with redundant links here if we see one cone first // and then a larger one later. But removing links is not efficient, // so for now just leave that. } // Link to the canonical location. connectDuringFlow(coneReadLocation, ExpressionLocation{read, 0}); } void Flower::writeToData(Expression* ref, Expression* value, Index fieldIndex) { #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " add special writes\n"; #endif auto refContents = getContents(getIndex(ExpressionLocation{ref, 0})); #ifndef NDEBUG // We must not have anything in the reference that is invalid for the wasm // type there. auto maximalContents = PossibleContents::fullConeType(ref->type); assert(PossibleContents::isSubContents(refContents, maximalContents)); #endif // We could set up links here as we do for reads, but as we get to this code // in any case, we can just flow the values forward directly. This avoids // adding any links (edges) to the graph (and edges are what we want to avoid // adding, as there can be a quadratic number of them). In other words, we'll // loop over the places we need to send info to, which we can figure out in a // simple way, and by doing so we avoid materializing edges into the graph. // // Note that this is different from readFromData, above, which does add edges // to the graph (and works hard to add as few as possible, see the "canonical // cone reads" logic). The difference is because readFromData must "subscribe" // to get notifications from the relevant DataLocations. But when writing that // is not a problem: whenever a change happens in the reference or the value // of a struct.set then this function will get called, and those are the only // things we care about. And we can then just compute the values we are // sending (based on the current contents of the reference and the value), and // where we should send them to, and do that right here. (And as commented in // readFromData, that is guaranteed to give us the right result in the end: at // every point in time we send the right data, so when the flow is finished // we've sent information based on the final and correct information about our // reference and value.) auto valueContents = getContents(getIndex(ExpressionLocation{value, 0})); // See the related comment in readFromData() as to why these are the only // things we need to check, and why the assertion afterwards contains the only // things possible. if (refContents.isNone() || refContents.isNull()) { return; } assert(refContents.isGlobal() || refContents.isConeType()); // As in readFromData, normalize to the proper cone. auto cone = refContents.getCone(); auto normalizedDepth = getNormalizedConeDepth(cone.type, cone.depth); subTypes->iterSubTypes( cone.type.getHeapType(), normalizedDepth, [&](HeapType type, Index depth) { auto heapLoc = DataLocation{type, fieldIndex}; updateContents(heapLoc, valueContents); }); } #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 void Flower::dump(Location location) { if (auto* loc = std::get_if(&location)) { std::cout << " exprloc \n" << *loc->expr << " : " << loc->tupleIndex << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " dataloc "; if (wasm.typeNames.count(loc->type)) { std::cout << '$' << wasm.typeNames[loc->type].name; } else { std::cout << loc->type << '\n'; } std::cout << " : " << loc->index << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " tagloc " << loc->tag << " : " << loc->tupleIndex << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " paramloc " << loc->func->name << " : " << loc->index << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " localloc " << loc->func->name << " : " << loc->index << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " resultloc $" << loc->func->name << " : " << loc->index << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " globalloc " << loc->name << '\n'; } else if (std::get_if(&location)) { std::cout << " sigparamloc " << '\n'; } else if (std::get_if(&location)) { std::cout << " sigresultloc " << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " Nullloc " << loc->type << '\n'; } else { std::cout << " (other)\n"; } } #endif } // anonymous namespace void ContentOracle::analyze() { Flower flower(wasm, options); for (LocationIndex i = 0; i < flower.locations.size(); i++) { locationContents[flower.getLocation(i)] = flower.getContents(i); } } } // namespace wasm