/* * Copyright 2016 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Computes code at compile time where possible, replacing it with the // computed constant. // // The "propagate" variant of this pass also propagates constants across // sets and gets, which implements a standard constant propagation. // // Possible nondeterminism: WebAssembly NaN signs are nondeterministic, // and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may // emit -nan, which can be a noticeable difference if the bits are // looked at. // #include "ir/iteration.h" #include "ir/literal-utils.h" #include "ir/local-graph.h" #include "ir/manipulation.h" #include "ir/properties.h" #include "ir/utils.h" #include "pass.h" #include "support/insert_ordered.h" #include "support/small_vector.h" #include "wasm-builder.h" #include "wasm-interpreter.h" #include "wasm.h" namespace wasm { // A map of gets to their constant values. If a get does not have a constant // value then it does not appear in the map (that avoids allocating for the // majority of gets). using GetValues = std::unordered_map<LocalGet*, Literals>; // A map of values on the heap. This maps the expressions that create the // heap data (struct.new, array.new, etc.) to the data they are created with. // Each such expression gets its own GCData created for it. This allows // computing identity between locals referring to the same GCData, by seeing // if they point to the same thing. // // Note that a source expression may create different data each time it is // reached in a loop, // // (loop // (if .. // (local.set $x // (struct.new .. // ) // ) // ..compare $x to something.. // ) // // Just like in SSA form, this is not a problem because the loop entry must // have a merge, if a value entering the loop might be noticed. In SSA form // that means a phi is created, and identity is set there. In our // representation, the merge will cause a local.get of $x to have more // possible input values than that struct.new, which means we will not infer // a value for it, and not attempt to say anything about comparisons of $x. using HeapValues = std::unordered_map<Expression*, std::shared_ptr<GCData>>; // Precomputes an expression. Errors if we hit anything that can't be // precomputed. Inherits most of its functionality from // ConstantExpressionRunner, which it shares with the C-API, but adds handling // of GetValues computed during the precompute pass. class PrecomputingExpressionRunner : public ConstantExpressionRunner<PrecomputingExpressionRunner> { using Super = ConstantExpressionRunner<PrecomputingExpressionRunner>; // Concrete values of gets computed during the pass, which the runner does not // know about since it only records values of sets it visits. const GetValues& getValues; HeapValues& heapValues; // Limit evaluation depth for 2 reasons: first, it is highly unlikely // that we can do anything useful to precompute a hugely nested expression // (we should succed at smaller parts of it first). Second, a low limit is // helpful to avoid platform differences in native stack sizes. static const Index MAX_DEPTH = 50; // Limit loop iterations since loops might be infinite. Since we are going to // replace the expression and must preserve side effects, we limit this to the // very first iteration because a side effect would be necessary to achieve // more than one iteration before becoming concrete. static const Index MAX_LOOP_ITERATIONS = 1; public: PrecomputingExpressionRunner(Module* module, const GetValues& getValues, HeapValues& heapValues, bool replaceExpression) : ConstantExpressionRunner<PrecomputingExpressionRunner>( module, replaceExpression ? FlagValues::PRESERVE_SIDEEFFECTS : FlagValues::DEFAULT, MAX_DEPTH, MAX_LOOP_ITERATIONS), getValues(getValues), heapValues(heapValues) {} Flow visitLocalGet(LocalGet* curr) { auto iter = getValues.find(curr); if (iter != getValues.end()) { auto values = iter->second; assert(values.isConcrete()); return Flow(values); } return ConstantExpressionRunner< PrecomputingExpressionRunner>::visitLocalGet(curr); } // TODO: Use immutability for values Flow visitStructNew(StructNew* curr) { auto flow = Super::visitStructNew(curr); if (flow.breaking()) { return flow; } return getHeapCreationFlow(flow, curr); } Flow visitStructSet(StructSet* curr) { return Flow(NONCONSTANT_FLOW); } Flow visitStructGet(StructGet* curr) { if (curr->ref->type != Type::unreachable && !curr->ref->type.isNull()) { // If this field is immutable then we may be able to precompute this, as // if we also created the data in this function (or it was created in an // immutable global) then we know the value in the field. If it is // immutable, call the super method which will do the rest here. That // includes checking for the data being properly created, as if it was // not then we will not have a constant value for it, which means the // local.get of that value will stop us. auto& field = curr->ref->type.getHeapType().getStruct().fields[curr->index]; if (field.mutable_ == Immutable) { return Super::visitStructGet(curr); } } // Otherwise, we've failed to precompute. return Flow(NONCONSTANT_FLOW); } Flow visitArrayNew(ArrayNew* curr) { auto flow = Super::visitArrayNew(curr); if (flow.breaking()) { return flow; } return getHeapCreationFlow(flow, curr); } Flow visitArrayNewFixed(ArrayNewFixed* curr) { auto flow = Super::visitArrayNewFixed(curr); if (flow.breaking()) { return flow; } return getHeapCreationFlow(flow, curr); } Flow visitArraySet(ArraySet* curr) { return Flow(NONCONSTANT_FLOW); } Flow visitArrayGet(ArrayGet* curr) { if (curr->ref->type != Type::unreachable && !curr->ref->type.isNull()) { // See above with struct.get auto element = curr->ref->type.getHeapType().getArray().element; if (element.mutable_ == Immutable) { return Super::visitArrayGet(curr); } } // Otherwise, we've failed to precompute. return Flow(NONCONSTANT_FLOW); } // ArrayLen is not disallowed here as it is an immutable property. Flow visitArrayCopy(ArrayCopy* curr) { return Flow(NONCONSTANT_FLOW); } // Generates heap info for a heap-allocating expression. template<typename T> Flow getHeapCreationFlow(Flow flow, T* curr) { // We must return a literal that refers to the canonical location for this // source expression, so that each time we compute a specific struct.new // we get the same identity. std::shared_ptr<GCData>& canonical = heapValues[curr]; std::shared_ptr<GCData> newGCData = flow.getSingleValue().getGCData(); if (!canonical) { canonical = std::make_shared<GCData>(*newGCData); } else { *canonical = *newGCData; } return Literal(canonical, curr->type.getHeapType()); } Flow visitStringNew(StringNew* curr) { if (curr->op != StringNewWTF16Array) { // TODO: handle other string ops. For now we focus on JS-like strings. return Flow(NONCONSTANT_FLOW); } // string.encode_wtf16_array is effectively an Array read operation, so // just like ArrayGet above we must check for immutability. auto refType = curr->ref->type; if (refType.isRef()) { auto heapType = refType.getHeapType(); if (heapType.isArray()) { if (heapType.getArray().element.mutable_ == Immutable) { return Super::visitStringNew(curr); } } } // Otherwise, this is mutable or unreachable or otherwise uninteresting. return Flow(NONCONSTANT_FLOW); } Flow visitStringEncode(StringEncode* curr) { // string.encode_wtf16_array is effectively an Array write operation, so // just like ArraySet and ArrayCopy above we must mark it as disallowed // (due to side effects). (And we do not support other operations than // string.encode_wtf16_array anyhow.) return Flow(NONCONSTANT_FLOW); } }; struct Precompute : public WalkerPass< PostWalker<Precompute, UnifiedExpressionVisitor<Precompute>>> { bool isFunctionParallel() override { return true; } std::unique_ptr<Pass> create() override { return std::make_unique<Precompute>(propagate); } bool propagate = false; Precompute(bool propagate) : propagate(propagate) {} GetValues getValues; HeapValues heapValues; bool canPartiallyPrecompute; void doWalkFunction(Function* func) { // Perform partial precomputing only when the optimization level is non- // trivial, as it is slower and less likely to help. canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2; // Walk the function and precompute things. Super::doWalkFunction(func); partiallyPrecompute(func); if (!propagate) { return; } // When propagating, we can utilize the graph of local operations to // precompute the values from a local.set to a local.get. This populates // getValues which is then used by a subsequent walk that applies those // values. bool propagated = propagateLocals(func); if (propagated) { // We found constants to propagate and entered them in getValues. Do // another walk to apply them and perhaps other optimizations that are // unlocked. Super::doWalkFunction(func); // We could also try to partially precompute again, but that is a somewhat // heavy operation, so we only do it the first time, and leave such things // for later runs of this pass and for --converge. } // Note that in principle even more cycles could find further work here, in // very rare cases. To avoid constructing a LocalGraph again just for that // unlikely chance, we leave such things for later. } template<typename T> void reuseConstantNode(T* curr, Flow flow) { if (flow.values.isConcrete()) { // reuse a const / ref.null / ref.func node if there is one if (curr->value && flow.values.size() == 1) { Literal singleValue = flow.getSingleValue(); if (singleValue.type.isNumber()) { if (auto* c = curr->value->template dynCast<Const>()) { c->value = singleValue; c->finalize(); curr->finalize(); return; } } else if (singleValue.isNull()) { if (auto* n = curr->value->template dynCast<RefNull>()) { n->finalize(singleValue.type); curr->finalize(); return; } } else if (singleValue.type.isRef() && singleValue.type.getHeapType().isSignature()) { if (auto* r = curr->value->template dynCast<RefFunc>()) { r->func = singleValue.getFunc(); auto heapType = getModule()->getFunction(r->func)->type; r->finalize(Type(heapType, NonNullable)); curr->finalize(); return; } } } curr->value = flow.getConstExpression(*getModule()); } else { curr->value = nullptr; } curr->finalize(); } void visitExpression(Expression* curr) { // TODO: if local.get, only replace with a constant if we don't care about // size...? if (Properties::isConstantExpression(curr) || curr->is<Nop>()) { return; } // try to evaluate this into a const Flow flow = precomputeExpression(curr); if (!canEmitConstantFor(flow.values)) { return; } if (flow.breaking()) { if (flow.breakTo == NONCONSTANT_FLOW) { // This cannot be turned into a constant, but perhaps we can partially // precompute it. considerPartiallyPrecomputing(curr); return; } if (flow.breakTo == RETURN_FLOW) { // this expression causes a return. if it's already a return, reuse the // node if (auto* ret = curr->dynCast<Return>()) { reuseConstantNode(ret, flow); } else { Builder builder(*getModule()); replaceCurrent(builder.makeReturn( flow.values.isConcrete() ? flow.getConstExpression(*getModule()) : nullptr)); } return; } // this expression causes a break, emit it directly. if it's already a br, // reuse the node. if (auto* br = curr->dynCast<Break>()) { br->name = flow.breakTo; br->condition = nullptr; reuseConstantNode(br, flow); } else { Builder builder(*getModule()); replaceCurrent(builder.makeBreak( flow.breakTo, flow.values.isConcrete() ? flow.getConstExpression(*getModule()) : nullptr)); } return; } // this was precomputed if (flow.values.isConcrete()) { replaceCurrent(flow.getConstExpression(*getModule())); } else { ExpressionManipulator::nop(curr); } } void visitBlock(Block* curr) { // When block precomputation fails, it can lead to quadratic slowness due to // the "tower of blocks" pattern used to implement switches: // // (block // (block // ... // (block // (br_table .. // // If we try to precompute each block here, and fail on each, then we end up // doing quadratic work. This is also wasted work as once a nested block // fails to precompute there is not really a chance to succeed on the // parent. If we do *not* fail to precompute, however, then we do want to // precompute such nested blocks, e.g.: // // (block $out // (block // (br $out) // ) // ) // // Here we *can* precompute the inner block, so when we get to the outer one // we see this: // // (block $out // (br $out) // ) // // And that precomputes to nothing. Therefore when we see a child of the // block that is another block (it failed to precompute to something // simpler) then we leave early here. // // Note that in theory we could still precompute here if wasm had // instructions that allow such things, e.g.: // // (block $out // (block // (cause side effect1) // (cause side effect2) // ) // (undo those side effects exactly) // ) // // We are forced to invent a side effect that we can precisely undo (unlike, // say locals - a local.set would persist outside of the block, and even if // we did another set to the original value, this pass doesn't track values // that way). Only with that can we make the inner block un-precomputable // (because there are side effects) but the outer one is (because those // effects are undone). Note that it is critical that we have two things in // the block, so that we can't precompute it to one of them (which is what // we did to the br in the previous example). Note also that this is still // optimizable using other passes, as merge-blocks will fold the two blocks // together. if (!curr->list.empty() && curr->list[0]->is<Block>()) { // The first child is a block, that is, it could not be simplified, so // this looks like the "tower of blocks" pattern. Avoid quadratic time // here as explained above. (We could also look at other children of the // block, but the only real-world pattern identified so far is on the // first child, so keep things simple here.) return; } // Otherwise, precompute normally like all other expressions. visitExpression(curr); } // If we failed to precompute a constant, perhaps we can still precompute part // of an expression. Specifically, consider this case: // // (A // (select // (B) // (C) // (condition) // ) // ) // // Perhaps we can compute A(B) and A(C). If so, we can emit a better select: // // (select // (constant result of A(B)) // (constant result of A(C)) // (condition) // ) // // Note that in general for code size we want to move operations *out* of // selects and ifs (OptimizeInstructions does that), but here we are // computing two constants which replace three expressions, so it is // worthwhile. // // To do such partial precomputing, in the main pass we note selects that look // promising. If we find any then we do a second pass later just for that (as // doing so requires walking up the stack in a manner that we want to avoid in // the main pass for overhead reasons; see below). // // Note that selects are all we really need here: Other passes would turn an // if into a select if the arms are simple enough, and only in those cases // (simple arms) do we have a chance at partially precomputing. For example, // if an arm is a constant then we can, but if it is a call then we can't.) // However, there are cases like an if with arms with side effects that end in // precomputable things, that are missed atm TODO std::unordered_set<Select*> partiallyPrecomputable; void considerPartiallyPrecomputing(Expression* curr) { if (!canPartiallyPrecompute) { return; } if (auto* select = curr->dynCast<Select>()) { // We only have a reasonable hope of success if the select arms are things // like constants or global gets. At a first approximation, allow the set // of things we allow in constant initializers (but we can probably allow // more here TODO). // // We also ignore selects with no parent (that are the entire function // body) as then there is nothing to optimize into their arms. auto& wasm = *getModule(); if (Properties::isValidConstantExpression(wasm, select->ifTrue) && Properties::isValidConstantExpression(wasm, select->ifFalse) && getFunction()->body != select) { partiallyPrecomputable.insert(select); } } } // To partially precompute selects we walk up the stack from them, like this: // // (A // (B // (select // (C) // (D) // (condition) // ) // ) // ) // // First we try to apply B to C and D. If that works, we arrive at this: // // (A // (select // (constant result of B(C)) // (constant result of B(D)) // (condition) // ) // ) // // We can then proceed to perhaps apply A. However, even if we failed to apply // B then we can try to apply A and B together, because that combination may // succeed where incremental work fails, for example: // // (global $C // (struct.new ;; outer // (struct.new ;; inner // (i32.const 10) // ) // ) // ) // // (struct.get ;; outer // (struct.get ;; inner // (select // (global.get $C) // (global.get $D) // (condition) // ) // ) // ) // // Applying the inner struct.get to $C leads us to the inner struct.new, but // that is an interior pointer in the global - it is not something we can // refer to using a global.get, so precomputing it fails. However, when we // apply both struct.gets at once we arrive at the outer struct.new, which is // in fact the global $C, and we succeed. void partiallyPrecompute(Function* func) { if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) { // Nothing to do. return; } // Walk the function to find the parent stacks of the promising selects. We // copy the stacks and process them later. We do it like this because if we // wanted to process stacks as we reached them then we'd trip over // ourselves: when we optimize we replace a parent, but that parent is an // expression we'll reach later in the walk, so modifying it is unsafe. struct StackFinder : public ExpressionStackWalker<StackFinder> { Precompute& parent; StackFinder(Precompute& parent) : parent(parent) {} // We will later iterate on this in the order of insertion, which keeps // things deterministic, and also usually lets us do consecutive work // like a select nested in another select's condition, simply because we // will traverse the selects in postorder (however, because we cannot // always succeed in an incremental manner - see the comment on this // function - it is possible in theory that some work can happen only in a // later execution of the pass). InsertOrderedMap<Select*, ExpressionStack> stackMap; void visitSelect(Select* curr) { if (parent.partiallyPrecomputable.count(curr)) { stackMap[curr] = expressionStack; } } } stackFinder(*this); stackFinder.walkFunction(func); // Note which expressions we've modified as we go, as it is invalid to // modify more than once. This could happen in theory in a situation like // this: // // (ternary.f32.max ;; fictional instruction for explanatory purposes // (select ..) // (select ..) // (f32.infinity) // ) // // When we consider the first select we can see that the computation result // is always infinity, so we can optimize here and replace the ternary. Then // the same thing happens with the second select, causing the ternary to be // replaced again, which is unsafe because it no longer exists after we // precomputed it the first time. (Note that in this example the result is // the same either way, but at least in theory an instruction could exist // for whom there was a difference.) In practice it does not seem that wasm // has instructions capable of this atm but this code is still useful to // guard against future problems, and as a minor speedup (quickly skip code // if it was already modified). std::unordered_set<Expression*> modified; for (auto& [select, stack] : stackFinder.stackMap) { // Each stack ends in the select itself, and contains more than the select // itself (otherwise we'd have ignored the select), i.e., the select has a // parent that we can try to optimize into the arms. assert(stack.back() == select); assert(stack.size() >= 2); Index selectIndex = stack.size() - 1; assert(selectIndex >= 1); if (modified.count(select)) { // This select was modified; go to the next one. continue; } // Go up through the parents, until we can't do any more work. At each // parent we'll try to execute it and all intermediate parents into the // select arms. for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1); parentIndex--) { auto* parent = stack[parentIndex]; if (modified.count(parent)) { // This parent was modified; exit the loop on parents as no upper // parent is valid to try either. break; } // If the parent lacks a concrete type then we can't move it into the // select: the select needs a concrete (and non-tuple) type. For example // if the parent is a drop or is unreachable, those are things we don't // want to handle, and we stop here (once we see one such parent we // can't expect to make any more progress). if (!parent->type.isConcrete() || parent->type.isTuple()) { break; } // We are precomputing the select arms, but leaving the condition as-is. // If the condition breaks to the parent, then we can't move the parent // into the select arms: // // (block $name ;; this must stay outside of the select // (select // (B) // (C) // (block ;; condition // (br_if $target // // Ignore all control flow for simplicity, as they aren't interesting // for us, and other passes should have removed them anyhow. if (Properties::isControlFlowStructure(parent)) { break; } // This looks promising, so try to precompute here. What we do is // precompute twice, once with the select replaced with the left arm, // and once with the right. If both succeed then we can create a new // select (with the same condition as before) whose arms are the // precomputed values. auto isValidPrecomputation = [&](const Flow& flow) { // For now we handle simple concrete values. We could also handle // breaks in principle TODO return canEmitConstantFor(flow.values) && !flow.breaking() && flow.values.isConcrete(); }; // Find the pointer to the select in its immediate parent so that we can // replace it first with one arm and then the other. auto** pointerToSelect = getChildPointerInImmediateParent(stack, selectIndex, func); *pointerToSelect = select->ifTrue; auto ifTrue = precomputeExpression(parent); if (isValidPrecomputation(ifTrue)) { *pointerToSelect = select->ifFalse; auto ifFalse = precomputeExpression(parent); if (isValidPrecomputation(ifFalse)) { // Wonderful, we can precompute here! The select can now contain the // computed values in its arms. select->ifTrue = ifTrue.getConstExpression(*getModule()); select->ifFalse = ifFalse.getConstExpression(*getModule()); select->finalize(); // The parent of the select is now replaced by the select. auto** pointerToParent = getChildPointerInImmediateParent(stack, parentIndex, func); *pointerToParent = select; // Update state for further iterations: Mark everything modified and // move the select to the parent's location. for (Index i = parentIndex; i <= selectIndex; i++) { modified.insert(stack[i]); } selectIndex = parentIndex; stack[selectIndex] = select; stack.resize(selectIndex + 1); } } // Whether we succeeded to precompute here or not, restore the parent's // pointer to its original state (if we precomputed, the parent is no // longer in use, but there is no harm in modifying it). *pointerToSelect = select; } } } void visitFunction(Function* curr) { // removing breaks can alter types ReFinalize().walkFunctionInModule(curr, getModule()); } private: // Precompute an expression, returning a flow, which may be a constant // (that we can replace the expression with if replaceExpression is set). Flow precomputeExpression(Expression* curr, bool replaceExpression = true) { Flow flow; try { flow = PrecomputingExpressionRunner( getModule(), getValues, heapValues, replaceExpression) .visit(curr); } catch (PrecomputingExpressionRunner::NonconstantException&) { return Flow(NONCONSTANT_FLOW); } // If we are replacing the expression, then the resulting value must be of // a type we can emit a constant for. if (!flow.breaking() && replaceExpression && !canEmitConstantFor(flow.values)) { return Flow(NONCONSTANT_FLOW); } return flow; } // Precomputes the value of an expression, as opposed to the expression // itself. This differs from precomputeExpression in that we care about // the value the expression will have, which we cannot necessary replace // the expression with. For example, // (local.tee (i32.const 1)) // will have value 1 which we can optimize here, but in precomputeExpression // we could not do anything. Literals precomputeValue(Expression* curr) { // Note that we set replaceExpression to false, as we just care about // the value here. Flow flow = precomputeExpression(curr, false /* replaceExpression */); if (flow.breaking()) { return {}; } return flow.values; } // Propagates values around. Returns whether we propagated. bool propagateLocals(Function* func) { bool propagated = false; // Using the graph of get-set interactions, do a constant-propagation type // operation: note which sets are assigned locals, then see if that lets us // compute other sets as locals (since some of the gets they read may be // constant). We do this lazily as most locals do not end up with constant // values that we can propagate. LazyLocalGraph localGraph(func, getModule()); // A map of sets to their constant values. If a set does not appear here // then it is not constant, like |getValues|. std::unordered_map<LocalSet*, Literals> setValues; // The work list, which will contain sets and gets that have just been // found to have a constant value. As we only add them to the list when they // are found to be constant, each can only be added once, and a simple // vector is enough here (which we can make a small vector to avoid any // allocation in small-enough functions). SmallVector<Expression*, 10> work; // Given a set, see if it has a constant value. If so, note that on // setValues and add to the work list. auto checkConstantSet = [&](LocalSet* set) { if (setValues.count(set)) { // Already known to be constant. return; } // Precompute the value. Note that this executes the code from scratch // each time we reach this point, and so we need to be careful about // repeating side effects if those side effects are expressed *in the // value*. A case where that can happen is GC data (each struct.new // creates a new, unique struct, even if the data is equal), and so // PrecomputingExpressionRunner has special logic to make sure that // reference identity is preserved properly. // // (Other side effects are fine; if an expression does a call and we // somehow know the entire expression precomputes to a 42, then we can // propagate that 42 along to the users, regardless of whatever the call // did globally.) auto values = precomputeValue( Properties::getFallthrough(set->value, getPassOptions(), *getModule())); // We precomputed the *fallthrough* value (which allows us to look through // some things that would otherwise block us). But in some cases, like a // ref.cast, the fallthrough value can have an incompatible type for the // entire expression, which would be invalid for us to propagate, e.g.: // // (ref.cast (ref struct) // (ref.null any) // ) // // In such a case the value cannot actually fall through. Ignore such // cases (which other optimizations can handle) by making sure that we // only propagate a valid subtype. if (values.isConcrete() && Type::isSubType(values.getType(), set->value->type)) { setValues[set] = values; work.push_back(set); } }; // The same, for a get. auto checkConstantGet = [&](LocalGet* get) { if (getValues.count(get)) { // Already known to be constant. return; } // For this get to have constant value, all sets must agree on a constant. Literals values; bool first = true; for (auto* set : localGraph.getSets(get)) { Literals curr; if (set == nullptr) { if (getFunction()->isVar(get->index)) { auto localType = getFunction()->getLocalType(get->index); if (!localType.isDefaultable()) { // This is a nondefaultable local that seems to read the default // value at the function entry. This is either an internal error // or a case of unreachable code; the latter is possible as // LocalGraph is not precise in unreachable code. Give up. return; } else { curr = Literal::makeZeros(localType); } } else { // It's a param, so the value is non-constant. Give up. return; } } else { // If there is an entry for the set, use that constant. Otherwise, the // set is not constant, and we give up. auto iter = setValues.find(set); if (iter == setValues.end()) { return; } curr = iter->second; } // We found a concrete value, so there is a chance, if it matches all // the rest. assert(curr.isConcrete()); if (first) { // This is the first ever value we see. All later ones must match it. values = curr; first = false; } else if (values != curr) { // This later value is not the same as before, give up. return; } } if (values.isConcrete()) { // We found a constant value! getValues[get] = values; work.push_back(get); propagated = true; } else { // If it is not concrete then, since we early-exited before on any // possible problem, there must be no sets for this get, which means it // is in unreachable code. In that case, we never switched |first| from // true to false. assert(first == true); // We could optimize using unreachability here, but we leave that for // other passes. } }; // Check all gets and sets to find which are constant, mark them as such, // and add propagation work based on that. for (auto& [curr, _] : localGraph.getLocations()) { if (auto* set = curr->dynCast<LocalSet>()) { checkConstantSet(set); } else { checkConstantGet(curr->cast<LocalGet>()); } } // Propagate constant values while work remains. while (!work.empty()) { auto* curr = work.back(); work.pop_back(); // This get or set is a constant value. Check if the things it influences // become constant. if (auto* set = curr->dynCast<LocalSet>()) { for (auto* get : localGraph.getSetInfluences(set)) { checkConstantGet(get); } } else { auto* get = curr->cast<LocalGet>(); for (auto* set : localGraph.getGetInfluences(get)) { checkConstantSet(set); } } } return propagated; } bool canEmitConstantFor(const Literals& values) { for (auto& value : values) { if (!canEmitConstantFor(value)) { return false; } } return true; } bool canEmitConstantFor(const Literal& value) { // A null is fine to emit a constant for - we'll emit a RefNull. Otherwise, // see below about references to GC data. if (value.isNull()) { return true; } return canEmitConstantFor(value.type); } bool canEmitConstantFor(Type type) { // A function is fine to emit a constant for - we'll emit a RefFunc, which // is compact and immutable, so there can't be a problem. if (type.isFunction()) { return true; } // We can emit a StringConst for a string constant. if (type.isString()) { return true; } // All other reference types cannot be precomputed. Even an immutable GC // reference is not currently something this pass can handle, as it will // evaluate and reevaluate code multiple times in e.g. propagateLocals, see // the comment above. if (type.isRef()) { return false; } return true; } // Helpers for partial precomputing. // Given a stack of expressions and the index of an expression in it, find // the pointer to that expression in the parent. This gives us a pointer that // allows us to replace the expression. Expression** getChildPointerInImmediateParent(const ExpressionStack& stack, Index index, Function* func) { if (index == 0) { // There is nothing above this expression, so the pointer referring to it // is the function's body. return &func->body; } auto* child = stack[index]; auto childIterator = ChildIterator(stack[index - 1]); for (auto** currChild : childIterator.children) { if (*currChild == child) { return currChild; } } WASM_UNREACHABLE("child not found in parent"); } }; Pass* createPrecomputePass() { return new Precompute(false); } Pass* createPrecomputePropagatePass() { return new Precompute(true); } } // namespace wasm