/* * Copyright 2018 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Operations on Stack IR. // #include "ir/branch-utils.h" #include "ir/iteration.h" #include "ir/local-graph.h" #include "pass.h" #include "wasm-stack.h" #include "wasm.h" namespace wasm { StackIROptimizer::StackIROptimizer(Function* func, StackIR& insts, const PassOptions& passOptions, FeatureSet features) : func(func), insts(insts), passOptions(passOptions), features(features) {} void StackIROptimizer::run() { dce(); // FIXME: local2Stack is currently rather slow (due to localGraph), // so for now run it only when really optimizing if (passOptions.optimizeLevel >= 3 || passOptions.shrinkLevel >= 1) { local2Stack(); } removeUnneededBlocks(); dce(); vacuum(); } void StackIROptimizer::dce() { // Remove code after an unreachable instruction: anything after it, up to the // next control flow barrier, can simply be removed. bool inUnreachableCode = false; for (Index i = 0; i < insts.size(); i++) { auto* inst = insts[i]; if (!inst) { continue; } if (inUnreachableCode) { // Does the unreachable code end here? if (isControlFlowBarrier(inst)) { inUnreachableCode = false; } else { // We can remove this. removeAt(i); } } else if (inst->type == Type::unreachable) { inUnreachableCode = true; } } // Remove code before an Unreachable. Consider this: // // (drop // .. // ) // (unreachable) // // The drop is not needed, as the unreachable puts the stack in the // polymorphic state anyhow. Note that we don't need to optimize anything // other than a drop here, as in general the Binaryen IR DCE pass will handle // everything else. A drop followed by an unreachable is the only thing that // pass cannot handle, as the structured form of Binaryen IR does not allow // removing such a drop, and so we can only do it here in StackIR. // // TODO: We can look even further back, say if there is another drop of // something before, then we can remove that drop as well. To do that // we'd need to inspect the stack going backwards. for (Index i = 1; i < insts.size(); i++) { auto* inst = insts[i]; if (!inst || inst->op != StackInst::Basic || !inst->origin->is()) { continue; } // Look back past nulls. Index j = i - 1; while (j > 0 && !insts[j]) { j--; } auto*& prev = insts[j]; if (prev && prev->op == StackInst::Basic && prev->origin->is()) { prev = nullptr; } } } // Remove obviously-unneeded code. void StackIROptimizer::vacuum() { // In the wasm binary format a nop is never needed. (In Binaryen IR, in // comparison, it is necessary e.g. in a function body or an if arm.) // // It is especially important to remove nops because we add nops when we // read wasm into Binaryen IR. That is, this avoids a potential increase in // code size. for (Index i = 0; i < insts.size(); i++) { auto*& inst = insts[i]; if (inst && inst->origin->is()) { inst = nullptr; } } } // If ordered properly, we can avoid a local.set/local.get pair, // and use the value directly from the stack, for example // [..produce a value on the stack..] // local.set $x // [..much code..] // local.get $x // call $foo ;; use the value, foo(value) // As long as the code in between does not modify $x, and has // no control flow branching out, we can remove both the set // and the get. void StackIROptimizer::local2Stack() { // We use the localGraph to tell us if a get-set pair is indeed a set that is // read by that get, and only that get. Note that we run this on Binaryen IR, // so we are assuming that no previous opt has changed the interaction of // local operations. // // We use a lazy graph here as we only query in the rare case when we find a // set/get pair that looks optimizable. LazyLocalGraph localGraph(func); // The binary writing of StringWTF16Get and StringSliceWTF is optimized to use // fewer scratch locals when their operands are already LocalGets. To avoid // interfering with that optimization, we have to avoid removing such // LocalGets. auto deferredGets = findStringViewDeferredGets(); // We maintain a stack of relevant values. This contains: // * a null for each actual value that the value stack would have // * an index of each LocalSet that *could* be on the value // stack at that location. const Index null = -1; std::vector values; // We also maintain a stack of values vectors for control flow, // saving the stack as we enter and restoring it when we exit. std::vector> savedValues; #ifdef STACK_OPT_DEBUG std::cout << "func: " << func->name << '\n' << insts << '\n'; #endif for (Index instIndex = 0; instIndex < insts.size(); instIndex++) { auto* inst = insts[instIndex]; if (!inst) { continue; } // First, consume values from the stack as required. auto consumed = getNumConsumedValues(inst); #ifdef STACK_OPT_DEBUG std::cout << " " << instIndex << " : " << *inst << ", " << values.size() << " on stack, will consume " << consumed << "\n "; for (auto s : values) std::cout << s << ' '; std::cout << '\n'; #endif // TODO: currently we run dce before this, but if we didn't, we'd need // to handle unreachable code here - it's ok to pop multiple values // there even if the stack is at size 0. while (consumed > 0) { assert(values.size() > 0); // Whenever we hit a possible stack value, kill it - it would // be consumed here, so we can never optimize to it. while (values.back() != null) { values.pop_back(); assert(values.size() > 0); } // Finally, consume the actual value that is consumed here. values.pop_back(); consumed--; } // After consuming, we can see what to do with this. First, handle // control flow. if (isControlFlowBegin(inst)) { // Save the stack for when we end this control flow. savedValues.push_back(values); // TODO: optimize copies values.clear(); } else if (isControlFlowEnd(inst)) { assert(!savedValues.empty()); values = savedValues.back(); savedValues.pop_back(); } else if (isControlFlow(inst)) { // Otherwise, in the middle of control flow, just clear it values.clear(); } // This is something we should handle, look into it. if (inst->type.isConcrete()) { bool optimized = false; // Do not optimize multivalue locals, since those will be better // optimized when they are visited in the binary writer and this // optimization would intefere with that one. if (auto* get = inst->origin->dynCast(); get && inst->type.isSingle() && !deferredGets.count(get)) { // Use another local to clarify what instIndex means in this scope. auto getIndex = instIndex; // This is a potential optimization opportunity! See if we // can reach the set. if (values.size() > 0) { Index j = values.size() - 1; while (1) { // If there's an actual value in the way, we've failed. auto setIndex = values[j]; if (setIndex == null) { break; } auto* set = insts[setIndex]->origin->cast(); if (set->index == get->index) { // This might be a proper set-get pair, where the set is // used by this get and nothing else, check that. auto& sets = localGraph.getSets(get); if (sets.size() == 1 && *sets.begin() == set) { auto& setInfluences = localGraph.getSetInfluences(set); // If this has the proper value of 1, also do the potentially- // expensive check of whether we can remove this pair at all. if (setInfluences.size() == 1 && canRemoveSetGetPair(setIndex, getIndex)) { assert(*setInfluences.begin() == get); // Do it! The set and the get can go away, the proper // value is on the stack. #ifdef STACK_OPT_DEBUG std::cout << " stackify the get\n"; #endif insts[setIndex] = nullptr; insts[getIndex] = nullptr; // Continuing on from here, replace this on the stack // with a null, representing a regular value. We // keep possible values above us active - they may // be optimized later, as they would be pushed after // us, and used before us, so there is no conflict. values[j] = null; optimized = true; break; } } } // We failed here. Can we look some more? if (j == 0) { break; } j--; } } } if (!optimized) { // This is an actual regular value on the value stack. values.push_back(null); } } else if (inst->origin->is() && inst->type == Type::none) { // This set is potentially optimizable later, add to stack. values.push_back(instIndex); } } } // There may be unnecessary blocks we can remove: blocks without arriving // branches are always ok to remove. // TODO: A branch to a block in an if body can become a branch to that if body. void StackIROptimizer::removeUnneededBlocks() { // First, find all branch targets. std::unordered_set targets; for (auto*& inst : insts) { if (inst) { BranchUtils::operateOnScopeNameUses( inst->origin, [&](Name& name) { targets.insert(name); }); } } // Remove untargeted blocks. for (auto*& inst : insts) { if (!inst) { continue; } if (auto* block = inst->origin->dynCast()) { if (!block->name.is() || !targets.count(block->name)) { // TODO optimize, maybe run remove-unused-names inst = nullptr; } } } } // A control flow "barrier" - a point where stack machine // unreachability ends. bool StackIROptimizer::isControlFlowBarrier(StackInst* inst) { switch (inst->op) { case StackInst::BlockEnd: case StackInst::IfElse: case StackInst::IfEnd: case StackInst::LoopEnd: case StackInst::Catch: case StackInst::CatchAll: case StackInst::Delegate: case StackInst::TryEnd: case StackInst::TryTableEnd: { return true; } default: { return false; } } } // A control flow beginning. bool StackIROptimizer::isControlFlowBegin(StackInst* inst) { switch (inst->op) { case StackInst::BlockBegin: case StackInst::IfBegin: case StackInst::LoopBegin: case StackInst::TryBegin: case StackInst::TryTableBegin: { return true; } default: { return false; } } } // A control flow ending. bool StackIROptimizer::isControlFlowEnd(StackInst* inst) { switch (inst->op) { case StackInst::BlockEnd: case StackInst::IfEnd: case StackInst::LoopEnd: case StackInst::TryEnd: case StackInst::Delegate: case StackInst::TryTableEnd: { return true; } default: { return false; } } } bool StackIROptimizer::isControlFlow(StackInst* inst) { return inst->op != StackInst::Basic; } // Remove the instruction at index i. If the instruction // is control flow, and so has been expanded to multiple // instructions, remove them as well. void StackIROptimizer::removeAt(Index i) { auto* inst = insts[i]; insts[i] = nullptr; if (inst->op == StackInst::Basic) { return; // that was it } auto* origin = inst->origin; while (1) { i++; assert(i < insts.size()); inst = insts[i]; insts[i] = nullptr; if (inst && inst->origin == origin && isControlFlowEnd(inst)) { return; // that's it, we removed it all } } } Index StackIROptimizer::getNumConsumedValues(StackInst* inst) { if (isControlFlow(inst)) { // If consumes 1; that's it. if (inst->op == StackInst::IfBegin) { return 1; } return 0; } // Otherwise, for basic instructions, just count the expression children. return ChildIterator(inst->origin).children.size(); } // Given a pair of a local.set and local.get, see if we can remove them // without breaking validation. Specifically, we must keep sets of non- // nullable locals that dominate a get until the end of the block, such as // here: // // local.set 0 ;; Can we remove // local.get 0 ;; this pair? // if // local.set 0 // else // local.set 0 // end // local.get 0 ;; This get poses a problem. // // Logically the 2nd&3rd sets ensure a value is applied to the local before we // read it, but the validation rules only track each set until the end of its // scope, so the 1st set (before the if, in the pair) is necessary. // // The logic below is related to LocalStructuralDominance, but sharing code // with it is difficult as this uses StackIR and not BinaryenIR, and it checks // a particular set/get pair. // // We are given the indexes of the set and get instructions in |insts|. bool StackIROptimizer::canRemoveSetGetPair(Index setIndex, Index getIndex) { // The set must be before the get. assert(setIndex < getIndex); auto* set = insts[setIndex]->origin->cast(); auto localType = func->getLocalType(set->index); // Note we do not need to handle tuples here, as the parent ignores them // anyhow (hence we can check non-nullability instead of non- // defaultability). assert(localType.isSingle()); if (func->isParam(set->index) || !localType.isNonNullable()) { // This local cannot pose a problem for validation (params are always // initialized, and it is ok if nullable locals are uninitialized). return true; } // Track the depth (in block/if/loop/etc. scopes) relative to our starting // point. Anything less deep than that is not interesting, as we can only // help things at our depth or deeper to validate. Index currDepth = 0; // Look for a different get than the one in getIndex (since that one is // being removed) which would stop validating without the set. While doing // so, note other sets that ensure validation even if our set is removed. We // track those in this stack of booleans, one for each scope, which is true // if another sets covers us and ours is not needed. // // We begin in the current scope and with no other set covering us. std::vector coverStack = {false}; // Track the total number of covers as well, for quick checking below. Index covers = 0; // TODO: We could look before us as well, but then we might end up scanning // much of the function every time. for (Index i = setIndex + 1; i < insts.size(); i++) { auto* inst = insts[i]; if (!inst) { continue; } if (isControlFlowBegin(inst)) { // A new scope begins. currDepth++; coverStack.push_back(false); } else if (isControlFlowEnd(inst)) { if (currDepth == 0) { // Less deep than the start, so we found no problem. return true; } currDepth--; if (coverStack.back()) { // A cover existed in the scope which ended. covers--; } coverStack.pop_back(); } else if (isControlFlowBarrier(inst)) { // A barrier, like the else in an if-else, not only ends a scope but // opens a new one. if (currDepth == 0) { // Another scope with the same depth begins, but ours ended, so stop. return true; } if (coverStack.back()) { // A cover existed in the scope which ended. covers--; } coverStack.back() = false; } else if (auto* otherSet = inst->origin->dynCast()) { // We are covered in this scope henceforth. if (otherSet->index == set->index) { if (!coverStack.back()) { covers++; if (currDepth == 0) { // We have a cover at depth 0, so everything from here on out // will be covered. return true; } coverStack.back() = true; } } } else if (auto* otherGet = inst->origin->dynCast()) { if (otherGet->index == set->index && i != getIndex && !covers) { // We found a get that might be a problem: it uses the same index, but // is not the get we were told about, and no other set covers us. return false; } } } // No problem. return true; } std::unordered_set StackIROptimizer::findStringViewDeferredGets() { std::unordered_set deferred; auto note = [&](Expression* e) { if (auto* get = e->dynCast()) { deferred.insert(get); } }; for (auto* inst : insts) { if (!inst) { continue; } if (auto* curr = inst->origin->dynCast()) { note(curr->pos); } else if (auto* curr = inst->origin->dynCast()) { note(curr->start); note(curr->end); } } return deferred; } } // namespace wasm