/* * Copyright 2018 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef wasm_stack_h #define wasm_stack_h #include "ir/branch-utils.h" #include "ir/module-utils.h" #include "ir/properties.h" #include "pass.h" #include "support/insert_ordered.h" #include "wasm-binary.h" #include "wasm-traversal.h" #include "wasm.h" namespace wasm { // Stack IR: an IR that represents code at the wasm binary format level, // that is, a stack machine. Binaryen IR is *almost* identical to this, // but as documented in README.md, there are a few differences, intended // to make Binaryen IR fast and flexible for maximal optimization. Stack // IR, on the other hand, is designed to optimize a few final things that // can only really be done when modeling the stack machine format precisely. // Currently the benefits of Stack IR are minor, less than 1% reduction in // code size. For that reason it is just a secondary IR, run optionally // after the main IR has been optimized. However, if we improve Stack IR // optimizations to a point where they have a significant impact, it's // possible that could motivate investigating replacing the main IR with Stack // IR (so that we have just a single IR). // A StackIR instance (see wasm.h) contains a linear sequence of // stack instructions. This representation is very simple: just a single vector // of all instructions, in order. // * nullptr is allowed in the vector, representing something to skip. // This is useful as a common thing optimizations do is remove instructions, // so this way we can do so without compacting the vector all the time. // Direct writing binaryen IR to binary is fast. Otherwise, StackIRGenerator // lets you optimize the Stack IR before emitting stack IR to binary (but the // cost is that the extra IR in the middle makes things 20% slower than emitting // binaryen IR to binary directly). // A Stack IR instruction. Most just directly reflect a Binaryen IR node, // but we need extra ones for certain things. class StackInst { public: StackInst(MixedArena&) {} enum Op { Basic, // an instruction directly corresponding to a // non-control-flow Binaryen IR node BlockBegin, // the beginning of a block BlockEnd, // the ending of a block IfBegin, // the beginning of a if IfElse, // the else of a if IfEnd, // the ending of a if LoopBegin, // the beginning of a loop LoopEnd, // the ending of a loop TryBegin, // the beginning of a try Catch, // the catch within a try CatchAll, // the catch_all within a try Delegate, // the delegate within a try TryEnd, // the ending of a try TryTableBegin, // the beginning of a try_table TryTableEnd // the ending of a try_table } op; Expression* origin; // the expression this originates from // the type - usually identical to the origin type, but e.g. wasm has no // unreachable blocks, they must be none Type type; }; using StackIR = std::vector; class BinaryInstWriter : public OverriddenVisitor { public: BinaryInstWriter(WasmBinaryWriter& parent, BufferWithRandomAccess& o, Function* func, bool sourceMap, bool DWARF) : parent(parent), o(o), func(func), sourceMap(sourceMap), DWARF(DWARF) {} void visit(Expression* curr) { if (func && !sourceMap) { parent.writeDebugLocation(curr, func); } OverriddenVisitor::visit(curr); if (func && !sourceMap) { parent.writeDebugLocationEnd(curr, func); } } #define DELEGATE(CLASS_TO_VISIT) \ void visit##CLASS_TO_VISIT(CLASS_TO_VISIT* curr); #include "wasm-delegations.def" void emitResultType(Type type); void emitIfElse(If* curr); void emitCatch(Try* curr, Index i); void emitCatchAll(Try* curr); void emitDelegate(Try* curr); // emit an end at the end of a block/loop/if/try void emitScopeEnd(Expression* curr); // emit an end at the end of a function void emitFunctionEnd(); void emitUnreachable(); void mapLocalsAndEmitHeader(); MappedLocals mappedLocals; private: void emitMemoryAccess(size_t alignment, size_t bytes, uint64_t offset, Name memory); int32_t getBreakIndex(Name name); WasmBinaryWriter& parent; BufferWithRandomAccess& o; Function* func = nullptr; bool sourceMap; bool DWARF; std::vector breakStack; // The types of locals in the compact form, in order. std::vector localTypes; // type => number of locals of that type in the compact form std::unordered_map numLocalsByType; void noteLocalType(Type type, Index count = 1); // Keeps track of the binary index of the scratch locals used to lower // tuple.extract. If there are multiple scratch locals of the same type, they // are contiguous and this map holds the index of the first. InsertOrderedMap scratchLocals; // Return the type and number of required scratch locals. InsertOrderedMap countScratchLocals(); // local.get, local.tee, and global.get expressions that will be followed by // tuple.extracts. We can optimize these by getting only the local for the // extracted index. std::unordered_map extractedGets; // As an optimization, we do not need to use scratch locals for StringWTF16Get // and StringSliceWTF if their non-string operands are already LocalGets. // Record those LocalGets here. std::unordered_set deferredGets; // Track which br_ifs need handling of their output values, which is the case // when they have a value that is more refined than the wasm type system // allows atm (and they are not dropped, in which case the type would not // matter). See https://github.com/WebAssembly/binaryen/pull/6390 for more on // the difference. As a result of the difference, we will insert extra casts // to ensure validation in the wasm spec. The wasm spec will hopefully improve // to use the more refined type as well, which would remove the need for this // hack. // // Each br_if present as a key here is mapped to the unrefined type for it. // That is, the br_if has a type in Binaryen IR that is too refined, and the // map contains the unrefined one (which we need to know the local types, as // we'll stash the unrefined values and then cast them). std::unordered_map brIfsNeedingHandling; }; // Takes binaryen IR and converts it to something else (binary or stack IR) template class BinaryenIRWriter : public Visitor> { public: BinaryenIRWriter(Function* func) : func(func) {} void write(); // visits a node, emitting the proper code for it void visit(Expression* curr); void visitBlock(Block* curr); void visitIf(If* curr); void visitLoop(Loop* curr); void visitTry(Try* curr); void visitTryTable(TryTable* curr); protected: Function* func = nullptr; private: void emit(Expression* curr) { static_cast(this)->emit(curr); } void emitHeader() { static_cast(this)->emitHeader(); } void emitIfElse(If* curr) { static_cast(this)->emitIfElse(curr); } void emitCatch(Try* curr, Index i) { static_cast(this)->emitCatch(curr, i); } void emitCatchAll(Try* curr) { static_cast(this)->emitCatchAll(curr); } void emitDelegate(Try* curr) { static_cast(this)->emitDelegate(curr); } void emitScopeEnd(Expression* curr) { static_cast(this)->emitScopeEnd(curr); } void emitFunctionEnd() { static_cast(this)->emitFunctionEnd(); } void emitUnreachable() { static_cast(this)->emitUnreachable(); } void emitDebugLocation(Expression* curr) { static_cast(this)->emitDebugLocation(curr); } void visitPossibleBlockContents(Expression* curr); }; template void BinaryenIRWriter::write() { assert(func && "BinaryenIRWriter: function is not set"); emitHeader(); visitPossibleBlockContents(func->body); emitFunctionEnd(); } // Emits a node in a position that can contain a list of contents, like an if // arm. This will emit the node, but if it is a block with no name, just emit // its contents. This is ok to do because a list of contents is ok in the wasm // binary format in such positions anyhow. When we read such code in Binaryen // we will end up creating a block for it (note that while doing so we create a // block without a name, since nothing branches to it, which makes it easy to // handle in optimization passes and when writing the binary out again). template void BinaryenIRWriter::visitPossibleBlockContents(Expression* curr) { auto* block = curr->dynCast(); // Even if the block has a name, check if the name is necessary (if it has no // uses, it is equivalent to not having one). Scanning the children of the // block means that this takes quadratic time, but it will be N^2 for a fairly // small N since the number of nested non-block control flow structures tends // to be very reasonable. if (!block || BranchUtils::BranchSeeker::has(block, block->name)) { visit(curr); return; } for (auto* child : block->list) { visit(child); // Since this child was unreachable, either this child or one of its // descendants was a source of unreachability that was actually // emitted. Subsequent children won't be reachable, so skip them. if (child->type == Type::unreachable) { break; } } } template void BinaryenIRWriter::visit(Expression* curr) { // We emit unreachable instructions that create unreachability, but not // unreachable instructions that just inherit unreachability from their // children, since the latter won't be reached. This (together with logic in // the control flow visitors) also ensures that the final instruction in each // unreachable block is a source of unreachability, which means we don't need // to emit an extra `unreachable` before the end of the block to prevent type // errors. bool hasUnreachableChild = false; for (auto* child : ValueChildIterator(curr)) { visit(child); if (child->type == Type::unreachable) { hasUnreachableChild = true; break; } } if (hasUnreachableChild) { // `curr` is not reachable, so don't emit it. return; } emitDebugLocation(curr); // Control flow requires special handling, but most instructions can be // emitted directly after their children. if (Properties::isControlFlowStructure(curr)) { Visitor::visit(curr); } else { emit(curr); } } template void BinaryenIRWriter::visitBlock(Block* curr) { auto visitChildren = [this](Block* curr, Index from) { auto& list = curr->list; while (from < list.size()) { auto* child = list[from]; visit(child); if (child->type == Type::unreachable) { break; } ++from; } }; // A block with no name never needs to be emitted: we can just emit its // contents. In some cases that will end up as "stacky" code, which is valid // in wasm but not in Binaryen IR. This is similar to what we do in // visitPossibleBlockContents(), and like there, when we reload such a binary // we'll end up creating a block for it then. // // Note that in visitPossibleBlockContents() we also optimize the case of a // block with a name but the name actually has no uses - that handles more // cases, but it requires more work. It is reasonable to do it in // visitPossibleBlockContents() which handles the common cases of blocks that // are children of control flow structures (like an if arm); doing it here // would affect every block, including highly-nested block stacks, which would // end up as quadratic time. In optimized code the name will not exist if it's // not used anyhow, so a minor optimization for the unoptimized case that // leads to potential quadratic behavior is not worth it here. if (!curr->name.is()) { visitChildren(curr, 0); return; } auto afterChildren = [this](Block* curr) { emitScopeEnd(curr); if (curr->type == Type::unreachable) { // Since this block is unreachable, no instructions will be emitted after // it in its enclosing scope. That means that this block will be the last // instruction before the end of its parent scope, so its type must match // the type of its parent. But we don't have a concrete type for this // block and we don't know what type its parent expects, so we can't // ensure the types match. To work around this, we insert an `unreachable` // instruction after every unreachable control flow structure and depend // on its polymorphic behavior to paper over any type mismatches. emitUnreachable(); } }; // Handle very deeply nested blocks in the first position efficiently, // avoiding heavy recursion. We only start to do this if we see it will help // us (to avoid allocation of the vector). if (!curr->list.empty() && curr->list[0]->is()) { std::vector parents; Block* child; while (!curr->list.empty() && (child = curr->list[0]->dynCast())) { parents.push_back(curr); emit(curr); curr = child; emitDebugLocation(curr); } // Emit the current block, which does not have a block as a child in the // first position. emit(curr); visitChildren(curr, 0); afterChildren(curr); bool childUnreachable = curr->type == Type::unreachable; // Finish the later parts of all the parent blocks. while (!parents.empty()) { auto* parent = parents.back(); parents.pop_back(); if (!childUnreachable) { visitChildren(parent, 1); } afterChildren(parent); childUnreachable = parent->type == Type::unreachable; } return; } // Simple case of not having a nested block in the first position. emit(curr); visitChildren(curr, 0); afterChildren(curr); } template void BinaryenIRWriter::visitIf(If* curr) { emit(curr); visitPossibleBlockContents(curr->ifTrue); if (curr->ifFalse) { emitIfElse(curr); visitPossibleBlockContents(curr->ifFalse); } emitScopeEnd(curr); if (curr->type == Type::unreachable) { // We already handled the case of the condition being unreachable in // `visit`. Otherwise, we may still be unreachable, if we are an if-else // with both sides unreachable. Just like with blocks, we emit an extra // `unreachable` to work around potential type mismatches. assert(curr->ifFalse); emitUnreachable(); } } template void BinaryenIRWriter::visitLoop(Loop* curr) { emit(curr); visitPossibleBlockContents(curr->body); emitScopeEnd(curr); if (curr->type == Type::unreachable) { // we emitted a loop without a return type, so it must not be consumed emitUnreachable(); } } template void BinaryenIRWriter::visitTry(Try* curr) { emit(curr); visitPossibleBlockContents(curr->body); for (Index i = 0; i < curr->catchTags.size(); i++) { emitCatch(curr, i); visitPossibleBlockContents(curr->catchBodies[i]); } if (curr->hasCatchAll()) { emitCatchAll(curr); visitPossibleBlockContents(curr->catchBodies.back()); } if (curr->isDelegate()) { emitDelegate(curr); // Note that when we emit a delegate we do not need to also emit a scope // ending, as the delegate ends the scope. } else { emitScopeEnd(curr); } if (curr->type == Type::unreachable) { emitUnreachable(); } } template void BinaryenIRWriter::visitTryTable(TryTable* curr) { emit(curr); visitPossibleBlockContents(curr->body); emitScopeEnd(curr); if (curr->type == Type::unreachable) { emitUnreachable(); } } // Binaryen IR to binary writer class BinaryenIRToBinaryWriter : public BinaryenIRWriter { public: BinaryenIRToBinaryWriter(WasmBinaryWriter& parent, BufferWithRandomAccess& o, Function* func = nullptr, bool sourceMap = false, bool DWARF = false) : BinaryenIRWriter(func), parent(parent), writer(parent, o, func, sourceMap, DWARF), sourceMap(sourceMap) {} void emit(Expression* curr) { writer.visit(curr); } void emitHeader() { if (func->prologLocation) { parent.writeDebugLocation(*func->prologLocation); } writer.mapLocalsAndEmitHeader(); } void emitIfElse(If* curr) { writer.emitIfElse(curr); } void emitCatch(Try* curr, Index i) { writer.emitCatch(curr, i); } void emitCatchAll(Try* curr) { writer.emitCatchAll(curr); } void emitDelegate(Try* curr) { writer.emitDelegate(curr); } void emitScopeEnd(Expression* curr) { writer.emitScopeEnd(curr); } void emitFunctionEnd() { // Indicate the debug location corresponding to the end opcode // that terminates the function code. if (func->epilogLocation) { parent.writeDebugLocation(*func->epilogLocation); } else { // The end opcode has no debug location. parent.writeNoDebugLocation(); } writer.emitFunctionEnd(); } void emitUnreachable() { writer.emitUnreachable(); } void emitDebugLocation(Expression* curr) { if (sourceMap) { parent.writeDebugLocation(curr, func); } } MappedLocals& getMappedLocals() { return writer.mappedLocals; } private: WasmBinaryWriter& parent; BinaryInstWriter writer; bool sourceMap; }; // Binaryen IR to stack IR converter for an entire module. Generates all the // StackIR in parallel, and then allows querying for the StackIR of individual // functions. class ModuleStackIR { ModuleUtils::ParallelFunctionAnalysis analysis; public: ModuleStackIR(Module& wasm, const PassOptions& options); // Get StackIR for a function, if it exists. (This allows some functions to // have it and others not, if we add such capability in the future.) StackIR* getStackIROrNull(Function* func) { auto iter = analysis.map.find(func); if (iter == analysis.map.end()) { return nullptr; } return &iter->second; } }; // Stack IR to binary writer class StackIRToBinaryWriter { public: StackIRToBinaryWriter(WasmBinaryWriter& parent, BufferWithRandomAccess& o, Function* func, StackIR& stackIR, bool sourceMap = false, bool DWARF = false) : parent(parent), writer(parent, o, func, sourceMap, DWARF), func(func), stackIR(stackIR), sourceMap(sourceMap) {} void write(); MappedLocals& getMappedLocals() { return writer.mappedLocals; } private: WasmBinaryWriter& parent; BinaryInstWriter writer; Function* func; StackIR& stackIR; bool sourceMap; }; // Stack IR optimizer class StackIROptimizer { Function* func; StackIR& insts; const PassOptions& passOptions; FeatureSet features; public: StackIROptimizer(Function* func, StackIR& insts, const PassOptions& passOptions, FeatureSet features); void run(); private: void dce(); void vacuum(); void local2Stack(); void removeUnneededBlocks(); bool isControlFlowBarrier(StackInst* inst); bool isControlFlowBegin(StackInst* inst); bool isControlFlowEnd(StackInst* inst); bool isControlFlow(StackInst* inst); void removeAt(Index i); Index getNumConsumedValues(StackInst* inst); bool canRemoveSetGetPair(Index setIndex, Index getIndex); std::unordered_set findStringViewDeferredGets(); }; // Generate and emit StackIR. std::ostream& printStackIR(std::ostream& o, Module* module, const PassOptions& options); } // namespace wasm namespace std { std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst); } // namespace std #endif // wasm_stack_h