diff options
Diffstat (limited to 'src')
32 files changed, 703 insertions, 780 deletions
diff --git a/src/binaryen-c.cpp b/src/binaryen-c.cpp index f27f6e2c8..1c9a5be0e 100644 --- a/src/binaryen-c.cpp +++ b/src/binaryen-c.cpp @@ -5590,8 +5590,8 @@ void BinaryenModulePrint(BinaryenModuleRef module) { std::cout << *(Module*)module; } -void BinaryenModulePrintStackIR(BinaryenModuleRef module, bool optimize) { - wasm::printStackIR(std::cout, (Module*)module, optimize); +void BinaryenModulePrintStackIR(BinaryenModuleRef module) { + wasm::printStackIR(std::cout, (Module*)module, globalPassOptions); } void BinaryenModulePrintAsmjs(BinaryenModuleRef module) { @@ -5737,7 +5737,7 @@ static BinaryenBufferSizes writeModule(BinaryenModuleRef module, char* sourceMap, size_t sourceMapSize) { BufferWithRandomAccess buffer; - WasmBinaryWriter writer((Module*)module, buffer); + WasmBinaryWriter writer((Module*)module, buffer, globalPassOptions); writer.setNamesSection(globalPassOptions.debugInfo); std::ostringstream os; if (sourceMapUrl) { @@ -5778,12 +5778,11 @@ size_t BinaryenModuleWriteText(BinaryenModuleRef module, size_t BinaryenModuleWriteStackIR(BinaryenModuleRef module, char* output, - size_t outputSize, - bool optimize) { + size_t outputSize) { // use a stringstream as an std::ostream. Extract the std::string // representation, and then store in the output. std::stringstream ss; - wasm::printStackIR(ss, (Module*)module, optimize); + wasm::printStackIR(ss, (Module*)module, globalPassOptions); const auto temp = ss.str(); const auto ctemp = temp.c_str(); @@ -5808,7 +5807,7 @@ BinaryenModuleAllocateAndWriteResult BinaryenModuleAllocateAndWrite(BinaryenModuleRef module, const char* sourceMapUrl) { BufferWithRandomAccess buffer; - WasmBinaryWriter writer((Module*)module, buffer); + WasmBinaryWriter writer((Module*)module, buffer, globalPassOptions); writer.setNamesSection(globalPassOptions.debugInfo); std::ostringstream os; if (sourceMapUrl) { @@ -5842,13 +5841,12 @@ char* BinaryenModuleAllocateAndWriteText(BinaryenModuleRef module) { return output; } -char* BinaryenModuleAllocateAndWriteStackIR(BinaryenModuleRef module, - bool optimize) { +char* BinaryenModuleAllocateAndWriteStackIR(BinaryenModuleRef module) { std::ostringstream os; bool colors = Colors::isEnabled(); Colors::setEnabled(false); // do not use colors for writing - wasm::printStackIR(os, (Module*)module, optimize); + wasm::printStackIR(os, (Module*)module, globalPassOptions); Colors::setEnabled(colors); // restore colors state auto str = os.str(); diff --git a/src/binaryen-c.h b/src/binaryen-c.h index bc63e963a..595086d6a 100644 --- a/src/binaryen-c.h +++ b/src/binaryen-c.h @@ -2984,8 +2984,7 @@ BINARYEN_API BinaryenModuleRef BinaryenModuleParse(const char* text); BINARYEN_API void BinaryenModulePrint(BinaryenModuleRef module); // Print a module to stdout in stack IR text format. Useful for debugging. -BINARYEN_API void BinaryenModulePrintStackIR(BinaryenModuleRef module, - bool optimize); +BINARYEN_API void BinaryenModulePrintStackIR(BinaryenModuleRef module); // Print a module to stdout in asm.js syntax. BINARYEN_API void BinaryenModulePrintAsmjs(BinaryenModuleRef module); @@ -3126,8 +3125,7 @@ BINARYEN_API size_t BinaryenModuleWriteText(BinaryenModuleRef module, // outputSize BINARYEN_API size_t BinaryenModuleWriteStackIR(BinaryenModuleRef module, char* output, - size_t outputSize, - bool optimize); + size_t outputSize); typedef struct BinaryenBufferSizes { size_t outputBytes; @@ -3173,7 +3171,7 @@ BINARYEN_API char* BinaryenModuleAllocateAndWriteText(BinaryenModuleRef module); // char* with malloc(), and expects the user to free() them manually // once not needed anymore. BINARYEN_API char* -BinaryenModuleAllocateAndWriteStackIR(BinaryenModuleRef module, bool optimize); +BinaryenModuleAllocateAndWriteStackIR(BinaryenModuleRef module); // Deserialize a module from binary form, assuming the MVP feature set. BINARYEN_API BinaryenModuleRef BinaryenModuleRead(char* input, diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index 8d9e88d18..e169f0ff1 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -69,9 +69,6 @@ Function* copyFunction(Function* func, ret->base = func->base; ret->noFullInline = func->noFullInline; ret->noPartialInline = func->noPartialInline; - - // TODO: copy Stack IR - assert(!func->stackIR); return out.addFunction(std::move(ret)); } diff --git a/src/js/binaryen.js-post.js b/src/js/binaryen.js-post.js index 1461420bb..dfde2d7ef 100644 --- a/src/js/binaryen.js-post.js +++ b/src/js/binaryen.js-post.js @@ -2688,8 +2688,8 @@ function wrapModule(module, self = {}) { if (textPtr) _free(textPtr); return text; }; - self['emitStackIR'] = function(optimize) { - let textPtr = Module['_BinaryenModuleAllocateAndWriteStackIR'](module, optimize); + self['emitStackIR'] = function() { + let textPtr = Module['_BinaryenModuleAllocateAndWriteStackIR'](module); let text = UTF8ToString(textPtr); if (textPtr) _free(textPtr); return text; diff --git a/src/pass.h b/src/pass.h index 2c2fa0619..ab060309b 100644 --- a/src/pass.h +++ b/src/pass.h @@ -219,6 +219,16 @@ struct PassOptions { bool closedWorld = false; // Whether to try to preserve debug info through, which are special calls. bool debugInfo = false; + // Whether to generate StackIR during binary writing. This is on by default + // in -O2 and above. + bool generateStackIR = false; + // Whether to optimize StackIR during binary writing. How we optimize depends + // on other optimization flags like optimizeLevel. This is on by default in + // -O2 and above. + bool optimizeStackIR = false; + // Whether to print StackIR during binary writing, and if so to what stream. + // This is mainly useful for debugging. + std::optional<std::ostream*> printStackIR; // Whether we are targeting JS. In that case we want to avoid emitting things // in the optimizer that do not translate well to JS, or that could cause us // to need extra lowering work or even a loop (where we optimize to something diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 1e5540ab0..72fa02b62 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -87,7 +87,6 @@ set(passes_SOURCES PrintFunctionMap.cpp RoundTrip.cpp SetGlobals.cpp - StackIR.cpp SignaturePruning.cpp SignatureRefining.cpp SignExtLowering.cpp diff --git a/src/passes/Metrics.cpp b/src/passes/Metrics.cpp index d2e072a6f..1778eb9bd 100644 --- a/src/passes/Metrics.cpp +++ b/src/passes/Metrics.cpp @@ -94,7 +94,7 @@ struct Metrics printCounts("global"); // compute binary info, so we know function sizes BufferWithRandomAccess buffer; - WasmBinaryWriter writer(module, buffer); + WasmBinaryWriter writer(module, buffer, getPassOptions()); writer.write(); // print for each function Index binaryIndex = 0; @@ -108,14 +108,14 @@ struct Metrics }); // print for each export how much code size is due to it, i.e., // how much the module could shrink without it. - auto sizeAfterGlobalCleanup = [](Module* module) { + auto sizeAfterGlobalCleanup = [&](Module* module) { PassRunner runner(module, PassOptions::getWithDefaultOptimizationOptions()); runner.setIsNested(true); runner.addDefaultGlobalOptimizationPostPasses(); // remove stuff runner.run(); BufferWithRandomAccess buffer; - WasmBinaryWriter writer(module, buffer); + WasmBinaryWriter writer(module, buffer, getPassOptions()); writer.write(); return buffer.size(); }; diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp index f8ee4a6e6..012f24750 100644 --- a/src/passes/Monomorphize.cpp +++ b/src/passes/Monomorphize.cpp @@ -151,11 +151,6 @@ struct Monomorphize : public Pass { // monomorphizing. // Create a new function with refined parameters as a copy of the original. - // (Note we must clear stack IR on the original: atm we do not have the - // ability to copy stack IR, so we'd hit an internal error. But as we will - // be optimizing the function anyhow, we'd be throwing away stack IR later - // so this isn't a problem.) - func->stackIR.reset(); auto refinedTarget = Names::getValidFunctionName(*module, target); auto* refinedFunc = ModuleUtils::copyFunction(func, *module, refinedTarget); TypeUpdating::updateParamTypes(refinedFunc, refinedTypes, *module); diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 1ecf382d9..99c86cec4 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -109,11 +109,11 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> { const char* maybeSpace; const char* maybeNewLine; - bool full = false; // whether to not elide nodes in output when possible - // (like implicit blocks) and to emit types - bool stackIR = false; // whether to print stack IR if it is present - // (if false, and Stack IR is there, we just - // note it exists) + // Whether to not elide nodes in output when possible (like implicit blocks) + // and to emit types. + bool full = false; + // If present, it contains StackIR that we will print. + std::optional<ModuleStackIR> moduleStackIR; Module* currModule = nullptr; Function* currFunction = nullptr; @@ -268,7 +268,9 @@ struct PrintSExpression : public UnifiedExpressionVisitor<PrintSExpression> { void setFull(bool full_) { full = full_; } - void setStackIR(bool stackIR_) { stackIR = stackIR_; } + void generateStackIR(const PassOptions& options) { + moduleStackIR.emplace(*currModule, options); + } void setDebugInfo(bool debugInfo_) { debugInfo = debugInfo_; } @@ -2978,9 +2980,6 @@ void PrintSExpression::visitDefinedFunction(Function* curr) { o << " (type "; printHeapType(curr->type) << ')'; } - if (!stackIR && curr->stackIR && !minify) { - o << " (; has Stack IR ;)"; - } if (curr->getParams().size() > 0) { Index i = 0; for (const auto& param : curr->getParams()) { @@ -3007,7 +3006,13 @@ void PrintSExpression::visitDefinedFunction(Function* curr) { o << maybeNewLine; } // Print the body. - if (!stackIR || !curr->stackIR) { + StackIR* stackIR = nullptr; + if (moduleStackIR) { + stackIR = moduleStackIR->getStackIROrNull(curr); + } + if (stackIR) { + printStackIR(stackIR, *this); + } else { // It is ok to emit a block here, as a function can directly contain a // list, even if our ast avoids that for simplicity. We can just do that // optimization here.. @@ -3021,9 +3026,6 @@ void PrintSExpression::visitDefinedFunction(Function* curr) { printFullLine(curr->body); } assert(controlFlowDepth == 0); - } else { - // Print the stack IR. - printStackIR(curr->stackIR.get(), *this); } if (currFunction->epilogLocation.size()) { // Print last debug location: mix of decIndent and printDebugLocation @@ -3430,14 +3432,12 @@ public: void run(Module* module) override { PrintSExpression print(o); print.setDebugInfo(getPassOptions().debugInfo); - print.setStackIR(true); print.currModule = module; + print.generateStackIR(getPassOptions()); print.visitModule(module); } }; -Pass* createPrintStackIRPass() { return new PrintStackIR(); } - static std::ostream& printExpression(Expression* expression, std::ostream& o, bool minify, @@ -3602,12 +3602,9 @@ static std::ostream& printStackIR(StackIR* ir, PrintSExpression& printer) { return o; } -std::ostream& printStackIR(std::ostream& o, Module* module, bool optimize) { - wasm::PassRunner runner(module); - runner.add("generate-stack-ir"); - if (optimize) { - runner.add("optimize-stack-ir"); - } +std::ostream& +printStackIR(std::ostream& o, Module* module, const PassOptions& options) { + wasm::PassRunner runner(module, options); runner.add(std::make_unique<PrintStackIR>(&o)); runner.run(); return o; @@ -3659,9 +3656,4 @@ std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst) { return wasm::printStackInst(&inst, o); } -std::ostream& operator<<(std::ostream& o, wasm::StackIR& ir) { - wasm::PrintSExpression printer(o); - return wasm::printStackIR(&ir, printer); -} - } // namespace std diff --git a/src/passes/RoundTrip.cpp b/src/passes/RoundTrip.cpp index b930b195c..129b89ac8 100644 --- a/src/passes/RoundTrip.cpp +++ b/src/passes/RoundTrip.cpp @@ -41,7 +41,7 @@ struct RoundTrip : public Pass { // to tell the builder which features to build with. auto features = module->features; // Write, clear, and read the module - WasmBinaryWriter(module, buffer).write(); + WasmBinaryWriter(module, buffer, getPassOptions()).write(); ModuleUtils::clearModule(*module); auto input = buffer.getAsChars(); WasmBinaryReader parser(*module, features, input); diff --git a/src/passes/StackIR.cpp b/src/passes/StackIR.cpp deleted file mode 100644 index 24b4fcbe8..000000000 --- a/src/passes/StackIR.cpp +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright 2018 WebAssembly Community Group participants - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// Operations on Stack IR. -// - -#include "ir/iteration.h" -#include "ir/local-graph.h" -#include "pass.h" -#include "wasm-stack.h" -#include "wasm.h" - -namespace wasm { - -// Generate Stack IR from Binaryen IR - -struct GenerateStackIR : public WalkerPass<PostWalker<GenerateStackIR>> { - bool isFunctionParallel() override { return true; } - - std::unique_ptr<Pass> create() override { - return std::make_unique<GenerateStackIR>(); - } - - bool modifiesBinaryenIR() override { return false; } - - void doWalkFunction(Function* func) { - StackIRGenerator stackIRGen(*getModule(), func); - stackIRGen.write(); - func->stackIR = std::make_unique<StackIR>(); - func->stackIR->swap(stackIRGen.getStackIR()); - } -}; - -Pass* createGenerateStackIRPass() { return new GenerateStackIR(); } - -// Optimize - -class StackIROptimizer { - Function* func; - PassOptions& passOptions; - StackIR& insts; - FeatureSet features; - -public: - StackIROptimizer(Function* func, - PassOptions& passOptions, - FeatureSet features) - : func(func), passOptions(passOptions), insts(*func->stackIR.get()), - features(features) { - assert(func->stackIR); - } - - void run() { - dce(); - // FIXME: local2Stack is currently rather slow (due to localGraph), - // so for now run it only when really optimizing - if (passOptions.optimizeLevel >= 3 || passOptions.shrinkLevel >= 1) { - local2Stack(); - } - removeUnneededBlocks(); - dce(); - vacuum(); - } - -private: - // Passes. - - // Remove unreachable code. - void dce() { - bool inUnreachableCode = false; - for (Index i = 0; i < insts.size(); i++) { - auto* inst = insts[i]; - if (!inst) { - continue; - } - if (inUnreachableCode) { - // Does the unreachable code end here? - if (isControlFlowBarrier(inst)) { - inUnreachableCode = false; - } else { - // We can remove this. - removeAt(i); - } - } else if (inst->type == Type::unreachable) { - inUnreachableCode = true; - } - } - } - - // Remove obviously-unneeded code. - void vacuum() { - // In the wasm binary format a nop is never needed. (In Binaryen IR, in - // comparison, it is necessary e.g. in a function body or an if arm.) - // - // It is especially important to remove nops because we add nops when we - // read wasm into Binaryen IR. That is, this avoids a potential increase in - // code size. - for (Index i = 0; i < insts.size(); i++) { - auto*& inst = insts[i]; - if (inst && inst->origin->is<Nop>()) { - inst = nullptr; - } - } - } - - // If ordered properly, we can avoid a local.set/local.get pair, - // and use the value directly from the stack, for example - // [..produce a value on the stack..] - // local.set $x - // [..much code..] - // local.get $x - // call $foo ;; use the value, foo(value) - // As long as the code in between does not modify $x, and has - // no control flow branching out, we can remove both the set - // and the get. - void local2Stack() { - // We use the localGraph to tell us if a get-set pair is indeed - // a set that is read by that get, and only that get. Note that we run - // this on the Binaryen IR, so we are assuming that no previous opt - // has changed the interaction of local operations. - // TODO: we can do this a lot faster, as we just care about linear - // control flow. - LocalGraph localGraph(func); - localGraph.computeSetInfluences(); - // We maintain a stack of relevant values. This contains: - // * a null for each actual value that the value stack would have - // * an index of each LocalSet that *could* be on the value - // stack at that location. - const Index null = -1; - std::vector<Index> values; - // We also maintain a stack of values vectors for control flow, - // saving the stack as we enter and restoring it when we exit. - std::vector<std::vector<Index>> savedValues; -#ifdef STACK_OPT_DEBUG - std::cout << "func: " << func->name << '\n' << insts << '\n'; -#endif - for (Index instIndex = 0; instIndex < insts.size(); instIndex++) { - auto* inst = insts[instIndex]; - if (!inst) { - continue; - } - // First, consume values from the stack as required. - auto consumed = getNumConsumedValues(inst); -#ifdef STACK_OPT_DEBUG - std::cout << " " << instIndex << " : " << *inst << ", " << values.size() - << " on stack, will consume " << consumed << "\n "; - for (auto s : values) - std::cout << s << ' '; - std::cout << '\n'; -#endif - // TODO: currently we run dce before this, but if we didn't, we'd need - // to handle unreachable code here - it's ok to pop multiple values - // there even if the stack is at size 0. - while (consumed > 0) { - assert(values.size() > 0); - // Whenever we hit a possible stack value, kill it - it would - // be consumed here, so we can never optimize to it. - while (values.back() != null) { - values.pop_back(); - assert(values.size() > 0); - } - // Finally, consume the actual value that is consumed here. - values.pop_back(); - consumed--; - } - // After consuming, we can see what to do with this. First, handle - // control flow. - if (isControlFlowBegin(inst)) { - // Save the stack for when we end this control flow. - savedValues.push_back(values); // TODO: optimize copies - values.clear(); - } else if (isControlFlowEnd(inst)) { - assert(!savedValues.empty()); - values = savedValues.back(); - savedValues.pop_back(); - } else if (isControlFlow(inst)) { - // Otherwise, in the middle of control flow, just clear it - values.clear(); - } - // This is something we should handle, look into it. - if (inst->type.isConcrete()) { - bool optimized = false; - // Do not optimize multivalue locals, since those will be better - // optimized when they are visited in the binary writer and this - // optimization would intefere with that one. - if (auto* get = inst->origin->dynCast<LocalGet>(); - get && inst->type.isSingle()) { - // Use another local to clarify what instIndex means in this scope. - auto getIndex = instIndex; - - // This is a potential optimization opportunity! See if we - // can reach the set. - if (values.size() > 0) { - Index j = values.size() - 1; - while (1) { - // If there's an actual value in the way, we've failed. - auto setIndex = values[j]; - if (setIndex == null) { - break; - } - auto* set = insts[setIndex]->origin->cast<LocalSet>(); - if (set->index == get->index) { - // This might be a proper set-get pair, where the set is - // used by this get and nothing else, check that. - auto& sets = localGraph.getSetses[get]; - if (sets.size() == 1 && *sets.begin() == set) { - auto& setInfluences = localGraph.setInfluences[set]; - // If this has the proper value of 1, also do the potentially- - // expensive check of whether we can remove this pair at all. - if (setInfluences.size() == 1 && - canRemoveSetGetPair(setIndex, getIndex)) { - assert(*setInfluences.begin() == get); - // Do it! The set and the get can go away, the proper - // value is on the stack. -#ifdef STACK_OPT_DEBUG - std::cout << " stackify the get\n"; -#endif - insts[setIndex] = nullptr; - insts[getIndex] = nullptr; - // Continuing on from here, replace this on the stack - // with a null, representing a regular value. We - // keep possible values above us active - they may - // be optimized later, as they would be pushed after - // us, and used before us, so there is no conflict. - values[j] = null; - optimized = true; - break; - } - } - } - // We failed here. Can we look some more? - if (j == 0) { - break; - } - j--; - } - } - } - if (!optimized) { - // This is an actual regular value on the value stack. - values.push_back(null); - } - } else if (inst->origin->is<LocalSet>() && inst->type == Type::none) { - // This set is potentially optimizable later, add to stack. - values.push_back(instIndex); - } - } - } - - // There may be unnecessary blocks we can remove: blocks - // without branches to them are always ok to remove. - // TODO: a branch to a block in an if body can become - // a branch to that if body - void removeUnneededBlocks() { - for (auto*& inst : insts) { - if (!inst) { - continue; - } - if (auto* block = inst->origin->dynCast<Block>()) { - if (!BranchUtils::BranchSeeker::has(block, block->name)) { - // TODO optimize, maybe run remove-unused-names - inst = nullptr; - } - } - } - } - - // Utilities. - - // A control flow "barrier" - a point where stack machine - // unreachability ends. - bool isControlFlowBarrier(StackInst* inst) { - switch (inst->op) { - case StackInst::BlockEnd: - case StackInst::IfElse: - case StackInst::IfEnd: - case StackInst::LoopEnd: - case StackInst::Catch: - case StackInst::CatchAll: - case StackInst::Delegate: - case StackInst::TryEnd: - case StackInst::TryTableEnd: { - return true; - } - default: { return false; } - } - } - - // A control flow beginning. - bool isControlFlowBegin(StackInst* inst) { - switch (inst->op) { - case StackInst::BlockBegin: - case StackInst::IfBegin: - case StackInst::LoopBegin: - case StackInst::TryBegin: - case StackInst::TryTableBegin: { - return true; - } - default: { return false; } - } - } - - // A control flow ending. - bool isControlFlowEnd(StackInst* inst) { - switch (inst->op) { - case StackInst::BlockEnd: - case StackInst::IfEnd: - case StackInst::LoopEnd: - case StackInst::TryEnd: - case StackInst::Delegate: - case StackInst::TryTableEnd: { - return true; - } - default: { return false; } - } - } - - bool isControlFlow(StackInst* inst) { return inst->op != StackInst::Basic; } - - // Remove the instruction at index i. If the instruction - // is control flow, and so has been expanded to multiple - // instructions, remove them as well. - void removeAt(Index i) { - auto* inst = insts[i]; - insts[i] = nullptr; - if (inst->op == StackInst::Basic) { - return; // that was it - } - auto* origin = inst->origin; - while (1) { - i++; - assert(i < insts.size()); - inst = insts[i]; - insts[i] = nullptr; - if (inst && inst->origin == origin && isControlFlowEnd(inst)) { - return; // that's it, we removed it all - } - } - } - - Index getNumConsumedValues(StackInst* inst) { - if (isControlFlow(inst)) { - // If consumes 1; that's it. - if (inst->op == StackInst::IfBegin) { - return 1; - } - return 0; - } - // Otherwise, for basic instructions, just count the expression children. - return ChildIterator(inst->origin).children.size(); - } - - // Given a pair of a local.set and local.get, see if we can remove them - // without breaking validation. Specifically, we must keep sets of non- - // nullable locals that dominate a get until the end of the block, such as - // here: - // - // local.set 0 ;; Can we remove - // local.get 0 ;; this pair? - // if - // local.set 0 - // else - // local.set 0 - // end - // local.get 0 ;; This get poses a problem. - // - // Logically the 2nd&3rd sets ensure a value is applied to the local before we - // read it, but the validation rules only track each set until the end of its - // scope, so the 1st set (before the if, in the pair) is necessary. - // - // The logic below is related to LocalStructuralDominance, but sharing code - // with it is difficult as this uses StackIR and not BinaryenIR, and it checks - // a particular set/get pair. - // - // We are given the indexes of the set and get instructions in |insts|. - bool canRemoveSetGetPair(Index setIndex, Index getIndex) { - // The set must be before the get. - assert(setIndex < getIndex); - - auto* set = insts[setIndex]->origin->cast<LocalSet>(); - auto localType = func->getLocalType(set->index); - // Note we do not need to handle tuples here, as the parent ignores them - // anyhow (hence we can check non-nullability instead of non- - // defaultability). - assert(localType.isSingle()); - if (func->isParam(set->index) || !localType.isNonNullable()) { - // This local cannot pose a problem for validation (params are always - // initialized, and it is ok if nullable locals are uninitialized). - return true; - } - - // Track the depth (in block/if/loop/etc. scopes) relative to our starting - // point. Anything less deep than that is not interesting, as we can only - // help things at our depth or deeper to validate. - Index currDepth = 0; - - // Look for a different get than the one in getIndex (since that one is - // being removed) which would stop validating without the set. While doing - // so, note other sets that ensure validation even if our set is removed. We - // track those in this stack of booleans, one for each scope, which is true - // if another sets covers us and ours is not needed. - // - // We begin in the current scope and with no other set covering us. - std::vector<bool> coverStack = {false}; - - // Track the total number of covers as well, for quick checking below. - Index covers = 0; - - // TODO: We could look before us as well, but then we might end up scanning - // much of the function every time. - for (Index i = setIndex + 1; i < insts.size(); i++) { - auto* inst = insts[i]; - if (!inst) { - continue; - } - if (isControlFlowBegin(inst)) { - // A new scope begins. - currDepth++; - coverStack.push_back(false); - } else if (isControlFlowEnd(inst)) { - if (currDepth == 0) { - // Less deep than the start, so we found no problem. - return true; - } - currDepth--; - - if (coverStack.back()) { - // A cover existed in the scope which ended. - covers--; - } - coverStack.pop_back(); - } else if (isControlFlowBarrier(inst)) { - // A barrier, like the else in an if-else, not only ends a scope but - // opens a new one. - if (currDepth == 0) { - // Another scope with the same depth begins, but ours ended, so stop. - return true; - } - - if (coverStack.back()) { - // A cover existed in the scope which ended. - covers--; - } - coverStack.back() = false; - } else if (auto* otherSet = inst->origin->dynCast<LocalSet>()) { - // We are covered in this scope henceforth. - if (otherSet->index == set->index) { - if (!coverStack.back()) { - covers++; - if (currDepth == 0) { - // We have a cover at depth 0, so everything from here on out - // will be covered. - return true; - } - coverStack.back() = true; - } - } - } else if (auto* otherGet = inst->origin->dynCast<LocalGet>()) { - if (otherGet->index == set->index && i != getIndex && !covers) { - // We found a get that might be a problem: it uses the same index, but - // is not the get we were told about, and no other set covers us. - return false; - } - } - } - - // No problem. - return true; - } -}; - -struct OptimizeStackIR : public WalkerPass<PostWalker<OptimizeStackIR>> { - bool isFunctionParallel() override { return true; } - - std::unique_ptr<Pass> create() override { - return std::make_unique<OptimizeStackIR>(); - } - - bool modifiesBinaryenIR() override { return false; } - - void doWalkFunction(Function* func) { - if (!func->stackIR) { - return; - } - StackIROptimizer(func, getPassOptions(), getModule()->features).run(); - } -}; - -Pass* createOptimizeStackIRPass() { return new OptimizeStackIR(); } - -} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 4a4a9c558..c5a2bcc55 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -179,8 +179,6 @@ void PassRegistry::registerPasses() { "generate global effect info (helps later passes)", createGenerateGlobalEffectsPass); registerPass( - "generate-stack-ir", "generate Stack IR", createGenerateStackIRPass); - registerPass( "global-refining", "refine the types of globals", createGlobalRefiningPass); registerPass( "gsi", "globally optimize struct values", createGlobalStructInferencePass); @@ -321,8 +319,6 @@ void PassRegistry::registerPasses() { registerPass("optimize-instructions", "optimizes instruction combinations", createOptimizeInstructionsPass); - registerPass( - "optimize-stack-ir", "optimize Stack IR", createOptimizeStackIRPass); // Outlining currently relies on LLVM's SuffixTree, which we can't rely upon // when building Binaryen for Emscripten. #ifndef SKIP_OUTLINING @@ -369,9 +365,6 @@ void PassRegistry::registerPasses() { registerPass( "symbolmap", "(alias for print-function-map)", createPrintFunctionMapPass); - registerPass("print-stack-ir", - "print out Stack IR (useful for internal debugging)", - createPrintStackIRPass); registerPass("propagate-globals-globally", "propagate global values to other globals (useful for tests)", createPropagateGlobalsGloballyPass); @@ -736,15 +729,9 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { } // may allow more inlining/dae/etc., need --converge for that addIfNoDWARFIssues("directize"); - // perform Stack IR optimizations here, at the very end of the - // optimization pipeline - if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { - addIfNoDWARFIssues("generate-stack-ir"); - addIfNoDWARFIssues("optimize-stack-ir"); - } } -static void dumpWasm(Name name, Module* wasm) { +static void dumpWasm(Name name, Module* wasm, const PassOptions& options) { static int counter = 0; std::string numstr = std::to_string(counter++); while (numstr.size() < 3) { @@ -757,7 +744,7 @@ static void dumpWasm(Name name, Module* wasm) { #endif fullName += numstr + "-" + name.toString(); Colors::setEnabled(false); - ModuleWriter writer; + ModuleWriter writer(options); writer.setDebugInfo(true); writer.writeBinary(*wasm, fullName + ".wasm"); } @@ -780,7 +767,7 @@ void PassRunner::run() { padding = std::max(padding, pass->name.size()); } if (passDebug >= 3 && !isNested) { - dumpWasm("before", wasm); + dumpWasm("before", wasm, options); } for (auto& pass : passes) { // ignoring the time, save a printout of the module before, in case this @@ -824,7 +811,7 @@ void PassRunner::run() { } } if (passDebug >= 3) { - dumpWasm(pass->name, wasm); + dumpWasm(pass->name, wasm, options); } } std::cerr << "[PassRunner] " << what << " took " << totalTime.count() @@ -908,100 +895,6 @@ void PassRunner::doAdd(std::unique_ptr<Pass> pass) { void PassRunner::clear() { passes.clear(); } -// Checks that the state is valid before and after a -// pass runs on a function. We run these extra checks when -// pass-debug mode is enabled. -struct AfterEffectFunctionChecker { - Function* func; - Name name; - - // Check Stack IR state: if the main IR changes, there should be no - // stack IR, as the stack IR would be wrong. - bool beganWithStackIR; - size_t originalFunctionHash; - - // In the creator we can scan the state of the module and function before the - // pass runs. - AfterEffectFunctionChecker(Function* func) : func(func), name(func->name) { - beganWithStackIR = func->stackIR != nullptr; - if (beganWithStackIR) { - originalFunctionHash = FunctionHasher::hashFunction(func); - } - } - - // This is called after the pass is run, at which time we can check things. - void check() { - assert(func->name == name); // no global module changes should have occurred - if (beganWithStackIR && func->stackIR) { - auto after = FunctionHasher::hashFunction(func); - if (after != originalFunctionHash) { - Fatal() << "[PassRunner] PASS_DEBUG check failed: had Stack IR before " - "and after the pass ran, and the pass modified the main IR, " - "which invalidates Stack IR - pass should have been marked " - "'modifiesBinaryenIR'"; - } - } - } -}; - -// Runs checks on the entire module, in a non-function-parallel pass. -// In particular, in such a pass functions may be removed or renamed, track -// that. -struct AfterEffectModuleChecker { - Module* module; - - std::vector<AfterEffectFunctionChecker> checkers; - - bool beganWithAnyStackIR; - - AfterEffectModuleChecker(Module* module) : module(module) { - for (auto& func : module->functions) { - checkers.emplace_back(func.get()); - } - beganWithAnyStackIR = hasAnyStackIR(); - } - - void check() { - if (beganWithAnyStackIR && hasAnyStackIR()) { - // If anything changed to the functions, that's not good. - if (checkers.size() != module->functions.size()) { - error(); - } - for (Index i = 0; i < checkers.size(); i++) { - // Did a pointer change? (a deallocated function could cause that) - if (module->functions[i].get() != checkers[i].func || - module->functions[i]->body != checkers[i].func->body) { - error(); - } - // Did a name change? - if (module->functions[i]->name != checkers[i].name) { - error(); - } - } - // Global function state appears to not have been changed: the same - // functions are there. Look into their contents. - for (auto& checker : checkers) { - checker.check(); - } - } - } - - void error() { - Fatal() << "[PassRunner] PASS_DEBUG check failed: had Stack IR before and " - "after the pass ran, and the pass modified global function " - "state - pass should have been marked 'modifiesBinaryenIR'"; - } - - bool hasAnyStackIR() { - for (auto& func : module->functions) { - if (func->stackIR) { - return true; - } - } - return false; - } -}; - void PassRunner::runPass(Pass* pass) { assert(!pass->isFunctionParallel()); @@ -1009,20 +902,12 @@ void PassRunner::runPass(Pass* pass) { return; } - std::unique_ptr<AfterEffectModuleChecker> checker; - if (getPassDebug()) { - checker = std::unique_ptr<AfterEffectModuleChecker>( - new AfterEffectModuleChecker(wasm)); - } // Passes can only be run once and we deliberately do not clear the pass // runner after running the pass, so there must not already be a runner here. assert(!pass->getPassRunner()); pass->setPassRunner(this); pass->run(wasm); handleAfterEffects(pass); - if (getPassDebug()) { - checker->check(); - } } void PassRunner::runPassOnFunction(Pass* pass, Function* func) { @@ -1051,21 +936,12 @@ void PassRunner::runPassOnFunction(Pass* pass, Function* func) { bodyBefore << *func->body << '\n'; } - std::unique_ptr<AfterEffectFunctionChecker> checker; - if (passDebug) { - checker = std::make_unique<AfterEffectFunctionChecker>(func); - } - // Function-parallel passes get a new instance per function auto instance = pass->create(); instance->setPassRunner(this); instance->runOnFunction(wasm, func); handleAfterEffects(pass, func); - if (passDebug) { - checker->check(); - } - if (extraFunctionValidation) { if (!WasmValidator().validate(func, *wasm, WasmValidator::Minimal)) { Fatal() << "Last nested function-parallel pass (" << pass->name @@ -1095,10 +971,6 @@ void PassRunner::handleAfterEffects(Pass* pass, Function* func) { return; } - // If Binaryen IR is modified, Stack IR must be cleared - it would - // be out of sync in a potentially dangerous way. - func->stackIR.reset(nullptr); - if (pass->requiresNonNullableLocalFixups()) { TypeUpdating::handleNonDefaultableLocals(func, *wasm); } diff --git a/src/passes/passes.h b/src/passes/passes.h index 8695539b3..ac3e1ef29 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -54,7 +54,6 @@ Pass* createFunctionMetricsPass(); Pass* createGenerateDynCallsPass(); Pass* createGenerateI64DynCallsPass(); Pass* createGenerateGlobalEffectsPass(); -Pass* createGenerateStackIRPass(); Pass* createGlobalRefiningPass(); Pass* createGlobalStructInferencePass(); Pass* createGlobalTypeOptimizationPass(); @@ -103,7 +102,6 @@ Pass* createOptimizeAddedConstantsPropagatePass(); Pass* createOptimizeInstructionsPass(); Pass* createOptimizeCastsPass(); Pass* createOptimizeForJSPass(); -Pass* createOptimizeStackIRPass(); // Outlining currently relies on LLVM's SuffixTree, which we can't rely upon // when building Binaryen for Emscripten. #ifdef __EMSCRIPTEN__ @@ -123,7 +121,6 @@ Pass* createPrinterPass(); Pass* createPrintCallGraphPass(); Pass* createPrintFeaturesPass(); Pass* createPrintFunctionMapPass(); -Pass* createPrintStackIRPass(); Pass* createPropagateGlobalsGloballyPass(); Pass* createRemoveNonJSOpsPass(); Pass* createRemoveImportsPass(); diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h index 0a47d9f70..8772edd29 100644 --- a/src/tools/optimization-options.h +++ b/src/tools/optimization-options.h @@ -26,6 +26,17 @@ namespace wasm { struct OptimizationOptions : public ToolOptions { + void parse(int argc, const char* argv[]) { + ToolOptions::parse(argc, argv); + + // After parsing the arguments, update defaults based on the optimize/shrink + // levels. + if (passOptions.optimizeLevel >= 2 || passOptions.shrinkLevel >= 1) { + passOptions.generateStackIR = true; + passOptions.optimizeStackIR = true; + } + } + static constexpr const char* DEFAULT_OPT_PASSES = "O"; static constexpr const int OS_OPTIMIZE_LEVEL = 2; static constexpr const int OS_SHRINK_LEVEL = 1; diff --git a/src/tools/tool-options.h b/src/tools/tool-options.h index 6d68ff3c1..599b3b22c 100644 --- a/src/tools/tool-options.h +++ b/src/tools/tool-options.h @@ -151,7 +151,35 @@ struct ToolOptions : public Options { Options::Arguments::Zero, [this](Options*, const std::string&) { passOptions.closedWorld = true; - }); + }) + .add("--generate-stack-ir", + "", + "generate StackIR during writing", + ToolOptionsCategory, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + passOptions.generateStackIR = true; + }) + .add("--optimize-stack-ir", + "", + "optimize StackIR during writing", + ToolOptionsCategory, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + // Also generate StackIR, to have something to optimize. + passOptions.generateStackIR = true; + passOptions.optimizeStackIR = true; + }) + .add("--print-stack-ir", + "", + "print StackIR during writing", + ToolOptionsCategory, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + // Also generate StackIR, to have something to print. + passOptions.generateStackIR = true; + passOptions.printStackIR = &std::cout; + }); } ToolOptions& addFeature(FeatureSet::Feature feature, diff --git a/src/tools/wasm-as.cpp b/src/tools/wasm-as.cpp index 311605326..a767e6908 100644 --- a/src/tools/wasm-as.cpp +++ b/src/tools/wasm-as.cpp @@ -129,7 +129,7 @@ int main(int argc, const char* argv[]) { if (options.debug) { std::cerr << "writing..." << std::endl; } - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(true); writer.setDebugInfo(debugInfo); if (sourceMapFilename.size()) { diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 4018be0e7..d3f798084 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -1491,7 +1491,7 @@ int main(int argc, const char* argv[]) { if (options.debug) { std::cout << "writing..." << std::endl; } - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); writer.write(wasm, options.extra["output"]); diff --git a/src/tools/wasm-emscripten-finalize.cpp b/src/tools/wasm-emscripten-finalize.cpp index a19d27328..6b4e994ac 100644 --- a/src/tools/wasm-emscripten-finalize.cpp +++ b/src/tools/wasm-emscripten-finalize.cpp @@ -279,7 +279,7 @@ int main(int argc, const char* argv[]) { if (writeOutput) { Output output(outfile, emitBinary ? Flags::Binary : Flags::Text); - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setDebugInfo(debugInfo); // writer.setSymbolMap(symbolMap); writer.setBinary(emitBinary); diff --git a/src/tools/wasm-merge.cpp b/src/tools/wasm-merge.cpp index 8e8e2d80d..449f0cfdb 100644 --- a/src/tools/wasm-merge.cpp +++ b/src/tools/wasm-merge.cpp @@ -752,7 +752,7 @@ Input source maps can be specified by adding an -ism option right after the modu // Output. if (options.extra.count("output") > 0) { - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); if (outputSourceMapFilename.size()) { diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 1b429a723..301470206 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -600,7 +600,7 @@ int main(int argc, const char* argv[]) { graph.apply(); if (options.extra.count("output") > 0) { - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); if (outputSourceMapFilename.size()) { diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 32f9f1aad..cb6d0bcec 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -35,6 +35,7 @@ #include "wasm-interpreter.h" #include "wasm-io.h" #include "wasm-s-parser.h" +#include "wasm-stack.h" #include "wasm-validator.h" #include "wasm2c-wrapper.h" @@ -347,7 +348,7 @@ int main(int argc, const char* argv[]) { if (extraFuzzCommand.size() > 0 && options.extra.count("output") > 0) { BYN_TRACE("writing binary before opts, for extra fuzz command...\n"); - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(emitBinary); writer.setDebugInfo(options.passOptions.debugInfo); writer.write(wasm, options.extra["output"]); @@ -379,7 +380,7 @@ int main(int argc, const char* argv[]) { // size no longer decreasing. auto getSize = [&]() { BufferWithRandomAccess buffer; - WasmBinaryWriter writer(&wasm, buffer); + WasmBinaryWriter writer(&wasm, buffer, options.passOptions); writer.write(); return buffer.size(); }; @@ -402,11 +403,6 @@ int main(int argc, const char* argv[]) { runner.add("translate-to-new-eh"); // Perform Stack IR optimizations here, at the very end of the // optimization pipeline. - if (options.passOptions.optimizeLevel >= 2 || - options.passOptions.shrinkLevel >= 1) { - runner.addIfNoDWARFIssues("generate-stack-ir"); - runner.addIfNoDWARFIssues("optimize-stack-ir"); - } runner.run(); if (options.passOptions.validate) { bool valid = WasmValidator().validate(wasm, options.passOptions); @@ -420,6 +416,10 @@ int main(int argc, const char* argv[]) { results.check(wasm); } + if (options.passOptions.printStackIR) { + printStackIR(std::cout, &wasm, options.passOptions); + } + if (options.extra.count("output") == 0) { if (!options.quiet) { std::cerr << "warning: no output file specified, not emitting output\n"; @@ -428,7 +428,7 @@ int main(int argc, const char* argv[]) { } BYN_TRACE("writing...\n"); - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(emitBinary); writer.setDebugInfo(options.passOptions.debugInfo); if (outputSourceMapFilename.size()) { diff --git a/src/tools/wasm-reduce.cpp b/src/tools/wasm-reduce.cpp index 094338bea..ac5b7722b 100644 --- a/src/tools/wasm-reduce.cpp +++ b/src/tools/wasm-reduce.cpp @@ -401,7 +401,7 @@ struct Reducer bool writeAndTestReduction(ProgramResult& out) { // write the module out - ModuleWriter writer; + ModuleWriter writer(toolOptions.passOptions); writer.setBinary(binary); writer.setDebugInfo(debugInfo); writer.write(*getModule(), test); @@ -1368,7 +1368,7 @@ int main(int argc, const char* argv[]) { if (resultOnInvalid == expected) { // Try it on a valid input. Module emptyModule; - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(true); writer.write(emptyModule, test); ProgramResult resultOnValid(command); diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp index bea3ddce7..d7dc19d67 100644 --- a/src/tools/wasm-split/wasm-split.cpp +++ b/src/tools/wasm-split/wasm-split.cpp @@ -91,7 +91,7 @@ void adjustTableSize(Module& wasm, int initialSize) { void writeModule(Module& wasm, std::string filename, const WasmSplitOptions& options) { - ModuleWriter writer; + ModuleWriter writer(options.passOptions); writer.setBinary(options.emitBinary); writer.setDebugInfo(options.passOptions.debugInfo); if (options.emitModuleNames) { diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 6c1230b7b..b5aa83960 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1270,8 +1270,10 @@ class WasmBinaryWriter { }; public: - WasmBinaryWriter(Module* input, BufferWithRandomAccess& o) - : wasm(input), o(o), indexes(*input) { + WasmBinaryWriter(Module* input, + BufferWithRandomAccess& o, + const PassOptions& options) + : wasm(input), o(o), options(options), indexes(*input) { prepare(); } @@ -1384,6 +1386,8 @@ public: private: Module* wasm; BufferWithRandomAccess& o; + const PassOptions& options; + BinaryIndexes indexes; ModuleUtils::IndexedHeapTypes indexedTypes; std::unordered_map<Signature, uint32_t> signatureIndexes; diff --git a/src/wasm-io.h b/src/wasm-io.h index ae66c3932..e5cad9a23 100644 --- a/src/wasm-io.h +++ b/src/wasm-io.h @@ -22,6 +22,7 @@ #define wasm_wasm_io_h #include "parsing.h" +#include "pass.h" #include "support/file.h" #include "wasm.h" @@ -85,6 +86,8 @@ private: }; class ModuleWriter : public ModuleIOBase { + const PassOptions& options; + bool binary = true; // TODO: Remove `emitModuleName`. See the comment in wasm-binary.h @@ -97,7 +100,9 @@ class ModuleWriter : public ModuleIOBase { public: // Writing defaults to not storing the names section. Storing it is a user- // observable fact that must be opted into. - ModuleWriter() { setDebugInfo(false); } + ModuleWriter(const PassOptions& options) : options(options) { + setDebugInfo(false); + } void setBinary(bool binary_) { binary = binary_; } void setSymbolMap(std::string symbolMap_) { symbolMap = symbolMap_; } diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 5858d6f88..ae453f9de 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -18,6 +18,7 @@ #define wasm_stack_h #include "ir/branch-utils.h" +#include "ir/module-utils.h" #include "ir/properties.h" #include "pass.h" #include "support/insert_ordered.h" @@ -85,6 +86,8 @@ public: Type type; }; +using StackIR = std::vector<StackInst*>; + class BinaryInstWriter : public OverriddenVisitor<BinaryInstWriter> { public: BinaryInstWriter(WasmBinaryWriter& parent, @@ -468,44 +471,24 @@ private: bool sourceMap; }; -// Binaryen IR to stack IR converter -// Queues the expressions linearly in Stack IR (SIR) -class StackIRGenerator : public BinaryenIRWriter<StackIRGenerator> { -public: - StackIRGenerator(Module& module, Function* func) - : BinaryenIRWriter<StackIRGenerator>(func), module(module) {} - - void emit(Expression* curr); - void emitScopeEnd(Expression* curr); - void emitHeader() {} - void emitIfElse(If* curr) { - stackIR.push_back(makeStackInst(StackInst::IfElse, curr)); - } - void emitCatch(Try* curr, Index i) { - stackIR.push_back(makeStackInst(StackInst::Catch, curr)); - } - void emitCatchAll(Try* curr) { - stackIR.push_back(makeStackInst(StackInst::CatchAll, curr)); - } - void emitDelegate(Try* curr) { - stackIR.push_back(makeStackInst(StackInst::Delegate, curr)); - } - void emitFunctionEnd() {} - void emitUnreachable() { - stackIR.push_back(makeStackInst(Builder(module).makeUnreachable())); - } - void emitDebugLocation(Expression* curr) {} - - StackIR& getStackIR() { return stackIR; } +// Binaryen IR to stack IR converter for an entire module. Generates all the +// StackIR in parallel, and then allows querying for the StackIR of individual +// functions. +class ModuleStackIR { + ModuleUtils::ParallelFunctionAnalysis<StackIR> analysis; -private: - StackInst* makeStackInst(StackInst::Op op, Expression* origin); - StackInst* makeStackInst(Expression* origin) { - return makeStackInst(StackInst::Basic, origin); +public: + ModuleStackIR(Module& wasm, const PassOptions& options); + + // Get StackIR for a function, if it exists. (This allows some functions to + // have it and others not, if we add such capability in the future.) + StackIR* getStackIROrNull(Function* func) { + auto iter = analysis.map.find(func); + if (iter == analysis.map.end()) { + return nullptr; + } + return &iter->second; } - - Module& module; - StackIR stackIR; // filled in write() }; // Stack IR to binary writer @@ -514,10 +497,11 @@ public: StackIRToBinaryWriter(WasmBinaryWriter& parent, BufferWithRandomAccess& o, Function* func, + StackIR& stackIR, bool sourceMap = false, bool DWARF = false) : parent(parent), writer(parent, o, func, sourceMap, DWARF), func(func), - sourceMap(sourceMap) {} + stackIR(stackIR), sourceMap(sourceMap) {} void write(); @@ -527,11 +511,47 @@ private: WasmBinaryWriter& parent; BinaryInstWriter writer; Function* func; + StackIR& stackIR; bool sourceMap; }; -std::ostream& printStackIR(std::ostream& o, Module* module, bool optimize); +// Stack IR optimizer +class StackIROptimizer { + Function* func; + StackIR& insts; + const PassOptions& passOptions; + FeatureSet features; + +public: + StackIROptimizer(Function* func, + StackIR& insts, + const PassOptions& passOptions, + FeatureSet features); + + void run(); + +private: + void dce(); + void vacuum(); + void local2Stack(); + void removeUnneededBlocks(); + bool isControlFlowBarrier(StackInst* inst); + bool isControlFlowBegin(StackInst* inst); + bool isControlFlowEnd(StackInst* inst); + bool isControlFlow(StackInst* inst); + void removeAt(Index i); + Index getNumConsumedValues(StackInst* inst); + bool canRemoveSetGetPair(Index setIndex, Index getIndex); +}; + +// Generate and emit StackIR. +std::ostream& +printStackIR(std::ostream& o, Module* module, const PassOptions& options); } // namespace wasm +namespace std { +std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst); +} // namespace std + #endif // wasm_stack_h diff --git a/src/wasm.h b/src/wasm.h index d852950e8..ebc7b908d 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2136,14 +2136,6 @@ struct BinaryLocations { std::unordered_map<Function*, FunctionLocations> functions; }; -// Forward declarations of Stack IR, as functions can contain it, see -// the stackIR property. -// Stack IR is a secondary IR to the main IR defined in this file (Binaryen -// IR). See wasm-stack.h. -class StackInst; - -using StackIR = std::vector<StackInst*>; - class Function : public Importable { public: HeapType type = HeapType(Signature()); // parameters and return value @@ -2153,16 +2145,6 @@ public: // The body of the function Expression* body = nullptr; - // If present, this stack IR was generated from the main Binaryen IR body, - // and possibly optimized. If it is present when writing to wasm binary, - // it will be emitted instead of the main Binaryen IR. - // - // Note that no special care is taken to synchronize the two IRs - if you - // emit stack IR and then optimize the main IR, you need to recompute the - // stack IR. The Pass system will throw away Stack IR if a pass is run - // that declares it may modify Binaryen IR. - std::unique_ptr<StackIR> stackIR; - // local names. these are optional. std::unordered_map<Index, Name> localNames; std::unordered_map<Name, Index> localIndices; @@ -2511,8 +2493,6 @@ std::ostream& operator<<(std::ostream& o, wasm::Function& func); std::ostream& operator<<(std::ostream& o, wasm::Expression& expression); std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair); std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression); -std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst); -std::ostream& operator<<(std::ostream& o, wasm::StackIR& ir); } // namespace std diff --git a/src/wasm/CMakeLists.txt b/src/wasm/CMakeLists.txt index d5b4f6747..e01be3b6a 100644 --- a/src/wasm/CMakeLists.txt +++ b/src/wasm/CMakeLists.txt @@ -11,6 +11,7 @@ set(wasm_SOURCES wasm-ir-builder.cpp wasm-s-parser.cpp wasm-stack.cpp + wasm-stack-opts.cpp wasm-type.cpp wasm-validator.cpp ${wasm_HEADERS} diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d4fda5915..7e90dfde4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -392,6 +392,12 @@ void WasmBinaryWriter::writeFunctions() { if (importInfo->getNumDefinedFunctions() == 0) { return; } + + std::optional<ModuleStackIR> moduleStackIR; + if (options.generateStackIR) { + moduleStackIR.emplace(*wasm, options); + } + BYN_TRACE("== writeFunctions\n"); auto sectionStart = startSection(BinaryConsts::Section::Code); o << U32LEB(importInfo->getNumDefinedFunctions()); @@ -405,10 +411,14 @@ void WasmBinaryWriter::writeFunctions() { size_t sizePos = writeU32LEBPlaceholder(); size_t start = o.size(); BYN_TRACE("writing" << func->name << std::endl); - // Emit Stack IR if present, and if we can - if (func->stackIR) { + // Emit Stack IR if present. + StackIR* stackIR = nullptr; + if (moduleStackIR) { + stackIR = moduleStackIR->getStackIROrNull(func); + } + if (stackIR) { BYN_TRACE("write Stack IR\n"); - StackIRToBinaryWriter writer(*this, o, func, sourceMap, DWARF); + StackIRToBinaryWriter writer(*this, o, func, *stackIR, sourceMap, DWARF); writer.write(); if (debugInfo) { funcMappedLocals[func->name] = std::move(writer.getMappedLocals()); diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp index a24122bd8..10b84bb4d 100644 --- a/src/wasm/wasm-io.cpp +++ b/src/wasm/wasm-io.cpp @@ -149,7 +149,7 @@ void ModuleWriter::writeText(Module& wasm, std::string filename) { void ModuleWriter::writeBinary(Module& wasm, Output& output) { BufferWithRandomAccess buffer; - WasmBinaryWriter writer(&wasm, buffer); + WasmBinaryWriter writer(&wasm, buffer, options); // if debug info is used, then we want to emit the names section writer.setNamesSection(debugInfo); if (emitModuleName) { diff --git a/src/wasm/wasm-stack-opts.cpp b/src/wasm/wasm-stack-opts.cpp new file mode 100644 index 000000000..eac423fbd --- /dev/null +++ b/src/wasm/wasm-stack-opts.cpp @@ -0,0 +1,456 @@ +/* + * Copyright 2018 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Operations on Stack IR. +// + +#include "ir/iteration.h" +#include "ir/local-graph.h" +#include "pass.h" +#include "wasm-stack.h" +#include "wasm.h" + +namespace wasm { + +StackIROptimizer::StackIROptimizer(Function* func, + StackIR& insts, + const PassOptions& passOptions, + FeatureSet features) + : func(func), insts(insts), passOptions(passOptions), features(features) {} + +void StackIROptimizer::run() { + dce(); + // FIXME: local2Stack is currently rather slow (due to localGraph), + // so for now run it only when really optimizing + if (passOptions.optimizeLevel >= 3 || passOptions.shrinkLevel >= 1) { + local2Stack(); + } + removeUnneededBlocks(); + dce(); + vacuum(); +} + +// Remove unreachable code. +void StackIROptimizer::dce() { + bool inUnreachableCode = false; + for (Index i = 0; i < insts.size(); i++) { + auto* inst = insts[i]; + if (!inst) { + continue; + } + if (inUnreachableCode) { + // Does the unreachable code end here? + if (isControlFlowBarrier(inst)) { + inUnreachableCode = false; + } else { + // We can remove this. + removeAt(i); + } + } else if (inst->type == Type::unreachable) { + inUnreachableCode = true; + } + } +} + +// Remove obviously-unneeded code. +void StackIROptimizer::vacuum() { + // In the wasm binary format a nop is never needed. (In Binaryen IR, in + // comparison, it is necessary e.g. in a function body or an if arm.) + // + // It is especially important to remove nops because we add nops when we + // read wasm into Binaryen IR. That is, this avoids a potential increase in + // code size. + for (Index i = 0; i < insts.size(); i++) { + auto*& inst = insts[i]; + if (inst && inst->origin->is<Nop>()) { + inst = nullptr; + } + } +} + +// If ordered properly, we can avoid a local.set/local.get pair, +// and use the value directly from the stack, for example +// [..produce a value on the stack..] +// local.set $x +// [..much code..] +// local.get $x +// call $foo ;; use the value, foo(value) +// As long as the code in between does not modify $x, and has +// no control flow branching out, we can remove both the set +// and the get. +void StackIROptimizer::local2Stack() { + // We use the localGraph to tell us if a get-set pair is indeed + // a set that is read by that get, and only that get. Note that we run + // this on the Binaryen IR, so we are assuming that no previous opt + // has changed the interaction of local operations. + // TODO: we can do this a lot faster, as we just care about linear + // control flow. + LocalGraph localGraph(func); + localGraph.computeSetInfluences(); + // We maintain a stack of relevant values. This contains: + // * a null for each actual value that the value stack would have + // * an index of each LocalSet that *could* be on the value + // stack at that location. + const Index null = -1; + std::vector<Index> values; + // We also maintain a stack of values vectors for control flow, + // saving the stack as we enter and restoring it when we exit. + std::vector<std::vector<Index>> savedValues; +#ifdef STACK_OPT_DEBUG + std::cout << "func: " << func->name << '\n' << insts << '\n'; +#endif + for (Index instIndex = 0; instIndex < insts.size(); instIndex++) { + auto* inst = insts[instIndex]; + if (!inst) { + continue; + } + // First, consume values from the stack as required. + auto consumed = getNumConsumedValues(inst); +#ifdef STACK_OPT_DEBUG + std::cout << " " << instIndex << " : " << *inst << ", " << values.size() + << " on stack, will consume " << consumed << "\n "; + for (auto s : values) + std::cout << s << ' '; + std::cout << '\n'; +#endif + // TODO: currently we run dce before this, but if we didn't, we'd need + // to handle unreachable code here - it's ok to pop multiple values + // there even if the stack is at size 0. + while (consumed > 0) { + assert(values.size() > 0); + // Whenever we hit a possible stack value, kill it - it would + // be consumed here, so we can never optimize to it. + while (values.back() != null) { + values.pop_back(); + assert(values.size() > 0); + } + // Finally, consume the actual value that is consumed here. + values.pop_back(); + consumed--; + } + // After consuming, we can see what to do with this. First, handle + // control flow. + if (isControlFlowBegin(inst)) { + // Save the stack for when we end this control flow. + savedValues.push_back(values); // TODO: optimize copies + values.clear(); + } else if (isControlFlowEnd(inst)) { + assert(!savedValues.empty()); + values = savedValues.back(); + savedValues.pop_back(); + } else if (isControlFlow(inst)) { + // Otherwise, in the middle of control flow, just clear it + values.clear(); + } + // This is something we should handle, look into it. + if (inst->type.isConcrete()) { + bool optimized = false; + // Do not optimize multivalue locals, since those will be better + // optimized when they are visited in the binary writer and this + // optimization would intefere with that one. + if (auto* get = inst->origin->dynCast<LocalGet>(); + get && inst->type.isSingle()) { + // Use another local to clarify what instIndex means in this scope. + auto getIndex = instIndex; + + // This is a potential optimization opportunity! See if we + // can reach the set. + if (values.size() > 0) { + Index j = values.size() - 1; + while (1) { + // If there's an actual value in the way, we've failed. + auto setIndex = values[j]; + if (setIndex == null) { + break; + } + auto* set = insts[setIndex]->origin->cast<LocalSet>(); + if (set->index == get->index) { + // This might be a proper set-get pair, where the set is + // used by this get and nothing else, check that. + auto& sets = localGraph.getSetses[get]; + if (sets.size() == 1 && *sets.begin() == set) { + auto& setInfluences = localGraph.setInfluences[set]; + // If this has the proper value of 1, also do the potentially- + // expensive check of whether we can remove this pair at all. + if (setInfluences.size() == 1 && + canRemoveSetGetPair(setIndex, getIndex)) { + assert(*setInfluences.begin() == get); + // Do it! The set and the get can go away, the proper + // value is on the stack. +#ifdef STACK_OPT_DEBUG + std::cout << " stackify the get\n"; +#endif + insts[setIndex] = nullptr; + insts[getIndex] = nullptr; + // Continuing on from here, replace this on the stack + // with a null, representing a regular value. We + // keep possible values above us active - they may + // be optimized later, as they would be pushed after + // us, and used before us, so there is no conflict. + values[j] = null; + optimized = true; + break; + } + } + } + // We failed here. Can we look some more? + if (j == 0) { + break; + } + j--; + } + } + } + if (!optimized) { + // This is an actual regular value on the value stack. + values.push_back(null); + } + } else if (inst->origin->is<LocalSet>() && inst->type == Type::none) { + // This set is potentially optimizable later, add to stack. + values.push_back(instIndex); + } + } +} + +// There may be unnecessary blocks we can remove: blocks +// without branches to them are always ok to remove. +// TODO: a branch to a block in an if body can become +// a branch to that if body +void StackIROptimizer::removeUnneededBlocks() { + for (auto*& inst : insts) { + if (!inst) { + continue; + } + if (auto* block = inst->origin->dynCast<Block>()) { + if (!BranchUtils::BranchSeeker::has(block, block->name)) { + // TODO optimize, maybe run remove-unused-names + inst = nullptr; + } + } + } +} + +// A control flow "barrier" - a point where stack machine +// unreachability ends. +bool StackIROptimizer::isControlFlowBarrier(StackInst* inst) { + switch (inst->op) { + case StackInst::BlockEnd: + case StackInst::IfElse: + case StackInst::IfEnd: + case StackInst::LoopEnd: + case StackInst::Catch: + case StackInst::CatchAll: + case StackInst::Delegate: + case StackInst::TryEnd: + case StackInst::TryTableEnd: { + return true; + } + default: { + return false; + } + } +} + +// A control flow beginning. +bool StackIROptimizer::isControlFlowBegin(StackInst* inst) { + switch (inst->op) { + case StackInst::BlockBegin: + case StackInst::IfBegin: + case StackInst::LoopBegin: + case StackInst::TryBegin: + case StackInst::TryTableBegin: { + return true; + } + default: { + return false; + } + } +} + +// A control flow ending. +bool StackIROptimizer::isControlFlowEnd(StackInst* inst) { + switch (inst->op) { + case StackInst::BlockEnd: + case StackInst::IfEnd: + case StackInst::LoopEnd: + case StackInst::TryEnd: + case StackInst::Delegate: + case StackInst::TryTableEnd: { + return true; + } + default: { + return false; + } + } +} + +bool StackIROptimizer::isControlFlow(StackInst* inst) { + return inst->op != StackInst::Basic; +} + +// Remove the instruction at index i. If the instruction +// is control flow, and so has been expanded to multiple +// instructions, remove them as well. +void StackIROptimizer::removeAt(Index i) { + auto* inst = insts[i]; + insts[i] = nullptr; + if (inst->op == StackInst::Basic) { + return; // that was it + } + auto* origin = inst->origin; + while (1) { + i++; + assert(i < insts.size()); + inst = insts[i]; + insts[i] = nullptr; + if (inst && inst->origin == origin && isControlFlowEnd(inst)) { + return; // that's it, we removed it all + } + } +} + +Index StackIROptimizer::getNumConsumedValues(StackInst* inst) { + if (isControlFlow(inst)) { + // If consumes 1; that's it. + if (inst->op == StackInst::IfBegin) { + return 1; + } + return 0; + } + // Otherwise, for basic instructions, just count the expression children. + return ChildIterator(inst->origin).children.size(); +} + +// Given a pair of a local.set and local.get, see if we can remove them +// without breaking validation. Specifically, we must keep sets of non- +// nullable locals that dominate a get until the end of the block, such as +// here: +// +// local.set 0 ;; Can we remove +// local.get 0 ;; this pair? +// if +// local.set 0 +// else +// local.set 0 +// end +// local.get 0 ;; This get poses a problem. +// +// Logically the 2nd&3rd sets ensure a value is applied to the local before we +// read it, but the validation rules only track each set until the end of its +// scope, so the 1st set (before the if, in the pair) is necessary. +// +// The logic below is related to LocalStructuralDominance, but sharing code +// with it is difficult as this uses StackIR and not BinaryenIR, and it checks +// a particular set/get pair. +// +// We are given the indexes of the set and get instructions in |insts|. +bool StackIROptimizer::canRemoveSetGetPair(Index setIndex, Index getIndex) { + // The set must be before the get. + assert(setIndex < getIndex); + + auto* set = insts[setIndex]->origin->cast<LocalSet>(); + auto localType = func->getLocalType(set->index); + // Note we do not need to handle tuples here, as the parent ignores them + // anyhow (hence we can check non-nullability instead of non- + // defaultability). + assert(localType.isSingle()); + if (func->isParam(set->index) || !localType.isNonNullable()) { + // This local cannot pose a problem for validation (params are always + // initialized, and it is ok if nullable locals are uninitialized). + return true; + } + + // Track the depth (in block/if/loop/etc. scopes) relative to our starting + // point. Anything less deep than that is not interesting, as we can only + // help things at our depth or deeper to validate. + Index currDepth = 0; + + // Look for a different get than the one in getIndex (since that one is + // being removed) which would stop validating without the set. While doing + // so, note other sets that ensure validation even if our set is removed. We + // track those in this stack of booleans, one for each scope, which is true + // if another sets covers us and ours is not needed. + // + // We begin in the current scope and with no other set covering us. + std::vector<bool> coverStack = {false}; + + // Track the total number of covers as well, for quick checking below. + Index covers = 0; + + // TODO: We could look before us as well, but then we might end up scanning + // much of the function every time. + for (Index i = setIndex + 1; i < insts.size(); i++) { + auto* inst = insts[i]; + if (!inst) { + continue; + } + if (isControlFlowBegin(inst)) { + // A new scope begins. + currDepth++; + coverStack.push_back(false); + } else if (isControlFlowEnd(inst)) { + if (currDepth == 0) { + // Less deep than the start, so we found no problem. + return true; + } + currDepth--; + + if (coverStack.back()) { + // A cover existed in the scope which ended. + covers--; + } + coverStack.pop_back(); + } else if (isControlFlowBarrier(inst)) { + // A barrier, like the else in an if-else, not only ends a scope but + // opens a new one. + if (currDepth == 0) { + // Another scope with the same depth begins, but ours ended, so stop. + return true; + } + + if (coverStack.back()) { + // A cover existed in the scope which ended. + covers--; + } + coverStack.back() = false; + } else if (auto* otherSet = inst->origin->dynCast<LocalSet>()) { + // We are covered in this scope henceforth. + if (otherSet->index == set->index) { + if (!coverStack.back()) { + covers++; + if (currDepth == 0) { + // We have a cover at depth 0, so everything from here on out + // will be covered. + return true; + } + coverStack.back() = true; + } + } + } else if (auto* otherGet = inst->origin->dynCast<LocalGet>()) { + if (otherGet->index == set->index && i != getIndex && !covers) { + // We found a get that might be a problem: it uses the same index, but + // is not the get we were told about, and no other set covers us. + return false; + } + } + } + + // No problem. + return true; +} + +} // namespace wasm diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 1ed60abed..3cdd1f718 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2737,6 +2737,45 @@ int32_t BinaryInstWriter::getBreakIndex(Name name) { // -1 if not found WASM_UNREACHABLE("break index not found"); } +// Queues the expressions linearly in Stack IR (SIR) +class StackIRGenerator : public BinaryenIRWriter<StackIRGenerator> { +public: + StackIRGenerator(Module& module, Function* func) + : BinaryenIRWriter<StackIRGenerator>(func), module(module) {} + + void emit(Expression* curr); + void emitScopeEnd(Expression* curr); + void emitHeader() {} + void emitIfElse(If* curr) { + stackIR.push_back(makeStackInst(StackInst::IfElse, curr)); + } + void emitCatch(Try* curr, Index i) { + stackIR.push_back(makeStackInst(StackInst::Catch, curr)); + } + void emitCatchAll(Try* curr) { + stackIR.push_back(makeStackInst(StackInst::CatchAll, curr)); + } + void emitDelegate(Try* curr) { + stackIR.push_back(makeStackInst(StackInst::Delegate, curr)); + } + void emitFunctionEnd() {} + void emitUnreachable() { + stackIR.push_back(makeStackInst(Builder(module).makeUnreachable())); + } + void emitDebugLocation(Expression* curr) {} + + StackIR& getStackIR() { return stackIR; } + +private: + StackInst* makeStackInst(StackInst::Op op, Expression* origin); + StackInst* makeStackInst(Expression* origin) { + return makeStackInst(StackInst::Basic, origin); + } + + Module& module; + StackIR stackIR; // filled in write() +}; + void StackIRGenerator::emit(Expression* curr) { StackInst* stackInst = nullptr; if (curr->is<Block>()) { @@ -2798,6 +2837,22 @@ StackInst* StackIRGenerator::makeStackInst(StackInst::Op op, return ret; } +ModuleStackIR::ModuleStackIR(Module& wasm, const PassOptions& options) + : analysis(wasm, [&](Function* func, StackIR& stackIR) { + if (func->imported()) { + return; + } + + StackIRGenerator stackIRGen(wasm, func); + stackIRGen.write(); + stackIR = std::move(stackIRGen.getStackIR()); + + if (options.optimizeStackIR) { + StackIROptimizer optimizer(func, stackIR, options, wasm.features); + optimizer.run(); + } + }) {} + void StackIRToBinaryWriter::write() { if (func->prologLocation.size()) { parent.writeDebugLocation(*func->prologLocation.begin()); @@ -2805,7 +2860,7 @@ void StackIRToBinaryWriter::write() { writer.mapLocalsAndEmitHeader(); // Stack to track indices of catches within a try SmallVector<Index, 4> catchIndexStack; - for (auto* inst : *func->stackIR) { + for (auto* inst : stackIR) { if (!inst) { continue; // a nullptr is just something we can skip } |