diff options
author | Alon Zakai <alonzakai@gmail.com> | 2018-01-17 13:11:14 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-17 13:11:14 -0800 |
commit | 0a9ddae715b2cff23a86cf14bbf6a4b870511395 (patch) | |
tree | ac15dd6da937509b22b6cc5b97f21fa508f48570 /src | |
parent | 01b23987405d8d7b2f13e40ef906169163ac2a5f (diff) | |
download | binaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.tar.gz binaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.tar.bz2 binaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.zip |
Global optimization fixes (#1360)
* run dfe at the very end, as it may be more effective after inlining
* optimize reorder-functions
* do a final dfe in asm2wasm after all other opts
* make inlining deterministic: std::atomic<T> values are not zero-initialized
* do global post opts at the end of asm2wasm, and don't also do them in the module builder
* fix function type removing
* don't inline+optimize when preserving debug info
Diffstat (limited to 'src')
-rw-r--r-- | src/asm2wasm.h | 35 | ||||
-rw-r--r-- | src/passes/Inlining.cpp | 11 | ||||
-rw-r--r-- | src/passes/RemoveUnusedModuleElements.cpp | 1 | ||||
-rw-r--r-- | src/passes/ReorderFunctions.cpp | 56 | ||||
-rw-r--r-- | src/passes/pass.cpp | 9 | ||||
-rw-r--r-- | src/wasm-module-building.h | 13 | ||||
-rw-r--r-- | src/wasm.h | 1 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 12 |
8 files changed, 96 insertions, 42 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h index b5f701dac..aa8394d3a 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -966,21 +966,6 @@ void Asm2WasmBuilder::processAsm(Ref ast) { } // optimize relooper label variable usage at the wasm level, where it is easy passRunner.add("relooper-jump-threading"); - }, [&]() { - // beforeGlobalOptimizations - // if we added any helper functions (like non-trapping i32-div, etc.), then those - // have not been optimized (the optimizing builder has just been fed the asm.js - // functions). Optimize those now. Typically there are very few, just do it - // sequentially. - PassRunner passRunner(&wasm, passOptions); - passRunner.addDefaultFunctionOptimizationPasses(); - for (auto& pair : trappingFunctions.getFunctions()) { - auto* func = pair.second; - passRunner.runOnFunction(func); - } - for (auto* func : extraSupportFunctions) { - passRunner.runOnFunction(func); - } }, debug, false /* do not validate globally yet */); } @@ -1190,6 +1175,19 @@ void Asm2WasmBuilder::processAsm(Ref ast) { if (runOptimizationPasses) { optimizingBuilder->finish(); + // if we added any helper functions (like non-trapping i32-div, etc.), then those + // have not been optimized (the optimizing builder has just been fed the asm.js + // functions). Optimize those now. Typically there are very few, just do it + // sequentially. + PassRunner passRunner(&wasm, passOptions); + passRunner.addDefaultFunctionOptimizationPasses(); + for (auto& pair : trappingFunctions.getFunctions()) { + auto* func = pair.second; + passRunner.runOnFunction(func); + } + for (auto* func : extraSupportFunctions) { + passRunner.runOnFunction(func); + } } wasm.debugInfoFileNames = std::move(preprocessor.debugInfoFileNames); @@ -1382,7 +1380,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) { } }; - PassRunner passRunner(&wasm); + PassRunner passRunner(&wasm, passOptions); passRunner.setFeatures(passOptions.features); if (debug) { passRunner.setDebug(true); @@ -1411,6 +1409,11 @@ void Asm2WasmBuilder::processAsm(Ref ast) { passRunner.add<ApplyDebugInfo>(); passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing? } + if (runOptimizationPasses) { + // do final global optimizations after all function work is done + // (e.g. duplicate funcs may appear thanks to that work) + passRunner.addDefaultGlobalOptimizationPostPasses(); + } passRunner.run(); // remove the debug info intrinsic diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index b3e111e5a..79785811c 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -50,8 +50,15 @@ static const int FLEXIBLE_SIZE_LIMIT = 20; struct FunctionInfo { std::atomic<Index> calls; Index size; - bool lightweight = true; - bool usedGlobally = false; // in a table or export + std::atomic<bool> lightweight; + bool usedGlobally; // in a table or export + + FunctionInfo() { + calls = 0; + size = 0; + lightweight = true; + usedGlobally = false; + } bool worthInlining(PassOptions& options, bool allowMultipleInliningsPerFunction) { // if it's big, it's just not worth doing (TODO: investigate more) diff --git a/src/passes/RemoveUnusedModuleElements.cpp b/src/passes/RemoveUnusedModuleElements.cpp index 13caaccc9..6cbb001b1 100644 --- a/src/passes/RemoveUnusedModuleElements.cpp +++ b/src/passes/RemoveUnusedModuleElements.cpp @@ -268,6 +268,7 @@ struct RemoveUnusedModuleElements : public Pass { module->functionTypes.erase(std::remove_if(module->functionTypes.begin(), module->functionTypes.end(), [&needed](std::unique_ptr<FunctionType>& type) { return needed.count(type.get()) == 0; }), module->functionTypes.end()); + module->updateMaps(); } }; diff --git a/src/passes/ReorderFunctions.cpp b/src/passes/ReorderFunctions.cpp index c468fd9d3..5312ee90a 100644 --- a/src/passes/ReorderFunctions.cpp +++ b/src/passes/ReorderFunctions.cpp @@ -19,6 +19,13 @@ // binaries because fewer bytes are needed to encode references to frequently // used functions. // +// This may incur a tradeoff, though, as while it reduces binary size, it may +// increase gzip size. This might be because the new order has the functions in +// a less beneficial position for compression, that is, mutually-compressible +// functions are no longer together (when they were before, in the original order, +// the has some natural tendency one way or the other). TODO: investigate +// similarity ordering here. +// #include <memory> @@ -28,10 +35,41 @@ namespace wasm { -struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> { - std::map<Name, uint32_t> counts; +typedef std::unordered_map<Name, std::atomic<Index>> NameCountMap; + +struct CallCountScanner : public WalkerPass<PostWalker<CallCountScanner>> { + bool isFunctionParallel() override { return true; } + + CallCountScanner(NameCountMap* counts) : counts(counts) {} + + CallCountScanner* create() override { + return new CallCountScanner(counts); + } + + void visitCall(Call* curr) { + assert(counts->count(curr->target) > 0); // can't add a new element in parallel + (*counts)[curr->target]++; + } - void visitModule(Module *module) { +private: + NameCountMap* counts; +}; + +struct ReorderFunctions : public Pass { + void run(PassRunner* runner, Module* module) override { + NameCountMap counts; + // fill in info, as we operate on it in parallel (each function to its own entry) + for (auto& func : module->functions) { + counts[func->name]; + } + // find counts on function calls + { + PassRunner runner(module); + runner.setIsNested(true); + runner.add<CallCountScanner>(&counts); + runner.run(); + } + // find counts on global usages if (module->start.is()) { counts[module->start]++; } @@ -43,19 +81,15 @@ struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> { counts[curr]++; } } - std::sort(module->functions.begin(), module->functions.end(), [this]( + // sort + std::sort(module->functions.begin(), module->functions.end(), [&counts]( const std::unique_ptr<Function>& a, const std::unique_ptr<Function>& b) -> bool { - if (this->counts[a->name] == this->counts[b->name]) { + if (counts[a->name] == counts[b->name]) { return strcmp(a->name.str, b->name.str) > 0; } - return this->counts[a->name] > this->counts[b->name]; + return counts[a->name] > counts[b->name]; }); - counts.clear(); - } - - void visitCall(Call *curr) { - counts[curr->target]++; } }; diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 88734e85a..11575b1df 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -180,11 +180,14 @@ void PassRunner::addDefaultGlobalOptimizationPrePasses() { } void PassRunner::addDefaultGlobalOptimizationPostPasses() { - add("duplicate-function-elimination"); // optimizations show more functions as duplicate - add("remove-unused-module-elements"); - if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) { + // inline when working hard, and when not preserving debug info + // (inlining+optimizing can remove the annotations) + if ((options.optimizeLevel >= 2 || options.shrinkLevel >= 2) && + !options.debugInfo) { add("inlining-optimizing"); } + add("duplicate-function-elimination"); // optimizations show more functions as duplicate + add("remove-unused-module-elements"); add("memory-packing"); } diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h index 866019328..990ea5191 100644 --- a/src/wasm-module-building.h +++ b/src/wasm-module-building.h @@ -35,9 +35,9 @@ static std::mutex debug; // Helps build wasm modules efficiently. If you build a module by // adding function by function, and you want to optimize them, this class // starts optimizing using worker threads *while you are still adding*. -// It runs function optimization passes at that time, and then at the end -// it runs global module-level optimization passes. The result is a fully -// optimized module, optimized while being generated. +// It runs function optimization passes at that time. This does not +// run global optimization after that by default, but you can do that +// to by calling optimizeGlobally(). // // This might also be faster than normal module optimization since it // runs all passes on each function, then goes on to the next function @@ -75,7 +75,6 @@ class OptimizingIncrementalModuleBuilder { uint32_t numFunctions; PassOptions passOptions; std::function<void (PassRunner&)> addPrePasses; - std::function<void ()> beforeGlobalOptimizations; Function* endMarker; std::atomic<Function*>* list; uint32_t nextFunction; // only used on main thread @@ -93,10 +92,9 @@ public: // this bounds helps avoid locking. OptimizingIncrementalModuleBuilder(Module* wasm, Index numFunctions, PassOptions passOptions, std::function<void (PassRunner&)> addPrePasses, - std::function<void ()> beforeGlobalOptimizations, bool debug, bool validateGlobally) : wasm(wasm), numFunctions(numFunctions), passOptions(passOptions), - addPrePasses(addPrePasses), beforeGlobalOptimizations(beforeGlobalOptimizations), + addPrePasses(addPrePasses), endMarker(nullptr), list(nullptr), nextFunction(0), numWorkers(0), liveWorkers(0), activeWorkers(0), availableFuncs(0), finishedFuncs(0), finishing(false), debug(debug), validateGlobally(validateGlobally) { @@ -178,7 +176,6 @@ public: wakeAllWorkers(); waitUntilAllFinished(); } - optimizeGlobally(); // TODO: clear side thread allocators from module allocator, as these threads were transient } @@ -230,11 +227,9 @@ private: } void optimizeGlobally() { - beforeGlobalOptimizations(); PassRunner passRunner(wasm, passOptions); passRunner.addDefaultGlobalOptimizationPostPasses(); passRunner.run(); - } // worker code diff --git a/src/wasm.h b/src/wasm.h index 485afade1..4072533a7 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -784,6 +784,7 @@ public: void removeImport(Name name); void removeExport(Name name); void removeFunction(Name name); + void removeFunctionType(Name name); // TODO: remove* for other elements void updateMaps(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 8739d96f7..32a633926 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -747,7 +747,17 @@ void Module::removeFunction(Name name) { functionsMap.erase(name); } - // TODO: remove* for other elements +void Module::removeFunctionType(Name name) { + for (size_t i = 0; i < functionTypes.size(); i++) { + if (functionTypes[i]->name == name) { + functionTypes.erase(functionTypes.begin() + i); + break; + } + } + functionTypesMap.erase(name); +} + +// TODO: remove* for other elements void Module::updateMaps() { functionsMap.clear(); |