summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2018-01-17 13:11:14 -0800
committerGitHub <noreply@github.com>2018-01-17 13:11:14 -0800
commit0a9ddae715b2cff23a86cf14bbf6a4b870511395 (patch)
treeac15dd6da937509b22b6cc5b97f21fa508f48570 /src
parent01b23987405d8d7b2f13e40ef906169163ac2a5f (diff)
downloadbinaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.tar.gz
binaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.tar.bz2
binaryen-0a9ddae715b2cff23a86cf14bbf6a4b870511395.zip
Global optimization fixes (#1360)
* run dfe at the very end, as it may be more effective after inlining * optimize reorder-functions * do a final dfe in asm2wasm after all other opts * make inlining deterministic: std::atomic<T> values are not zero-initialized * do global post opts at the end of asm2wasm, and don't also do them in the module builder * fix function type removing * don't inline+optimize when preserving debug info
Diffstat (limited to 'src')
-rw-r--r--src/asm2wasm.h35
-rw-r--r--src/passes/Inlining.cpp11
-rw-r--r--src/passes/RemoveUnusedModuleElements.cpp1
-rw-r--r--src/passes/ReorderFunctions.cpp56
-rw-r--r--src/passes/pass.cpp9
-rw-r--r--src/wasm-module-building.h13
-rw-r--r--src/wasm.h1
-rw-r--r--src/wasm/wasm.cpp12
8 files changed, 96 insertions, 42 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h
index b5f701dac..aa8394d3a 100644
--- a/src/asm2wasm.h
+++ b/src/asm2wasm.h
@@ -966,21 +966,6 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
}
// optimize relooper label variable usage at the wasm level, where it is easy
passRunner.add("relooper-jump-threading");
- }, [&]() {
- // beforeGlobalOptimizations
- // if we added any helper functions (like non-trapping i32-div, etc.), then those
- // have not been optimized (the optimizing builder has just been fed the asm.js
- // functions). Optimize those now. Typically there are very few, just do it
- // sequentially.
- PassRunner passRunner(&wasm, passOptions);
- passRunner.addDefaultFunctionOptimizationPasses();
- for (auto& pair : trappingFunctions.getFunctions()) {
- auto* func = pair.second;
- passRunner.runOnFunction(func);
- }
- for (auto* func : extraSupportFunctions) {
- passRunner.runOnFunction(func);
- }
}, debug, false /* do not validate globally yet */);
}
@@ -1190,6 +1175,19 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
if (runOptimizationPasses) {
optimizingBuilder->finish();
+ // if we added any helper functions (like non-trapping i32-div, etc.), then those
+ // have not been optimized (the optimizing builder has just been fed the asm.js
+ // functions). Optimize those now. Typically there are very few, just do it
+ // sequentially.
+ PassRunner passRunner(&wasm, passOptions);
+ passRunner.addDefaultFunctionOptimizationPasses();
+ for (auto& pair : trappingFunctions.getFunctions()) {
+ auto* func = pair.second;
+ passRunner.runOnFunction(func);
+ }
+ for (auto* func : extraSupportFunctions) {
+ passRunner.runOnFunction(func);
+ }
}
wasm.debugInfoFileNames = std::move(preprocessor.debugInfoFileNames);
@@ -1382,7 +1380,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
}
};
- PassRunner passRunner(&wasm);
+ PassRunner passRunner(&wasm, passOptions);
passRunner.setFeatures(passOptions.features);
if (debug) {
passRunner.setDebug(true);
@@ -1411,6 +1409,11 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
passRunner.add<ApplyDebugInfo>();
passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing?
}
+ if (runOptimizationPasses) {
+ // do final global optimizations after all function work is done
+ // (e.g. duplicate funcs may appear thanks to that work)
+ passRunner.addDefaultGlobalOptimizationPostPasses();
+ }
passRunner.run();
// remove the debug info intrinsic
diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp
index b3e111e5a..79785811c 100644
--- a/src/passes/Inlining.cpp
+++ b/src/passes/Inlining.cpp
@@ -50,8 +50,15 @@ static const int FLEXIBLE_SIZE_LIMIT = 20;
struct FunctionInfo {
std::atomic<Index> calls;
Index size;
- bool lightweight = true;
- bool usedGlobally = false; // in a table or export
+ std::atomic<bool> lightweight;
+ bool usedGlobally; // in a table or export
+
+ FunctionInfo() {
+ calls = 0;
+ size = 0;
+ lightweight = true;
+ usedGlobally = false;
+ }
bool worthInlining(PassOptions& options, bool allowMultipleInliningsPerFunction) {
// if it's big, it's just not worth doing (TODO: investigate more)
diff --git a/src/passes/RemoveUnusedModuleElements.cpp b/src/passes/RemoveUnusedModuleElements.cpp
index 13caaccc9..6cbb001b1 100644
--- a/src/passes/RemoveUnusedModuleElements.cpp
+++ b/src/passes/RemoveUnusedModuleElements.cpp
@@ -268,6 +268,7 @@ struct RemoveUnusedModuleElements : public Pass {
module->functionTypes.erase(std::remove_if(module->functionTypes.begin(), module->functionTypes.end(), [&needed](std::unique_ptr<FunctionType>& type) {
return needed.count(type.get()) == 0;
}), module->functionTypes.end());
+ module->updateMaps();
}
};
diff --git a/src/passes/ReorderFunctions.cpp b/src/passes/ReorderFunctions.cpp
index c468fd9d3..5312ee90a 100644
--- a/src/passes/ReorderFunctions.cpp
+++ b/src/passes/ReorderFunctions.cpp
@@ -19,6 +19,13 @@
// binaries because fewer bytes are needed to encode references to frequently
// used functions.
//
+// This may incur a tradeoff, though, as while it reduces binary size, it may
+// increase gzip size. This might be because the new order has the functions in
+// a less beneficial position for compression, that is, mutually-compressible
+// functions are no longer together (when they were before, in the original order,
+// the has some natural tendency one way or the other). TODO: investigate
+// similarity ordering here.
+//
#include <memory>
@@ -28,10 +35,41 @@
namespace wasm {
-struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> {
- std::map<Name, uint32_t> counts;
+typedef std::unordered_map<Name, std::atomic<Index>> NameCountMap;
+
+struct CallCountScanner : public WalkerPass<PostWalker<CallCountScanner>> {
+ bool isFunctionParallel() override { return true; }
+
+ CallCountScanner(NameCountMap* counts) : counts(counts) {}
+
+ CallCountScanner* create() override {
+ return new CallCountScanner(counts);
+ }
+
+ void visitCall(Call* curr) {
+ assert(counts->count(curr->target) > 0); // can't add a new element in parallel
+ (*counts)[curr->target]++;
+ }
- void visitModule(Module *module) {
+private:
+ NameCountMap* counts;
+};
+
+struct ReorderFunctions : public Pass {
+ void run(PassRunner* runner, Module* module) override {
+ NameCountMap counts;
+ // fill in info, as we operate on it in parallel (each function to its own entry)
+ for (auto& func : module->functions) {
+ counts[func->name];
+ }
+ // find counts on function calls
+ {
+ PassRunner runner(module);
+ runner.setIsNested(true);
+ runner.add<CallCountScanner>(&counts);
+ runner.run();
+ }
+ // find counts on global usages
if (module->start.is()) {
counts[module->start]++;
}
@@ -43,19 +81,15 @@ struct ReorderFunctions : public WalkerPass<PostWalker<ReorderFunctions>> {
counts[curr]++;
}
}
- std::sort(module->functions.begin(), module->functions.end(), [this](
+ // sort
+ std::sort(module->functions.begin(), module->functions.end(), [&counts](
const std::unique_ptr<Function>& a,
const std::unique_ptr<Function>& b) -> bool {
- if (this->counts[a->name] == this->counts[b->name]) {
+ if (counts[a->name] == counts[b->name]) {
return strcmp(a->name.str, b->name.str) > 0;
}
- return this->counts[a->name] > this->counts[b->name];
+ return counts[a->name] > counts[b->name];
});
- counts.clear();
- }
-
- void visitCall(Call *curr) {
- counts[curr->target]++;
}
};
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 88734e85a..11575b1df 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -180,11 +180,14 @@ void PassRunner::addDefaultGlobalOptimizationPrePasses() {
}
void PassRunner::addDefaultGlobalOptimizationPostPasses() {
- add("duplicate-function-elimination"); // optimizations show more functions as duplicate
- add("remove-unused-module-elements");
- if (options.optimizeLevel >= 2 || options.shrinkLevel >= 2) {
+ // inline when working hard, and when not preserving debug info
+ // (inlining+optimizing can remove the annotations)
+ if ((options.optimizeLevel >= 2 || options.shrinkLevel >= 2) &&
+ !options.debugInfo) {
add("inlining-optimizing");
}
+ add("duplicate-function-elimination"); // optimizations show more functions as duplicate
+ add("remove-unused-module-elements");
add("memory-packing");
}
diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h
index 866019328..990ea5191 100644
--- a/src/wasm-module-building.h
+++ b/src/wasm-module-building.h
@@ -35,9 +35,9 @@ static std::mutex debug;
// Helps build wasm modules efficiently. If you build a module by
// adding function by function, and you want to optimize them, this class
// starts optimizing using worker threads *while you are still adding*.
-// It runs function optimization passes at that time, and then at the end
-// it runs global module-level optimization passes. The result is a fully
-// optimized module, optimized while being generated.
+// It runs function optimization passes at that time. This does not
+// run global optimization after that by default, but you can do that
+// to by calling optimizeGlobally().
//
// This might also be faster than normal module optimization since it
// runs all passes on each function, then goes on to the next function
@@ -75,7 +75,6 @@ class OptimizingIncrementalModuleBuilder {
uint32_t numFunctions;
PassOptions passOptions;
std::function<void (PassRunner&)> addPrePasses;
- std::function<void ()> beforeGlobalOptimizations;
Function* endMarker;
std::atomic<Function*>* list;
uint32_t nextFunction; // only used on main thread
@@ -93,10 +92,9 @@ public:
// this bounds helps avoid locking.
OptimizingIncrementalModuleBuilder(Module* wasm, Index numFunctions, PassOptions passOptions,
std::function<void (PassRunner&)> addPrePasses,
- std::function<void ()> beforeGlobalOptimizations,
bool debug, bool validateGlobally)
: wasm(wasm), numFunctions(numFunctions), passOptions(passOptions),
- addPrePasses(addPrePasses), beforeGlobalOptimizations(beforeGlobalOptimizations),
+ addPrePasses(addPrePasses),
endMarker(nullptr), list(nullptr), nextFunction(0),
numWorkers(0), liveWorkers(0), activeWorkers(0), availableFuncs(0), finishedFuncs(0),
finishing(false), debug(debug), validateGlobally(validateGlobally) {
@@ -178,7 +176,6 @@ public:
wakeAllWorkers();
waitUntilAllFinished();
}
- optimizeGlobally();
// TODO: clear side thread allocators from module allocator, as these threads were transient
}
@@ -230,11 +227,9 @@ private:
}
void optimizeGlobally() {
- beforeGlobalOptimizations();
PassRunner passRunner(wasm, passOptions);
passRunner.addDefaultGlobalOptimizationPostPasses();
passRunner.run();
-
}
// worker code
diff --git a/src/wasm.h b/src/wasm.h
index 485afade1..4072533a7 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -784,6 +784,7 @@ public:
void removeImport(Name name);
void removeExport(Name name);
void removeFunction(Name name);
+ void removeFunctionType(Name name);
// TODO: remove* for other elements
void updateMaps();
diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp
index 8739d96f7..32a633926 100644
--- a/src/wasm/wasm.cpp
+++ b/src/wasm/wasm.cpp
@@ -747,7 +747,17 @@ void Module::removeFunction(Name name) {
functionsMap.erase(name);
}
- // TODO: remove* for other elements
+void Module::removeFunctionType(Name name) {
+ for (size_t i = 0; i < functionTypes.size(); i++) {
+ if (functionTypes[i]->name == name) {
+ functionTypes.erase(functionTypes.begin() + i);
+ break;
+ }
+ }
+ functionTypesMap.erase(name);
+}
+
+// TODO: remove* for other elements
void Module::updateMaps() {
functionsMap.clear();