diff options
author | Alon Zakai <alonzakai@gmail.com> | 2016-05-30 16:35:11 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2016-06-02 18:46:37 -0700 |
commit | e4a81f9ea487441e0c01f399d15026672fdeadf9 (patch) | |
tree | 571bf874365e41a50976da0d15604e5d6f1edebb | |
parent | bed10e24a9a1818e8583a1f3b30a6dfdd3b57bfa (diff) | |
download | binaryen-e4a81f9ea487441e0c01f399d15026672fdeadf9.tar.gz binaryen-e4a81f9ea487441e0c01f399d15026672fdeadf9.tar.bz2 binaryen-e4a81f9ea487441e0c01f399d15026672fdeadf9.zip |
add OptimizingIncrementalModuleBuilder for faster incremental module building + optimizing
-rw-r--r-- | src/asm2wasm.h | 49 | ||||
-rw-r--r-- | src/tools/asm2wasm.cpp | 7 | ||||
-rw-r--r-- | src/wasm-module-building.h | 252 | ||||
-rw-r--r-- | test/memorygrowth.fromasm | 4 | ||||
-rw-r--r-- | test/memorygrowth.fromasm.imprecise | 4 |
5 files changed, 290 insertions, 26 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h index 4231fcc9a..5f6cee558 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -30,7 +30,8 @@ #include "pass.h" #include "ast_utils.h" #include "wasm-builder.h" -#include <wasm-validator.h> +#include "wasm-validator.h" +#include "wasm-module-building.h" namespace wasm { @@ -136,6 +137,8 @@ class Asm2WasmBuilder { Builder builder; + std::unique_ptr<OptimizingIncrementalModuleBuilder> optimizingBuilder; + // globals unsigned nextGlobal; // next place to put a global @@ -156,6 +159,7 @@ class Asm2WasmBuilder { bool memoryGrowth; bool debug; bool imprecise; + bool optimize; public: std::map<IString, MappedGlobal> mappedGlobals; @@ -267,7 +271,7 @@ private: } public: - Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise) + Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise, bool optimize) : wasm(wasm), allocator(wasm.allocator), builder(wasm), @@ -275,10 +279,10 @@ public: maxGlobal(1000), memoryGrowth(memoryGrowth), debug(debug), - imprecise(imprecise) {} + imprecise(imprecise), + optimize(optimize) {} void processAsm(Ref ast); - void optimize(); private: AsmType detectAsmType(Ref ast, AsmData *data) { @@ -523,6 +527,16 @@ void Asm2WasmBuilder::processAsm(Ref ast) { IString Int8Array, Int16Array, Int32Array, UInt8Array, UInt16Array, UInt32Array, Float32Array, Float64Array; + // set up optimization + + if (optimize) { + Index numFunctions = 0; + for (unsigned i = 1; i < body->size(); i++) { + if (body[i][0] == DEFUN) numFunctions++; + } + optimizingBuilder = std::unique_ptr<OptimizingIncrementalModuleBuilder>(new OptimizingIncrementalModuleBuilder(&wasm, numFunctions)); + } + // first pass - do almost everything, but function imports and indirect calls for (unsigned i = 1; i < body->size(); i++) { @@ -655,7 +669,12 @@ void Asm2WasmBuilder::processAsm(Ref ast) { } } else if (curr[0] == DEFUN) { // function - wasm.addFunction(processFunction(curr)); + auto* func = processFunction(curr); + if (optimize) { + optimizingBuilder->addFunction(func); + } else { + wasm.addFunction(func); + } } else if (curr[0] == RETURN) { // exports Ref object = curr[1]; @@ -690,6 +709,15 @@ void Asm2WasmBuilder::processAsm(Ref ast) { } } + if (optimize) { + optimizingBuilder->finish(); + if (maxGlobal < 1024) { + PassRunner passRunner(&wasm); + passRunner.add("post-emscripten"); + passRunner.run(); + } + } + // second pass. first, function imports std::vector<IString> toErase; @@ -1726,17 +1754,6 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { return function; } -void Asm2WasmBuilder::optimize() { - PassRunner passRunner(&wasm); - passRunner.addDefaultOptimizationPasses(); - if (maxGlobal < 1024) { - passRunner.add("post-emscripten"); - } - passRunner.run(); - - assert(WasmValidator().validate(wasm)); -} - } // namespace wasm #endif // wasm_asm2wasm_h diff --git a/src/tools/asm2wasm.cpp b/src/tools/asm2wasm.cpp index e18f78de9..7f1a07d1d 100644 --- a/src/tools/asm2wasm.cpp +++ b/src/tools/asm2wasm.cpp @@ -92,14 +92,9 @@ int main(int argc, const char *argv[]) { if (options.debug) std::cerr << "wasming..." << std::endl; Module wasm; wasm.memory.initial = wasm.memory.max = totalMemory / Memory::kPageSize; - Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise); + Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise, opts); asm2wasm.processAsm(asmjs); - if (opts) { - if (options.debug) std::cerr << "optimizing..." << std::endl; - asm2wasm.optimize(); - } - if (options.debug) std::cerr << "printing..." << std::endl; Output output(options.extra["output"], Flags::Text, options.debug ? Flags::Debug : Flags::Release); WasmPrinter::printModule(&wasm, output.getStream()); diff --git a/src/wasm-module-building.h b/src/wasm-module-building.h new file mode 100644 index 000000000..3cdebc558 --- /dev/null +++ b/src/wasm-module-building.h @@ -0,0 +1,252 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef optimizing_incremental_module_builder_h +#define optimizing_incremental_module_builder_h + +#include <wasm.h> +#include <support/threads.h> + +namespace wasm { + +#ifdef BINARYEN_THREAD_DEBUG +static std::mutex debug; +#define DEBUG_THREAD(x) { std::lock_guard<std::mutex> lock(debug); std::cerr << "[OptimizingIncrementalModuleBuilder Threading (thread: " << std::this_thread::get_id() << ")] " << x; std::cerr << '\n'; } +#else +#define DEBUG_THREAD(x) +#endif + +// +// OptimizingIncrementalModuleBuilder +// +// Helps build wasm modules efficiently. If you build a module by +// adding function by function, and you want to optimize them, this class +// starts optimizing using worker threads *while you are still adding*. +// It runs function optimization passes at that time, and then at the end +// it runs global module-level optimization passes. The result is a fully +// optimized module, optimized while being generated. +// +// This might also be faster than normal module optimization since it +// runs all passes on each function, then goes on to the next function +// which is better for data locality. +// +// Usage: Create an instance, passing it the module and the total +// number of functions. Then call addFunction as you have +// new functions to add (this also adds it to the module). Finally, +// call finish() when all functions have been added. +// +// This avoids locking by using atomics. We allocate an array of nullptrs +// that represent all the functions, and as we add a function, we place it +// at the next index. Each worker will read from the start to the end, +// and when a non-nullptr is found, the worker optimizes that function and +// nulls it. There is also an end marker that is not nullptr nor the address of +// a valid function, which represents that beyond this point we have not +// yet filled in. In other words, +// * the main thread fills everything with the end marker +// * the main thread transforms a marker entry into a function +// * workers pause when they see the marker +// * workers skip over nullptrs +// * workers transform functions into nullptrs, and optimize them +// * we keep an atomic count of the number of active workers and +// the number of optimized functions. +// * after adding a function, the main thread wakes up workers if +// it calculates there is work for them. +// * a lock is used for going to sleep and waking up. +// Locking should be rare, as optimization is +// generally slower than generation; in the optimal case, we never +// lock beyond the first step, and all further work is lock-free. +// + +class OptimizingIncrementalModuleBuilder { + Module* wasm; + uint32_t numFunctions; + Function* endMarker; + std::atomic<Function*>* list; + uint32_t nextFunction; // only used on main thread + uint32_t numWorkers; + std::vector<std::unique_ptr<std::thread>> threads; + std::atomic<uint32_t> liveWorkers, activeWorkers, availableFuncs, finishedFuncs; + std::mutex mutex; + std::condition_variable condition; + bool finishing; + +public: + // numFunctions must be equal to the number of functions allocated, or higher. Knowing + // this bounds helps avoid locking. + OptimizingIncrementalModuleBuilder(Module* wasm, Index numFunctions) : wasm(wasm), numFunctions(numFunctions), nextFunction(0), finishing(false) { + // prepare work list + endMarker = new Function(); + list = new std::atomic<Function*>[numFunctions]; + for (uint32_t i = 0; i < numFunctions; i++) { + list[i].store(endMarker); + } + // create workers + DEBUG_THREAD("creating workers"); + numWorkers = ThreadPool::getNumCores(); + assert(numWorkers >= 1); + liveWorkers.store(0); + activeWorkers.store(0); + for (uint32_t i = 0; i < numWorkers; i++) { // TODO: one less, and add it at the very end, to not compete with main thread? + createWorker(); + } + waitUntilAllReady(); + DEBUG_THREAD("workers are ready"); + // prepare the rest of the initial state + availableFuncs.store(0); + finishedFuncs.store(0); + } + + ~OptimizingIncrementalModuleBuilder() { + delete[] list; + delete endMarker; + } + + // Add a function to the module, and to be optimized + void addFunction(Function* func) { + wasm->addFunction(func); + queueFunction(func); + // wake workers if needed + auto wake = availableFuncs.load(); + for (uint32_t i = 0; i < wake; i++) { + wakeWorker(); + } + } + + // All functions have been added, block until all are optimized, and then do + // global optimizations. When this returns, the module is ready and optimized. + void finish() { + DEBUG_THREAD("finish()ing"); + assert(nextFunction == numFunctions); + wakeAllWorkers(); + waitUntilAllFinished(); + optimizeGlobally(); + // TODO: clear side thread allocators from module allocator, as these threads were transient + } + +private: + void createWorker() { + DEBUG_THREAD("create a worker"); + threads.emplace_back(std::unique_ptr<std::thread>(new std::thread(workerMain, this))); + } + + void wakeWorker() { + DEBUG_THREAD("wake a worker"); + std::lock_guard<std::mutex> lock(mutex); + condition.notify_one(); + } + + void wakeAllWorkers() { + DEBUG_THREAD("wake all workers"); + std::lock_guard<std::mutex> lock(mutex); + condition.notify_all(); + } + + void waitUntilAllReady() { + DEBUG_THREAD("wait until all workers are ready"); + std::unique_lock<std::mutex> lock(mutex); + if (liveWorkers.load() < numWorkers) { + condition.wait(lock, [this]() { return liveWorkers.load() == numWorkers; }); + } + } + + void waitUntilAllFinished() { + DEBUG_THREAD("wait until all workers are finished"); + { + std::unique_lock<std::mutex> lock(mutex); + finishing = true; + if (liveWorkers.load() > 0) { + condition.wait(lock, [this]() { return liveWorkers.load() == 0; }); + } + } + DEBUG_THREAD("joining"); + for (auto& thread : threads) thread->join(); + DEBUG_THREAD("joined"); + } + + void queueFunction(Function* func) { + DEBUG_THREAD("queue function"); + assert(nextFunction < numFunctions); // TODO: if we are given more than we expected, use a slower work queue? + list[nextFunction++].store(func); + availableFuncs++; + } + + void optimizeGlobally() { + PassRunner passRunner(wasm); + passRunner.addDefaultGlobalOptimizationPasses(); + passRunner.run(); + } + + // worker code + + void optimizeFunction(Function* func) { + PassRunner passRunner(wasm); + passRunner.addDefaultFunctionOptimizationPasses(); + passRunner.runFunction(func); + } + + static void workerMain(void* param) { + DEBUG_THREAD("workerMain"); + OptimizingIncrementalModuleBuilder* self = (OptimizingIncrementalModuleBuilder*)param; + { + std::lock_guard<std::mutex> lock(self->mutex); + self->liveWorkers++; + self->activeWorkers++; + self->condition.notify_all(); + } + for (uint32_t i = 0; i < self->numFunctions; i++) { + DEBUG_THREAD("workerMain iteration " << i); + if (self->list[i].load() == self->endMarker) { + // sleep, this entry isn't ready yet + DEBUG_THREAD("workerMain sleep"); + self->activeWorkers--; + { + std::unique_lock<std::mutex> lock(self->mutex); + if (!self->finishing) { // while waiting for the lock, things may have ended + self->condition.wait(lock); + } + } + // continue + DEBUG_THREAD("workerMain continue"); + self->activeWorkers++; + i--; + continue; + } + DEBUG_THREAD("workerMain exchange item"); + auto* func = self->list[i].exchange(nullptr); + if (func == nullptr) { + DEBUG_THREAD("workerMain sees was already taken"); + continue; // someone else has taken this one + } + // we have work to do! + DEBUG_THREAD("workerMain work on " << size_t(func)); + self->availableFuncs--; + self->optimizeFunction(func); + self->finishedFuncs++; + } + DEBUG_THREAD("workerMain ready to exit"); + { + std::lock_guard<std::mutex> lock(self->mutex); + self->liveWorkers--; + self->condition.notify_all(); + } + DEBUG_THREAD("workerMain exiting"); + } +}; + +} // namespace wasm + +#endif // optimizing_incremental_module_builder_h + diff --git a/test/memorygrowth.fromasm b/test/memorygrowth.fromasm index f2acc39d2..07909b855 100644 --- a/test/memorygrowth.fromasm +++ b/test/memorygrowth.fromasm @@ -10114,9 +10114,9 @@ (func $ib (result i32) (i32.const 0) ) - (func $__growWasmMemory (param $0 i32) + (func $__growWasmMemory (param $newSize i32) (grow_memory - (get_local $0) + (get_local $newSize) ) ) ) diff --git a/test/memorygrowth.fromasm.imprecise b/test/memorygrowth.fromasm.imprecise index f2acc39d2..07909b855 100644 --- a/test/memorygrowth.fromasm.imprecise +++ b/test/memorygrowth.fromasm.imprecise @@ -10114,9 +10114,9 @@ (func $ib (result i32) (i32.const 0) ) - (func $__growWasmMemory (param $0 i32) + (func $__growWasmMemory (param $newSize i32) (grow_memory - (get_local $0) + (get_local $newSize) ) ) ) |