diff options
-rw-r--r-- | src/tools/wasm-split/instrumenter.cpp | 191 | ||||
-rw-r--r-- | src/tools/wasm-split/instrumenter.h | 8 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.cpp | 13 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.h | 6 | ||||
-rw-r--r-- | src/tools/wasm-split/wasm-split.cpp | 2 | ||||
-rw-r--r-- | test/lit/help/wasm-split.test | 13 | ||||
-rw-r--r-- | test/lit/wasm-split/call_exports.mjs | 4 | ||||
-rw-r--r-- | test/lit/wasm-split/instrument-in-memory.wast | 88 | ||||
-rw-r--r-- | test/lit/wasm-split/profile-guided.wast | 66 |
9 files changed, 315 insertions, 76 deletions
diff --git a/src/tools/wasm-split/instrumenter.cpp b/src/tools/wasm-split/instrumenter.cpp index 0c5e96b54..79d6a98a4 100644 --- a/src/tools/wasm-split/instrumenter.cpp +++ b/src/tools/wasm-split/instrumenter.cpp @@ -22,9 +22,8 @@ namespace wasm { -Instrumenter::Instrumenter(const std::string& profileExport, - uint64_t moduleHash) - : profileExport(profileExport), moduleHash(moduleHash) {} +Instrumenter::Instrumenter(const WasmSplitOptions& options, uint64_t moduleHash) + : options(options), moduleHash(moduleHash) {} void Instrumenter::run(PassRunner* runner, Module* wasm) { this->runner = runner; @@ -35,6 +34,10 @@ void Instrumenter::run(PassRunner* runner, Module* wasm) { } void Instrumenter::addGlobals() { + if (options.storageKind != WasmSplitOptions::StorageKind::InGlobals) { + // Don't need globals + return; + } // Create fresh global names (over-reserves, but that's ok) counterGlobal = Names::getValidGlobalName(*wasm, "monotonic_counter"); functionGlobals.reserve(wasm->functions.size()); @@ -60,42 +63,65 @@ void Instrumenter::addGlobals() { } void Instrumenter::instrumentFuncs() { - // Inject the following code at the beginning of each function to advance the - // monotonic counter and set the function's timestamp if it hasn't already - // been set. - // - // (if (i32.eqz (global.get $timestamp)) - // (block - // (global.set $monotonic_counter - // (i32.add - // (global.get $monotonic_counter) - // (i32.const 1) - // ) - // ) - // (global.set $timestamp - // (global.get $monotonic_counter) - // ) - // ) - // ) + // Inject code at the beginning of each function to advance the monotonic + // counter and set the function's timestamp if it hasn't already been set. Builder builder(*wasm); - auto globalIt = functionGlobals.begin(); - ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { - func->body = builder.makeSequence( - builder.makeIf( - builder.makeUnary(EqZInt32, - builder.makeGlobalGet(*globalIt, Type::i32)), - builder.makeSequence( - builder.makeGlobalSet( - counterGlobal, - builder.makeBinary(AddInt32, - builder.makeGlobalGet(counterGlobal, Type::i32), - builder.makeConst(Literal::makeOne(Type::i32)))), - builder.makeGlobalSet( - *globalIt, builder.makeGlobalGet(counterGlobal, Type::i32)))), - func->body, - func->body->type); - ++globalIt; - }); + switch (options.storageKind) { + case WasmSplitOptions::StorageKind::InGlobals: { + // (if (i32.eqz (global.get $timestamp)) + // (block + // (global.set $monotonic_counter + // (i32.add + // (global.get $monotonic_counter) + // (i32.const 1) + // ) + // ) + // (global.set $timestamp + // (global.get $monotonic_counter) + // ) + // ) + // ) + auto globalIt = functionGlobals.begin(); + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + func->body = builder.makeSequence( + builder.makeIf( + builder.makeUnary(EqZInt32, + builder.makeGlobalGet(*globalIt, Type::i32)), + builder.makeSequence( + builder.makeGlobalSet( + counterGlobal, + builder.makeBinary( + AddInt32, + builder.makeGlobalGet(counterGlobal, Type::i32), + builder.makeConst(Literal::makeOne(Type::i32)))), + builder.makeGlobalSet( + *globalIt, builder.makeGlobalGet(counterGlobal, Type::i32)))), + func->body, + func->body->type); + ++globalIt; + }); + break; + } + case WasmSplitOptions::StorageKind::InMemory: { + if (!wasm->features.hasAtomics()) { + Fatal() << "error: --in-memory requires atomics to be enabled"; + } + // (i32.atomic.store8 offset=funcidx (i32.const 0) (i32.const 1)) + Index funcIdx = 0; + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + func->body = builder.makeSequence( + builder.makeAtomicStore(1, + funcIdx, + builder.makeConstPtr(0), + builder.makeConst(uint32_t(1)), + Type::i32), + func->body, + func->body->type); + ++funcIdx; + }); + break; + } + } } // wasm-split profile format: @@ -118,17 +144,20 @@ void Instrumenter::addProfileExport() { // buffer. The function takes the available address and buffer size as // arguments and returns the total size of the profile. It only actually // writes the profile if the given space is sufficient to hold it. - auto name = Names::getValidFunctionName(*wasm, profileExport); + auto name = Names::getValidFunctionName(*wasm, options.profileExport); auto writeProfile = Builder::makeFunction( name, Signature({Type::i32, Type::i32}, Type::i32), {}); writeProfile->hasExplicitName = true; writeProfile->setLocalName(0, "addr"); writeProfile->setLocalName(1, "size"); + size_t numFuncs = 0; + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function*) { ++numFuncs; }); + // Calculate the size of the profile: // 8 bytes module hash + // 4 bytes for the timestamp for each function - const size_t profileSize = 8 + 4 * functionGlobals.size(); + const size_t profileSize = 8 + 4 * numFuncs; // Create the function body Builder builder(*wasm); @@ -142,18 +171,76 @@ void Instrumenter::addProfileExport() { // Write the hash followed by all the time stamps Expression* writeData = builder.makeStore(8, 0, 1, getAddr(), hashConst(), Type::i64); - uint32_t offset = 8; - for (const auto& global : functionGlobals) { - writeData = builder.blockify( - writeData, - builder.makeStore(4, - offset, - 1, - getAddr(), - builder.makeGlobalGet(global, Type::i32), - Type::i32)); - offset += 4; + + switch (options.storageKind) { + case WasmSplitOptions::StorageKind::InGlobals: { + for (const auto& global : functionGlobals) { + writeData = builder.blockify( + writeData, + builder.makeStore(4, + offset, + 1, + getAddr(), + builder.makeGlobalGet(global, Type::i32), + Type::i32)); + offset += 4; + } + break; + } + case WasmSplitOptions::StorageKind::InMemory: { + Index funcIdxVar = + Builder::addVar(writeProfile.get(), "funcIdx", Type::i32); + auto getFuncIdx = [&]() { + return builder.makeLocalGet(funcIdxVar, Type::i32); + }; + // (block $outer + // (loop $l + // (br_if $outer (i32.eq (local.get $fucIdx) (i32.const numFuncs)) + // (i32.store offset=8 + // (i32.add + // (local.get $addr) + // (i32.mul (local.get $funcIdx) (i32.const 4)) + // ) + // (i32.atomic.load8_u (local.get $funcIdx)) + // ) + // (local.set $funcIdx + // (i32.add (local.get $fundIdx) (i32.const 1) + // ) + // (br $l) + // ) + // ) + writeData = builder.blockify( + writeData, + builder.makeBlock( + "outer", + builder.makeLoop( + "l", + builder.blockify( + builder.makeBreak( + "outer", + nullptr, + builder.makeBinary(EqInt32, + getFuncIdx(), + builder.makeConst(uint32_t(numFuncs)))), + builder.makeStore( + 4, + offset, + 4, + builder.makeBinary( + AddInt32, + getAddr(), + builder.makeBinary( + MulInt32, getFuncIdx(), builder.makeConst(uint32_t(4)))), + builder.makeAtomicLoad(1, 0, getFuncIdx(), Type::i32), + Type::i32), + builder.makeLocalSet( + funcIdxVar, + builder.makeBinary( + AddInt32, getFuncIdx(), builder.makeConst(uint32_t(1)))), + builder.makeBreak("l"))))); + break; + } } writeProfile->body = builder.makeSequence( @@ -164,7 +251,7 @@ void Instrumenter::addProfileExport() { // Create an export for the function wasm->addFunction(std::move(writeProfile)); wasm->addExport( - Builder::makeExport(profileExport, name, ExternalKind::Function)); + Builder::makeExport(options.profileExport, name, ExternalKind::Function)); // Also make sure there is a memory with enough pages to write into size_t pages = (profileSize + Memory::kPageSize - 1) / Memory::kPageSize; diff --git a/src/tools/wasm-split/instrumenter.h b/src/tools/wasm-split/instrumenter.h index 4f714fde9..7de5a9135 100644 --- a/src/tools/wasm-split/instrumenter.h +++ b/src/tools/wasm-split/instrumenter.h @@ -18,6 +18,8 @@ #define wasm_tools_wasm_split_instrumenter_h #include "pass.h" +#include "split-options.h" +#include "wasm.h" namespace wasm { @@ -28,15 +30,17 @@ struct Instrumenter : public Pass { PassRunner* runner = nullptr; Module* wasm = nullptr; - const std::string& profileExport; + const WasmSplitOptions& options; uint64_t moduleHash; Name counterGlobal; std::vector<Name> functionGlobals; - Instrumenter(const std::string& profileExport, uint64_t moduleHash); + Instrumenter(const WasmSplitOptions& options, uint64_t moduleHash); void run(PassRunner* runner, Module* wasm) override; + +private: void addGlobals(); void instrumentFuncs(); void addProfileExport(); diff --git a/src/tools/wasm-split/split-options.cpp b/src/tools/wasm-split/split-options.cpp index 419555f45..8de08d5a1 100644 --- a/src/tools/wasm-split/split-options.cpp +++ b/src/tools/wasm-split/split-options.cpp @@ -197,6 +197,19 @@ WasmSplitOptions::WasmSplitOptions() profileExport = argument; }) .add( + "--in-memory", + "", + "Store profile information in memory (starting at address 0 and taking " + "one byte per function) rather than globals (the default) so that " + "it can be shared between multiple threads. Users are responsible for " + "ensuring that the module does not use the initial memory region for " + "anything else.", + {Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + storageKind = StorageKind::InMemory; + }) + .add( "--emit-module-names", "", "Emit module names, even if not emitting the rest of the names section. " diff --git a/src/tools/wasm-split/split-options.h b/src/tools/wasm-split/split-options.h index 5c811c32c..faa6ee30f 100644 --- a/src/tools/wasm-split/split-options.h +++ b/src/tools/wasm-split/split-options.h @@ -33,6 +33,12 @@ struct WasmSplitOptions : ToolOptions { constexpr static size_t NumModes = static_cast<unsigned>(Mode::MergeProfiles) + 1; + enum class StorageKind : unsigned { + InGlobals, // Store profile data in WebAssembly Globals + InMemory, // Store profile data in memory, accessible from all threads + }; + StorageKind storageKind = StorageKind::InGlobals; + bool verbose = false; bool emitBinary = true; bool symbolMap = false; diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp index bedba8957..b1ae153c9 100644 --- a/src/tools/wasm-split/wasm-split.cpp +++ b/src/tools/wasm-split/wasm-split.cpp @@ -109,7 +109,7 @@ void instrumentModule(const WasmSplitOptions& options) { uint64_t moduleHash = hashFile(options.inputFiles[0]); PassRunner runner(&wasm, options.passOptions); - Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); + Instrumenter(options, moduleHash).run(&runner, &wasm); adjustTableSize(wasm, options.initialTableSize); diff --git a/test/lit/help/wasm-split.test b/test/lit/help/wasm-split.test index 362c20d84..dd2b5f7e1 100644 --- a/test/lit/help/wasm-split.test +++ b/test/lit/help/wasm-split.test @@ -39,7 +39,7 @@ ;; CHECK-NEXT: of any profile. You can also pass a file ;; CHECK-NEXT: with a list of functions separated by new ;; CHECK-NEXT: lines. To do this, prepend @ before -;; CHECK-NEXT: filename (--keep-funcs @myfile) +;; CHECK-NEXT: filename (--keep-funcs @myfile) ;; CHECK-NEXT: ;; CHECK-NEXT: --split-funcs [split] Comma-separated list of functions ;; CHECK-NEXT: to split into the secondary module, @@ -49,7 +49,7 @@ ;; CHECK-NEXT: also pass a file with a list of functions ;; CHECK-NEXT: separated by new lines. To do this, ;; CHECK-NEXT: prepend @ before filename (--split-funcs -;; CHECK-NEXT: @myfile) +;; CHECK-NEXT: @myfile) ;; CHECK-NEXT: ;; CHECK-NEXT: --primary-output,-o1 [split] Output file for the primary ;; CHECK-NEXT: module. @@ -80,6 +80,15 @@ ;; CHECK-NEXT: profile into memory. Defaults to ;; CHECK-NEXT: `__write_profile`. ;; CHECK-NEXT: +;; CHECK-NEXT: --in-memory [instrument] Store profile information in +;; CHECK-NEXT: memory (starting at address 0 and taking +;; CHECK-NEXT: one byte per function) rather than +;; CHECK-NEXT: globals (the default) so that it can be +;; CHECK-NEXT: shared between multiple threads. Users +;; CHECK-NEXT: are responsible for ensuring that the +;; CHECK-NEXT: module does not use the initial memory +;; CHECK-NEXT: region for anything else. +;; CHECK-NEXT: ;; CHECK-NEXT: --emit-module-names [split, instrument] Emit module names, ;; CHECK-NEXT: even if not emitting the rest of the ;; CHECK-NEXT: names section. Can help differentiate the diff --git a/test/lit/wasm-split/call_exports.mjs b/test/lit/wasm-split/call_exports.mjs index 546564d2e..592fbad48 100644 --- a/test/lit/wasm-split/call_exports.mjs +++ b/test/lit/wasm-split/call_exports.mjs @@ -20,6 +20,6 @@ for (let i = 4; i < process.argv.length; i++) { } // Create and read the profile -let profileSize = instance.exports['__write_profile'](0, 2**32 - 1); -let profileData = Buffer.from(instance.exports.memory.buffer, 0, profileSize); +let profileSize = instance.exports['__write_profile'](1024, 2**32 - 1024); +let profileData = Buffer.from(instance.exports.memory.buffer, 1024, profileSize); fs.writeFileSync(outFile, profileData); diff --git a/test/lit/wasm-split/instrument-in-memory.wast b/test/lit/wasm-split/instrument-in-memory.wast new file mode 100644 index 000000000..568ce2207 --- /dev/null +++ b/test/lit/wasm-split/instrument-in-memory.wast @@ -0,0 +1,88 @@ +;; RUN: wasm-split %s --instrument --in-memory -all -S -o - | filecheck %s + +;; Check that the output round trips and validates as well +;; RUN: wasm-split %s --instrument --in-memory -all -g -o %t.wasm +;; RUN: wasm-opt -all %t.wasm -S -o - + +(module + (import "env" "foo" (func $foo)) + (export "bar" (func $bar)) + (func $bar + (call $foo) + ) + (func $baz (param i32) (result i32) + (local.get 0) + ) +) + +;; Check that a memory has been added +;; CHECK: (memory $0 1 1) + +;; And the profiling function exported +;; CHECK: (export "__write_profile" (func $__write_profile)) + +;; Check that the function instrumentation is correct + +;; CHECK: (func $bar +;; CHECK-NEXT: (i32.atomic.store8 +;; CHECK-NEXT: (i32.const 0) +;; CHECK-NEXT: (i32.const 1) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (call $foo) +;; CHECK-NEXT: ) + +;; CHECK-NEXT: (func $baz (param $0 i32) (result i32) +;; CHECK-NEXT: (i32.atomic.store8 offset=1 +;; CHECK-NEXT: (i32.const 0) +;; CHECK-NEXT: (i32.const 1) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) + +;; Check that the profiling function is correct. + +;; CHECK: (func $__write_profile (param $addr i32) (param $size i32) (result i32) +;; CHECK-NEXT: (local $funcIdx i32) +;; CHECK-NEXT: (if +;; CHECK-NEXT: (i32.ge_u +;; CHECK-NEXT: (local.get $size) +;; CHECK-NEXT: (i32.const 16) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (block +;; CHECK-NEXT: (i64.store align=1 +;; CHECK-NEXT: (local.get $addr) +;; CHECK-NEXT: (i64.const {{.*}}) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (block $outer +;; CHECK-NEXT: (loop $l +;; CHECK-NEXT: (br_if $outer +;; CHECK-NEXT: (i32.eq +;; CHECK-NEXT: (local.get $funcIdx) +;; CHECK-NEXT: (i32.const 2) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (i32.store offset=8 +;; CHECK-NEXT: (i32.add +;; CHECK-NEXT: (local.get $addr) +;; CHECK-NEXT: (i32.mul +;; CHECK-NEXT: (local.get $funcIdx) +;; CHECK-NEXT: (i32.const 4) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (i32.atomic.load8_u +;; CHECK-NEXT: (local.get $funcIdx) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (local.set $funcIdx +;; CHECK-NEXT: (i32.add +;; CHECK-NEXT: (local.get $funcIdx) +;; CHECK-NEXT: (i32.const 1) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (br $l) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (i32.const 16) +;; CHECK-NEXT: ) diff --git a/test/lit/wasm-split/profile-guided.wast b/test/lit/wasm-split/profile-guided.wast index f2d79f99c..4a96ef55e 100644 --- a/test/lit/wasm-split/profile-guided.wast +++ b/test/lit/wasm-split/profile-guided.wast @@ -1,43 +1,75 @@ +;; ===================== ;; Instrument the binary +;; ===================== -;; RUN: wasm-split --instrument %s -o %t.instrumented.wasm +;; RUN: wasm-split -all --instrument %s -o %t.instrumented.wasm ;; Create profiles -;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.foo.prof foo -;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.bar.prof bar -;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.both.prof foo bar -;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.none.prof +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.foo.prof foo +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.bar.prof bar +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.both.prof foo bar +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.none.prof ;; Create profile-guided splits -;; RUN: wasm-split %s --profile=%t.foo.prof -v -o1 %t.foo.1.wasm -o2 %t.foo.2.wasm \ +;; RUN: wasm-split -all %s --profile=%t.foo.prof -v -o1 %t.foo.1.wasm -o2 %t.foo.2.wasm \ ;; RUN: | filecheck %s --check-prefix FOO -;; FOO: Keeping functions: deep_foo_callee, foo, foo_callee, shared_callee -;; FOO: Splitting out functions: bar, bar_callee, uncalled +;; RUN: wasm-split -all %s --profile=%t.bar.prof -v -o1 %t.bar.1.wasm -o2 %t.bar.2.wasm \ +;; RUN: | filecheck %s --check-prefix BAR + +;; RUN: wasm-split -all %s --profile=%t.both.prof -v -o1 %t.both.1.wasm -o2 %t.both.2.wasm \ +;; RUN: | filecheck %s --check-prefix BOTH + +;; RUN: wasm-split -all %s --profile=%t.none.prof -v -o1 %t.none.1.wasm -o2 %t.none.2.wasm \ +;; RUN: | filecheck %s --check-prefix NONE + +;; ================================= +;; Do it all again using --in-memory +;; ================================= -;; RUN: wasm-split %s --profile=%t.bar.prof -v -o1 %t.bar.1.wasm -o2 %t.bar.2.wasm \ +;; RUN: wasm-split -all --instrument --in-memory %s -o %t.instrumented.wasm + +;; Create profiles + +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.foo.prof foo +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.bar.prof bar +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.both.prof foo bar +;; RUN: node --experimental-wasm-threads %S/call_exports.mjs %t.instrumented.wasm %t.none.prof + +;; Create profile-guided splits + +;; RUN: wasm-split -all %s --profile=%t.foo.prof -v -o1 %t.foo.1.wasm -o2 %t.foo.2.wasm \ +;; RUN: | filecheck %s --check-prefix FOO + +;; RUN: wasm-split -all %s --profile=%t.bar.prof -v -o1 %t.bar.1.wasm -o2 %t.bar.2.wasm \ ;; RUN: | filecheck %s --check-prefix BAR +;; RUN: wasm-split -all %s --profile=%t.both.prof -v -o1 %t.both.1.wasm -o2 %t.both.2.wasm \ +;; RUN: | filecheck %s --check-prefix BOTH + +;; RUN: wasm-split -all %s --profile=%t.none.prof -v -o1 %t.none.1.wasm -o2 %t.none.2.wasm \ +;; RUN: | filecheck %s --check-prefix NONE + +;; ======= +;; Results +;; ======= + +;; FOO: Keeping functions: deep_foo_callee, foo, foo_callee, shared_callee +;; FOO: Splitting out functions: bar, bar_callee, uncalled + ;; BAR: Keeping functions: bar, bar_callee, shared_callee ;; BAR: Splitting out functions: deep_foo_callee, foo, foo_callee, uncalled -;; RUN: wasm-split %s --profile=%t.both.prof -v -o1 %t.both.1.wasm -o2 %t.both.2.wasm \ -;; RUN: | filecheck %s --check-prefix BOTH - ;; BOTH: Keeping functions: bar, bar_callee, deep_foo_callee, foo, foo_callee, shared_callee ;; BOTH: Splitting out functions: uncalled -;; RUN: wasm-split %s --profile=%t.none.prof -v -o1 %t.none.1.wasm -o2 %t.none.2.wasm \ -;; RUN: | filecheck %s --check-prefix NONE - ;; NONE: Keeping functions: ;; NONE: Splitting out functions: bar, bar_callee, deep_foo_callee, foo, foo_callee, shared_callee, uncalled - (module - (memory $mem 1 1) + (memory $mem (shared 1 1)) (export "memory" (memory $mem)) (export "foo" (func $foo)) (export "bar" (func $bar)) |