diff options
-rw-r--r-- | src/tools/wasm-split.cpp | 236 | ||||
-rw-r--r-- | test/lit/wasm-split/help.test | 14 | ||||
-rw-r--r-- | test/lit/wasm-split/invalid-options.wast | 12 | ||||
-rw-r--r-- | test/lit/wasm-split/merge-profiles.wast | 42 |
4 files changed, 235 insertions, 69 deletions
diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp index f12a6dee6..4ac295d0e 100644 --- a/src/tools/wasm-split.cpp +++ b/src/tools/wasm-split.cpp @@ -26,6 +26,7 @@ #include "support/path.h" #include "support/utilities.h" #include "tool-options.h" +#include "wasm-binary.h" #include "wasm-builder.h" #include "wasm-io.h" #include "wasm-type.h" @@ -51,10 +52,11 @@ struct WasmSplitOptions : ToolOptions { enum class Mode : unsigned { Split, Instrument, + MergeProfiles, }; Mode mode = Mode::Split; constexpr static size_t NumModes = - static_cast<unsigned>(Mode::Instrument) + 1; + static_cast<unsigned>(Mode::MergeProfiles) + 1; bool verbose = false; bool emitBinary = true; @@ -69,7 +71,7 @@ struct WasmSplitOptions : ToolOptions { std::set<Name> keepFuncs; std::set<Name> splitFuncs; - std::string input; + std::vector<std::string> inputFiles; std::string output; std::string primaryOutput; std::string secondaryOutput; @@ -123,6 +125,13 @@ WasmSplitOptions::WasmSplitOptions() " be used to guide splitting.", Options::Arguments::Zero, [&](Options* o, const std::string& argument) { mode = Mode::Instrument; }) + .add("--merge-profiles", + "", + "Merge multiple profiles for the same module into a single profile.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + mode = Mode::MergeProfiles; + }) .add( "--profile", "", @@ -197,12 +206,6 @@ WasmSplitOptions::WasmSplitOptions() {Mode::Split}, Options::Arguments::One, [&](Options* o, const std::string& argument) { exportPrefix = argument; }) - .add("--output", - "-o", - "Output file.", - {Mode::Instrument}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { output = argument; }) .add("--profile-export", "", "The export name of the function the embedder calls to write the " @@ -233,31 +236,40 @@ WasmSplitOptions::WasmSplitOptions() [&](Options* o, const std::string& argument) { initialTableSize = std::stoi(argument); }) - .add("--verbose", - "-v", - "Verbose output mode. Prints the functions that will be kept " - "and split out when splitting a module.", - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { - verbose = true; - quiet = false; - }) .add("--emit-text", "-S", "Emit text instead of binary for the output file or files.", + {Mode::Split, Mode::Instrument}, Options::Arguments::Zero, [&](Options* o, const std::string& argument) { emitBinary = false; }) .add("--debuginfo", "-g", "Emit names section in wasm binary (or full debuginfo in wast)", + {Mode::Split, Mode::Instrument}, Options::Arguments::Zero, [&](Options* o, const std::string& arguments) { passOptions.debugInfo = true; }) - .add_positional( - "INFILE", - Options::Arguments::One, - [&](Options* o, const std::string& argument) { input = argument; }); + .add("--output", + "-o", + "Output file.", + {Mode::Instrument, Mode::MergeProfiles}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { output = argument; }) + .add("--verbose", + "-v", + "Verbose output mode. Prints the functions that will be kept " + "and split out when splitting a module.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + verbose = true; + quiet = false; + }) + .add_positional("INFILES", + Options::Arguments::N, + [&](Options* o, const std::string& argument) { + inputFiles.push_back(argument); + }); } std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { @@ -268,6 +280,9 @@ std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { case WasmSplitOptions::Mode::Instrument: o << "instrument"; break; + case WasmSplitOptions::Mode::MergeProfiles: + o << "merge-profiles"; + break; } return o; } @@ -322,9 +337,21 @@ bool WasmSplitOptions::validate() { valid = false; }; - if (!input.size()) { + // Validate the positional arguments. + if (inputFiles.size() == 0) { fail("no input file"); } + switch (mode) { + case Mode::Split: + case Mode::Instrument: + if (inputFiles.size() > 1) { + fail("Cannot have more than one input file."); + } + break; + case Mode::MergeProfiles: + // Any number >= 1 allowed. + break; + } // Validate that all used options are allowed in the current mode. for (std::string& opt : usedOptions) { @@ -364,7 +391,7 @@ void parseInput(Module& wasm, const WasmSplitOptions& options) { ModuleReader reader; reader.setProfile(options.profile); try { - reader.read(options.input, wasm); + reader.read(options.inputFiles[0], wasm); } catch (ParseException& p) { p.dump(std::cerr); std::cerr << '\n'; @@ -374,6 +401,10 @@ void parseInput(Module& wasm, const WasmSplitOptions& options) { "request for silly amounts of memory)"; } options.applyFeatures(wasm); + + if (options.passOptions.validate && !WasmValidator().validate(wasm)) { + Fatal() << "error validating input"; + } } // Add a global monotonic counter and a timestamp global for each function, code @@ -600,13 +631,16 @@ void writeModule(Module& wasm, writer.write(wasm, filename); } -void instrumentModule(Module& wasm, const WasmSplitOptions& options) { +void instrumentModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + // Check that the profile export name is not already taken if (wasm.getExportOrNull(options.profileExport) != nullptr) { Fatal() << "error: Export " << options.profileExport << " already exists."; } - uint64_t moduleHash = hashFile(options.input); + uint64_t moduleHash = hashFile(options.inputFiles[0]); PassRunner runner(&wasm, options.passOptions); Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); @@ -616,14 +650,18 @@ void instrumentModule(Module& wasm, const WasmSplitOptions& options) { writeModule(wasm, options.output, options); } +struct ProfileData { + uint64_t hash; + std::vector<size_t> timestamps; +}; + // See "wasm-split profile format" above for more information. -std::set<Name> readProfile(Module& wasm, const WasmSplitOptions& options) { - auto profileData = - read_file<std::vector<char>>(options.profileFile, Flags::Binary); +ProfileData readProfile(const std::string& file) { + auto profileData = read_file<std::vector<char>>(file, Flags::Binary); size_t i = 0; auto readi32 = [&]() { if (i + 4 > profileData.size()) { - Fatal() << "Unexpected end of profile data"; + Fatal() << "Unexpected end of profile data in " << file; } uint32_t i32 = 0; i32 |= uint32_t(uint8_t(profileData[i++])); @@ -633,31 +671,15 @@ std::set<Name> readProfile(Module& wasm, const WasmSplitOptions& options) { return i32; }; - // Read and compare the 8-byte module hash. - uint64_t expected = readi32(); - expected |= uint64_t(readi32()) << 32; - if (expected != hashFile(options.input)) { - Fatal() << "error: checksum in profile does not match module checksum. " - << "The split module must be the original module that was " - << "instrumented to generate the profile."; - } - - std::set<Name> keptFuncs; - ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { - uint32_t timestamp = readi32(); - // TODO: provide an option to set the timestamp threshold. For now, kee the - // function if the profile shows it being run at all. - if (timestamp > 0) { - keptFuncs.insert(func->name); - } - }); + uint64_t hash = readi32(); + hash |= uint64_t(readi32()) << 32; - if (i != profileData.size()) { - // TODO: Handle concatenated profile data. - Fatal() << "Unexpected extra profile data"; + std::vector<size_t> timestamps; + while (i < profileData.size()) { + timestamps.push_back(readi32()); } - return keptFuncs; + return {hash, timestamps}; } void writeSymbolMap(Module& wasm, std::string filename) { @@ -668,12 +690,33 @@ void writeSymbolMap(Module& wasm, std::string filename) { runner.run(); } -void splitModule(Module& wasm, const WasmSplitOptions& options) { +void splitModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + std::set<Name> keepFuncs; if (options.profileFile.size()) { - // Use the profile to initialize `keepFuncs` - keepFuncs = readProfile(wasm, options); + // Use the profile to initialize `keepFuncs`. + uint64_t hash = hashFile(options.inputFiles[0]); + ProfileData profile = readProfile(options.profileFile); + if (profile.hash != hash) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } + size_t i = 0; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (i >= profile.timestamps.size()) { + Fatal() << "Unexpected end of profile data"; + } + if (profile.timestamps[i++] > 0) { + keepFuncs.insert(func->name); + } + }); + if (i != profile.timestamps.size()) { + Fatal() << "Unexpected extra profile data"; + } } // Add in the functions specified with --keep-funcs @@ -777,6 +820,77 @@ void splitModule(Module& wasm, const WasmSplitOptions& options) { writeModule(*secondary, options.secondaryOutput, options); } +void mergeProfiles(const WasmSplitOptions& options) { + // Read the initial profile. We will merge other profiles into this one. + ProfileData data = readProfile(options.inputFiles[0]); + + // In verbose mode, we want to find profiles that don't contribute to the + // merged profile. To do that, keep track of how many profiles each function + // appears in. If any profile contains only functions that appear in multiple + // profiles, it could be dropped. + std::vector<size_t> numProfiles; + if (options.verbose) { + numProfiles.resize(data.timestamps.size()); + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t]) { + numProfiles[t] = 1; + } + } + } + + // Read all the other profiles, taking the minimum nonzero timestamp for each + // function. + for (size_t i = 1; i < options.inputFiles.size(); ++i) { + ProfileData newData = readProfile(options.inputFiles[i]); + if (newData.hash != data.hash) { + Fatal() << "Checksum in profile " << options.inputFiles[i] + << " does not match hash in profile " << options.inputFiles[0]; + } + if (newData.timestamps.size() != data.timestamps.size()) { + Fatal() << "Profile " << options.inputFiles[i] + << " incompatible with profile " << options.inputFiles[0]; + } + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t] && newData.timestamps[t]) { + data.timestamps[t] = + std::min(data.timestamps[t], newData.timestamps[t]); + } else if (newData.timestamps[t]) { + data.timestamps[t] = newData.timestamps[t]; + } + if (options.verbose && newData.timestamps[t]) { + ++numProfiles[t]; + } + } + } + + // Check for useless profiles. + if (options.verbose) { + for (const auto& file : options.inputFiles) { + bool useless = true; + ProfileData newData = readProfile(file); + for (size_t t = 0; t < newData.timestamps.size(); ++t) { + if (newData.timestamps[t] && numProfiles[t] == 1) { + useless = false; + break; + } + } + if (useless) { + std::cout << "Profile " << file + << " only includes functions included in other profiles.\n"; + } + } + } + + // Write the combined profile. + BufferWithRandomAccess buffer; + buffer << data.hash; + for (size_t t = 0; t < data.timestamps.size(); ++t) { + buffer << uint32_t(data.timestamps[t]); + } + Output out(options.output, Flags::Binary); + buffer.writeTo(out.getStream()); +} + } // anonymous namespace int main(int argc, const char* argv[]) { @@ -787,19 +901,15 @@ int main(int argc, const char* argv[]) { Fatal() << "Invalid command line arguments"; } - Module wasm; - parseInput(wasm, options); - - if (options.passOptions.validate && !WasmValidator().validate(wasm)) { - Fatal() << "error validating input"; - } - switch (options.mode) { case WasmSplitOptions::Mode::Split: - splitModule(wasm, options); + splitModule(options); break; case WasmSplitOptions::Mode::Instrument: - instrumentModule(wasm, options); + instrumentModule(options); + break; + case WasmSplitOptions::Mode::MergeProfiles: + mergeProfiles(options); break; } } diff --git a/test/lit/wasm-split/help.test b/test/lit/wasm-split/help.test index c473b28a1..de789a362 100644 --- a/test/lit/wasm-split/help.test +++ b/test/lit/wasm-split/help.test @@ -1,6 +1,6 @@ ;; RUN: wasm-split --help | filecheck %s -CHECK: wasm-split INFILE +CHECK: wasm-split INFILES CHECK-NEXT: CHECK-NEXT: Split a module into a primary module and a secondary module, or instrument a CHECK-NEXT: module to gather a profile that can inform future splitting, or manage such @@ -16,6 +16,8 @@ CHECK-NEXT: modules. The default mode. CHECK-NEXT: --instrument Instrument an input module to allow it to CHECK-NEXT: generate a profile that can be used to CHECK-NEXT: guide splitting. +CHECK-NEXT: --merge-profiles Merge multiple profiles for the same +CHECK-NEXT: module into a single profile. CHECK-NEXT: --profile [split] The profile to use to guide CHECK-NEXT: splitting. CHECK-NEXT: --keep-funcs [split] Comma-separated list of functions @@ -41,7 +43,6 @@ CHECK-NEXT: primary module. CHECK-NEXT: --export-prefix [split] An identifying prefix to prepend CHECK-NEXT: to new export names created by module CHECK-NEXT: splitting. -CHECK-NEXT: --output,-o [instrument] Output file. CHECK-NEXT: --profile-export [instrument] The export name of the CHECK-NEXT: function the embedder calls to write the CHECK-NEXT: profile into memory. Defaults to @@ -59,10 +60,11 @@ CHECK-NEXT: same table size when using Em CHECK-NEXT: SPLIT_MODULE mode with dynamic linking. CHECK-NEXT: TODO: Figure out a more elegant solution CHECK-NEXT: for that use case and remove this. +CHECK-NEXT: --emit-text,-S [split, instrument] Emit text instead of +CHECK-NEXT: binary for the output file or files. +CHECK-NEXT: --debuginfo,-g [split, instrument] Emit names section in +CHECK-NEXT: wasm binary (or full debuginfo in wast) +CHECK-NEXT: --output,-o [instrument, merge-profiles] Output file. CHECK-NEXT: --verbose,-v Verbose output mode. Prints the functions CHECK-NEXT: that will be kept and split out when CHECK-NEXT: splitting a module. -CHECK-NEXT: --emit-text,-S Emit text instead of binary for the -CHECK-NEXT: output file or files. -CHECK-NEXT: --debuginfo,-g Emit names section in wasm binary (or -CHECK-NEXT: full debuginfo in wast) diff --git a/test/lit/wasm-split/invalid-options.wast b/test/lit/wasm-split/invalid-options.wast index 89a68e6aa..c77691ecc 100644 --- a/test/lit/wasm-split/invalid-options.wast +++ b/test/lit/wasm-split/invalid-options.wast @@ -45,6 +45,14 @@ ;; RUN: not wasm-split %s --profile-export=foo 2>&1 \ ;; RUN: | filecheck %s --check-prefix SPLIT-PROFILE-EXPORT +;; -S cannot be used with --merge-profiles +;; RUN: not wasm-split %s --merge-profiles -S 2>&1 \ +;; RUN: | filecheck %s --check-prefix MERGE-EMIT-TEXT + +;; -g cannot be used with --merge-profiles +;; RUN: not wasm-split %s --merge-profiles -g 2>&1 \ +;; RUN: | filecheck %s --check-prefix MERGE-DEBUGINFO + ;; INSTRUMENT-PROFILE: error: Option --profile cannot be used in instrument mode. ;; INSTRUMENT-OUT1: error: Option --primary-output cannot be used in instrument mode. @@ -67,4 +75,8 @@ ;; SPLIT-PROFILE-EXPORT: error: Option --profile-export cannot be used in split mode. +;; MERGE-EMIT-TEXT: error: Option --emit-text cannot be used in merge-profiles mode. + +;; MERGE-DEBUGINFO: error: Option --debuginfo cannot be used in merge-profiles mode. + (module) diff --git a/test/lit/wasm-split/merge-profiles.wast b/test/lit/wasm-split/merge-profiles.wast new file mode 100644 index 000000000..fdaf6678e --- /dev/null +++ b/test/lit/wasm-split/merge-profiles.wast @@ -0,0 +1,42 @@ +;; Instrument the module +;; RUN: wasm-split --instrument %s -o %t.instrumented.wasm -g + +;; Generate profiles +;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.foo.prof foo +;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.foo.bar.prof foo bar +;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.bar.baz.prof bar baz + +;; Merge profiles +;; RUN: wasm-split --merge-profiles -v %t.foo.prof %t.foo.bar.prof %t.bar.baz.prof -o %t.merged.prof 2>&1 \ +;; RUN: | filecheck %s --check-prefix MERGE + +;; Split the module +;; RUN: wasm-split %s --profile %t.merged.prof -o1 %t.1.wasm -o2 %t.2.wasm -g -v \ +;; RUN: | filecheck %s --check-prefix SPLIT + +;; MERGE: Profile {{.*}}foo.prof only includes functions included in other profiles. +;; MERGE: Profile {{.*}}foo.bar.prof only includes functions included in other profiles. +;; MERGE-NOT: Profile {{.*}}bar.baz.prof only includes functions included in other profiles. + +;; SPLIT: Keeping functions: bar, baz, foo{{$}} +;; SPLIT-NEXT: Splitting out functions: qux{{$}} + +(module + (export "memory" (memory 0 0)) + (export "foo" (func $foo)) + (export "bar" (func $bar)) + (export "baz" (func $baz)) + (export "qux" (func $qux)) + (func $foo + (nop) + ) + (func $bar + (nop) + ) + (func $baz + (nop) + ) + (func $qux + (nop) + ) +) |