diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2021-09-03 08:24:08 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-03 08:24:08 -0700 |
commit | 548d1971c3c844e8dab8c0da4e97aa5c339937df (patch) | |
tree | 188cec7d93b7743f987c101e6a8d8999d3c1d62d /src/tools/wasm-split | |
parent | e84980dffb62d672c991960a701a3c7de8f8aa74 (diff) | |
download | binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.tar.gz binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.tar.bz2 binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.zip |
[NFC] Split wasm-split into multiple files (#4119)
As wasm-split has gained new functionality, its implementation file has become
large. In preparation for adding even more functionality, split the existing
implementation across multiple files in a new tools/wasm-split subdirectory.
Diffstat (limited to 'src/tools/wasm-split')
-rw-r--r-- | src/tools/wasm-split/CMakeLists.txt | 10 | ||||
-rw-r--r-- | src/tools/wasm-split/instrumenter.cpp | 185 | ||||
-rw-r--r-- | src/tools/wasm-split/instrumenter.h | 47 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.cpp | 356 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.h | 86 | ||||
-rw-r--r-- | src/tools/wasm-split/wasm-split.cpp | 396 |
6 files changed, 1080 insertions, 0 deletions
diff --git a/src/tools/wasm-split/CMakeLists.txt b/src/tools/wasm-split/CMakeLists.txt new file mode 100644 index 000000000..4f4e7d832 --- /dev/null +++ b/src/tools/wasm-split/CMakeLists.txt @@ -0,0 +1,10 @@ +FILE(GLOB wasm_split_HEADERS *h) +set(wasm_split_SOURCES + split-options.cpp + instrumenter.cpp + ${wasm_split_HEADERS} +) +add_library(wasm-split-lib OBJECT ${wasm_split_SOURCES}) + +binaryen_add_executable(wasm-split wasm-split.cpp) +target_link_libraries(wasm-split wasm-split-lib) diff --git a/src/tools/wasm-split/instrumenter.cpp b/src/tools/wasm-split/instrumenter.cpp new file mode 100644 index 000000000..0c5e96b54 --- /dev/null +++ b/src/tools/wasm-split/instrumenter.cpp @@ -0,0 +1,185 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instrumenter.h" +#include "ir/module-utils.h" +#include "ir/names.h" +#include "support/name.h" +#include "wasm-type.h" + +namespace wasm { + +Instrumenter::Instrumenter(const std::string& profileExport, + uint64_t moduleHash) + : profileExport(profileExport), moduleHash(moduleHash) {} + +void Instrumenter::run(PassRunner* runner, Module* wasm) { + this->runner = runner; + this->wasm = wasm; + addGlobals(); + instrumentFuncs(); + addProfileExport(); +} + +void Instrumenter::addGlobals() { + // Create fresh global names (over-reserves, but that's ok) + counterGlobal = Names::getValidGlobalName(*wasm, "monotonic_counter"); + functionGlobals.reserve(wasm->functions.size()); + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + functionGlobals.push_back(Names::getValidGlobalName( + *wasm, std::string(func->name.c_str()) + "_timestamp")); + }); + + // Create and add new globals + auto addGlobal = [&](Name name) { + auto global = Builder::makeGlobal( + name, + Type::i32, + Builder(*wasm).makeConst(Literal::makeZero(Type::i32)), + Builder::Mutable); + global->hasExplicitName = true; + wasm->addGlobal(std::move(global)); + }; + addGlobal(counterGlobal); + for (auto& name : functionGlobals) { + addGlobal(name); + } +} + +void Instrumenter::instrumentFuncs() { + // Inject the following code at the beginning of each function to advance the + // monotonic counter and set the function's timestamp if it hasn't already + // been set. + // + // (if (i32.eqz (global.get $timestamp)) + // (block + // (global.set $monotonic_counter + // (i32.add + // (global.get $monotonic_counter) + // (i32.const 1) + // ) + // ) + // (global.set $timestamp + // (global.get $monotonic_counter) + // ) + // ) + // ) + Builder builder(*wasm); + auto globalIt = functionGlobals.begin(); + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + func->body = builder.makeSequence( + builder.makeIf( + builder.makeUnary(EqZInt32, + builder.makeGlobalGet(*globalIt, Type::i32)), + builder.makeSequence( + builder.makeGlobalSet( + counterGlobal, + builder.makeBinary(AddInt32, + builder.makeGlobalGet(counterGlobal, Type::i32), + builder.makeConst(Literal::makeOne(Type::i32)))), + builder.makeGlobalSet( + *globalIt, builder.makeGlobalGet(counterGlobal, Type::i32)))), + func->body, + func->body->type); + ++globalIt; + }); +} + +// wasm-split profile format: +// +// The wasm-split profile is a binary format designed to be simple to produce +// and consume. It is comprised of: +// +// 1. An 8-byte module hash +// +// 2. A 4-byte timestamp for each defined function +// +// The module hash is meant to guard against bugs where the module that was +// instrumented and the module that is being split are different. The timestamps +// are non-zero for functions that were called during the instrumented run and 0 +// otherwise. Functions with smaller non-zero timestamps were called earlier in +// the instrumented run than funtions with larger timestamps. + +void Instrumenter::addProfileExport() { + // Create and export a function to dump the profile into a given memory + // buffer. The function takes the available address and buffer size as + // arguments and returns the total size of the profile. It only actually + // writes the profile if the given space is sufficient to hold it. + auto name = Names::getValidFunctionName(*wasm, profileExport); + auto writeProfile = Builder::makeFunction( + name, Signature({Type::i32, Type::i32}, Type::i32), {}); + writeProfile->hasExplicitName = true; + writeProfile->setLocalName(0, "addr"); + writeProfile->setLocalName(1, "size"); + + // Calculate the size of the profile: + // 8 bytes module hash + + // 4 bytes for the timestamp for each function + const size_t profileSize = 8 + 4 * functionGlobals.size(); + + // Create the function body + Builder builder(*wasm); + auto getAddr = [&]() { return builder.makeLocalGet(0, Type::i32); }; + auto getSize = [&]() { return builder.makeLocalGet(1, Type::i32); }; + auto hashConst = [&]() { return builder.makeConst(int64_t(moduleHash)); }; + auto profileSizeConst = [&]() { + return builder.makeConst(int32_t(profileSize)); + }; + + // Write the hash followed by all the time stamps + Expression* writeData = + builder.makeStore(8, 0, 1, getAddr(), hashConst(), Type::i64); + + uint32_t offset = 8; + for (const auto& global : functionGlobals) { + writeData = builder.blockify( + writeData, + builder.makeStore(4, + offset, + 1, + getAddr(), + builder.makeGlobalGet(global, Type::i32), + Type::i32)); + offset += 4; + } + + writeProfile->body = builder.makeSequence( + builder.makeIf(builder.makeBinary(GeUInt32, getSize(), profileSizeConst()), + writeData), + profileSizeConst()); + + // Create an export for the function + wasm->addFunction(std::move(writeProfile)); + wasm->addExport( + Builder::makeExport(profileExport, name, ExternalKind::Function)); + + // Also make sure there is a memory with enough pages to write into + size_t pages = (profileSize + Memory::kPageSize - 1) / Memory::kPageSize; + if (!wasm->memory.exists) { + wasm->memory.exists = true; + wasm->memory.initial = pages; + wasm->memory.max = pages; + } else if (wasm->memory.initial < pages) { + wasm->memory.initial = pages; + if (wasm->memory.max < pages) { + wasm->memory.max = pages; + } + } + + // TODO: export the memory if it is not already exported. +} + +} // namespace wasm diff --git a/src/tools/wasm-split/instrumenter.h b/src/tools/wasm-split/instrumenter.h new file mode 100644 index 000000000..4f714fde9 --- /dev/null +++ b/src/tools/wasm-split/instrumenter.h @@ -0,0 +1,47 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_tools_wasm_split_instrumenter_h +#define wasm_tools_wasm_split_instrumenter_h + +#include "pass.h" + +namespace wasm { + +// Add a global monotonic counter and a timestamp global for each function, code +// at the beginning of each function to set its timestamp, and a new exported +// function for dumping the profile data. +struct Instrumenter : public Pass { + PassRunner* runner = nullptr; + Module* wasm = nullptr; + + const std::string& profileExport; + uint64_t moduleHash; + + Name counterGlobal; + std::vector<Name> functionGlobals; + + Instrumenter(const std::string& profileExport, uint64_t moduleHash); + + void run(PassRunner* runner, Module* wasm) override; + void addGlobals(); + void instrumentFuncs(); + void addProfileExport(); +}; + +} // namespace wasm + +#endif // wasm_tools_wasm_split_instrumenter_h diff --git a/src/tools/wasm-split/split-options.cpp b/src/tools/wasm-split/split-options.cpp new file mode 100644 index 000000000..419555f45 --- /dev/null +++ b/src/tools/wasm-split/split-options.cpp @@ -0,0 +1,356 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split-options.h" +#include <fstream> + +namespace wasm { + +namespace { + +std::set<Name> parseNameListFromLine(const std::string& line) { + std::set<Name> names; + std::istringstream stream(line); + for (std::string name; std::getline(stream, name, ',');) { + names.insert(name); + } + return names; +} + +std::set<Name> parseNameListFromFile(const std::string& filename) { + std::ifstream infile(filename); + if (!infile.is_open()) { + std::cerr << "Failed opening '" << filename << "'" << std::endl; + exit(EXIT_FAILURE); + } + + std::set<Name> names; + std::string line; + while (std::getline(infile, line)) { + if (line.length() > 0) { + names.insert(line); + } + } + + return names; +} + +std::set<Name> parseNameList(const std::string& listOrFile) { + if (!listOrFile.empty() && listOrFile[0] == '@') { + return parseNameListFromFile(listOrFile.substr(1)); + } + + return parseNameListFromLine(listOrFile); +} + +std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { + switch (mode) { + case WasmSplitOptions::Mode::Split: + o << "split"; + break; + case WasmSplitOptions::Mode::Instrument: + o << "instrument"; + break; + case WasmSplitOptions::Mode::MergeProfiles: + o << "merge-profiles"; + break; + } + return o; +} + +} // anonymous namespace + +WasmSplitOptions::WasmSplitOptions() + : ToolOptions("wasm-split", + "Split a module into a primary module and a secondary " + "module, or instrument a module to gather a profile that " + "can inform future splitting, or manage such profiles. Options " + "that are only accepted in particular modes are marked with " + "the accepted \"[<modes>]\" in their descriptions.") { + (*this) + .add("--split", + "", + "Split an input module into two output modules. The default mode.", + Options::Arguments::Zero, + [&](Options* o, const std::string& arugment) { mode = Mode::Split; }) + .add( + "--instrument", + "", + "Instrument an input module to allow it to generate a profile that can" + " be used to guide splitting.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { mode = Mode::Instrument; }) + .add("--merge-profiles", + "", + "Merge multiple profiles for the same module into a single profile.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + mode = Mode::MergeProfiles; + }) + .add( + "--profile", + "", + "The profile to use to guide splitting.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { profileFile = argument; }) + .add("--keep-funcs", + "", + "Comma-separated list of functions to keep in the primary module, " + "regardless of any profile. " + "You can also pass a file with a list of functions separated by new " + "lines. " + "To do this, prepend @ before filename (--keep-funcs @myfile)", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + keepFuncs = parseNameList(argument); + }) + .add("--split-funcs", + "", + "Comma-separated list of functions to split into the secondary " + "module, regardless of any profile. If there is no profile, then " + "this defaults to all functions defined in the module. " + "You can also pass a file with a list of functions separated by new " + "lines. " + "To do this, prepend @ before filename (--split-funcs @myfile)", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + splitFuncs = parseNameList(argument); + }) + .add("--primary-output", + "-o1", + "Output file for the primary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + primaryOutput = argument; + }) + .add("--secondary-output", + "-o2", + "Output file for the secondary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + secondaryOutput = argument; + }) + .add("--symbolmap", + "", + "Write a symbol map file for each of the output modules.", + {Mode::Split}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { symbolMap = true; }) + .add( + "--placeholdermap", + "", + "Write a file mapping placeholder indices to the function names.", + {Mode::Split}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { placeholderMap = true; }) + .add("--import-namespace", + "", + "The namespace from which to import objects from the primary " + "module into the secondary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + importNamespace = argument; + }) + .add("--placeholder-namespace", + "", + "The namespace from which to import placeholder functions into " + "the primary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + placeholderNamespace = argument; + }) + .add( + "--export-prefix", + "", + "An identifying prefix to prepend to new export names created " + "by module splitting.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { exportPrefix = argument; }) + .add("--profile-export", + "", + "The export name of the function the embedder calls to write the " + "profile into memory. Defaults to `__write_profile`.", + {Mode::Instrument}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + profileExport = argument; + }) + .add( + "--emit-module-names", + "", + "Emit module names, even if not emitting the rest of the names section. " + "Can help differentiate the modules in stack traces. This option will be " + "removed once simpler ways of naming modules are widely available. See " + "https://bugs.chromium.org/p/v8/issues/detail?id=11808.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { emitModuleNames = true; }) + .add("--initial-table", + "", + "A hack to ensure the split and instrumented modules have the same " + "table size when using Emscripten's SPLIT_MODULE mode with dynamic " + "linking. TODO: Figure out a more elegant solution for that use " + "case and remove this.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + initialTableSize = std::stoi(argument); + }) + .add("--emit-text", + "-S", + "Emit text instead of binary for the output file or files.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { emitBinary = false; }) + .add("--debuginfo", + "-g", + "Emit names section in wasm binary (or full debuginfo in wast)", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + passOptions.debugInfo = true; + }) + .add("--output", + "-o", + "Output file.", + {Mode::Instrument, Mode::MergeProfiles}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { output = argument; }) + .add("--verbose", + "-v", + "Verbose output mode. Prints the functions that will be kept " + "and split out when splitting a module.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + verbose = true; + quiet = false; + }) + .add_positional("INFILES", + Options::Arguments::N, + [&](Options* o, const std::string& argument) { + inputFiles.push_back(argument); + }); +} + +WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, + const std::string& shortName, + const std::string& description, + std::vector<Mode>&& modes, + Arguments arguments, + const Action& action) { + // Insert the valid modes at the beginning of the description. + std::stringstream desc; + if (modes.size()) { + desc << '['; + std::string sep = ""; + for (Mode m : modes) { + validOptions[static_cast<unsigned>(m)].insert(longName); + desc << sep << m; + sep = ", "; + } + desc << "] "; + } + desc << description; + ToolOptions::add( + longName, + shortName, + desc.str(), + arguments, + [&, action, longName](Options* o, const std::string& argument) { + usedOptions.push_back(longName); + action(o, argument); + }); + return *this; +} + +WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, + const std::string& shortName, + const std::string& description, + Arguments arguments, + const Action& action) { + // Add an option valid in all modes. + for (unsigned i = 0; i < NumModes; ++i) { + validOptions[i].insert(longName); + } + return add(longName, shortName, description, {}, arguments, action); +} + +bool WasmSplitOptions::validate() { + bool valid = true; + auto fail = [&](auto msg) { + std::cerr << "error: " << msg << "\n"; + valid = false; + }; + + // Validate the positional arguments. + if (inputFiles.size() == 0) { + fail("no input file"); + } + switch (mode) { + case Mode::Split: + case Mode::Instrument: + if (inputFiles.size() > 1) { + fail("Cannot have more than one input file."); + } + break; + case Mode::MergeProfiles: + // Any number >= 1 allowed. + break; + } + + // Validate that all used options are allowed in the current mode. + for (std::string& opt : usedOptions) { + if (!validOptions[static_cast<unsigned>(mode)].count(opt)) { + std::stringstream msg; + msg << "Option " << opt << " cannot be used in " << mode << " mode."; + fail(msg.str()); + } + } + + if (mode == Mode::Split) { + std::vector<Name> impossible; + std::set_intersection(keepFuncs.begin(), + keepFuncs.end(), + splitFuncs.begin(), + splitFuncs.end(), + std::inserter(impossible, impossible.end())); + for (auto& func : impossible) { + fail(std::string("Cannot both keep and split out function ") + + func.c_str()); + } + } + + return valid; +} + +void WasmSplitOptions::parse(int argc, const char* argv[]) { + ToolOptions::parse(argc, argv); + // Since --quiet is defined in ToolOptions but --verbose is defined here, + // --quiet doesn't know to unset --verbose. Fix it up here. + if (quiet && verbose) { + verbose = false; + } +} + +} // namespace wasm diff --git a/src/tools/wasm-split/split-options.h b/src/tools/wasm-split/split-options.h new file mode 100644 index 000000000..5c811c32c --- /dev/null +++ b/src/tools/wasm-split/split-options.h @@ -0,0 +1,86 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_tools_wasm_split_options_h +#define wasm_tools_wasm_split_options_h + +#include "tools/tool-options.h" + +namespace wasm { + +const std::string DEFAULT_PROFILE_EXPORT("__write_profile"); + +struct WasmSplitOptions : ToolOptions { + enum class Mode : unsigned { + Split, + Instrument, + MergeProfiles, + }; + Mode mode = Mode::Split; + constexpr static size_t NumModes = + static_cast<unsigned>(Mode::MergeProfiles) + 1; + + bool verbose = false; + bool emitBinary = true; + bool symbolMap = false; + bool placeholderMap = false; + + // TODO: Remove this. See the comment in wasm-binary.h. + bool emitModuleNames = false; + + std::string profileFile; + std::string profileExport = DEFAULT_PROFILE_EXPORT; + + std::set<Name> keepFuncs; + std::set<Name> splitFuncs; + + std::vector<std::string> inputFiles; + std::string output; + std::string primaryOutput; + std::string secondaryOutput; + + std::string importNamespace; + std::string placeholderNamespace; + std::string exportPrefix; + + // A hack to ensure the split and instrumented modules have the same table + // size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO: + // Figure out a more elegant solution for that use case and remove this. + int initialTableSize = -1; + + // The options that are valid for each mode. + std::array<std::unordered_set<std::string>, NumModes> validOptions; + std::vector<std::string> usedOptions; + + WasmSplitOptions(); + WasmSplitOptions& add(const std::string& longName, + const std::string& shortName, + const std::string& description, + std::vector<Mode>&& modes, + Arguments arguments, + const Action& action); + WasmSplitOptions& add(const std::string& longName, + const std::string& shortName, + const std::string& description, + Arguments arguments, + const Action& action); + bool validate(); + void parse(int argc, const char* argv[]); +}; + +} // namespace wasm + +#endif // wasm_tools_wasm_split_h diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp new file mode 100644 index 000000000..bedba8957 --- /dev/null +++ b/src/tools/wasm-split/wasm-split.cpp @@ -0,0 +1,396 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// wasm-split: Split a module in two or instrument a module to inform future +// splitting. + +#include "ir/module-splitting.h" +#include "ir/names.h" +#include "support/file.h" +#include "support/name.h" +#include "support/path.h" +#include "support/utilities.h" +#include "wasm-binary.h" +#include "wasm-builder.h" +#include "wasm-io.h" +#include "wasm-validator.h" + +#include "instrumenter.h" +#include "split-options.h" + +using namespace wasm; + +namespace { + +void parseInput(Module& wasm, const WasmSplitOptions& options) { + options.applyFeatures(wasm); + ModuleReader reader; + reader.setProfile(options.profile); + try { + reader.read(options.inputFiles[0], wasm); + } catch (ParseException& p) { + p.dump(std::cerr); + std::cerr << '\n'; + Fatal() << "error parsing wasm"; + } catch (std::bad_alloc&) { + Fatal() << "error building module, std::bad_alloc (possibly invalid " + "request for silly amounts of memory)"; + } + + if (options.passOptions.validate && !WasmValidator().validate(wasm)) { + Fatal() << "error validating input"; + } +} + +uint64_t hashFile(const std::string& filename) { + auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + +void adjustTableSize(Module& wasm, int initialSize) { + if (initialSize < 0) { + return; + } + if (wasm.tables.empty()) { + Fatal() << "--initial-table used but there is no table"; + } + + auto& table = wasm.tables.front(); + + if ((uint64_t)initialSize < table->initial) { + Fatal() << "Specified initial table size too small, should be at least " + << table->initial; + } + if ((uint64_t)initialSize > table->max) { + Fatal() << "Specified initial table size larger than max table size " + << table->max; + } + table->initial = initialSize; +} + +void writeModule(Module& wasm, + std::string filename, + const WasmSplitOptions& options) { + ModuleWriter writer; + writer.setBinary(options.emitBinary); + writer.setDebugInfo(options.passOptions.debugInfo); + if (options.emitModuleNames) { + writer.setEmitModuleName(true); + } + writer.write(wasm, filename); +} + +void instrumentModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + // Check that the profile export name is not already taken + if (wasm.getExportOrNull(options.profileExport) != nullptr) { + Fatal() << "error: Export " << options.profileExport << " already exists."; + } + + uint64_t moduleHash = hashFile(options.inputFiles[0]); + PassRunner runner(&wasm, options.passOptions); + Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); + + adjustTableSize(wasm, options.initialTableSize); + + // Write the output modules + writeModule(wasm, options.output, options); +} + +struct ProfileData { + uint64_t hash; + std::vector<size_t> timestamps; +}; + +// See "wasm-split profile format" in instrumenter.cpp for more information. +ProfileData readProfile(const std::string& file) { + auto profileData = read_file<std::vector<char>>(file, Flags::Binary); + size_t i = 0; + auto readi32 = [&]() { + if (i + 4 > profileData.size()) { + Fatal() << "Unexpected end of profile data in " << file; + } + uint32_t i32 = 0; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; + return i32; + }; + + uint64_t hash = readi32(); + hash |= uint64_t(readi32()) << 32; + + std::vector<size_t> timestamps; + while (i < profileData.size()) { + timestamps.push_back(readi32()); + } + + return {hash, timestamps}; +} + +void writeSymbolMap(Module& wasm, std::string filename) { + PassOptions options; + options.arguments["symbolmap"] = filename; + PassRunner runner(&wasm, options); + runner.add("symbolmap"); + runner.run(); +} + +void writePlaceholderMap(const std::map<size_t, Name> placeholderMap, + std::string filename) { + Output output(filename, Flags::Text); + auto& o = output.getStream(); + for (auto pair : placeholderMap) { + o << pair.first << ':' << pair.second << '\n'; + } +} + +void splitModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + std::set<Name> keepFuncs; + + if (options.profileFile.size()) { + // Use the profile to initialize `keepFuncs`. + uint64_t hash = hashFile(options.inputFiles[0]); + ProfileData profile = readProfile(options.profileFile); + if (profile.hash != hash) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } + size_t i = 0; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (i >= profile.timestamps.size()) { + Fatal() << "Unexpected end of profile data"; + } + if (profile.timestamps[i++] > 0) { + keepFuncs.insert(func->name); + } + }); + if (i != profile.timestamps.size()) { + Fatal() << "Unexpected extra profile data"; + } + } + + // Add in the functions specified with --keep-funcs + for (auto& func : options.keepFuncs) { + if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + keepFuncs.insert(func); + } + + // Remove the functions specified with --remove-funcs + for (auto& func : options.splitFuncs) { + auto* function = wasm.getFunctionOrNull(func); + if (!options.quiet && function == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + if (function && function->imported()) { + if (!options.quiet) { + std::cerr << "warning: cannot split out imported function " << func + << "\n"; + } + } else { + keepFuncs.erase(func); + } + } + + if (!options.quiet && keepFuncs.size() == 0) { + std::cerr << "warning: not keeping any functions in the primary module\n"; + } + + // If warnings are enabled, check that any functions are being split out. + if (!options.quiet) { + std::set<Name> splitFuncs; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (keepFuncs.count(func->name) == 0) { + splitFuncs.insert(func->name); + } + }); + + if (splitFuncs.size() == 0) { + std::cerr + << "warning: not splitting any functions out to the secondary module\n"; + } + + // Dump the kept and split functions if we are verbose + if (options.verbose) { + auto printCommaSeparated = [&](auto funcs) { + for (auto it = funcs.begin(); it != funcs.end(); ++it) { + if (it != funcs.begin()) { + std::cout << ", "; + } + std::cout << *it; + } + }; + + std::cout << "Keeping functions: "; + printCommaSeparated(keepFuncs); + std::cout << "\n"; + + std::cout << "Splitting out functions: "; + printCommaSeparated(splitFuncs); + std::cout << "\n"; + } + } + + // Actually perform the splitting + ModuleSplitting::Config config; + config.primaryFuncs = std::move(keepFuncs); + if (options.importNamespace.size()) { + config.importNamespace = options.importNamespace; + } + if (options.placeholderNamespace.size()) { + config.placeholderNamespace = options.placeholderNamespace; + } + if (options.exportPrefix.size()) { + config.newExportPrefix = options.exportPrefix; + } + config.minimizeNewExportNames = !options.passOptions.debugInfo; + auto splitResults = ModuleSplitting::splitFunctions(wasm, config); + auto& secondary = splitResults.secondary; + + adjustTableSize(wasm, options.initialTableSize); + adjustTableSize(*secondary, options.initialTableSize); + + if (options.symbolMap) { + writeSymbolMap(wasm, options.primaryOutput + ".symbols"); + writeSymbolMap(*secondary, options.secondaryOutput + ".symbols"); + } + + if (options.placeholderMap) { + writePlaceholderMap(splitResults.placeholderMap, + options.primaryOutput + ".placeholders"); + } + + // Set the names of the split modules. This can help differentiate them in + // stack traces. + if (options.emitModuleNames) { + if (!wasm.name) { + wasm.name = Path::getBaseName(options.primaryOutput); + } + secondary->name = Path::getBaseName(options.secondaryOutput); + } + + // write the output modules + writeModule(wasm, options.primaryOutput, options); + writeModule(*secondary, options.secondaryOutput, options); +} + +void mergeProfiles(const WasmSplitOptions& options) { + // Read the initial profile. We will merge other profiles into this one. + ProfileData data = readProfile(options.inputFiles[0]); + + // In verbose mode, we want to find profiles that don't contribute to the + // merged profile. To do that, keep track of how many profiles each function + // appears in. If any profile contains only functions that appear in multiple + // profiles, it could be dropped. + std::vector<size_t> numProfiles; + if (options.verbose) { + numProfiles.resize(data.timestamps.size()); + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t]) { + numProfiles[t] = 1; + } + } + } + + // Read all the other profiles, taking the minimum nonzero timestamp for each + // function. + for (size_t i = 1; i < options.inputFiles.size(); ++i) { + ProfileData newData = readProfile(options.inputFiles[i]); + if (newData.hash != data.hash) { + Fatal() << "Checksum in profile " << options.inputFiles[i] + << " does not match hash in profile " << options.inputFiles[0]; + } + if (newData.timestamps.size() != data.timestamps.size()) { + Fatal() << "Profile " << options.inputFiles[i] + << " incompatible with profile " << options.inputFiles[0]; + } + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t] && newData.timestamps[t]) { + data.timestamps[t] = + std::min(data.timestamps[t], newData.timestamps[t]); + } else if (newData.timestamps[t]) { + data.timestamps[t] = newData.timestamps[t]; + } + if (options.verbose && newData.timestamps[t]) { + ++numProfiles[t]; + } + } + } + + // Check for useless profiles. + if (options.verbose) { + for (const auto& file : options.inputFiles) { + bool useless = true; + ProfileData newData = readProfile(file); + for (size_t t = 0; t < newData.timestamps.size(); ++t) { + if (newData.timestamps[t] && numProfiles[t] == 1) { + useless = false; + break; + } + } + if (useless) { + std::cout << "Profile " << file + << " only includes functions included in other profiles.\n"; + } + } + } + + // Write the combined profile. + BufferWithRandomAccess buffer; + buffer << data.hash; + for (size_t t = 0; t < data.timestamps.size(); ++t) { + buffer << uint32_t(data.timestamps[t]); + } + Output out(options.output, Flags::Binary); + buffer.writeTo(out.getStream()); +} + +} // anonymous namespace + +int main(int argc, const char* argv[]) { + WasmSplitOptions options; + options.parse(argc, argv); + + if (!options.validate()) { + Fatal() << "Invalid command line arguments"; + } + + switch (options.mode) { + case WasmSplitOptions::Mode::Split: + splitModule(options); + break; + case WasmSplitOptions::Mode::Instrument: + instrumentModule(options); + break; + case WasmSplitOptions::Mode::MergeProfiles: + mergeProfiles(options); + break; + } +} |