diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2021-09-03 08:24:08 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-03 08:24:08 -0700 |
commit | 548d1971c3c844e8dab8c0da4e97aa5c339937df (patch) | |
tree | 188cec7d93b7743f987c101e6a8d8999d3c1d62d | |
parent | e84980dffb62d672c991960a701a3c7de8f8aa74 (diff) | |
download | binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.tar.gz binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.tar.bz2 binaryen-548d1971c3c844e8dab8c0da4e97aa5c339937df.zip |
[NFC] Split wasm-split into multiple files (#4119)
As wasm-split has gained new functionality, its implementation file has become
large. In preparation for adding even more functionality, split the existing
implementation across multiple files in a new tools/wasm-split subdirectory.
-rw-r--r-- | CMakeLists.txt | 25 | ||||
-rw-r--r-- | src/tools/CMakeLists.txt | 21 | ||||
-rw-r--r-- | src/tools/wasm-split.cpp | 969 | ||||
-rw-r--r-- | src/tools/wasm-split/CMakeLists.txt | 10 | ||||
-rw-r--r-- | src/tools/wasm-split/instrumenter.cpp | 185 | ||||
-rw-r--r-- | src/tools/wasm-split/instrumenter.h | 47 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.cpp | 356 | ||||
-rw-r--r-- | src/tools/wasm-split/split-options.h | 86 | ||||
-rw-r--r-- | src/tools/wasm-split/wasm-split.cpp | 396 |
9 files changed, 1103 insertions, 992 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 037ae4a45..69ab81bda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -286,7 +286,7 @@ endif() # Static libraries # Current (partial) dependency structure is as follows: -# passes -> wasm -> asmjs -> support +# tools -> passes -> wasm -> asmjs -> support # TODO: It's odd that wasm should depend on asmjs, maybe we should fix that. add_subdirectory(src/ir) add_subdirectory(src/asmjs) @@ -295,6 +295,7 @@ add_subdirectory(src/emscripten-optimizer) add_subdirectory(src/passes) add_subdirectory(src/support) add_subdirectory(src/wasm) +add_subdirectory(src/tools) add_subdirectory(third_party) # Configure lit tests @@ -339,28 +340,6 @@ if(NOT BYN_INSTALL_TOOLS_ONLY) install(FILES src/binaryen-c.h src/wasm-delegations.def DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() -function(binaryen_add_executable name sources) - add_executable(${name} ${sources}) - target_link_libraries(${name} ${CMAKE_THREAD_LIBS_INIT}) - target_link_libraries(${name} binaryen) - set_property(TARGET ${name} PROPERTY CXX_STANDARD ${CXX_STANDARD}) - set_property(TARGET ${name} PROPERTY CXX_STANDARD_REQUIRED ON) - binaryen_setup_rpath(${name}) - install(TARGETS ${name} DESTINATION ${CMAKE_INSTALL_BINDIR}) -endfunction() - -binaryen_add_executable(wasm-opt src/tools/wasm-opt.cpp) -binaryen_add_executable(wasm-shell src/tools/wasm-shell.cpp) -binaryen_add_executable(wasm-metadce src/tools/wasm-metadce.cpp) -binaryen_add_executable(wasm2js src/tools/wasm2js.cpp) -binaryen_add_executable(wasm-emscripten-finalize src/tools/wasm-emscripten-finalize.cpp) -binaryen_add_executable(wasm-as src/tools/wasm-as.cpp) -binaryen_add_executable(wasm-dis src/tools/wasm-dis.cpp) -binaryen_add_executable(wasm-ctor-eval src/tools/wasm-ctor-eval.cpp) -binaryen_add_executable(wasm-reduce src/tools/wasm-reduce.cpp) -binaryen_add_executable(wasm-split src/tools/wasm-split.cpp) - - # binaryen.js # # Note that we can't emit binaryen.js directly, as there is libbinaryen already diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt new file mode 100644 index 000000000..b770e5a61 --- /dev/null +++ b/src/tools/CMakeLists.txt @@ -0,0 +1,21 @@ +function(binaryen_add_executable name sources) + add_executable(${name} ${sources}) + target_link_libraries(${name} ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${name} binaryen) + set_property(TARGET ${name} PROPERTY CXX_STANDARD ${CXX_STANDARD}) + set_property(TARGET ${name} PROPERTY CXX_STANDARD_REQUIRED ON) + binaryen_setup_rpath(${name}) + install(TARGETS ${name} DESTINATION ${CMAKE_INSTALL_BINDIR}) +endfunction() + +binaryen_add_executable(wasm-opt wasm-opt.cpp) +binaryen_add_executable(wasm-shell wasm-shell.cpp) +binaryen_add_executable(wasm-metadce wasm-metadce.cpp) +binaryen_add_executable(wasm2js wasm2js.cpp) +binaryen_add_executable(wasm-emscripten-finalize wasm-emscripten-finalize.cpp) +binaryen_add_executable(wasm-as wasm-as.cpp) +binaryen_add_executable(wasm-dis wasm-dis.cpp) +binaryen_add_executable(wasm-ctor-eval wasm-ctor-eval.cpp) +binaryen_add_executable(wasm-reduce wasm-reduce.cpp) + +add_subdirectory(wasm-split) diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp deleted file mode 100644 index 83c170abe..000000000 --- a/src/tools/wasm-split.cpp +++ /dev/null @@ -1,969 +0,0 @@ -/* - * Copyright 2020 WebAssembly Community Group participants - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// wasm-split: Split a module in two or instrument a module to inform future -// splitting. - -#include "ir/module-splitting.h" -#include "ir/module-utils.h" -#include "ir/names.h" -#include "pass.h" -#include "support/file.h" -#include "support/name.h" -#include "support/path.h" -#include "support/utilities.h" -#include "tool-options.h" -#include "wasm-binary.h" -#include "wasm-builder.h" -#include "wasm-io.h" -#include "wasm-type.h" -#include "wasm-validator.h" -#include <sstream> - -using namespace wasm; - -namespace { - -const std::string DEFAULT_PROFILE_EXPORT("__write_profile"); - -std::set<Name> parseNameListFromLine(const std::string& line) { - std::set<Name> names; - std::istringstream stream(line); - for (std::string name; std::getline(stream, name, ',');) { - names.insert(name); - } - return names; -} - -std::set<Name> parseNameListFromFile(const std::string& filename) { - std::ifstream infile(filename); - if (!infile.is_open()) { - std::cerr << "Failed opening '" << filename << "'" << std::endl; - exit(EXIT_FAILURE); - } - - std::set<Name> names; - std::string line; - while (std::getline(infile, line)) { - if (line.length() > 0) { - names.insert(line); - } - } - - return names; -} - -std::set<Name> parseNameList(const std::string& listOrFile) { - if (!listOrFile.empty() && listOrFile[0] == '@') { - return parseNameListFromFile(listOrFile.substr(1)); - } - - return parseNameListFromLine(listOrFile); -} - -struct WasmSplitOptions : ToolOptions { - enum class Mode : unsigned { - Split, - Instrument, - MergeProfiles, - }; - Mode mode = Mode::Split; - constexpr static size_t NumModes = - static_cast<unsigned>(Mode::MergeProfiles) + 1; - - bool verbose = false; - bool emitBinary = true; - bool symbolMap = false; - bool placeholderMap = false; - - // TODO: Remove this. See the comment in wasm-binary.h. - bool emitModuleNames = false; - - std::string profileFile; - std::string profileExport = DEFAULT_PROFILE_EXPORT; - - std::set<Name> keepFuncs; - std::set<Name> splitFuncs; - - std::vector<std::string> inputFiles; - std::string output; - std::string primaryOutput; - std::string secondaryOutput; - - std::string importNamespace; - std::string placeholderNamespace; - std::string exportPrefix; - - // A hack to ensure the split and instrumented modules have the same table - // size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO: - // Figure out a more elegant solution for that use case and remove this. - int initialTableSize = -1; - - // The options that are valid for each mode. - std::array<std::unordered_set<std::string>, NumModes> validOptions; - std::vector<std::string> usedOptions; - - WasmSplitOptions(); - WasmSplitOptions& add(const std::string& longName, - const std::string& shortName, - const std::string& description, - std::vector<Mode>&& modes, - Arguments arguments, - const Action& action); - WasmSplitOptions& add(const std::string& longName, - const std::string& shortName, - const std::string& description, - Arguments arguments, - const Action& action); - bool validate(); - void parse(int argc, const char* argv[]); -}; - -WasmSplitOptions::WasmSplitOptions() - : ToolOptions("wasm-split", - "Split a module into a primary module and a secondary " - "module, or instrument a module to gather a profile that " - "can inform future splitting, or manage such profiles. Options " - "that are only accepted in particular modes are marked with " - "the accepted \"[<modes>]\" in their descriptions.") { - (*this) - .add("--split", - "", - "Split an input module into two output modules. The default mode.", - Options::Arguments::Zero, - [&](Options* o, const std::string& arugment) { mode = Mode::Split; }) - .add( - "--instrument", - "", - "Instrument an input module to allow it to generate a profile that can" - " be used to guide splitting.", - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { mode = Mode::Instrument; }) - .add("--merge-profiles", - "", - "Merge multiple profiles for the same module into a single profile.", - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { - mode = Mode::MergeProfiles; - }) - .add( - "--profile", - "", - "The profile to use to guide splitting.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { profileFile = argument; }) - .add("--keep-funcs", - "", - "Comma-separated list of functions to keep in the primary module, " - "regardless of any profile. " - "You can also pass a file with a list of functions separated by new " - "lines. " - "To do this, prepend @ before filename (--keep-funcs @myfile)", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - keepFuncs = parseNameList(argument); - }) - .add("--split-funcs", - "", - "Comma-separated list of functions to split into the secondary " - "module, regardless of any profile. If there is no profile, then " - "this defaults to all functions defined in the module. " - "You can also pass a file with a list of functions separated by new " - "lines. " - "To do this, prepend @ before filename (--split-funcs @myfile)", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - splitFuncs = parseNameList(argument); - }) - .add("--primary-output", - "-o1", - "Output file for the primary module.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - primaryOutput = argument; - }) - .add("--secondary-output", - "-o2", - "Output file for the secondary module.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - secondaryOutput = argument; - }) - .add("--symbolmap", - "", - "Write a symbol map file for each of the output modules.", - {Mode::Split}, - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { symbolMap = true; }) - .add( - "--placeholdermap", - "", - "Write a file mapping placeholder indices to the function names.", - {Mode::Split}, - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { placeholderMap = true; }) - .add("--import-namespace", - "", - "The namespace from which to import objects from the primary " - "module into the secondary module.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - importNamespace = argument; - }) - .add("--placeholder-namespace", - "", - "The namespace from which to import placeholder functions into " - "the primary module.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - placeholderNamespace = argument; - }) - .add( - "--export-prefix", - "", - "An identifying prefix to prepend to new export names created " - "by module splitting.", - {Mode::Split}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { exportPrefix = argument; }) - .add("--profile-export", - "", - "The export name of the function the embedder calls to write the " - "profile into memory. Defaults to `__write_profile`.", - {Mode::Instrument}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - profileExport = argument; - }) - .add( - "--emit-module-names", - "", - "Emit module names, even if not emitting the rest of the names section. " - "Can help differentiate the modules in stack traces. This option will be " - "removed once simpler ways of naming modules are widely available. See " - "https://bugs.chromium.org/p/v8/issues/detail?id=11808.", - {Mode::Split, Mode::Instrument}, - Options::Arguments::Zero, - [&](Options* o, const std::string& arguments) { emitModuleNames = true; }) - .add("--initial-table", - "", - "A hack to ensure the split and instrumented modules have the same " - "table size when using Emscripten's SPLIT_MODULE mode with dynamic " - "linking. TODO: Figure out a more elegant solution for that use " - "case and remove this.", - {Mode::Split, Mode::Instrument}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { - initialTableSize = std::stoi(argument); - }) - .add("--emit-text", - "-S", - "Emit text instead of binary for the output file or files.", - {Mode::Split, Mode::Instrument}, - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { emitBinary = false; }) - .add("--debuginfo", - "-g", - "Emit names section in wasm binary (or full debuginfo in wast)", - {Mode::Split, Mode::Instrument}, - Options::Arguments::Zero, - [&](Options* o, const std::string& arguments) { - passOptions.debugInfo = true; - }) - .add("--output", - "-o", - "Output file.", - {Mode::Instrument, Mode::MergeProfiles}, - Options::Arguments::One, - [&](Options* o, const std::string& argument) { output = argument; }) - .add("--verbose", - "-v", - "Verbose output mode. Prints the functions that will be kept " - "and split out when splitting a module.", - Options::Arguments::Zero, - [&](Options* o, const std::string& argument) { - verbose = true; - quiet = false; - }) - .add_positional("INFILES", - Options::Arguments::N, - [&](Options* o, const std::string& argument) { - inputFiles.push_back(argument); - }); -} - -std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { - switch (mode) { - case WasmSplitOptions::Mode::Split: - o << "split"; - break; - case WasmSplitOptions::Mode::Instrument: - o << "instrument"; - break; - case WasmSplitOptions::Mode::MergeProfiles: - o << "merge-profiles"; - break; - } - return o; -} - -WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, - const std::string& shortName, - const std::string& description, - std::vector<Mode>&& modes, - Arguments arguments, - const Action& action) { - // Insert the valid modes at the beginning of the description. - std::stringstream desc; - if (modes.size()) { - desc << '['; - std::string sep = ""; - for (Mode m : modes) { - validOptions[static_cast<unsigned>(m)].insert(longName); - desc << sep << m; - sep = ", "; - } - desc << "] "; - } - desc << description; - ToolOptions::add( - longName, - shortName, - desc.str(), - arguments, - [&, action, longName](Options* o, const std::string& argument) { - usedOptions.push_back(longName); - action(o, argument); - }); - return *this; -} - -WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, - const std::string& shortName, - const std::string& description, - Arguments arguments, - const Action& action) { - // Add an option valid in all modes. - for (unsigned i = 0; i < NumModes; ++i) { - validOptions[i].insert(longName); - } - return add(longName, shortName, description, {}, arguments, action); -} - -bool WasmSplitOptions::validate() { - bool valid = true; - auto fail = [&](auto msg) { - std::cerr << "error: " << msg << "\n"; - valid = false; - }; - - // Validate the positional arguments. - if (inputFiles.size() == 0) { - fail("no input file"); - } - switch (mode) { - case Mode::Split: - case Mode::Instrument: - if (inputFiles.size() > 1) { - fail("Cannot have more than one input file."); - } - break; - case Mode::MergeProfiles: - // Any number >= 1 allowed. - break; - } - - // Validate that all used options are allowed in the current mode. - for (std::string& opt : usedOptions) { - if (!validOptions[static_cast<unsigned>(mode)].count(opt)) { - std::stringstream msg; - msg << "Option " << opt << " cannot be used in " << mode << " mode."; - fail(msg.str()); - } - } - - if (mode == Mode::Split) { - std::vector<Name> impossible; - std::set_intersection(keepFuncs.begin(), - keepFuncs.end(), - splitFuncs.begin(), - splitFuncs.end(), - std::inserter(impossible, impossible.end())); - for (auto& func : impossible) { - fail(std::string("Cannot both keep and split out function ") + - func.c_str()); - } - } - - return valid; -} - -void WasmSplitOptions::parse(int argc, const char* argv[]) { - ToolOptions::parse(argc, argv); - // Since --quiet is defined in ToolOptions but --verbose is defined here, - // --quiet doesn't know to unset --verbose. Fix it up here. - if (quiet && verbose) { - verbose = false; - } -} - -void parseInput(Module& wasm, const WasmSplitOptions& options) { - options.applyFeatures(wasm); - ModuleReader reader; - reader.setProfile(options.profile); - try { - reader.read(options.inputFiles[0], wasm); - } catch (ParseException& p) { - p.dump(std::cerr); - std::cerr << '\n'; - Fatal() << "error parsing wasm"; - } catch (std::bad_alloc&) { - Fatal() << "error building module, std::bad_alloc (possibly invalid " - "request for silly amounts of memory)"; - } - - if (options.passOptions.validate && !WasmValidator().validate(wasm)) { - Fatal() << "error validating input"; - } -} - -// Add a global monotonic counter and a timestamp global for each function, code -// at the beginning of each function to set its timestamp, and a new exported -// function for dumping the profile data. -struct Instrumenter : public Pass { - PassRunner* runner = nullptr; - Module* wasm = nullptr; - - const std::string& profileExport; - uint64_t moduleHash; - - Name counterGlobal; - std::vector<Name> functionGlobals; - - Instrumenter(const std::string& profileExport, uint64_t moduleHash); - - void run(PassRunner* runner, Module* wasm) override; - void addGlobals(); - void instrumentFuncs(); - void addProfileExport(); -}; - -Instrumenter::Instrumenter(const std::string& profileExport, - uint64_t moduleHash) - : profileExport(profileExport), moduleHash(moduleHash) {} - -void Instrumenter::run(PassRunner* runner, Module* wasm) { - this->runner = runner; - this->wasm = wasm; - addGlobals(); - instrumentFuncs(); - addProfileExport(); -} - -void Instrumenter::addGlobals() { - // Create fresh global names (over-reserves, but that's ok) - counterGlobal = Names::getValidGlobalName(*wasm, "monotonic_counter"); - functionGlobals.reserve(wasm->functions.size()); - ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { - functionGlobals.push_back(Names::getValidGlobalName( - *wasm, std::string(func->name.c_str()) + "_timestamp")); - }); - - // Create and add new globals - auto addGlobal = [&](Name name) { - auto global = Builder::makeGlobal( - name, - Type::i32, - Builder(*wasm).makeConst(Literal::makeZero(Type::i32)), - Builder::Mutable); - global->hasExplicitName = true; - wasm->addGlobal(std::move(global)); - }; - addGlobal(counterGlobal); - for (auto& name : functionGlobals) { - addGlobal(name); - } -} - -void Instrumenter::instrumentFuncs() { - // Inject the following code at the beginning of each function to advance the - // monotonic counter and set the function's timestamp if it hasn't already - // been set. - // - // (if (i32.eqz (global.get $timestamp)) - // (block - // (global.set $monotonic_counter - // (i32.add - // (global.get $monotonic_counter) - // (i32.const 1) - // ) - // ) - // (global.set $timestamp - // (global.get $monotonic_counter) - // ) - // ) - // ) - Builder builder(*wasm); - auto globalIt = functionGlobals.begin(); - ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { - func->body = builder.makeSequence( - builder.makeIf( - builder.makeUnary(EqZInt32, - builder.makeGlobalGet(*globalIt, Type::i32)), - builder.makeSequence( - builder.makeGlobalSet( - counterGlobal, - builder.makeBinary(AddInt32, - builder.makeGlobalGet(counterGlobal, Type::i32), - builder.makeConst(Literal::makeOne(Type::i32)))), - builder.makeGlobalSet( - *globalIt, builder.makeGlobalGet(counterGlobal, Type::i32)))), - func->body, - func->body->type); - ++globalIt; - }); -} - -// wasm-split profile format: -// -// The wasm-split profile is a binary format designed to be simple to produce -// and consume. It is comprised of: -// -// 1. An 8-byte module hash -// -// 2. A 4-byte timestamp for each defined function -// -// The module hash is meant to guard against bugs where the module that was -// instrumented and the module that is being split are different. The timestamps -// are non-zero for functions that were called during the instrumented run and 0 -// otherwise. Functions with smaller non-zero timestamps were called earlier in -// the instrumented run than funtions with larger timestamps. - -void Instrumenter::addProfileExport() { - // Create and export a function to dump the profile into a given memory - // buffer. The function takes the available address and buffer size as - // arguments and returns the total size of the profile. It only actually - // writes the profile if the given space is sufficient to hold it. - auto name = Names::getValidFunctionName(*wasm, profileExport); - auto writeProfile = Builder::makeFunction( - name, Signature({Type::i32, Type::i32}, Type::i32), {}); - writeProfile->hasExplicitName = true; - writeProfile->setLocalName(0, "addr"); - writeProfile->setLocalName(1, "size"); - - // Calculate the size of the profile: - // 8 bytes module hash + - // 4 bytes for the timestamp for each function - const size_t profileSize = 8 + 4 * functionGlobals.size(); - - // Create the function body - Builder builder(*wasm); - auto getAddr = [&]() { return builder.makeLocalGet(0, Type::i32); }; - auto getSize = [&]() { return builder.makeLocalGet(1, Type::i32); }; - auto hashConst = [&]() { return builder.makeConst(int64_t(moduleHash)); }; - auto profileSizeConst = [&]() { - return builder.makeConst(int32_t(profileSize)); - }; - - // Write the hash followed by all the time stamps - Expression* writeData = - builder.makeStore(8, 0, 1, getAddr(), hashConst(), Type::i64); - - uint32_t offset = 8; - for (const auto& global : functionGlobals) { - writeData = builder.blockify( - writeData, - builder.makeStore(4, - offset, - 1, - getAddr(), - builder.makeGlobalGet(global, Type::i32), - Type::i32)); - offset += 4; - } - - writeProfile->body = builder.makeSequence( - builder.makeIf(builder.makeBinary(GeUInt32, getSize(), profileSizeConst()), - writeData), - profileSizeConst()); - - // Create an export for the function - wasm->addFunction(std::move(writeProfile)); - wasm->addExport( - Builder::makeExport(profileExport, name, ExternalKind::Function)); - - // Also make sure there is a memory with enough pages to write into - size_t pages = (profileSize + Memory::kPageSize - 1) / Memory::kPageSize; - if (!wasm->memory.exists) { - wasm->memory.exists = true; - wasm->memory.initial = pages; - wasm->memory.max = pages; - } else if (wasm->memory.initial < pages) { - wasm->memory.initial = pages; - if (wasm->memory.max < pages) { - wasm->memory.max = pages; - } - } - - // TODO: export the memory if it is not already exported. -} - -uint64_t hashFile(const std::string& filename) { - auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); - size_t digest = 0; - // Don't use `hash` or `rehash` - they aren't deterministic between executions - for (char c : contents) { - hash_combine(digest, c); - } - return uint64_t(digest); -} - -void adjustTableSize(Module& wasm, int initialSize) { - if (initialSize < 0) { - return; - } - if (wasm.tables.empty()) { - Fatal() << "--initial-table used but there is no table"; - } - - auto& table = wasm.tables.front(); - - if ((uint64_t)initialSize < table->initial) { - Fatal() << "Specified initial table size too small, should be at least " - << table->initial; - } - if ((uint64_t)initialSize > table->max) { - Fatal() << "Specified initial table size larger than max table size " - << table->max; - } - table->initial = initialSize; -} - -void writeModule(Module& wasm, - std::string filename, - const WasmSplitOptions& options) { - ModuleWriter writer; - writer.setBinary(options.emitBinary); - writer.setDebugInfo(options.passOptions.debugInfo); - if (options.emitModuleNames) { - writer.setEmitModuleName(true); - } - writer.write(wasm, filename); -} - -void instrumentModule(const WasmSplitOptions& options) { - Module wasm; - parseInput(wasm, options); - - // Check that the profile export name is not already taken - if (wasm.getExportOrNull(options.profileExport) != nullptr) { - Fatal() << "error: Export " << options.profileExport << " already exists."; - } - - uint64_t moduleHash = hashFile(options.inputFiles[0]); - PassRunner runner(&wasm, options.passOptions); - Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); - - adjustTableSize(wasm, options.initialTableSize); - - // Write the output modules - writeModule(wasm, options.output, options); -} - -struct ProfileData { - uint64_t hash; - std::vector<size_t> timestamps; -}; - -// See "wasm-split profile format" above for more information. -ProfileData readProfile(const std::string& file) { - auto profileData = read_file<std::vector<char>>(file, Flags::Binary); - size_t i = 0; - auto readi32 = [&]() { - if (i + 4 > profileData.size()) { - Fatal() << "Unexpected end of profile data in " << file; - } - uint32_t i32 = 0; - i32 |= uint32_t(uint8_t(profileData[i++])); - i32 |= uint32_t(uint8_t(profileData[i++])) << 8; - i32 |= uint32_t(uint8_t(profileData[i++])) << 16; - i32 |= uint32_t(uint8_t(profileData[i++])) << 24; - return i32; - }; - - uint64_t hash = readi32(); - hash |= uint64_t(readi32()) << 32; - - std::vector<size_t> timestamps; - while (i < profileData.size()) { - timestamps.push_back(readi32()); - } - - return {hash, timestamps}; -} - -void writeSymbolMap(Module& wasm, std::string filename) { - PassOptions options; - options.arguments["symbolmap"] = filename; - PassRunner runner(&wasm, options); - runner.add("symbolmap"); - runner.run(); -} - -void writePlaceholderMap(const std::map<size_t, Name> placeholderMap, - std::string filename) { - Output output(filename, Flags::Text); - auto& o = output.getStream(); - for (auto pair : placeholderMap) { - o << pair.first << ':' << pair.second << '\n'; - } -} - -void splitModule(const WasmSplitOptions& options) { - Module wasm; - parseInput(wasm, options); - - std::set<Name> keepFuncs; - - if (options.profileFile.size()) { - // Use the profile to initialize `keepFuncs`. - uint64_t hash = hashFile(options.inputFiles[0]); - ProfileData profile = readProfile(options.profileFile); - if (profile.hash != hash) { - Fatal() << "error: checksum in profile does not match module checksum. " - << "The split module must be the original module that was " - << "instrumented to generate the profile."; - } - size_t i = 0; - ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { - if (i >= profile.timestamps.size()) { - Fatal() << "Unexpected end of profile data"; - } - if (profile.timestamps[i++] > 0) { - keepFuncs.insert(func->name); - } - }); - if (i != profile.timestamps.size()) { - Fatal() << "Unexpected extra profile data"; - } - } - - // Add in the functions specified with --keep-funcs - for (auto& func : options.keepFuncs) { - if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) { - std::cerr << "warning: function " << func << " does not exist\n"; - } - keepFuncs.insert(func); - } - - // Remove the functions specified with --remove-funcs - for (auto& func : options.splitFuncs) { - auto* function = wasm.getFunctionOrNull(func); - if (!options.quiet && function == nullptr) { - std::cerr << "warning: function " << func << " does not exist\n"; - } - if (function && function->imported()) { - if (!options.quiet) { - std::cerr << "warning: cannot split out imported function " << func - << "\n"; - } - } else { - keepFuncs.erase(func); - } - } - - if (!options.quiet && keepFuncs.size() == 0) { - std::cerr << "warning: not keeping any functions in the primary module\n"; - } - - // If warnings are enabled, check that any functions are being split out. - if (!options.quiet) { - std::set<Name> splitFuncs; - ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { - if (keepFuncs.count(func->name) == 0) { - splitFuncs.insert(func->name); - } - }); - - if (splitFuncs.size() == 0) { - std::cerr - << "warning: not splitting any functions out to the secondary module\n"; - } - - // Dump the kept and split functions if we are verbose - if (options.verbose) { - auto printCommaSeparated = [&](auto funcs) { - for (auto it = funcs.begin(); it != funcs.end(); ++it) { - if (it != funcs.begin()) { - std::cout << ", "; - } - std::cout << *it; - } - }; - - std::cout << "Keeping functions: "; - printCommaSeparated(keepFuncs); - std::cout << "\n"; - - std::cout << "Splitting out functions: "; - printCommaSeparated(splitFuncs); - std::cout << "\n"; - } - } - - // Actually perform the splitting - ModuleSplitting::Config config; - config.primaryFuncs = std::move(keepFuncs); - if (options.importNamespace.size()) { - config.importNamespace = options.importNamespace; - } - if (options.placeholderNamespace.size()) { - config.placeholderNamespace = options.placeholderNamespace; - } - if (options.exportPrefix.size()) { - config.newExportPrefix = options.exportPrefix; - } - config.minimizeNewExportNames = !options.passOptions.debugInfo; - auto splitResults = ModuleSplitting::splitFunctions(wasm, config); - auto& secondary = splitResults.secondary; - - adjustTableSize(wasm, options.initialTableSize); - adjustTableSize(*secondary, options.initialTableSize); - - if (options.symbolMap) { - writeSymbolMap(wasm, options.primaryOutput + ".symbols"); - writeSymbolMap(*secondary, options.secondaryOutput + ".symbols"); - } - - if (options.placeholderMap) { - writePlaceholderMap(splitResults.placeholderMap, - options.primaryOutput + ".placeholders"); - } - - // Set the names of the split modules. This can help differentiate them in - // stack traces. - if (options.emitModuleNames) { - if (!wasm.name) { - wasm.name = Path::getBaseName(options.primaryOutput); - } - secondary->name = Path::getBaseName(options.secondaryOutput); - } - - // write the output modules - writeModule(wasm, options.primaryOutput, options); - writeModule(*secondary, options.secondaryOutput, options); -} - -void mergeProfiles(const WasmSplitOptions& options) { - // Read the initial profile. We will merge other profiles into this one. - ProfileData data = readProfile(options.inputFiles[0]); - - // In verbose mode, we want to find profiles that don't contribute to the - // merged profile. To do that, keep track of how many profiles each function - // appears in. If any profile contains only functions that appear in multiple - // profiles, it could be dropped. - std::vector<size_t> numProfiles; - if (options.verbose) { - numProfiles.resize(data.timestamps.size()); - for (size_t t = 0; t < data.timestamps.size(); ++t) { - if (data.timestamps[t]) { - numProfiles[t] = 1; - } - } - } - - // Read all the other profiles, taking the minimum nonzero timestamp for each - // function. - for (size_t i = 1; i < options.inputFiles.size(); ++i) { - ProfileData newData = readProfile(options.inputFiles[i]); - if (newData.hash != data.hash) { - Fatal() << "Checksum in profile " << options.inputFiles[i] - << " does not match hash in profile " << options.inputFiles[0]; - } - if (newData.timestamps.size() != data.timestamps.size()) { - Fatal() << "Profile " << options.inputFiles[i] - << " incompatible with profile " << options.inputFiles[0]; - } - for (size_t t = 0; t < data.timestamps.size(); ++t) { - if (data.timestamps[t] && newData.timestamps[t]) { - data.timestamps[t] = - std::min(data.timestamps[t], newData.timestamps[t]); - } else if (newData.timestamps[t]) { - data.timestamps[t] = newData.timestamps[t]; - } - if (options.verbose && newData.timestamps[t]) { - ++numProfiles[t]; - } - } - } - - // Check for useless profiles. - if (options.verbose) { - for (const auto& file : options.inputFiles) { - bool useless = true; - ProfileData newData = readProfile(file); - for (size_t t = 0; t < newData.timestamps.size(); ++t) { - if (newData.timestamps[t] && numProfiles[t] == 1) { - useless = false; - break; - } - } - if (useless) { - std::cout << "Profile " << file - << " only includes functions included in other profiles.\n"; - } - } - } - - // Write the combined profile. - BufferWithRandomAccess buffer; - buffer << data.hash; - for (size_t t = 0; t < data.timestamps.size(); ++t) { - buffer << uint32_t(data.timestamps[t]); - } - Output out(options.output, Flags::Binary); - buffer.writeTo(out.getStream()); -} - -} // anonymous namespace - -int main(int argc, const char* argv[]) { - WasmSplitOptions options; - options.parse(argc, argv); - - if (!options.validate()) { - Fatal() << "Invalid command line arguments"; - } - - switch (options.mode) { - case WasmSplitOptions::Mode::Split: - splitModule(options); - break; - case WasmSplitOptions::Mode::Instrument: - instrumentModule(options); - break; - case WasmSplitOptions::Mode::MergeProfiles: - mergeProfiles(options); - break; - } -} diff --git a/src/tools/wasm-split/CMakeLists.txt b/src/tools/wasm-split/CMakeLists.txt new file mode 100644 index 000000000..4f4e7d832 --- /dev/null +++ b/src/tools/wasm-split/CMakeLists.txt @@ -0,0 +1,10 @@ +FILE(GLOB wasm_split_HEADERS *h) +set(wasm_split_SOURCES + split-options.cpp + instrumenter.cpp + ${wasm_split_HEADERS} +) +add_library(wasm-split-lib OBJECT ${wasm_split_SOURCES}) + +binaryen_add_executable(wasm-split wasm-split.cpp) +target_link_libraries(wasm-split wasm-split-lib) diff --git a/src/tools/wasm-split/instrumenter.cpp b/src/tools/wasm-split/instrumenter.cpp new file mode 100644 index 000000000..0c5e96b54 --- /dev/null +++ b/src/tools/wasm-split/instrumenter.cpp @@ -0,0 +1,185 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instrumenter.h" +#include "ir/module-utils.h" +#include "ir/names.h" +#include "support/name.h" +#include "wasm-type.h" + +namespace wasm { + +Instrumenter::Instrumenter(const std::string& profileExport, + uint64_t moduleHash) + : profileExport(profileExport), moduleHash(moduleHash) {} + +void Instrumenter::run(PassRunner* runner, Module* wasm) { + this->runner = runner; + this->wasm = wasm; + addGlobals(); + instrumentFuncs(); + addProfileExport(); +} + +void Instrumenter::addGlobals() { + // Create fresh global names (over-reserves, but that's ok) + counterGlobal = Names::getValidGlobalName(*wasm, "monotonic_counter"); + functionGlobals.reserve(wasm->functions.size()); + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + functionGlobals.push_back(Names::getValidGlobalName( + *wasm, std::string(func->name.c_str()) + "_timestamp")); + }); + + // Create and add new globals + auto addGlobal = [&](Name name) { + auto global = Builder::makeGlobal( + name, + Type::i32, + Builder(*wasm).makeConst(Literal::makeZero(Type::i32)), + Builder::Mutable); + global->hasExplicitName = true; + wasm->addGlobal(std::move(global)); + }; + addGlobal(counterGlobal); + for (auto& name : functionGlobals) { + addGlobal(name); + } +} + +void Instrumenter::instrumentFuncs() { + // Inject the following code at the beginning of each function to advance the + // monotonic counter and set the function's timestamp if it hasn't already + // been set. + // + // (if (i32.eqz (global.get $timestamp)) + // (block + // (global.set $monotonic_counter + // (i32.add + // (global.get $monotonic_counter) + // (i32.const 1) + // ) + // ) + // (global.set $timestamp + // (global.get $monotonic_counter) + // ) + // ) + // ) + Builder builder(*wasm); + auto globalIt = functionGlobals.begin(); + ModuleUtils::iterDefinedFunctions(*wasm, [&](Function* func) { + func->body = builder.makeSequence( + builder.makeIf( + builder.makeUnary(EqZInt32, + builder.makeGlobalGet(*globalIt, Type::i32)), + builder.makeSequence( + builder.makeGlobalSet( + counterGlobal, + builder.makeBinary(AddInt32, + builder.makeGlobalGet(counterGlobal, Type::i32), + builder.makeConst(Literal::makeOne(Type::i32)))), + builder.makeGlobalSet( + *globalIt, builder.makeGlobalGet(counterGlobal, Type::i32)))), + func->body, + func->body->type); + ++globalIt; + }); +} + +// wasm-split profile format: +// +// The wasm-split profile is a binary format designed to be simple to produce +// and consume. It is comprised of: +// +// 1. An 8-byte module hash +// +// 2. A 4-byte timestamp for each defined function +// +// The module hash is meant to guard against bugs where the module that was +// instrumented and the module that is being split are different. The timestamps +// are non-zero for functions that were called during the instrumented run and 0 +// otherwise. Functions with smaller non-zero timestamps were called earlier in +// the instrumented run than funtions with larger timestamps. + +void Instrumenter::addProfileExport() { + // Create and export a function to dump the profile into a given memory + // buffer. The function takes the available address and buffer size as + // arguments and returns the total size of the profile. It only actually + // writes the profile if the given space is sufficient to hold it. + auto name = Names::getValidFunctionName(*wasm, profileExport); + auto writeProfile = Builder::makeFunction( + name, Signature({Type::i32, Type::i32}, Type::i32), {}); + writeProfile->hasExplicitName = true; + writeProfile->setLocalName(0, "addr"); + writeProfile->setLocalName(1, "size"); + + // Calculate the size of the profile: + // 8 bytes module hash + + // 4 bytes for the timestamp for each function + const size_t profileSize = 8 + 4 * functionGlobals.size(); + + // Create the function body + Builder builder(*wasm); + auto getAddr = [&]() { return builder.makeLocalGet(0, Type::i32); }; + auto getSize = [&]() { return builder.makeLocalGet(1, Type::i32); }; + auto hashConst = [&]() { return builder.makeConst(int64_t(moduleHash)); }; + auto profileSizeConst = [&]() { + return builder.makeConst(int32_t(profileSize)); + }; + + // Write the hash followed by all the time stamps + Expression* writeData = + builder.makeStore(8, 0, 1, getAddr(), hashConst(), Type::i64); + + uint32_t offset = 8; + for (const auto& global : functionGlobals) { + writeData = builder.blockify( + writeData, + builder.makeStore(4, + offset, + 1, + getAddr(), + builder.makeGlobalGet(global, Type::i32), + Type::i32)); + offset += 4; + } + + writeProfile->body = builder.makeSequence( + builder.makeIf(builder.makeBinary(GeUInt32, getSize(), profileSizeConst()), + writeData), + profileSizeConst()); + + // Create an export for the function + wasm->addFunction(std::move(writeProfile)); + wasm->addExport( + Builder::makeExport(profileExport, name, ExternalKind::Function)); + + // Also make sure there is a memory with enough pages to write into + size_t pages = (profileSize + Memory::kPageSize - 1) / Memory::kPageSize; + if (!wasm->memory.exists) { + wasm->memory.exists = true; + wasm->memory.initial = pages; + wasm->memory.max = pages; + } else if (wasm->memory.initial < pages) { + wasm->memory.initial = pages; + if (wasm->memory.max < pages) { + wasm->memory.max = pages; + } + } + + // TODO: export the memory if it is not already exported. +} + +} // namespace wasm diff --git a/src/tools/wasm-split/instrumenter.h b/src/tools/wasm-split/instrumenter.h new file mode 100644 index 000000000..4f714fde9 --- /dev/null +++ b/src/tools/wasm-split/instrumenter.h @@ -0,0 +1,47 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_tools_wasm_split_instrumenter_h +#define wasm_tools_wasm_split_instrumenter_h + +#include "pass.h" + +namespace wasm { + +// Add a global monotonic counter and a timestamp global for each function, code +// at the beginning of each function to set its timestamp, and a new exported +// function for dumping the profile data. +struct Instrumenter : public Pass { + PassRunner* runner = nullptr; + Module* wasm = nullptr; + + const std::string& profileExport; + uint64_t moduleHash; + + Name counterGlobal; + std::vector<Name> functionGlobals; + + Instrumenter(const std::string& profileExport, uint64_t moduleHash); + + void run(PassRunner* runner, Module* wasm) override; + void addGlobals(); + void instrumentFuncs(); + void addProfileExport(); +}; + +} // namespace wasm + +#endif // wasm_tools_wasm_split_instrumenter_h diff --git a/src/tools/wasm-split/split-options.cpp b/src/tools/wasm-split/split-options.cpp new file mode 100644 index 000000000..419555f45 --- /dev/null +++ b/src/tools/wasm-split/split-options.cpp @@ -0,0 +1,356 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split-options.h" +#include <fstream> + +namespace wasm { + +namespace { + +std::set<Name> parseNameListFromLine(const std::string& line) { + std::set<Name> names; + std::istringstream stream(line); + for (std::string name; std::getline(stream, name, ',');) { + names.insert(name); + } + return names; +} + +std::set<Name> parseNameListFromFile(const std::string& filename) { + std::ifstream infile(filename); + if (!infile.is_open()) { + std::cerr << "Failed opening '" << filename << "'" << std::endl; + exit(EXIT_FAILURE); + } + + std::set<Name> names; + std::string line; + while (std::getline(infile, line)) { + if (line.length() > 0) { + names.insert(line); + } + } + + return names; +} + +std::set<Name> parseNameList(const std::string& listOrFile) { + if (!listOrFile.empty() && listOrFile[0] == '@') { + return parseNameListFromFile(listOrFile.substr(1)); + } + + return parseNameListFromLine(listOrFile); +} + +std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) { + switch (mode) { + case WasmSplitOptions::Mode::Split: + o << "split"; + break; + case WasmSplitOptions::Mode::Instrument: + o << "instrument"; + break; + case WasmSplitOptions::Mode::MergeProfiles: + o << "merge-profiles"; + break; + } + return o; +} + +} // anonymous namespace + +WasmSplitOptions::WasmSplitOptions() + : ToolOptions("wasm-split", + "Split a module into a primary module and a secondary " + "module, or instrument a module to gather a profile that " + "can inform future splitting, or manage such profiles. Options " + "that are only accepted in particular modes are marked with " + "the accepted \"[<modes>]\" in their descriptions.") { + (*this) + .add("--split", + "", + "Split an input module into two output modules. The default mode.", + Options::Arguments::Zero, + [&](Options* o, const std::string& arugment) { mode = Mode::Split; }) + .add( + "--instrument", + "", + "Instrument an input module to allow it to generate a profile that can" + " be used to guide splitting.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { mode = Mode::Instrument; }) + .add("--merge-profiles", + "", + "Merge multiple profiles for the same module into a single profile.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + mode = Mode::MergeProfiles; + }) + .add( + "--profile", + "", + "The profile to use to guide splitting.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { profileFile = argument; }) + .add("--keep-funcs", + "", + "Comma-separated list of functions to keep in the primary module, " + "regardless of any profile. " + "You can also pass a file with a list of functions separated by new " + "lines. " + "To do this, prepend @ before filename (--keep-funcs @myfile)", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + keepFuncs = parseNameList(argument); + }) + .add("--split-funcs", + "", + "Comma-separated list of functions to split into the secondary " + "module, regardless of any profile. If there is no profile, then " + "this defaults to all functions defined in the module. " + "You can also pass a file with a list of functions separated by new " + "lines. " + "To do this, prepend @ before filename (--split-funcs @myfile)", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + splitFuncs = parseNameList(argument); + }) + .add("--primary-output", + "-o1", + "Output file for the primary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + primaryOutput = argument; + }) + .add("--secondary-output", + "-o2", + "Output file for the secondary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + secondaryOutput = argument; + }) + .add("--symbolmap", + "", + "Write a symbol map file for each of the output modules.", + {Mode::Split}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { symbolMap = true; }) + .add( + "--placeholdermap", + "", + "Write a file mapping placeholder indices to the function names.", + {Mode::Split}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { placeholderMap = true; }) + .add("--import-namespace", + "", + "The namespace from which to import objects from the primary " + "module into the secondary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + importNamespace = argument; + }) + .add("--placeholder-namespace", + "", + "The namespace from which to import placeholder functions into " + "the primary module.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + placeholderNamespace = argument; + }) + .add( + "--export-prefix", + "", + "An identifying prefix to prepend to new export names created " + "by module splitting.", + {Mode::Split}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { exportPrefix = argument; }) + .add("--profile-export", + "", + "The export name of the function the embedder calls to write the " + "profile into memory. Defaults to `__write_profile`.", + {Mode::Instrument}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + profileExport = argument; + }) + .add( + "--emit-module-names", + "", + "Emit module names, even if not emitting the rest of the names section. " + "Can help differentiate the modules in stack traces. This option will be " + "removed once simpler ways of naming modules are widely available. See " + "https://bugs.chromium.org/p/v8/issues/detail?id=11808.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { emitModuleNames = true; }) + .add("--initial-table", + "", + "A hack to ensure the split and instrumented modules have the same " + "table size when using Emscripten's SPLIT_MODULE mode with dynamic " + "linking. TODO: Figure out a more elegant solution for that use " + "case and remove this.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { + initialTableSize = std::stoi(argument); + }) + .add("--emit-text", + "-S", + "Emit text instead of binary for the output file or files.", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { emitBinary = false; }) + .add("--debuginfo", + "-g", + "Emit names section in wasm binary (or full debuginfo in wast)", + {Mode::Split, Mode::Instrument}, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + passOptions.debugInfo = true; + }) + .add("--output", + "-o", + "Output file.", + {Mode::Instrument, Mode::MergeProfiles}, + Options::Arguments::One, + [&](Options* o, const std::string& argument) { output = argument; }) + .add("--verbose", + "-v", + "Verbose output mode. Prints the functions that will be kept " + "and split out when splitting a module.", + Options::Arguments::Zero, + [&](Options* o, const std::string& argument) { + verbose = true; + quiet = false; + }) + .add_positional("INFILES", + Options::Arguments::N, + [&](Options* o, const std::string& argument) { + inputFiles.push_back(argument); + }); +} + +WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, + const std::string& shortName, + const std::string& description, + std::vector<Mode>&& modes, + Arguments arguments, + const Action& action) { + // Insert the valid modes at the beginning of the description. + std::stringstream desc; + if (modes.size()) { + desc << '['; + std::string sep = ""; + for (Mode m : modes) { + validOptions[static_cast<unsigned>(m)].insert(longName); + desc << sep << m; + sep = ", "; + } + desc << "] "; + } + desc << description; + ToolOptions::add( + longName, + shortName, + desc.str(), + arguments, + [&, action, longName](Options* o, const std::string& argument) { + usedOptions.push_back(longName); + action(o, argument); + }); + return *this; +} + +WasmSplitOptions& WasmSplitOptions::add(const std::string& longName, + const std::string& shortName, + const std::string& description, + Arguments arguments, + const Action& action) { + // Add an option valid in all modes. + for (unsigned i = 0; i < NumModes; ++i) { + validOptions[i].insert(longName); + } + return add(longName, shortName, description, {}, arguments, action); +} + +bool WasmSplitOptions::validate() { + bool valid = true; + auto fail = [&](auto msg) { + std::cerr << "error: " << msg << "\n"; + valid = false; + }; + + // Validate the positional arguments. + if (inputFiles.size() == 0) { + fail("no input file"); + } + switch (mode) { + case Mode::Split: + case Mode::Instrument: + if (inputFiles.size() > 1) { + fail("Cannot have more than one input file."); + } + break; + case Mode::MergeProfiles: + // Any number >= 1 allowed. + break; + } + + // Validate that all used options are allowed in the current mode. + for (std::string& opt : usedOptions) { + if (!validOptions[static_cast<unsigned>(mode)].count(opt)) { + std::stringstream msg; + msg << "Option " << opt << " cannot be used in " << mode << " mode."; + fail(msg.str()); + } + } + + if (mode == Mode::Split) { + std::vector<Name> impossible; + std::set_intersection(keepFuncs.begin(), + keepFuncs.end(), + splitFuncs.begin(), + splitFuncs.end(), + std::inserter(impossible, impossible.end())); + for (auto& func : impossible) { + fail(std::string("Cannot both keep and split out function ") + + func.c_str()); + } + } + + return valid; +} + +void WasmSplitOptions::parse(int argc, const char* argv[]) { + ToolOptions::parse(argc, argv); + // Since --quiet is defined in ToolOptions but --verbose is defined here, + // --quiet doesn't know to unset --verbose. Fix it up here. + if (quiet && verbose) { + verbose = false; + } +} + +} // namespace wasm diff --git a/src/tools/wasm-split/split-options.h b/src/tools/wasm-split/split-options.h new file mode 100644 index 000000000..5c811c32c --- /dev/null +++ b/src/tools/wasm-split/split-options.h @@ -0,0 +1,86 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_tools_wasm_split_options_h +#define wasm_tools_wasm_split_options_h + +#include "tools/tool-options.h" + +namespace wasm { + +const std::string DEFAULT_PROFILE_EXPORT("__write_profile"); + +struct WasmSplitOptions : ToolOptions { + enum class Mode : unsigned { + Split, + Instrument, + MergeProfiles, + }; + Mode mode = Mode::Split; + constexpr static size_t NumModes = + static_cast<unsigned>(Mode::MergeProfiles) + 1; + + bool verbose = false; + bool emitBinary = true; + bool symbolMap = false; + bool placeholderMap = false; + + // TODO: Remove this. See the comment in wasm-binary.h. + bool emitModuleNames = false; + + std::string profileFile; + std::string profileExport = DEFAULT_PROFILE_EXPORT; + + std::set<Name> keepFuncs; + std::set<Name> splitFuncs; + + std::vector<std::string> inputFiles; + std::string output; + std::string primaryOutput; + std::string secondaryOutput; + + std::string importNamespace; + std::string placeholderNamespace; + std::string exportPrefix; + + // A hack to ensure the split and instrumented modules have the same table + // size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO: + // Figure out a more elegant solution for that use case and remove this. + int initialTableSize = -1; + + // The options that are valid for each mode. + std::array<std::unordered_set<std::string>, NumModes> validOptions; + std::vector<std::string> usedOptions; + + WasmSplitOptions(); + WasmSplitOptions& add(const std::string& longName, + const std::string& shortName, + const std::string& description, + std::vector<Mode>&& modes, + Arguments arguments, + const Action& action); + WasmSplitOptions& add(const std::string& longName, + const std::string& shortName, + const std::string& description, + Arguments arguments, + const Action& action); + bool validate(); + void parse(int argc, const char* argv[]); +}; + +} // namespace wasm + +#endif // wasm_tools_wasm_split_h diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp new file mode 100644 index 000000000..bedba8957 --- /dev/null +++ b/src/tools/wasm-split/wasm-split.cpp @@ -0,0 +1,396 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// wasm-split: Split a module in two or instrument a module to inform future +// splitting. + +#include "ir/module-splitting.h" +#include "ir/names.h" +#include "support/file.h" +#include "support/name.h" +#include "support/path.h" +#include "support/utilities.h" +#include "wasm-binary.h" +#include "wasm-builder.h" +#include "wasm-io.h" +#include "wasm-validator.h" + +#include "instrumenter.h" +#include "split-options.h" + +using namespace wasm; + +namespace { + +void parseInput(Module& wasm, const WasmSplitOptions& options) { + options.applyFeatures(wasm); + ModuleReader reader; + reader.setProfile(options.profile); + try { + reader.read(options.inputFiles[0], wasm); + } catch (ParseException& p) { + p.dump(std::cerr); + std::cerr << '\n'; + Fatal() << "error parsing wasm"; + } catch (std::bad_alloc&) { + Fatal() << "error building module, std::bad_alloc (possibly invalid " + "request for silly amounts of memory)"; + } + + if (options.passOptions.validate && !WasmValidator().validate(wasm)) { + Fatal() << "error validating input"; + } +} + +uint64_t hashFile(const std::string& filename) { + auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + +void adjustTableSize(Module& wasm, int initialSize) { + if (initialSize < 0) { + return; + } + if (wasm.tables.empty()) { + Fatal() << "--initial-table used but there is no table"; + } + + auto& table = wasm.tables.front(); + + if ((uint64_t)initialSize < table->initial) { + Fatal() << "Specified initial table size too small, should be at least " + << table->initial; + } + if ((uint64_t)initialSize > table->max) { + Fatal() << "Specified initial table size larger than max table size " + << table->max; + } + table->initial = initialSize; +} + +void writeModule(Module& wasm, + std::string filename, + const WasmSplitOptions& options) { + ModuleWriter writer; + writer.setBinary(options.emitBinary); + writer.setDebugInfo(options.passOptions.debugInfo); + if (options.emitModuleNames) { + writer.setEmitModuleName(true); + } + writer.write(wasm, filename); +} + +void instrumentModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + // Check that the profile export name is not already taken + if (wasm.getExportOrNull(options.profileExport) != nullptr) { + Fatal() << "error: Export " << options.profileExport << " already exists."; + } + + uint64_t moduleHash = hashFile(options.inputFiles[0]); + PassRunner runner(&wasm, options.passOptions); + Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); + + adjustTableSize(wasm, options.initialTableSize); + + // Write the output modules + writeModule(wasm, options.output, options); +} + +struct ProfileData { + uint64_t hash; + std::vector<size_t> timestamps; +}; + +// See "wasm-split profile format" in instrumenter.cpp for more information. +ProfileData readProfile(const std::string& file) { + auto profileData = read_file<std::vector<char>>(file, Flags::Binary); + size_t i = 0; + auto readi32 = [&]() { + if (i + 4 > profileData.size()) { + Fatal() << "Unexpected end of profile data in " << file; + } + uint32_t i32 = 0; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; + return i32; + }; + + uint64_t hash = readi32(); + hash |= uint64_t(readi32()) << 32; + + std::vector<size_t> timestamps; + while (i < profileData.size()) { + timestamps.push_back(readi32()); + } + + return {hash, timestamps}; +} + +void writeSymbolMap(Module& wasm, std::string filename) { + PassOptions options; + options.arguments["symbolmap"] = filename; + PassRunner runner(&wasm, options); + runner.add("symbolmap"); + runner.run(); +} + +void writePlaceholderMap(const std::map<size_t, Name> placeholderMap, + std::string filename) { + Output output(filename, Flags::Text); + auto& o = output.getStream(); + for (auto pair : placeholderMap) { + o << pair.first << ':' << pair.second << '\n'; + } +} + +void splitModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + std::set<Name> keepFuncs; + + if (options.profileFile.size()) { + // Use the profile to initialize `keepFuncs`. + uint64_t hash = hashFile(options.inputFiles[0]); + ProfileData profile = readProfile(options.profileFile); + if (profile.hash != hash) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } + size_t i = 0; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (i >= profile.timestamps.size()) { + Fatal() << "Unexpected end of profile data"; + } + if (profile.timestamps[i++] > 0) { + keepFuncs.insert(func->name); + } + }); + if (i != profile.timestamps.size()) { + Fatal() << "Unexpected extra profile data"; + } + } + + // Add in the functions specified with --keep-funcs + for (auto& func : options.keepFuncs) { + if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + keepFuncs.insert(func); + } + + // Remove the functions specified with --remove-funcs + for (auto& func : options.splitFuncs) { + auto* function = wasm.getFunctionOrNull(func); + if (!options.quiet && function == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + if (function && function->imported()) { + if (!options.quiet) { + std::cerr << "warning: cannot split out imported function " << func + << "\n"; + } + } else { + keepFuncs.erase(func); + } + } + + if (!options.quiet && keepFuncs.size() == 0) { + std::cerr << "warning: not keeping any functions in the primary module\n"; + } + + // If warnings are enabled, check that any functions are being split out. + if (!options.quiet) { + std::set<Name> splitFuncs; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (keepFuncs.count(func->name) == 0) { + splitFuncs.insert(func->name); + } + }); + + if (splitFuncs.size() == 0) { + std::cerr + << "warning: not splitting any functions out to the secondary module\n"; + } + + // Dump the kept and split functions if we are verbose + if (options.verbose) { + auto printCommaSeparated = [&](auto funcs) { + for (auto it = funcs.begin(); it != funcs.end(); ++it) { + if (it != funcs.begin()) { + std::cout << ", "; + } + std::cout << *it; + } + }; + + std::cout << "Keeping functions: "; + printCommaSeparated(keepFuncs); + std::cout << "\n"; + + std::cout << "Splitting out functions: "; + printCommaSeparated(splitFuncs); + std::cout << "\n"; + } + } + + // Actually perform the splitting + ModuleSplitting::Config config; + config.primaryFuncs = std::move(keepFuncs); + if (options.importNamespace.size()) { + config.importNamespace = options.importNamespace; + } + if (options.placeholderNamespace.size()) { + config.placeholderNamespace = options.placeholderNamespace; + } + if (options.exportPrefix.size()) { + config.newExportPrefix = options.exportPrefix; + } + config.minimizeNewExportNames = !options.passOptions.debugInfo; + auto splitResults = ModuleSplitting::splitFunctions(wasm, config); + auto& secondary = splitResults.secondary; + + adjustTableSize(wasm, options.initialTableSize); + adjustTableSize(*secondary, options.initialTableSize); + + if (options.symbolMap) { + writeSymbolMap(wasm, options.primaryOutput + ".symbols"); + writeSymbolMap(*secondary, options.secondaryOutput + ".symbols"); + } + + if (options.placeholderMap) { + writePlaceholderMap(splitResults.placeholderMap, + options.primaryOutput + ".placeholders"); + } + + // Set the names of the split modules. This can help differentiate them in + // stack traces. + if (options.emitModuleNames) { + if (!wasm.name) { + wasm.name = Path::getBaseName(options.primaryOutput); + } + secondary->name = Path::getBaseName(options.secondaryOutput); + } + + // write the output modules + writeModule(wasm, options.primaryOutput, options); + writeModule(*secondary, options.secondaryOutput, options); +} + +void mergeProfiles(const WasmSplitOptions& options) { + // Read the initial profile. We will merge other profiles into this one. + ProfileData data = readProfile(options.inputFiles[0]); + + // In verbose mode, we want to find profiles that don't contribute to the + // merged profile. To do that, keep track of how many profiles each function + // appears in. If any profile contains only functions that appear in multiple + // profiles, it could be dropped. + std::vector<size_t> numProfiles; + if (options.verbose) { + numProfiles.resize(data.timestamps.size()); + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t]) { + numProfiles[t] = 1; + } + } + } + + // Read all the other profiles, taking the minimum nonzero timestamp for each + // function. + for (size_t i = 1; i < options.inputFiles.size(); ++i) { + ProfileData newData = readProfile(options.inputFiles[i]); + if (newData.hash != data.hash) { + Fatal() << "Checksum in profile " << options.inputFiles[i] + << " does not match hash in profile " << options.inputFiles[0]; + } + if (newData.timestamps.size() != data.timestamps.size()) { + Fatal() << "Profile " << options.inputFiles[i] + << " incompatible with profile " << options.inputFiles[0]; + } + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t] && newData.timestamps[t]) { + data.timestamps[t] = + std::min(data.timestamps[t], newData.timestamps[t]); + } else if (newData.timestamps[t]) { + data.timestamps[t] = newData.timestamps[t]; + } + if (options.verbose && newData.timestamps[t]) { + ++numProfiles[t]; + } + } + } + + // Check for useless profiles. + if (options.verbose) { + for (const auto& file : options.inputFiles) { + bool useless = true; + ProfileData newData = readProfile(file); + for (size_t t = 0; t < newData.timestamps.size(); ++t) { + if (newData.timestamps[t] && numProfiles[t] == 1) { + useless = false; + break; + } + } + if (useless) { + std::cout << "Profile " << file + << " only includes functions included in other profiles.\n"; + } + } + } + + // Write the combined profile. + BufferWithRandomAccess buffer; + buffer << data.hash; + for (size_t t = 0; t < data.timestamps.size(); ++t) { + buffer << uint32_t(data.timestamps[t]); + } + Output out(options.output, Flags::Binary); + buffer.writeTo(out.getStream()); +} + +} // anonymous namespace + +int main(int argc, const char* argv[]) { + WasmSplitOptions options; + options.parse(argc, argv); + + if (!options.validate()) { + Fatal() << "Invalid command line arguments"; + } + + switch (options.mode) { + case WasmSplitOptions::Mode::Split: + splitModule(options); + break; + case WasmSplitOptions::Mode::Instrument: + instrumentModule(options); + break; + case WasmSplitOptions::Mode::MergeProfiles: + mergeProfiles(options); + break; + } +} |