summaryrefslogtreecommitdiff
path: root/src/tools/wasm-split.cpp
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2020-11-19 21:51:55 -0800
committerGitHub <noreply@github.com>2020-11-19 21:51:55 -0800
commit171cba44fe6fdaff63fff79d2c660b02d7a79747 (patch)
tree509b906a080e318bf4de6cca7d3dd822c41282c2 /src/tools/wasm-split.cpp
parentde5e7365957b3eaf0d9aa05eea4ee759efb67ca4 (diff)
downloadbinaryen-171cba44fe6fdaff63fff79d2c660b02d7a79747.tar.gz
binaryen-171cba44fe6fdaff63fff79d2c660b02d7a79747.tar.bz2
binaryen-171cba44fe6fdaff63fff79d2c660b02d7a79747.zip
Initial wasm-split tool (#3359)
Implement an initial version of the wasm-split tool, which splits modules into a primary module and a secondary module that can be instantiated after the primary module. Eventually, this tool will be able to not only split modules, but also instrument modules to collect profiles that will be able to guide later splitting. In this initial version, however, wasm-split can neither perform instrumentation nor consume any kind of profile data. Despite those shortcomings, this initial version of the tool is already able to perform module splitting according to function lists manually provided by the user via the command line. Follow-up PRs will implement the stubbed out instrumentation and profile consumption functionality.
Diffstat (limited to 'src/tools/wasm-split.cpp')
-rw-r--r--src/tools/wasm-split.cpp367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp
new file mode 100644
index 000000000..5f6207e9a
--- /dev/null
+++ b/src/tools/wasm-split.cpp
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2020 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// wasm-split: Split a module in two or instrument a module to inform future
+// splitting.
+
+#include "ir/module-splitting.h"
+#include "ir/module-utils.h"
+#include "support/name.h"
+#include "support/utilities.h"
+#include "tool-options.h"
+#include "wasm-io.h"
+#include "wasm-validator.h"
+#include <sstream>
+
+using namespace wasm;
+
+namespace {
+
+const std::string DEFAULT_PROFILE_EXPORT("__write_profile");
+
+std::set<Name> parseNameList(const std::string& list) {
+ std::set<Name> names;
+ std::istringstream stream(list);
+ for (std::string name; std::getline(stream, name, ',');) {
+ names.insert(name);
+ }
+ return names;
+}
+
+struct WasmSplitOptions : ToolOptions {
+ bool verbose = false;
+
+ bool instrument = false;
+
+ std::string profileFile;
+ std::string profileExport = DEFAULT_PROFILE_EXPORT;
+
+ std::set<Name> keepFuncs;
+ std::set<Name> splitFuncs;
+
+ std::string input;
+ std::string output;
+ std::string primaryOutput;
+ std::string secondaryOutput;
+
+ std::string importNamespace;
+ std::string placeholderNamespace;
+ std::string exportPrefix;
+
+ WasmSplitOptions();
+ bool validate();
+ void parse(int argc, const char* argv[]);
+};
+
+WasmSplitOptions::WasmSplitOptions()
+ : ToolOptions("wasm-split",
+ "Split a module into a primary module and a secondary "
+ "module or instrument a module to gather a profile that "
+ "can inform future splitting.") {
+ (*this)
+ .add("--instrument",
+ "",
+ "Instrument the module to generate a profile that can be used to "
+ "guide splitting",
+ Options::Arguments::Zero,
+ [&](Options* o, const std::string& argument) { instrument = true; })
+ .add(
+ "--profile",
+ "",
+ "The profile to use to guide splitting. May not be used with "
+ "--instrument.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) { profileFile = argument; })
+ .add("--profile-export",
+ "",
+ "The export name of the function the embedder calls to write the "
+ "profile into memory. Defaults to `__write_profile`. Must be used "
+ "with --instrument.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ profileExport = argument;
+ })
+ .add("--keep-funcs",
+ "",
+ "Comma-separated list of functions to keep in the primary module, "
+ "regardless of any profile.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ keepFuncs = parseNameList(argument);
+ })
+ .add("--split-funcs",
+ "",
+ "Comma-separated list of functions to split into the secondary "
+ "module, regardless of any profile. If there is no profile, then "
+ "this defaults to all functions defined in the module.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ splitFuncs = parseNameList(argument);
+ })
+ .add("--output",
+ "-o",
+ "Output file. Only usable with --instrument.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) { output = argument; })
+ .add("--primary-output",
+ "-o1",
+ "Output file for the primary module. Not usable with --instrument.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ primaryOutput = argument;
+ })
+ .add("--secondary-output",
+ "-o2",
+ "Output file for the secondary module. Not usable with --instrument.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ secondaryOutput = argument;
+ })
+ .add("--import-namespace",
+ "",
+ "The namespace from which to import objects from the primary "
+ "module into the secondary module.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ importNamespace = argument;
+ })
+ .add("--placeholder-namespace",
+ "",
+ "The namespace from which to import placeholder functions into "
+ "the primary module.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) {
+ placeholderNamespace = argument;
+ })
+ .add(
+ "--export-prefix",
+ "",
+ "An identifying prefix to prepend to new export names created "
+ "by module splitting.",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) { exportPrefix = argument; })
+ .add("--verbose",
+ "-v",
+ "Verbose output mode. Prints the functions that will be kept "
+ "and split out when splitting a module.",
+ Options::Arguments::Zero,
+ [&](Options* o, const std::string& argument) {
+ verbose = true;
+ quiet = false;
+ })
+ .add_positional(
+ "INFILE",
+ Options::Arguments::One,
+ [&](Options* o, const std::string& argument) { input = argument; });
+}
+
+bool WasmSplitOptions::validate() {
+ bool valid = true;
+ auto fail = [&](auto msg) {
+ std::cerr << "error: " << msg << "\n";
+ valid = false;
+ };
+
+ if (!input.size()) {
+ fail("no input file");
+ }
+ if (instrument) {
+ using Opt = std::pair<const std::string&, const std::string>;
+ for (auto& opt : {Opt{profileFile, "--profile"},
+ Opt{primaryOutput, "primary output"},
+ Opt{secondaryOutput, "secondary output"},
+ Opt{importNamespace, "--import-namespace"},
+ Opt{placeholderNamespace, "--placeholder-namespace"},
+ Opt{exportPrefix, "--export-prefix"}}) {
+ if (opt.first.size()) {
+ fail(opt.second + " cannot be used with --instrument");
+ }
+ }
+ if (keepFuncs.size()) {
+ fail("--keep-funcs cannot be used with --instrument");
+ }
+ if (splitFuncs.size()) {
+ fail("--split-funcs cannot be used with --instrument");
+ }
+ } else {
+ if (output.size()) {
+ fail(
+ "must provide separate primary and secondary output with -o1 and -o2");
+ }
+ if (profileExport != DEFAULT_PROFILE_EXPORT) {
+ fail("--profile-export must be used with --instrument");
+ }
+ }
+
+ std::vector<Name> impossible;
+ std::set_intersection(keepFuncs.begin(),
+ keepFuncs.end(),
+ splitFuncs.begin(),
+ splitFuncs.end(),
+ std::inserter(impossible, impossible.end()));
+ for (auto& func : impossible) {
+ fail(std::string("Cannot both keep and split out function ") +
+ func.c_str());
+ }
+
+ return valid;
+}
+
+void WasmSplitOptions::parse(int argc, const char* argv[]) {
+ ToolOptions::parse(argc, argv);
+ // Since --quiet is defined in ToolOptions but --verbose is defined here,
+ // --quiet doesn't know to unset --verbose. Fix it up here.
+ if (quiet && verbose) {
+ verbose = false;
+ }
+}
+
+void parseInput(Module& wasm, const WasmSplitOptions& options) {
+ ModuleReader reader;
+ reader.setProfile(options.profile);
+ try {
+ reader.read(options.input, wasm);
+ } catch (ParseException& p) {
+ p.dump(std::cerr);
+ std::cerr << '\n';
+ Fatal() << "error parsing wasm";
+ } catch (std::bad_alloc&) {
+ Fatal() << "error building module, std::bad_alloc (possibly invalid "
+ "request for silly amounts of memory)";
+ }
+ options.applyFeatures(wasm);
+}
+
+void instrumentModule(Module& wasm, const WasmSplitOptions& options) {
+ Fatal() << "TODO: implement instrumentation\n";
+}
+
+void splitModule(Module& wasm, const WasmSplitOptions& options) {
+ std::set<Name> keepFuncs;
+
+ if (options.profileFile.size()) {
+ // Use the profile to initialize `keepFuncs`
+ Fatal() << "TODO: implement reading profiles\n";
+ }
+
+ // Add in the functions specified with --keep-funcs
+ for (auto& func : options.keepFuncs) {
+ if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) {
+ std::cerr << "warning: function " << func << " does not exist\n";
+ }
+ keepFuncs.insert(func);
+ }
+
+ // Remove the functions specified with --remove-funcs
+ for (auto& func : options.splitFuncs) {
+ auto* function = wasm.getFunctionOrNull(func);
+ if (!options.quiet && function == nullptr) {
+ std::cerr << "warning: function " << func << " does not exist\n";
+ }
+ if (function && function->imported()) {
+ if (!options.quiet) {
+ std::cerr << "warning: cannot split out imported function " << func
+ << "\n";
+ }
+ } else {
+ keepFuncs.erase(func);
+ }
+ }
+
+ if (!options.quiet && keepFuncs.size() == 0) {
+ std::cerr << "warning: not keeping any functions in the primary module\n";
+ }
+
+ // If warnings are enabled, check that any functions are being split out.
+ if (!options.quiet) {
+ std::set<Name> splitFuncs;
+ ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) {
+ if (keepFuncs.count(func->name) == 0) {
+ splitFuncs.insert(func->name);
+ }
+ });
+
+ if (splitFuncs.size() == 0) {
+ std::cerr
+ << "warning: not splitting any functions out to the secondary module\n";
+ }
+
+ // Dump the kept and split functions if we are verbose
+ if (options.verbose) {
+ auto printCommaSeparated = [&](auto funcs) {
+ for (auto it = funcs.begin(); it != funcs.end(); ++it) {
+ if (it != funcs.begin()) {
+ std::cout << ", ";
+ }
+ std::cout << *it;
+ }
+ };
+
+ std::cout << "Keeping functions: ";
+ printCommaSeparated(keepFuncs);
+ std::cout << "\n";
+
+ std::cout << "Splitting out functions: ";
+ printCommaSeparated(splitFuncs);
+ std::cout << "\n";
+ }
+ }
+
+ // Actually perform the splitting
+ ModuleSplitting::Config config;
+ config.primaryFuncs = std::move(keepFuncs);
+ if (options.importNamespace.size()) {
+ config.importNamespace = options.importNamespace;
+ }
+ if (options.placeholderNamespace.size()) {
+ config.placeholderNamespace = options.placeholderNamespace;
+ }
+ if (options.exportPrefix.size()) {
+ config.newExportPrefix = options.exportPrefix;
+ }
+ std::unique_ptr<Module> secondary =
+ ModuleSplitting::splitFunctions(wasm, config);
+
+ // Write the output modules
+ ModuleWriter writer;
+ writer.setBinary(true);
+ writer.write(wasm, options.primaryOutput);
+ writer.write(*secondary, options.secondaryOutput);
+}
+
+} // anonymous namespace
+
+int main(int argc, const char* argv[]) {
+ WasmSplitOptions options;
+ options.parse(argc, argv);
+
+ if (!options.validate()) {
+ Fatal() << "Invalid command line arguments";
+ }
+
+ Module wasm;
+ parseInput(wasm, options);
+
+ if (options.passOptions.validate && !WasmValidator().validate(wasm)) {
+ Fatal() << "error validating input";
+ }
+
+ if (options.instrument) {
+ instrumentModule(wasm, options);
+ } else {
+ splitModule(wasm, options);
+ }
+}