summaryrefslogtreecommitdiff
path: root/src/tools/wasm-split/wasm-split.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/wasm-split/wasm-split.cpp')
-rw-r--r--src/tools/wasm-split/wasm-split.cpp396
1 files changed, 396 insertions, 0 deletions
diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp
new file mode 100644
index 000000000..bedba8957
--- /dev/null
+++ b/src/tools/wasm-split/wasm-split.cpp
@@ -0,0 +1,396 @@
+/*
+ * Copyright 2020 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// wasm-split: Split a module in two or instrument a module to inform future
+// splitting.
+
+#include "ir/module-splitting.h"
+#include "ir/names.h"
+#include "support/file.h"
+#include "support/name.h"
+#include "support/path.h"
+#include "support/utilities.h"
+#include "wasm-binary.h"
+#include "wasm-builder.h"
+#include "wasm-io.h"
+#include "wasm-validator.h"
+
+#include "instrumenter.h"
+#include "split-options.h"
+
+using namespace wasm;
+
+namespace {
+
+void parseInput(Module& wasm, const WasmSplitOptions& options) {
+ options.applyFeatures(wasm);
+ ModuleReader reader;
+ reader.setProfile(options.profile);
+ try {
+ reader.read(options.inputFiles[0], wasm);
+ } catch (ParseException& p) {
+ p.dump(std::cerr);
+ std::cerr << '\n';
+ Fatal() << "error parsing wasm";
+ } catch (std::bad_alloc&) {
+ Fatal() << "error building module, std::bad_alloc (possibly invalid "
+ "request for silly amounts of memory)";
+ }
+
+ if (options.passOptions.validate && !WasmValidator().validate(wasm)) {
+ Fatal() << "error validating input";
+ }
+}
+
+uint64_t hashFile(const std::string& filename) {
+ auto contents(read_file<std::vector<char>>(filename, Flags::Binary));
+ size_t digest = 0;
+ // Don't use `hash` or `rehash` - they aren't deterministic between executions
+ for (char c : contents) {
+ hash_combine(digest, c);
+ }
+ return uint64_t(digest);
+}
+
+void adjustTableSize(Module& wasm, int initialSize) {
+ if (initialSize < 0) {
+ return;
+ }
+ if (wasm.tables.empty()) {
+ Fatal() << "--initial-table used but there is no table";
+ }
+
+ auto& table = wasm.tables.front();
+
+ if ((uint64_t)initialSize < table->initial) {
+ Fatal() << "Specified initial table size too small, should be at least "
+ << table->initial;
+ }
+ if ((uint64_t)initialSize > table->max) {
+ Fatal() << "Specified initial table size larger than max table size "
+ << table->max;
+ }
+ table->initial = initialSize;
+}
+
+void writeModule(Module& wasm,
+ std::string filename,
+ const WasmSplitOptions& options) {
+ ModuleWriter writer;
+ writer.setBinary(options.emitBinary);
+ writer.setDebugInfo(options.passOptions.debugInfo);
+ if (options.emitModuleNames) {
+ writer.setEmitModuleName(true);
+ }
+ writer.write(wasm, filename);
+}
+
+void instrumentModule(const WasmSplitOptions& options) {
+ Module wasm;
+ parseInput(wasm, options);
+
+ // Check that the profile export name is not already taken
+ if (wasm.getExportOrNull(options.profileExport) != nullptr) {
+ Fatal() << "error: Export " << options.profileExport << " already exists.";
+ }
+
+ uint64_t moduleHash = hashFile(options.inputFiles[0]);
+ PassRunner runner(&wasm, options.passOptions);
+ Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm);
+
+ adjustTableSize(wasm, options.initialTableSize);
+
+ // Write the output modules
+ writeModule(wasm, options.output, options);
+}
+
+struct ProfileData {
+ uint64_t hash;
+ std::vector<size_t> timestamps;
+};
+
+// See "wasm-split profile format" in instrumenter.cpp for more information.
+ProfileData readProfile(const std::string& file) {
+ auto profileData = read_file<std::vector<char>>(file, Flags::Binary);
+ size_t i = 0;
+ auto readi32 = [&]() {
+ if (i + 4 > profileData.size()) {
+ Fatal() << "Unexpected end of profile data in " << file;
+ }
+ uint32_t i32 = 0;
+ i32 |= uint32_t(uint8_t(profileData[i++]));
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 8;
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 16;
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 24;
+ return i32;
+ };
+
+ uint64_t hash = readi32();
+ hash |= uint64_t(readi32()) << 32;
+
+ std::vector<size_t> timestamps;
+ while (i < profileData.size()) {
+ timestamps.push_back(readi32());
+ }
+
+ return {hash, timestamps};
+}
+
+void writeSymbolMap(Module& wasm, std::string filename) {
+ PassOptions options;
+ options.arguments["symbolmap"] = filename;
+ PassRunner runner(&wasm, options);
+ runner.add("symbolmap");
+ runner.run();
+}
+
+void writePlaceholderMap(const std::map<size_t, Name> placeholderMap,
+ std::string filename) {
+ Output output(filename, Flags::Text);
+ auto& o = output.getStream();
+ for (auto pair : placeholderMap) {
+ o << pair.first << ':' << pair.second << '\n';
+ }
+}
+
+void splitModule(const WasmSplitOptions& options) {
+ Module wasm;
+ parseInput(wasm, options);
+
+ std::set<Name> keepFuncs;
+
+ if (options.profileFile.size()) {
+ // Use the profile to initialize `keepFuncs`.
+ uint64_t hash = hashFile(options.inputFiles[0]);
+ ProfileData profile = readProfile(options.profileFile);
+ if (profile.hash != hash) {
+ Fatal() << "error: checksum in profile does not match module checksum. "
+ << "The split module must be the original module that was "
+ << "instrumented to generate the profile.";
+ }
+ size_t i = 0;
+ ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) {
+ if (i >= profile.timestamps.size()) {
+ Fatal() << "Unexpected end of profile data";
+ }
+ if (profile.timestamps[i++] > 0) {
+ keepFuncs.insert(func->name);
+ }
+ });
+ if (i != profile.timestamps.size()) {
+ Fatal() << "Unexpected extra profile data";
+ }
+ }
+
+ // Add in the functions specified with --keep-funcs
+ for (auto& func : options.keepFuncs) {
+ if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) {
+ std::cerr << "warning: function " << func << " does not exist\n";
+ }
+ keepFuncs.insert(func);
+ }
+
+ // Remove the functions specified with --remove-funcs
+ for (auto& func : options.splitFuncs) {
+ auto* function = wasm.getFunctionOrNull(func);
+ if (!options.quiet && function == nullptr) {
+ std::cerr << "warning: function " << func << " does not exist\n";
+ }
+ if (function && function->imported()) {
+ if (!options.quiet) {
+ std::cerr << "warning: cannot split out imported function " << func
+ << "\n";
+ }
+ } else {
+ keepFuncs.erase(func);
+ }
+ }
+
+ if (!options.quiet && keepFuncs.size() == 0) {
+ std::cerr << "warning: not keeping any functions in the primary module\n";
+ }
+
+ // If warnings are enabled, check that any functions are being split out.
+ if (!options.quiet) {
+ std::set<Name> splitFuncs;
+ ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) {
+ if (keepFuncs.count(func->name) == 0) {
+ splitFuncs.insert(func->name);
+ }
+ });
+
+ if (splitFuncs.size() == 0) {
+ std::cerr
+ << "warning: not splitting any functions out to the secondary module\n";
+ }
+
+ // Dump the kept and split functions if we are verbose
+ if (options.verbose) {
+ auto printCommaSeparated = [&](auto funcs) {
+ for (auto it = funcs.begin(); it != funcs.end(); ++it) {
+ if (it != funcs.begin()) {
+ std::cout << ", ";
+ }
+ std::cout << *it;
+ }
+ };
+
+ std::cout << "Keeping functions: ";
+ printCommaSeparated(keepFuncs);
+ std::cout << "\n";
+
+ std::cout << "Splitting out functions: ";
+ printCommaSeparated(splitFuncs);
+ std::cout << "\n";
+ }
+ }
+
+ // Actually perform the splitting
+ ModuleSplitting::Config config;
+ config.primaryFuncs = std::move(keepFuncs);
+ if (options.importNamespace.size()) {
+ config.importNamespace = options.importNamespace;
+ }
+ if (options.placeholderNamespace.size()) {
+ config.placeholderNamespace = options.placeholderNamespace;
+ }
+ if (options.exportPrefix.size()) {
+ config.newExportPrefix = options.exportPrefix;
+ }
+ config.minimizeNewExportNames = !options.passOptions.debugInfo;
+ auto splitResults = ModuleSplitting::splitFunctions(wasm, config);
+ auto& secondary = splitResults.secondary;
+
+ adjustTableSize(wasm, options.initialTableSize);
+ adjustTableSize(*secondary, options.initialTableSize);
+
+ if (options.symbolMap) {
+ writeSymbolMap(wasm, options.primaryOutput + ".symbols");
+ writeSymbolMap(*secondary, options.secondaryOutput + ".symbols");
+ }
+
+ if (options.placeholderMap) {
+ writePlaceholderMap(splitResults.placeholderMap,
+ options.primaryOutput + ".placeholders");
+ }
+
+ // Set the names of the split modules. This can help differentiate them in
+ // stack traces.
+ if (options.emitModuleNames) {
+ if (!wasm.name) {
+ wasm.name = Path::getBaseName(options.primaryOutput);
+ }
+ secondary->name = Path::getBaseName(options.secondaryOutput);
+ }
+
+ // write the output modules
+ writeModule(wasm, options.primaryOutput, options);
+ writeModule(*secondary, options.secondaryOutput, options);
+}
+
+void mergeProfiles(const WasmSplitOptions& options) {
+ // Read the initial profile. We will merge other profiles into this one.
+ ProfileData data = readProfile(options.inputFiles[0]);
+
+ // In verbose mode, we want to find profiles that don't contribute to the
+ // merged profile. To do that, keep track of how many profiles each function
+ // appears in. If any profile contains only functions that appear in multiple
+ // profiles, it could be dropped.
+ std::vector<size_t> numProfiles;
+ if (options.verbose) {
+ numProfiles.resize(data.timestamps.size());
+ for (size_t t = 0; t < data.timestamps.size(); ++t) {
+ if (data.timestamps[t]) {
+ numProfiles[t] = 1;
+ }
+ }
+ }
+
+ // Read all the other profiles, taking the minimum nonzero timestamp for each
+ // function.
+ for (size_t i = 1; i < options.inputFiles.size(); ++i) {
+ ProfileData newData = readProfile(options.inputFiles[i]);
+ if (newData.hash != data.hash) {
+ Fatal() << "Checksum in profile " << options.inputFiles[i]
+ << " does not match hash in profile " << options.inputFiles[0];
+ }
+ if (newData.timestamps.size() != data.timestamps.size()) {
+ Fatal() << "Profile " << options.inputFiles[i]
+ << " incompatible with profile " << options.inputFiles[0];
+ }
+ for (size_t t = 0; t < data.timestamps.size(); ++t) {
+ if (data.timestamps[t] && newData.timestamps[t]) {
+ data.timestamps[t] =
+ std::min(data.timestamps[t], newData.timestamps[t]);
+ } else if (newData.timestamps[t]) {
+ data.timestamps[t] = newData.timestamps[t];
+ }
+ if (options.verbose && newData.timestamps[t]) {
+ ++numProfiles[t];
+ }
+ }
+ }
+
+ // Check for useless profiles.
+ if (options.verbose) {
+ for (const auto& file : options.inputFiles) {
+ bool useless = true;
+ ProfileData newData = readProfile(file);
+ for (size_t t = 0; t < newData.timestamps.size(); ++t) {
+ if (newData.timestamps[t] && numProfiles[t] == 1) {
+ useless = false;
+ break;
+ }
+ }
+ if (useless) {
+ std::cout << "Profile " << file
+ << " only includes functions included in other profiles.\n";
+ }
+ }
+ }
+
+ // Write the combined profile.
+ BufferWithRandomAccess buffer;
+ buffer << data.hash;
+ for (size_t t = 0; t < data.timestamps.size(); ++t) {
+ buffer << uint32_t(data.timestamps[t]);
+ }
+ Output out(options.output, Flags::Binary);
+ buffer.writeTo(out.getStream());
+}
+
+} // anonymous namespace
+
+int main(int argc, const char* argv[]) {
+ WasmSplitOptions options;
+ options.parse(argc, argv);
+
+ if (!options.validate()) {
+ Fatal() << "Invalid command line arguments";
+ }
+
+ switch (options.mode) {
+ case WasmSplitOptions::Mode::Split:
+ splitModule(options);
+ break;
+ case WasmSplitOptions::Mode::Instrument:
+ instrumentModule(options);
+ break;
+ case WasmSplitOptions::Mode::MergeProfiles:
+ mergeProfiles(options);
+ break;
+ }
+}