diff options
Diffstat (limited to 'src/tools/wasm-split/wasm-split.cpp')
-rw-r--r-- | src/tools/wasm-split/wasm-split.cpp | 396 |
1 files changed, 396 insertions, 0 deletions
diff --git a/src/tools/wasm-split/wasm-split.cpp b/src/tools/wasm-split/wasm-split.cpp new file mode 100644 index 000000000..bedba8957 --- /dev/null +++ b/src/tools/wasm-split/wasm-split.cpp @@ -0,0 +1,396 @@ +/* + * Copyright 2020 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// wasm-split: Split a module in two or instrument a module to inform future +// splitting. + +#include "ir/module-splitting.h" +#include "ir/names.h" +#include "support/file.h" +#include "support/name.h" +#include "support/path.h" +#include "support/utilities.h" +#include "wasm-binary.h" +#include "wasm-builder.h" +#include "wasm-io.h" +#include "wasm-validator.h" + +#include "instrumenter.h" +#include "split-options.h" + +using namespace wasm; + +namespace { + +void parseInput(Module& wasm, const WasmSplitOptions& options) { + options.applyFeatures(wasm); + ModuleReader reader; + reader.setProfile(options.profile); + try { + reader.read(options.inputFiles[0], wasm); + } catch (ParseException& p) { + p.dump(std::cerr); + std::cerr << '\n'; + Fatal() << "error parsing wasm"; + } catch (std::bad_alloc&) { + Fatal() << "error building module, std::bad_alloc (possibly invalid " + "request for silly amounts of memory)"; + } + + if (options.passOptions.validate && !WasmValidator().validate(wasm)) { + Fatal() << "error validating input"; + } +} + +uint64_t hashFile(const std::string& filename) { + auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + +void adjustTableSize(Module& wasm, int initialSize) { + if (initialSize < 0) { + return; + } + if (wasm.tables.empty()) { + Fatal() << "--initial-table used but there is no table"; + } + + auto& table = wasm.tables.front(); + + if ((uint64_t)initialSize < table->initial) { + Fatal() << "Specified initial table size too small, should be at least " + << table->initial; + } + if ((uint64_t)initialSize > table->max) { + Fatal() << "Specified initial table size larger than max table size " + << table->max; + } + table->initial = initialSize; +} + +void writeModule(Module& wasm, + std::string filename, + const WasmSplitOptions& options) { + ModuleWriter writer; + writer.setBinary(options.emitBinary); + writer.setDebugInfo(options.passOptions.debugInfo); + if (options.emitModuleNames) { + writer.setEmitModuleName(true); + } + writer.write(wasm, filename); +} + +void instrumentModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + // Check that the profile export name is not already taken + if (wasm.getExportOrNull(options.profileExport) != nullptr) { + Fatal() << "error: Export " << options.profileExport << " already exists."; + } + + uint64_t moduleHash = hashFile(options.inputFiles[0]); + PassRunner runner(&wasm, options.passOptions); + Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); + + adjustTableSize(wasm, options.initialTableSize); + + // Write the output modules + writeModule(wasm, options.output, options); +} + +struct ProfileData { + uint64_t hash; + std::vector<size_t> timestamps; +}; + +// See "wasm-split profile format" in instrumenter.cpp for more information. +ProfileData readProfile(const std::string& file) { + auto profileData = read_file<std::vector<char>>(file, Flags::Binary); + size_t i = 0; + auto readi32 = [&]() { + if (i + 4 > profileData.size()) { + Fatal() << "Unexpected end of profile data in " << file; + } + uint32_t i32 = 0; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; + return i32; + }; + + uint64_t hash = readi32(); + hash |= uint64_t(readi32()) << 32; + + std::vector<size_t> timestamps; + while (i < profileData.size()) { + timestamps.push_back(readi32()); + } + + return {hash, timestamps}; +} + +void writeSymbolMap(Module& wasm, std::string filename) { + PassOptions options; + options.arguments["symbolmap"] = filename; + PassRunner runner(&wasm, options); + runner.add("symbolmap"); + runner.run(); +} + +void writePlaceholderMap(const std::map<size_t, Name> placeholderMap, + std::string filename) { + Output output(filename, Flags::Text); + auto& o = output.getStream(); + for (auto pair : placeholderMap) { + o << pair.first << ':' << pair.second << '\n'; + } +} + +void splitModule(const WasmSplitOptions& options) { + Module wasm; + parseInput(wasm, options); + + std::set<Name> keepFuncs; + + if (options.profileFile.size()) { + // Use the profile to initialize `keepFuncs`. + uint64_t hash = hashFile(options.inputFiles[0]); + ProfileData profile = readProfile(options.profileFile); + if (profile.hash != hash) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } + size_t i = 0; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (i >= profile.timestamps.size()) { + Fatal() << "Unexpected end of profile data"; + } + if (profile.timestamps[i++] > 0) { + keepFuncs.insert(func->name); + } + }); + if (i != profile.timestamps.size()) { + Fatal() << "Unexpected extra profile data"; + } + } + + // Add in the functions specified with --keep-funcs + for (auto& func : options.keepFuncs) { + if (!options.quiet && wasm.getFunctionOrNull(func) == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + keepFuncs.insert(func); + } + + // Remove the functions specified with --remove-funcs + for (auto& func : options.splitFuncs) { + auto* function = wasm.getFunctionOrNull(func); + if (!options.quiet && function == nullptr) { + std::cerr << "warning: function " << func << " does not exist\n"; + } + if (function && function->imported()) { + if (!options.quiet) { + std::cerr << "warning: cannot split out imported function " << func + << "\n"; + } + } else { + keepFuncs.erase(func); + } + } + + if (!options.quiet && keepFuncs.size() == 0) { + std::cerr << "warning: not keeping any functions in the primary module\n"; + } + + // If warnings are enabled, check that any functions are being split out. + if (!options.quiet) { + std::set<Name> splitFuncs; + ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { + if (keepFuncs.count(func->name) == 0) { + splitFuncs.insert(func->name); + } + }); + + if (splitFuncs.size() == 0) { + std::cerr + << "warning: not splitting any functions out to the secondary module\n"; + } + + // Dump the kept and split functions if we are verbose + if (options.verbose) { + auto printCommaSeparated = [&](auto funcs) { + for (auto it = funcs.begin(); it != funcs.end(); ++it) { + if (it != funcs.begin()) { + std::cout << ", "; + } + std::cout << *it; + } + }; + + std::cout << "Keeping functions: "; + printCommaSeparated(keepFuncs); + std::cout << "\n"; + + std::cout << "Splitting out functions: "; + printCommaSeparated(splitFuncs); + std::cout << "\n"; + } + } + + // Actually perform the splitting + ModuleSplitting::Config config; + config.primaryFuncs = std::move(keepFuncs); + if (options.importNamespace.size()) { + config.importNamespace = options.importNamespace; + } + if (options.placeholderNamespace.size()) { + config.placeholderNamespace = options.placeholderNamespace; + } + if (options.exportPrefix.size()) { + config.newExportPrefix = options.exportPrefix; + } + config.minimizeNewExportNames = !options.passOptions.debugInfo; + auto splitResults = ModuleSplitting::splitFunctions(wasm, config); + auto& secondary = splitResults.secondary; + + adjustTableSize(wasm, options.initialTableSize); + adjustTableSize(*secondary, options.initialTableSize); + + if (options.symbolMap) { + writeSymbolMap(wasm, options.primaryOutput + ".symbols"); + writeSymbolMap(*secondary, options.secondaryOutput + ".symbols"); + } + + if (options.placeholderMap) { + writePlaceholderMap(splitResults.placeholderMap, + options.primaryOutput + ".placeholders"); + } + + // Set the names of the split modules. This can help differentiate them in + // stack traces. + if (options.emitModuleNames) { + if (!wasm.name) { + wasm.name = Path::getBaseName(options.primaryOutput); + } + secondary->name = Path::getBaseName(options.secondaryOutput); + } + + // write the output modules + writeModule(wasm, options.primaryOutput, options); + writeModule(*secondary, options.secondaryOutput, options); +} + +void mergeProfiles(const WasmSplitOptions& options) { + // Read the initial profile. We will merge other profiles into this one. + ProfileData data = readProfile(options.inputFiles[0]); + + // In verbose mode, we want to find profiles that don't contribute to the + // merged profile. To do that, keep track of how many profiles each function + // appears in. If any profile contains only functions that appear in multiple + // profiles, it could be dropped. + std::vector<size_t> numProfiles; + if (options.verbose) { + numProfiles.resize(data.timestamps.size()); + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t]) { + numProfiles[t] = 1; + } + } + } + + // Read all the other profiles, taking the minimum nonzero timestamp for each + // function. + for (size_t i = 1; i < options.inputFiles.size(); ++i) { + ProfileData newData = readProfile(options.inputFiles[i]); + if (newData.hash != data.hash) { + Fatal() << "Checksum in profile " << options.inputFiles[i] + << " does not match hash in profile " << options.inputFiles[0]; + } + if (newData.timestamps.size() != data.timestamps.size()) { + Fatal() << "Profile " << options.inputFiles[i] + << " incompatible with profile " << options.inputFiles[0]; + } + for (size_t t = 0; t < data.timestamps.size(); ++t) { + if (data.timestamps[t] && newData.timestamps[t]) { + data.timestamps[t] = + std::min(data.timestamps[t], newData.timestamps[t]); + } else if (newData.timestamps[t]) { + data.timestamps[t] = newData.timestamps[t]; + } + if (options.verbose && newData.timestamps[t]) { + ++numProfiles[t]; + } + } + } + + // Check for useless profiles. + if (options.verbose) { + for (const auto& file : options.inputFiles) { + bool useless = true; + ProfileData newData = readProfile(file); + for (size_t t = 0; t < newData.timestamps.size(); ++t) { + if (newData.timestamps[t] && numProfiles[t] == 1) { + useless = false; + break; + } + } + if (useless) { + std::cout << "Profile " << file + << " only includes functions included in other profiles.\n"; + } + } + } + + // Write the combined profile. + BufferWithRandomAccess buffer; + buffer << data.hash; + for (size_t t = 0; t < data.timestamps.size(); ++t) { + buffer << uint32_t(data.timestamps[t]); + } + Output out(options.output, Flags::Binary); + buffer.writeTo(out.getStream()); +} + +} // anonymous namespace + +int main(int argc, const char* argv[]) { + WasmSplitOptions options; + options.parse(argc, argv); + + if (!options.validate()) { + Fatal() << "Invalid command line arguments"; + } + + switch (options.mode) { + case WasmSplitOptions::Mode::Split: + splitModule(options); + break; + case WasmSplitOptions::Mode::Instrument: + instrumentModule(options); + break; + case WasmSplitOptions::Mode::MergeProfiles: + mergeProfiles(options); + break; + } +} |