/* * Copyright 2020 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // wasm-split: Split a module in two or instrument a module to inform future // splitting. #include #include "ir/module-splitting.h" #include "ir/names.h" #include "support/file.h" #include "support/name.h" #include "support/path.h" #include "support/utilities.h" #include "wasm-binary.h" #include "wasm-builder.h" #include "wasm-io.h" #include "wasm-validator.h" #include "instrumenter.h" #include "split-options.h" using namespace wasm; namespace { void parseInput(Module& wasm, const WasmSplitOptions& options) { options.applyOptionsBeforeParse(wasm); ModuleReader reader; reader.setProfile(options.profile); try { reader.read(options.inputFiles[0], wasm); } catch (ParseException& p) { p.dump(std::cerr); std::cerr << '\n'; Fatal() << "error parsing wasm"; } catch (std::bad_alloc&) { Fatal() << "error building module, std::bad_alloc (possibly invalid " "request for silly amounts of memory)"; } options.applyOptionsAfterParse(wasm); if (options.passOptions.validate && !WasmValidator().validate(wasm)) { Fatal() << "error validating input"; } } uint64_t hashFile(const std::string& filename) { auto contents(read_file>(filename, Flags::Binary)); size_t digest = 0; // Don't use `hash` or `rehash` - they aren't deterministic between executions for (char c : contents) { hash_combine(digest, c); } return uint64_t(digest); } void adjustTableSize(Module& wasm, int initialSize, bool secondary = false) { if (initialSize < 0) { return; } if (wasm.tables.empty()) { if (secondary) { // It's not a problem if the table is not used in the secondary module. return; } Fatal() << "--initial-table used but there is no table"; } auto& table = wasm.tables.front(); if ((uint64_t)initialSize < table->initial) { Fatal() << "Specified initial table size too small, should be at least " << table->initial; } if ((uint64_t)initialSize > table->max) { Fatal() << "Specified initial table size larger than max table size " << table->max; } table->initial = initialSize; } void writeModule(Module& wasm, std::string filename, const WasmSplitOptions& options) { ModuleWriter writer(options.passOptions); writer.setBinary(options.emitBinary); writer.setDebugInfo(options.passOptions.debugInfo); if (options.emitModuleNames) { writer.setEmitModuleName(true); } writer.write(wasm, filename); } void instrumentModule(const WasmSplitOptions& options) { Module wasm; parseInput(wasm, options); // Check that the profile export name is not already taken if (wasm.getExportOrNull(options.profileExport) != nullptr) { Fatal() << "error: Export " << options.profileExport << " already exists."; } uint64_t moduleHash = hashFile(options.inputFiles[0]); InstrumenterConfig config; if (options.importNamespace.size()) { config.importNamespace = options.importNamespace; } if (options.secondaryMemoryName.size()) { config.secondaryMemoryName = options.secondaryMemoryName; } config.storageKind = options.storageKind; config.profileExport = options.profileExport; PassRunner runner(&wasm, options.passOptions); runner.add(std::make_unique(config, moduleHash)); runner.run(); adjustTableSize(wasm, options.initialTableSize); // Write the output modules writeModule(wasm, options.output, options); } struct ProfileData { uint64_t hash; std::vector timestamps; }; // See "wasm-split profile format" in instrumenter.cpp for more information. ProfileData readProfile(const std::string& file) { auto profileData = read_file>(file, Flags::Binary); size_t i = 0; auto readi32 = [&]() { if (i + 4 > profileData.size()) { Fatal() << "Unexpected end of profile data in " << file; } uint32_t i32 = 0; i32 |= uint32_t(uint8_t(profileData[i++])); i32 |= uint32_t(uint8_t(profileData[i++])) << 8; i32 |= uint32_t(uint8_t(profileData[i++])) << 16; i32 |= uint32_t(uint8_t(profileData[i++])) << 24; return i32; }; uint64_t hash = readi32(); hash |= uint64_t(readi32()) << 32; std::vector timestamps; while (i < profileData.size()) { timestamps.push_back(readi32()); } return {hash, timestamps}; } void getFunctionsToKeepAndSplit(Module& wasm, uint64_t wasmHash, const std::string& profileFile, std::set& keepFuncs, std::set& splitFuncs) { ProfileData profile = readProfile(profileFile); if (profile.hash != wasmHash) { Fatal() << "error: checksum in profile does not match module checksum. " << "The module to split must be the original, uninstrumented " "module, not the module used to generate the profile."; } size_t i = 0; ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { if (i >= profile.timestamps.size()) { Fatal() << "Unexpected end of profile data"; } if (profile.timestamps[i++] > 0) { keepFuncs.insert(func->name); } else { splitFuncs.insert(func->name); } }); if (i != profile.timestamps.size()) { Fatal() << "Unexpected extra profile data"; } } void writeSymbolMap(Module& wasm, std::string filename) { PassOptions options; PassRunner runner(&wasm, options); runner.add("symbolmap", filename); runner.run(); } void writePlaceholderMap(const std::map placeholderMap, std::string filename) { Output output(filename, Flags::Text); auto& o = output.getStream(); for (auto& [index, func] : placeholderMap) { o << index << ':' << func << '\n'; } } void splitModule(const WasmSplitOptions& options) { Module wasm; parseInput(wasm, options); // All defined functions will be in one set or the other. std::set keepFuncs; std::set splitFuncs; if (options.profileFile.size()) { // Use the profile to set `keepFuncs` and `splitFuncs`. uint64_t hash = hashFile(options.inputFiles[0]); getFunctionsToKeepAndSplit( wasm, hash, options.profileFile, keepFuncs, splitFuncs); } else { // Normally the default is to keep each function, but if --keep-funcs is the // only thing specified, then all other functions will be split. bool defaultSplit = options.hasKeepFuncs && !options.hasSplitFuncs; if (defaultSplit) { ModuleUtils::iterDefinedFunctions( wasm, [&](Function* func) { splitFuncs.insert(func->name); }); } else { ModuleUtils::iterDefinedFunctions( wasm, [&](Function* func) { keepFuncs.insert(func->name); }); } } // Use the explicitly provided `keepFuncs`. for (auto& func : options.keepFuncs) { if (!wasm.getFunctionOrNull(func)) { if (!options.quiet) { std::cerr << "warning: function " << func << " does not exist\n"; } continue; } keepFuncs.insert(func); splitFuncs.erase(func); } // Use the explicitly provided `splitFuncs`. for (auto& func : options.splitFuncs) { auto* function = wasm.getFunctionOrNull(func); if (!function) { if (!options.quiet) { std::cerr << "warning: function " << func << " does not exist\n"; } continue; } if (function->imported()) { if (!options.quiet) { std::cerr << "warning: cannot split out imported function " << func << "\n"; } continue; } if (!options.quiet && options.keepFuncs.count(func)) { std::cerr << "warning: function " << func << " was to be both kept and split. It will be split.\n"; } splitFuncs.insert(func); keepFuncs.erase(func); } if (!options.quiet && keepFuncs.size() == 0) { std::cerr << "warning: not keeping any functions in the primary module\n"; } if (options.jspi) { // The load secondary module function must be kept in the main module. keepFuncs.insert(ModuleSplitting::LOAD_SECONDARY_MODULE); splitFuncs.erase(ModuleSplitting::LOAD_SECONDARY_MODULE); } // If warnings are enabled, check that any functions are being split out. if (!options.quiet && splitFuncs.size() == 0) { std::cerr << "warning: not splitting any functions out to the secondary module\n"; } // Dump the kept and split functions if we are verbose. if (options.verbose) { auto printCommaSeparated = [&](auto funcs) { for (auto it = funcs.begin(); it != funcs.end(); ++it) { if (it != funcs.begin()) { std::cout << ", "; } std::cout << *it; } }; std::cout << "Keeping functions: "; printCommaSeparated(keepFuncs); std::cout << "\n"; std::cout << "Splitting out functions: "; printCommaSeparated(splitFuncs); std::cout << "\n"; } #ifndef NDEBUG // Check that all defined functions are in one set or the other. ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { assert(keepFuncs.count(func->name) || splitFuncs.count(func->name)); }); #endif // NDEBUG // Actually perform the splitting ModuleSplitting::Config config; config.secondaryFuncs = std::move(splitFuncs); if (options.importNamespace.size()) { config.importNamespace = options.importNamespace; } if (options.placeholderNamespace.size()) { config.placeholderNamespace = options.placeholderNamespace; } if (options.exportPrefix.size()) { config.newExportPrefix = options.exportPrefix; } config.usePlaceholders = options.usePlaceholders; config.minimizeNewExportNames = !options.passOptions.debugInfo; config.jspi = options.jspi; auto splitResults = ModuleSplitting::splitFunctions(wasm, config); auto& secondary = splitResults.secondary; adjustTableSize(wasm, options.initialTableSize); adjustTableSize(*secondary, options.initialTableSize, /*secondary=*/true); if (options.symbolMap) { writeSymbolMap(wasm, options.primaryOutput + ".symbols"); writeSymbolMap(*secondary, options.secondaryOutput + ".symbols"); } if (options.placeholderMap) { writePlaceholderMap(splitResults.placeholderMap, options.primaryOutput + ".placeholders"); } // Set the names of the split modules. This can help differentiate them in // stack traces. if (options.emitModuleNames) { if (!wasm.name) { wasm.name = Path::getBaseName(options.primaryOutput); } secondary->name = Path::getBaseName(options.secondaryOutput); } // write the output modules writeModule(wasm, options.primaryOutput, options); writeModule(*secondary, options.secondaryOutput, options); } void multiSplitModule(const WasmSplitOptions& options) { if (options.manifestFile.empty()) { Fatal() << "--multi-split requires --manifest"; } if (options.output.empty()) { Fatal() << "--multi-split requires --output"; } std::ifstream manifest(options.manifestFile); if (!manifest.is_open()) { Fatal() << "File not found: " << options.manifestFile; } Module wasm; parseInput(wasm, options); // Map module names to the functions that should be in the modules. std::map> moduleFuncs; // The module for which we are currently parsing a set of functions. std::string currModule; // The set of functions we are currently inserting into. std::unordered_set* currFuncs = nullptr; // Map functions to their modules to ensure no function is assigned to // multiple modules. std::unordered_map funcModules; std::string line; bool newSection = true; while (std::getline(manifest, line)) { if (line.empty()) { newSection = true; continue; } if (newSection) { currModule = line; currFuncs = &moduleFuncs[line]; newSection = false; continue; } assert(currFuncs); currFuncs->insert(line); auto [it, inserted] = funcModules.insert({line, currModule}); if (!inserted && it->second != currModule) { Fatal() << "Function " << line << "cannot be assigned to module " << currModule << "; it is already assigned to module " << it->second << '\n'; } if (inserted && !options.quiet && !wasm.getFunctionOrNull(line)) { std::cerr << "warning: Function " << line << " does not exist\n"; } } ModuleSplitting::Config config; config.usePlaceholders = false; config.importNamespace = ""; config.minimizeNewExportNames = true; for (auto& [mod, funcs] : moduleFuncs) { if (options.verbose) { std::cerr << "Splitting module " << mod << '\n'; } if (!options.quiet && funcs.empty()) { std::cerr << "warning: Module " << mod << " will be empty\n"; } config.secondaryFuncs = std::set(funcs.begin(), funcs.end()); auto splitResults = ModuleSplitting::splitFunctions(wasm, config); // TODO: symbolMap, placeholderMap, emitModuleNames // TODO: Support --emit-text and use .wast in that case. auto moduleName = options.outPrefix + mod + ".wasm"; writeModule(*splitResults.secondary, moduleName, options); } writeModule(wasm, options.output, options); } void mergeProfiles(const WasmSplitOptions& options) { // Read the initial profile. We will merge other profiles into this one. ProfileData data = readProfile(options.inputFiles[0]); // In verbose mode, we want to find profiles that don't contribute to the // merged profile. To do that, keep track of how many profiles each function // appears in. If any profile contains only functions that appear in multiple // profiles, it could be dropped. std::vector numProfiles; if (options.verbose) { numProfiles.resize(data.timestamps.size()); for (size_t t = 0; t < data.timestamps.size(); ++t) { if (data.timestamps[t]) { numProfiles[t] = 1; } } } // Read all the other profiles, taking the minimum nonzero timestamp for each // function. for (size_t i = 1; i < options.inputFiles.size(); ++i) { ProfileData newData = readProfile(options.inputFiles[i]); if (newData.hash != data.hash) { Fatal() << "Checksum in profile " << options.inputFiles[i] << " does not match hash in profile " << options.inputFiles[0]; } if (newData.timestamps.size() != data.timestamps.size()) { Fatal() << "Profile " << options.inputFiles[i] << " incompatible with profile " << options.inputFiles[0]; } for (size_t t = 0; t < data.timestamps.size(); ++t) { if (data.timestamps[t] && newData.timestamps[t]) { data.timestamps[t] = std::min(data.timestamps[t], newData.timestamps[t]); } else if (newData.timestamps[t]) { data.timestamps[t] = newData.timestamps[t]; } if (options.verbose && newData.timestamps[t]) { ++numProfiles[t]; } } } // Check for useless profiles. if (options.verbose) { for (const auto& file : options.inputFiles) { bool useless = true; ProfileData newData = readProfile(file); for (size_t t = 0; t < newData.timestamps.size(); ++t) { if (newData.timestamps[t] && numProfiles[t] == 1) { useless = false; break; } } if (useless) { std::cout << "Profile " << file << " only includes functions included in other profiles.\n"; } } } // Write the combined profile. BufferWithRandomAccess buffer; buffer << data.hash; for (size_t t = 0; t < data.timestamps.size(); ++t) { buffer << uint32_t(data.timestamps[t]); } Output out(options.output, Flags::Binary); buffer.writeTo(out.getStream()); } std::string unescape(std::string input) { std::string output; for (size_t i = 0; i < input.length(); i++) { if ((input[i] == '\\') && (i + 2 < input.length()) && isxdigit(input[i + 1]) && isxdigit(input[i + 2])) { std::string byte = input.substr(i + 1, 2); i += 2; char chr = (char)(int)strtol(byte.c_str(), nullptr, 16); output.push_back(chr); } else { output.push_back(input[i]); } } return output; } void checkExists(const std::string& path) { std::ifstream infile(path); if (!infile.is_open()) { Fatal() << "File not found: " << path; } } void printReadableProfile(const WasmSplitOptions& options) { const std::string wasmFile(options.inputFiles[0]); checkExists(options.profileFile); checkExists(wasmFile); Module wasm; parseInput(wasm, options); std::set keepFuncs; std::set splitFuncs; uint64_t hash = hashFile(wasmFile); getFunctionsToKeepAndSplit( wasm, hash, options.profileFile, keepFuncs, splitFuncs); auto printFnSet = [&](auto funcs, std::string prefix) { for (auto it = funcs.begin(); it != funcs.end(); ++it) { std::cout << prefix << " " << (options.unescape ? unescape(it->toString()) : it->toString()) << std::endl; } }; std::cout << "Keeping functions: " << std::endl; printFnSet(keepFuncs, "+"); std::cout << std::endl; std::cout << "Splitting out functions: " << std::endl; printFnSet(splitFuncs, "-"); std::cout << std::endl; } } // anonymous namespace int main(int argc, const char* argv[]) { WasmSplitOptions options; options.parse(argc, argv); if (!options.validate()) { Fatal() << "Invalid command line arguments"; } switch (options.mode) { case WasmSplitOptions::Mode::Split: splitModule(options); break; case WasmSplitOptions::Mode::MultiSplit: multiSplitModule(options); break; case WasmSplitOptions::Mode::Instrument: instrumentModule(options); break; case WasmSplitOptions::Mode::MergeProfiles: mergeProfiles(options); break; case WasmSplitOptions::Mode::PrintProfile: printReadableProfile(options); break; } }