From 16c1e3caff045666c5807b90998857da6ed8da74 Mon Sep 17 00:00:00 2001 From: Thomas Lively <7121787+tlively@users.noreply.github.com> Date: Wed, 2 Dec 2020 09:07:12 -0800 Subject: [wasm-split] Record checksums in profiles (#3412) Calculate a checksum of the original uninstrumented module and emit it as part of the profile data. When reading the profile, compare the checksum it contains to the checksum of the module that is being split. Error out if the module being split is not the same as the module that was originally instrumented. Also fixes a bug in how the profile data was being read. When `char` is signed, bytes read from the profile were being incorrectly sign extended. We had not noticed this before because the profiles we have tested have contained only small-valued counts. --- src/tools/wasm-split.cpp | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'src/tools/wasm-split.cpp') diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp index a363b825b..32a726791 100644 --- a/src/tools/wasm-split.cpp +++ b/src/tools/wasm-split.cpp @@ -359,7 +359,7 @@ void Instrumenter::instrumentFuncs() { }); } -// wasm-split profile format:: +// wasm-split profile format: // // The wasm-split profile is a binary format designed to be simple to produce // and consume. It is comprised of: @@ -443,14 +443,23 @@ void Instrumenter::addProfileExport() { // TODO: export the memory if it is not already exported. } +uint64_t hashFile(const std::string& filename) { + auto contents(read_file>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + void instrumentModule(Module& wasm, const WasmSplitOptions& options) { // Check that the profile export name is not already taken if (wasm.getExportOrNull(options.profileExport) != nullptr) { Fatal() << "error: Export " << options.profileExport << " already exists."; } - // TODO: calculate module hash. - uint64_t moduleHash = 0; + uint64_t moduleHash = hashFile(options.input); PassRunner runner(&wasm, options.passOptions); Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); @@ -471,16 +480,21 @@ std::set readProfile(Module& wasm, const WasmSplitOptions& options) { Fatal() << "Unexpected end of profile data"; } uint32_t i32 = 0; - i32 |= uint32_t(profileData[i++]); - i32 |= uint32_t(profileData[i++]) << 8; - i32 |= uint32_t(profileData[i++]) << 16; - i32 |= uint32_t(profileData[i++]) << 24; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; return i32; }; - // TODO: Read and compare the 8-byte module hash. Just skip it for now. - readi32(); - readi32(); + // Read and compare the 8-byte module hash. + uint64_t expected = readi32(); + expected |= uint64_t(readi32()) << 32; + if (expected != hashFile(options.input)) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } std::set keptFuncs; ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { -- cgit v1.2.3