diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2020-12-02 09:07:12 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-02 09:07:12 -0800 |
commit | 16c1e3caff045666c5807b90998857da6ed8da74 (patch) | |
tree | dbede26f800b402a5c5bd7e033b536076f5fab47 /src/tools/wasm-split.cpp | |
parent | 3b1d58aadd37ce8758447c9685d5c3a7bec9c418 (diff) | |
download | binaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.gz binaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.bz2 binaryen-16c1e3caff045666c5807b90998857da6ed8da74.zip |
[wasm-split] Record checksums in profiles (#3412)
Calculate a checksum of the original uninstrumented module and emit it as part
of the profile data. When reading the profile, compare the checksum it contains
to the checksum of the module that is being split. Error out if the module being
split is not the same as the module that was originally instrumented.
Also fixes a bug in how the profile data was being read. When `char` is signed,
bytes read from the profile were being incorrectly sign extended. We had not
noticed this before because the profiles we have tested have contained only
small-valued counts.
Diffstat (limited to 'src/tools/wasm-split.cpp')
-rw-r--r-- | src/tools/wasm-split.cpp | 34 |
1 files changed, 24 insertions, 10 deletions
diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp index a363b825b..32a726791 100644 --- a/src/tools/wasm-split.cpp +++ b/src/tools/wasm-split.cpp @@ -359,7 +359,7 @@ void Instrumenter::instrumentFuncs() { }); } -// wasm-split profile format:: +// wasm-split profile format: // // The wasm-split profile is a binary format designed to be simple to produce // and consume. It is comprised of: @@ -443,14 +443,23 @@ void Instrumenter::addProfileExport() { // TODO: export the memory if it is not already exported. } +uint64_t hashFile(const std::string& filename) { + auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + void instrumentModule(Module& wasm, const WasmSplitOptions& options) { // Check that the profile export name is not already taken if (wasm.getExportOrNull(options.profileExport) != nullptr) { Fatal() << "error: Export " << options.profileExport << " already exists."; } - // TODO: calculate module hash. - uint64_t moduleHash = 0; + uint64_t moduleHash = hashFile(options.input); PassRunner runner(&wasm, options.passOptions); Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); @@ -471,16 +480,21 @@ std::set<Name> readProfile(Module& wasm, const WasmSplitOptions& options) { Fatal() << "Unexpected end of profile data"; } uint32_t i32 = 0; - i32 |= uint32_t(profileData[i++]); - i32 |= uint32_t(profileData[i++]) << 8; - i32 |= uint32_t(profileData[i++]) << 16; - i32 |= uint32_t(profileData[i++]) << 24; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; return i32; }; - // TODO: Read and compare the 8-byte module hash. Just skip it for now. - readi32(); - readi32(); + // Read and compare the 8-byte module hash. + uint64_t expected = readi32(); + expected |= uint64_t(readi32()) << 32; + if (expected != hashFile(options.input)) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } std::set<Name> keptFuncs; ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { |