diff options
author | Thomas Lively <7121787+tlively@users.noreply.github.com> | 2020-12-02 09:07:12 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-02 09:07:12 -0800 |
commit | 16c1e3caff045666c5807b90998857da6ed8da74 (patch) | |
tree | dbede26f800b402a5c5bd7e033b536076f5fab47 | |
parent | 3b1d58aadd37ce8758447c9685d5c3a7bec9c418 (diff) | |
download | binaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.gz binaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.bz2 binaryen-16c1e3caff045666c5807b90998857da6ed8da74.zip |
[wasm-split] Record checksums in profiles (#3412)
Calculate a checksum of the original uninstrumented module and emit it as part
of the profile data. When reading the profile, compare the checksum it contains
to the checksum of the module that is being split. Error out if the module being
split is not the same as the module that was originally instrumented.
Also fixes a bug in how the profile data was being read. When `char` is signed,
bytes read from the profile were being incorrectly sign extended. We had not
noticed this before because the profiles we have tested have contained only
small-valued counts.
-rw-r--r-- | src/tools/wasm-split.cpp | 34 | ||||
-rw-r--r-- | test/lit/wasm-split/instrument-funcs.wast | 2 | ||||
-rw-r--r-- | test/lit/wasm-split/mismatched-hashes.wast | 23 |
3 files changed, 48 insertions, 11 deletions
diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp index a363b825b..32a726791 100644 --- a/src/tools/wasm-split.cpp +++ b/src/tools/wasm-split.cpp @@ -359,7 +359,7 @@ void Instrumenter::instrumentFuncs() { }); } -// wasm-split profile format:: +// wasm-split profile format: // // The wasm-split profile is a binary format designed to be simple to produce // and consume. It is comprised of: @@ -443,14 +443,23 @@ void Instrumenter::addProfileExport() { // TODO: export the memory if it is not already exported. } +uint64_t hashFile(const std::string& filename) { + auto contents(read_file<std::vector<char>>(filename, Flags::Binary)); + size_t digest = 0; + // Don't use `hash` or `rehash` - they aren't deterministic between executions + for (char c : contents) { + hash_combine(digest, c); + } + return uint64_t(digest); +} + void instrumentModule(Module& wasm, const WasmSplitOptions& options) { // Check that the profile export name is not already taken if (wasm.getExportOrNull(options.profileExport) != nullptr) { Fatal() << "error: Export " << options.profileExport << " already exists."; } - // TODO: calculate module hash. - uint64_t moduleHash = 0; + uint64_t moduleHash = hashFile(options.input); PassRunner runner(&wasm, options.passOptions); Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm); @@ -471,16 +480,21 @@ std::set<Name> readProfile(Module& wasm, const WasmSplitOptions& options) { Fatal() << "Unexpected end of profile data"; } uint32_t i32 = 0; - i32 |= uint32_t(profileData[i++]); - i32 |= uint32_t(profileData[i++]) << 8; - i32 |= uint32_t(profileData[i++]) << 16; - i32 |= uint32_t(profileData[i++]) << 24; + i32 |= uint32_t(uint8_t(profileData[i++])); + i32 |= uint32_t(uint8_t(profileData[i++])) << 8; + i32 |= uint32_t(uint8_t(profileData[i++])) << 16; + i32 |= uint32_t(uint8_t(profileData[i++])) << 24; return i32; }; - // TODO: Read and compare the 8-byte module hash. Just skip it for now. - readi32(); - readi32(); + // Read and compare the 8-byte module hash. + uint64_t expected = readi32(); + expected |= uint64_t(readi32()) << 32; + if (expected != hashFile(options.input)) { + Fatal() << "error: checksum in profile does not match module checksum. " + << "The split module must be the original module that was " + << "instrumented to generate the profile."; + } std::set<Name> keptFuncs; ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) { diff --git a/test/lit/wasm-split/instrument-funcs.wast b/test/lit/wasm-split/instrument-funcs.wast index bdd222d82..4b729aed2 100644 --- a/test/lit/wasm-split/instrument-funcs.wast +++ b/test/lit/wasm-split/instrument-funcs.wast @@ -59,7 +59,7 @@ ;; CHECK-NEXT: (block ;; CHECK-NEXT: (i64.store align=1 ;; CHECK-NEXT: (local.get $addr) -;; CHECK-NEXT: (i64.const 0) +;; CHECK-NEXT: (i64.const {{.*}}) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.store offset=8 align=1 ;; CHECK-NEXT: (local.get $addr) diff --git a/test/lit/wasm-split/mismatched-hashes.wast b/test/lit/wasm-split/mismatched-hashes.wast new file mode 100644 index 000000000..561135bbe --- /dev/null +++ b/test/lit/wasm-split/mismatched-hashes.wast @@ -0,0 +1,23 @@ +;; Check that using different inputs for the instrumentation and splitting steps +;; results in an error. + +;; Instrument the module +;; RUN: wasm-split --instrument %s -o %t.instrumented.wasm + +;; Generate a profile +;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.prof + +;; Attempt to split the instrumented module +;; RUN: not wasm-split %t.instrumented.wasm --profile=%t.prof -o1 %t.1.wasm -o2 %t.2.wasm \ +;; RUN: 2>&1 | filecheck %s + +;; CHECK: error: checksum in profile does not match module checksum. +;; CHECK-SAME: The split module must be the original module that was instrumented +;; CHECK-SAME: to generate the profile. + +;; Check that the matcing module succeeds +;; RUN: wasm-split %s --profile=%t.prof -o1 %t.1.wasm -o2 %t.2.wasm + +(module + (export "memory" (memory 0 0)) +) |