summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Lively <7121787+tlively@users.noreply.github.com>2020-12-02 09:07:12 -0800
committerGitHub <noreply@github.com>2020-12-02 09:07:12 -0800
commit16c1e3caff045666c5807b90998857da6ed8da74 (patch)
treedbede26f800b402a5c5bd7e033b536076f5fab47
parent3b1d58aadd37ce8758447c9685d5c3a7bec9c418 (diff)
downloadbinaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.gz
binaryen-16c1e3caff045666c5807b90998857da6ed8da74.tar.bz2
binaryen-16c1e3caff045666c5807b90998857da6ed8da74.zip
[wasm-split] Record checksums in profiles (#3412)
Calculate a checksum of the original uninstrumented module and emit it as part of the profile data. When reading the profile, compare the checksum it contains to the checksum of the module that is being split. Error out if the module being split is not the same as the module that was originally instrumented. Also fixes a bug in how the profile data was being read. When `char` is signed, bytes read from the profile were being incorrectly sign extended. We had not noticed this before because the profiles we have tested have contained only small-valued counts.
-rw-r--r--src/tools/wasm-split.cpp34
-rw-r--r--test/lit/wasm-split/instrument-funcs.wast2
-rw-r--r--test/lit/wasm-split/mismatched-hashes.wast23
3 files changed, 48 insertions, 11 deletions
diff --git a/src/tools/wasm-split.cpp b/src/tools/wasm-split.cpp
index a363b825b..32a726791 100644
--- a/src/tools/wasm-split.cpp
+++ b/src/tools/wasm-split.cpp
@@ -359,7 +359,7 @@ void Instrumenter::instrumentFuncs() {
});
}
-// wasm-split profile format::
+// wasm-split profile format:
//
// The wasm-split profile is a binary format designed to be simple to produce
// and consume. It is comprised of:
@@ -443,14 +443,23 @@ void Instrumenter::addProfileExport() {
// TODO: export the memory if it is not already exported.
}
+uint64_t hashFile(const std::string& filename) {
+ auto contents(read_file<std::vector<char>>(filename, Flags::Binary));
+ size_t digest = 0;
+ // Don't use `hash` or `rehash` - they aren't deterministic between executions
+ for (char c : contents) {
+ hash_combine(digest, c);
+ }
+ return uint64_t(digest);
+}
+
void instrumentModule(Module& wasm, const WasmSplitOptions& options) {
// Check that the profile export name is not already taken
if (wasm.getExportOrNull(options.profileExport) != nullptr) {
Fatal() << "error: Export " << options.profileExport << " already exists.";
}
- // TODO: calculate module hash.
- uint64_t moduleHash = 0;
+ uint64_t moduleHash = hashFile(options.input);
PassRunner runner(&wasm, options.passOptions);
Instrumenter(options.profileExport, moduleHash).run(&runner, &wasm);
@@ -471,16 +480,21 @@ std::set<Name> readProfile(Module& wasm, const WasmSplitOptions& options) {
Fatal() << "Unexpected end of profile data";
}
uint32_t i32 = 0;
- i32 |= uint32_t(profileData[i++]);
- i32 |= uint32_t(profileData[i++]) << 8;
- i32 |= uint32_t(profileData[i++]) << 16;
- i32 |= uint32_t(profileData[i++]) << 24;
+ i32 |= uint32_t(uint8_t(profileData[i++]));
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 8;
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 16;
+ i32 |= uint32_t(uint8_t(profileData[i++])) << 24;
return i32;
};
- // TODO: Read and compare the 8-byte module hash. Just skip it for now.
- readi32();
- readi32();
+ // Read and compare the 8-byte module hash.
+ uint64_t expected = readi32();
+ expected |= uint64_t(readi32()) << 32;
+ if (expected != hashFile(options.input)) {
+ Fatal() << "error: checksum in profile does not match module checksum. "
+ << "The split module must be the original module that was "
+ << "instrumented to generate the profile.";
+ }
std::set<Name> keptFuncs;
ModuleUtils::iterDefinedFunctions(wasm, [&](Function* func) {
diff --git a/test/lit/wasm-split/instrument-funcs.wast b/test/lit/wasm-split/instrument-funcs.wast
index bdd222d82..4b729aed2 100644
--- a/test/lit/wasm-split/instrument-funcs.wast
+++ b/test/lit/wasm-split/instrument-funcs.wast
@@ -59,7 +59,7 @@
;; CHECK-NEXT: (block
;; CHECK-NEXT: (i64.store align=1
;; CHECK-NEXT: (local.get $addr)
-;; CHECK-NEXT: (i64.const 0)
+;; CHECK-NEXT: (i64.const {{.*}})
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.store offset=8 align=1
;; CHECK-NEXT: (local.get $addr)
diff --git a/test/lit/wasm-split/mismatched-hashes.wast b/test/lit/wasm-split/mismatched-hashes.wast
new file mode 100644
index 000000000..561135bbe
--- /dev/null
+++ b/test/lit/wasm-split/mismatched-hashes.wast
@@ -0,0 +1,23 @@
+;; Check that using different inputs for the instrumentation and splitting steps
+;; results in an error.
+
+;; Instrument the module
+;; RUN: wasm-split --instrument %s -o %t.instrumented.wasm
+
+;; Generate a profile
+;; RUN: node %S/call_exports.mjs %t.instrumented.wasm %t.prof
+
+;; Attempt to split the instrumented module
+;; RUN: not wasm-split %t.instrumented.wasm --profile=%t.prof -o1 %t.1.wasm -o2 %t.2.wasm \
+;; RUN: 2>&1 | filecheck %s
+
+;; CHECK: error: checksum in profile does not match module checksum.
+;; CHECK-SAME: The split module must be the original module that was instrumented
+;; CHECK-SAME: to generate the profile.
+
+;; Check that the matcing module succeeds
+;; RUN: wasm-split %s --profile=%t.prof -o1 %t.1.wasm -o2 %t.2.wasm
+
+(module
+ (export "memory" (memory 0 0))
+)