summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Clegg <sbc@chromium.org>2023-11-08 10:28:10 -0800
committerGitHub <noreply@github.com>2023-11-08 18:28:10 +0000
commit784960180eac208a34eb33415267d977034971df (patch)
tree6b88bd5bc62bd4d5af5405fd52161a3a3aae1405
parent9627c8360d179c2cae168f8bca3bf1b7216c34a8 (diff)
downloadbinaryen-784960180eac208a34eb33415267d977034971df.tar.gz
binaryen-784960180eac208a34eb33415267d977034971df.tar.bz2
binaryen-784960180eac208a34eb33415267d977034971df.zip
Move --separate-data-segments into a pass so it can be run from wasm-opt (#6088)
Because we currently strip some data segments (i.e. EM_JS strings) during `--post-emscripten` this is too late as `--separate-data-segments` always runs in `wasm-emscripten-finalize`. Once emscripten switches over to using the pass directly we can remove the support from `wasm-emscripten-finalize`
-rw-r--r--src/passes/CMakeLists.txt1
-rw-r--r--src/passes/SeparateDataSegments.cpp78
-rw-r--r--src/passes/pass.cpp3
-rw-r--r--src/passes/passes.h1
-rw-r--r--src/tools/wasm-emscripten-finalize.cpp13
-rw-r--r--src/wasm-emscripten.h6
-rw-r--r--src/wasm/wasm-emscripten.cpp32
-rw-r--r--test/lit/help/wasm-opt.test3
-rw-r--r--test/lit/help/wasm2js.test3
-rw-r--r--test/lit/passes/separate-data-segments.wast22
10 files changed, 117 insertions, 45 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index bd1dd8598..2930898a9 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -107,6 +107,7 @@ set(passes_SOURCES
TypeMerging.cpp
TypeSSA.cpp
SafeHeap.cpp
+ SeparateDataSegments.cpp
SimplifyGlobals.cpp
SimplifyLocals.cpp
Souperify.cpp
diff --git a/src/passes/SeparateDataSegments.cpp b/src/passes/SeparateDataSegments.cpp
new file mode 100644
index 000000000..3e48d14bc
--- /dev/null
+++ b/src/passes/SeparateDataSegments.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2023 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Emits the data segments to a file. The file contains data from address base
+// onwards (we must pass in base, as we can't tell it from the wasm - the
+// first segment may start after a run of zeros, but we need those zeros in
+// the file.
+//
+
+#include "pass.h"
+#include "support/file.h"
+#include "wasm-features.h"
+#include "wasm.h"
+
+namespace wasm {
+
+struct SeparateDataSegments : public Pass {
+ bool modifiesBinaryenIR() override { return false; }
+
+ void run(Module* module) override {
+ std::string outfileName =
+ getPassOptions().getArgument("separate-data-segments",
+ "SeparateDataSegments usage: wasm-opt "
+ "--separate-data-segments@FILENAME");
+ Output outfile(outfileName, Flags::Binary);
+ std::string baseStr = getPassOptions().getArgument(
+ "separate-data-segments-global-base",
+ "SeparateDataSegments usage: wasm-opt "
+ "--pass-arg=separate-data-segments-global-base@NUMBER");
+ Address base = std::stoi(baseStr);
+ size_t lastEnd = 0;
+ for (auto& seg : module->dataSegments) {
+ if (seg->isPassive) {
+ Fatal() << "separating passive segments not implemented";
+ }
+ if (!seg->offset->is<Const>()) {
+ Fatal() << "separating relocatable segments not implemented";
+ }
+ size_t offset = seg->offset->cast<Const>()->value.getInteger();
+ offset -= base;
+ size_t fill = offset - lastEnd;
+ if (fill > 0) {
+ std::vector<char> buf(fill);
+ outfile.write(buf.data(), fill);
+ }
+ outfile.write(seg->data.data(), seg->data.size());
+ lastEnd = offset + seg->data.size();
+ }
+ module->dataSegments.clear();
+ // Remove the start/stop symbols that the PostEmscripten uses to remove
+ // em_asm/em_js data. Since we just removed all the data segments from the
+ // file there is nothing more for that pass to do.
+ // TODO(sbc): Fix the ordering so that the removal the EM_ASM/EM_JS data
+ // comes before this pass.
+ module->removeExport("__start_em_asm");
+ module->removeExport("__stop_em_asm");
+ module->removeExport("__start_em_js");
+ module->removeExport("__stop_em_js");
+ }
+};
+
+Pass* createSeparateDataSegmentsPass() { return new SeparateDataSegments(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 35085c201..a7f85e595 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -407,6 +407,9 @@ void PassRegistry::registerPasses() {
registerPass("set-globals",
"sets specified globals to specified values",
createSetGlobalsPass);
+ registerPass("separate-data-segments",
+ "write data segments to a file and strip them from the module",
+ createSeparateDataSegmentsPass);
registerPass("signature-pruning",
"remove params from function signature types where possible",
createSignaturePruningPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 2bace5bcc..041e828a7 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -129,6 +129,7 @@ Pass* createRedundantSetEliminationPass();
Pass* createRoundTripPass();
Pass* createSafeHeapPass();
Pass* createSetGlobalsPass();
+Pass* createSeparateDataSegmentsPass();
Pass* createSignaturePruningPass();
Pass* createSignatureRefiningPass();
Pass* createSignExtLoweringPass();
diff --git a/src/tools/wasm-emscripten-finalize.cpp b/src/tools/wasm-emscripten-finalize.cpp
index 0f63e8112..5ec4d245e 100644
--- a/src/tools/wasm-emscripten-finalize.cpp
+++ b/src/tools/wasm-emscripten-finalize.cpp
@@ -294,17 +294,16 @@ int main(int argc, const char* argv[]) {
passRunner.add("strip-dwarf");
}
- passRunner.run();
-
// Finally, separate out data segments if relevant
if (!dataSegmentFile.empty()) {
- Output memInitFile(dataSegmentFile, Flags::Binary);
- if (globalBase == INVALID_BASE) {
- Fatal() << "globalBase must be set";
- }
- generator.separateDataSegments(&memInitFile, globalBase);
+ passRunner.options.arguments["separate-data-segments"] = dataSegmentFile;
+ passRunner.options.arguments["separate-data-segments-global-base"] =
+ std::to_string(globalBase);
+ passRunner.add("separate-data-segments");
}
+ passRunner.run();
+
BYN_TRACE_WITH_TYPE("emscripten-dump", "Module after:\n");
BYN_DEBUG_WITH_TYPE("emscripten-dump", std::cerr << wasm << '\n');
diff --git a/src/wasm-emscripten.h b/src/wasm-emscripten.h
index 4d20b8706..1a70973ec 100644
--- a/src/wasm-emscripten.h
+++ b/src/wasm-emscripten.h
@@ -35,12 +35,6 @@ public:
void fixInvokeFunctionNames();
- // Emits the data segments to a file. The file contains data from address base
- // onwards (we must pass in base, as we can't tell it from the wasm - the
- // first segment may start after a run of zeros, but we need those zeros in
- // the file).
- void separateDataSegments(Output* outfile, Address base);
-
bool standalone = false;
bool sideModule = false;
bool minimizeWasmChanges = false;
diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp
index aef36d181..86efb6798 100644
--- a/src/wasm/wasm-emscripten.cpp
+++ b/src/wasm/wasm-emscripten.cpp
@@ -186,36 +186,4 @@ struct AsmConst {
std::string code;
};
-void EmscriptenGlueGenerator::separateDataSegments(Output* outfile,
- Address base) {
- size_t lastEnd = 0;
- for (auto& seg : wasm.dataSegments) {
- if (seg->isPassive) {
- Fatal() << "separating passive segments not implemented";
- }
- if (!seg->offset->is<Const>()) {
- Fatal() << "separating relocatable segments not implemented";
- }
- size_t offset = seg->offset->cast<Const>()->value.getInteger();
- offset -= base;
- size_t fill = offset - lastEnd;
- if (fill > 0) {
- std::vector<char> buf(fill);
- outfile->write(buf.data(), fill);
- }
- outfile->write(seg->data.data(), seg->data.size());
- lastEnd = offset + seg->data.size();
- }
- wasm.dataSegments.clear();
- // Remove the start/stop symbols that the PostEmscripten uses to remove
- // em_asm/em_js data. Since we just removed all the data segments from the
- // file there is nothing more for that pass to do.
- // TODO(sbc): Fix the ordering so that the removal the EM_ASM/EM_JS data comes
- // before this pass.
- wasm.removeExport("__start_em_asm");
- wasm.removeExport("__stop_em_asm");
- wasm.removeExport("__start_em_js");
- wasm.removeExport("__stop_em_js");
-}
-
} // namespace wasm
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index 4607a94b9..95d25cc61 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -395,6 +395,9 @@
;; CHECK-NEXT: --safe-heap instrument loads and stores to
;; CHECK-NEXT: check for invalid behavior
;; CHECK-NEXT:
+;; CHECK-NEXT: --separate-data-segments write data segments to a file
+;; CHECK-NEXT: and strip them from the module
+;; CHECK-NEXT:
;; CHECK-NEXT: --set-globals sets specified globals to
;; CHECK-NEXT: specified values
;; CHECK-NEXT:
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 0b041ddd3..d3df47eb6 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -354,6 +354,9 @@
;; CHECK-NEXT: --safe-heap instrument loads and stores to
;; CHECK-NEXT: check for invalid behavior
;; CHECK-NEXT:
+;; CHECK-NEXT: --separate-data-segments write data segments to a file
+;; CHECK-NEXT: and strip them from the module
+;; CHECK-NEXT:
;; CHECK-NEXT: --set-globals sets specified globals to
;; CHECK-NEXT: specified values
;; CHECK-NEXT:
diff --git a/test/lit/passes/separate-data-segments.wast b/test/lit/passes/separate-data-segments.wast
new file mode 100644
index 000000000..ad921df79
--- /dev/null
+++ b/test/lit/passes/separate-data-segments.wast
@@ -0,0 +1,22 @@
+;; RUN: wasm-opt %s --separate-data-segments=%t.data --pass-arg=separate-data-segments-global-base@1024 -S -o - | filecheck %s
+;; RUN: cat %t.data | filecheck %s --check-prefix=CHECK-DATA
+
+(module
+ ;; CHECK: (memory $0 1 1)
+ (memory 1 1)
+
+ ;; CHECK-NOT (data
+ (data (i32.const 1024) "hello world\n")
+
+ ;; CHECK: (func $foo
+ (func $foo
+ (call $bar)
+ )
+
+ ;; CHECK: (func $bar
+ (func $bar
+ (call $foo)
+ )
+)
+
+;; CHECK-DATA: {{^}}hello world{{$}}