diff options
author | Sam Clegg <sbc@chromium.org> | 2023-11-08 10:28:10 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-08 18:28:10 +0000 |
commit | 784960180eac208a34eb33415267d977034971df (patch) | |
tree | 6b88bd5bc62bd4d5af5405fd52161a3a3aae1405 | |
parent | 9627c8360d179c2cae168f8bca3bf1b7216c34a8 (diff) | |
download | binaryen-784960180eac208a34eb33415267d977034971df.tar.gz binaryen-784960180eac208a34eb33415267d977034971df.tar.bz2 binaryen-784960180eac208a34eb33415267d977034971df.zip |
Move --separate-data-segments into a pass so it can be run from wasm-opt (#6088)
Because we currently strip some data segments (i.e. EM_JS strings)
during `--post-emscripten` this is too late as `--separate-data-segments`
always runs in `wasm-emscripten-finalize`.
Once emscripten switches over to using the pass directly we can remove
the support from `wasm-emscripten-finalize`
-rw-r--r-- | src/passes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/passes/SeparateDataSegments.cpp | 78 | ||||
-rw-r--r-- | src/passes/pass.cpp | 3 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/tools/wasm-emscripten-finalize.cpp | 13 | ||||
-rw-r--r-- | src/wasm-emscripten.h | 6 | ||||
-rw-r--r-- | src/wasm/wasm-emscripten.cpp | 32 | ||||
-rw-r--r-- | test/lit/help/wasm-opt.test | 3 | ||||
-rw-r--r-- | test/lit/help/wasm2js.test | 3 | ||||
-rw-r--r-- | test/lit/passes/separate-data-segments.wast | 22 |
10 files changed, 117 insertions, 45 deletions
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index bd1dd8598..2930898a9 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -107,6 +107,7 @@ set(passes_SOURCES TypeMerging.cpp TypeSSA.cpp SafeHeap.cpp + SeparateDataSegments.cpp SimplifyGlobals.cpp SimplifyLocals.cpp Souperify.cpp diff --git a/src/passes/SeparateDataSegments.cpp b/src/passes/SeparateDataSegments.cpp new file mode 100644 index 000000000..3e48d14bc --- /dev/null +++ b/src/passes/SeparateDataSegments.cpp @@ -0,0 +1,78 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Emits the data segments to a file. The file contains data from address base +// onwards (we must pass in base, as we can't tell it from the wasm - the +// first segment may start after a run of zeros, but we need those zeros in +// the file. +// + +#include "pass.h" +#include "support/file.h" +#include "wasm-features.h" +#include "wasm.h" + +namespace wasm { + +struct SeparateDataSegments : public Pass { + bool modifiesBinaryenIR() override { return false; } + + void run(Module* module) override { + std::string outfileName = + getPassOptions().getArgument("separate-data-segments", + "SeparateDataSegments usage: wasm-opt " + "--separate-data-segments@FILENAME"); + Output outfile(outfileName, Flags::Binary); + std::string baseStr = getPassOptions().getArgument( + "separate-data-segments-global-base", + "SeparateDataSegments usage: wasm-opt " + "--pass-arg=separate-data-segments-global-base@NUMBER"); + Address base = std::stoi(baseStr); + size_t lastEnd = 0; + for (auto& seg : module->dataSegments) { + if (seg->isPassive) { + Fatal() << "separating passive segments not implemented"; + } + if (!seg->offset->is<Const>()) { + Fatal() << "separating relocatable segments not implemented"; + } + size_t offset = seg->offset->cast<Const>()->value.getInteger(); + offset -= base; + size_t fill = offset - lastEnd; + if (fill > 0) { + std::vector<char> buf(fill); + outfile.write(buf.data(), fill); + } + outfile.write(seg->data.data(), seg->data.size()); + lastEnd = offset + seg->data.size(); + } + module->dataSegments.clear(); + // Remove the start/stop symbols that the PostEmscripten uses to remove + // em_asm/em_js data. Since we just removed all the data segments from the + // file there is nothing more for that pass to do. + // TODO(sbc): Fix the ordering so that the removal the EM_ASM/EM_JS data + // comes before this pass. + module->removeExport("__start_em_asm"); + module->removeExport("__stop_em_asm"); + module->removeExport("__start_em_js"); + module->removeExport("__stop_em_js"); + } +}; + +Pass* createSeparateDataSegmentsPass() { return new SeparateDataSegments(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 35085c201..a7f85e595 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -407,6 +407,9 @@ void PassRegistry::registerPasses() { registerPass("set-globals", "sets specified globals to specified values", createSetGlobalsPass); + registerPass("separate-data-segments", + "write data segments to a file and strip them from the module", + createSeparateDataSegmentsPass); registerPass("signature-pruning", "remove params from function signature types where possible", createSignaturePruningPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index 2bace5bcc..041e828a7 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -129,6 +129,7 @@ Pass* createRedundantSetEliminationPass(); Pass* createRoundTripPass(); Pass* createSafeHeapPass(); Pass* createSetGlobalsPass(); +Pass* createSeparateDataSegmentsPass(); Pass* createSignaturePruningPass(); Pass* createSignatureRefiningPass(); Pass* createSignExtLoweringPass(); diff --git a/src/tools/wasm-emscripten-finalize.cpp b/src/tools/wasm-emscripten-finalize.cpp index 0f63e8112..5ec4d245e 100644 --- a/src/tools/wasm-emscripten-finalize.cpp +++ b/src/tools/wasm-emscripten-finalize.cpp @@ -294,17 +294,16 @@ int main(int argc, const char* argv[]) { passRunner.add("strip-dwarf"); } - passRunner.run(); - // Finally, separate out data segments if relevant if (!dataSegmentFile.empty()) { - Output memInitFile(dataSegmentFile, Flags::Binary); - if (globalBase == INVALID_BASE) { - Fatal() << "globalBase must be set"; - } - generator.separateDataSegments(&memInitFile, globalBase); + passRunner.options.arguments["separate-data-segments"] = dataSegmentFile; + passRunner.options.arguments["separate-data-segments-global-base"] = + std::to_string(globalBase); + passRunner.add("separate-data-segments"); } + passRunner.run(); + BYN_TRACE_WITH_TYPE("emscripten-dump", "Module after:\n"); BYN_DEBUG_WITH_TYPE("emscripten-dump", std::cerr << wasm << '\n'); diff --git a/src/wasm-emscripten.h b/src/wasm-emscripten.h index 4d20b8706..1a70973ec 100644 --- a/src/wasm-emscripten.h +++ b/src/wasm-emscripten.h @@ -35,12 +35,6 @@ public: void fixInvokeFunctionNames(); - // Emits the data segments to a file. The file contains data from address base - // onwards (we must pass in base, as we can't tell it from the wasm - the - // first segment may start after a run of zeros, but we need those zeros in - // the file). - void separateDataSegments(Output* outfile, Address base); - bool standalone = false; bool sideModule = false; bool minimizeWasmChanges = false; diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp index aef36d181..86efb6798 100644 --- a/src/wasm/wasm-emscripten.cpp +++ b/src/wasm/wasm-emscripten.cpp @@ -186,36 +186,4 @@ struct AsmConst { std::string code; }; -void EmscriptenGlueGenerator::separateDataSegments(Output* outfile, - Address base) { - size_t lastEnd = 0; - for (auto& seg : wasm.dataSegments) { - if (seg->isPassive) { - Fatal() << "separating passive segments not implemented"; - } - if (!seg->offset->is<Const>()) { - Fatal() << "separating relocatable segments not implemented"; - } - size_t offset = seg->offset->cast<Const>()->value.getInteger(); - offset -= base; - size_t fill = offset - lastEnd; - if (fill > 0) { - std::vector<char> buf(fill); - outfile->write(buf.data(), fill); - } - outfile->write(seg->data.data(), seg->data.size()); - lastEnd = offset + seg->data.size(); - } - wasm.dataSegments.clear(); - // Remove the start/stop symbols that the PostEmscripten uses to remove - // em_asm/em_js data. Since we just removed all the data segments from the - // file there is nothing more for that pass to do. - // TODO(sbc): Fix the ordering so that the removal the EM_ASM/EM_JS data comes - // before this pass. - wasm.removeExport("__start_em_asm"); - wasm.removeExport("__stop_em_asm"); - wasm.removeExport("__start_em_js"); - wasm.removeExport("__stop_em_js"); -} - } // namespace wasm diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 4607a94b9..95d25cc61 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -395,6 +395,9 @@ ;; CHECK-NEXT: --safe-heap instrument loads and stores to ;; CHECK-NEXT: check for invalid behavior ;; CHECK-NEXT: +;; CHECK-NEXT: --separate-data-segments write data segments to a file +;; CHECK-NEXT: and strip them from the module +;; CHECK-NEXT: ;; CHECK-NEXT: --set-globals sets specified globals to ;; CHECK-NEXT: specified values ;; CHECK-NEXT: diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 0b041ddd3..d3df47eb6 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -354,6 +354,9 @@ ;; CHECK-NEXT: --safe-heap instrument loads and stores to ;; CHECK-NEXT: check for invalid behavior ;; CHECK-NEXT: +;; CHECK-NEXT: --separate-data-segments write data segments to a file +;; CHECK-NEXT: and strip them from the module +;; CHECK-NEXT: ;; CHECK-NEXT: --set-globals sets specified globals to ;; CHECK-NEXT: specified values ;; CHECK-NEXT: diff --git a/test/lit/passes/separate-data-segments.wast b/test/lit/passes/separate-data-segments.wast new file mode 100644 index 000000000..ad921df79 --- /dev/null +++ b/test/lit/passes/separate-data-segments.wast @@ -0,0 +1,22 @@ +;; RUN: wasm-opt %s --separate-data-segments=%t.data --pass-arg=separate-data-segments-global-base@1024 -S -o - | filecheck %s +;; RUN: cat %t.data | filecheck %s --check-prefix=CHECK-DATA + +(module + ;; CHECK: (memory $0 1 1) + (memory 1 1) + + ;; CHECK-NOT (data + (data (i32.const 1024) "hello world\n") + + ;; CHECK: (func $foo + (func $foo + (call $bar) + ) + + ;; CHECK: (func $bar + (func $bar + (call $foo) + ) +) + +;; CHECK-DATA: {{^}}hello world{{$}} |