diff options
-rw-r--r-- | src/binaryen-c.h | 4 | ||||
-rw-r--r-- | src/wasm/wasm-emscripten.cpp | 189 | ||||
-rw-r--r-- | test/lit/wasm-emscripten-finalize/em_asm.wat | 29 | ||||
-rw-r--r-- | test/lit/wasm-emscripten-finalize/em_asm_partial.wat | 24 | ||||
-rw-r--r-- | test/lit/wasm-emscripten-finalize/em_js.wat | 25 |
5 files changed, 217 insertions, 54 deletions
diff --git a/src/binaryen-c.h b/src/binaryen-c.h index ba61db885..eb0cfc6e1 100644 --- a/src/binaryen-c.h +++ b/src/binaryen-c.h @@ -1633,9 +1633,9 @@ BINARYEN_API void BinaryenMemoryInitSetSize(BinaryenExpressionRef expr, // DataDrop -// Gets the index of the segment being dropped by a `memory.drop` expression. +// Gets the index of the segment being dropped by a `data.drop` expression. BINARYEN_API uint32_t BinaryenDataDropGetSegment(BinaryenExpressionRef expr); -// Sets the index of the segment being dropped by a `memory.drop` expression. +// Sets the index of the segment being dropped by a `data.drop` expression. BINARYEN_API void BinaryenDataDropSetSegment(BinaryenExpressionRef expr, uint32_t segmentIndex); diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp index 24fb7bcdc..7a593f599 100644 --- a/src/wasm/wasm-emscripten.cpp +++ b/src/wasm/wasm-emscripten.cpp @@ -76,8 +76,7 @@ Global* getStackPointerGlobal(Module& wasm) { const Address UNKNOWN_OFFSET(uint32_t(-1)); -std::string escape(const char* input) { - std::string code = input; +std::string escape(std::string code) { // replace newlines quotes with escaped newlines size_t curr = 0; while ((curr = code.find("\\n", curr)) != std::string::npos) { @@ -109,14 +108,21 @@ class StringConstantTracker { public: StringConstantTracker(Module& wasm) : wasm(wasm) { calcSegmentOffsets(); } - std::string codeForConstAddr(int64_t address) { - const char* str = stringAtAddr(address); - if (!str) { - Fatal() << "unable to find data for ASM/EM_JS const at: " << address; + const char* stringAtAddr(Address address) { + for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { + Memory::Segment& segment = wasm.memory.segments[i]; + Address offset = segmentOffsets[i]; + if (offset != UNKNOWN_OFFSET && address >= offset && + address < offset + segment.data.size()) { + return &segment.data[address - offset]; + } } - return escape(str); + Fatal() << "unable to find data for ASM/EM_JS const at: " << address; + return nullptr; } + std::vector<Address> segmentOffsets; // segment index => address offset + private: void calcSegmentOffsets() { std::unordered_map<Index, Address> passiveOffsets; @@ -172,20 +178,12 @@ private: } } - const char* stringAtAddr(Address address) { - for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) { - Memory::Segment& segment = wasm.memory.segments[i]; - Address offset = segmentOffsets[i]; - if (offset != UNKNOWN_OFFSET && address >= offset && - address < offset + segment.data.size()) { - return &segment.data[address - offset]; - } - } - return nullptr; - } - Module& wasm; - std::vector<Address> segmentOffsets; // segment index => address offset +}; + +struct AsmConst { + Address id; + std::string code; }; struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> { @@ -193,11 +191,6 @@ struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> { bool minimizeWasmChanges; StringConstantTracker stringTracker; - struct AsmConst { - Address id; - std::string code; - }; - std::vector<AsmConst> asmConsts; // last sets in the current basic block, per index std::map<Index, LocalSet*> sets; @@ -292,9 +285,8 @@ void AsmConstWalker::visitCall(Call* curr) { } auto* value = arg->cast<Const>(); - int64_t address = value->value.getInteger(); - auto code = stringTracker.codeForConstAddr(address); - createAsmConst(address, code); + Address address = value->value.getInteger(); + asmConsts.push_back({address, stringTracker.stringAtAddr(address)}); } void AsmConstWalker::process() { @@ -305,24 +297,105 @@ void AsmConstWalker::process() { addImports(); } -void AsmConstWalker::createAsmConst(uint64_t id, std::string code) { - AsmConst asmConst; - asmConst.id = id; - asmConst.code = code; - asmConsts.push_back(asmConst); -} - void AsmConstWalker::addImports() { for (auto& import : queuedImports) { wasm.addFunction(import.release()); } } -static AsmConstWalker findEmAsmConstsAndReturnWalker(Module& wasm, - bool minimizeWasmChanges) { - AsmConstWalker walker(wasm, minimizeWasmChanges); - walker.process(); - return walker; +struct SegmentRemover : WalkerPass<PostWalker<SegmentRemover>> { + SegmentRemover(Index segment) : segment(segment) {} + + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new SegmentRemover(segment); } + + void visitMemoryInit(MemoryInit* curr) { + if (segment == curr->segment) { + Builder builder(*getModule()); + replaceCurrent(builder.blockify(builder.makeDrop(curr->dest), + builder.makeDrop(curr->offset), + builder.makeDrop(curr->size))); + } + } + + void visitDataDrop(DataDrop* curr) { + if (segment == curr->segment) { + Builder builder(*getModule()); + replaceCurrent(builder.makeNop()); + } + } + + Index segment; +}; + +static void removeSegment(Module& wasm, Index segment) { + PassRunner runner(&wasm); + SegmentRemover(segment).run(&runner, &wasm); + // Resize the segment to zero. In theory we should completely remove it + // but that would mean re-numbering the segments that follow which is + // non-trivial. + wasm.memory.segments[segment].data.resize(0); +} + +static Address getExportedAddress(Module& wasm, Export* export_) { + Global* g = wasm.getGlobal(export_->value); + auto* addrConst = g->init->dynCast<Const>(); + return addrConst->value.getInteger(); +} + +static std::vector<AsmConst> findEmAsmConsts(Module& wasm, + bool minimizeWasmChanges) { + Export* start = wasm.getExportOrNull("__start_em_asm"); + Export* end = wasm.getExportOrNull("__stop_em_asm"); + + // Older versions of emscripten don't export these symbols. Instead + // we run AsmConstWalker in an attempt to derive the string addresses + // from the code. + if (!start || !end) { + AsmConstWalker walker(wasm, minimizeWasmChanges); + walker.process(); + return walker.asmConsts; + } + + // Newer version of emscripten export this symbols and we + // can use it ot find all the EM_ASM constants. Sadly __start_em_asm and + // __stop_em_asm don't alwasy mark the start and end of segment because in + // dynamic linking we merge all data segments into one. + std::vector<AsmConst> asmConsts; + StringConstantTracker stringTracker(wasm); + Address startAddress = getExportedAddress(wasm, start); + Address endAddress = getExportedAddress(wasm, end); + for (Index i = 0; i < wasm.memory.segments.size(); i++) { + Address segmentStart = stringTracker.segmentOffsets[i]; + size_t segmentSize = wasm.memory.segments[i].data.size(); + if (segmentStart <= startAddress && + segmentStart + segmentSize >= endAddress) { + Address address = startAddress; + while (address < endAddress) { + auto code = stringTracker.stringAtAddr(address); + asmConsts.push_back({address, code}); + address.addr += strlen(code) + 1; + } + + if (segmentStart == startAddress && + segmentStart + segmentSize == endAddress) { + removeSegment(wasm, i); + } else { + // If we can't remove the whole segment then just set the string + // data to zero. + size_t segmentOffset = startAddress - segmentStart; + char* startElem = &wasm.memory.segments[i].data[segmentOffset]; + memset(startElem, 0, endAddress - startAddress); + } + break; + } + } + + assert(asmConsts.size()); + wasm.removeExport("__start_em_asm"); + wasm.removeExport("__stop_em_asm"); + return asmConsts; } struct EmJsWalker : public PostWalker<EmJsWalker> { @@ -331,6 +404,7 @@ struct EmJsWalker : public PostWalker<EmJsWalker> { std::vector<Export> toRemove; std::map<std::string, std::string> codeByName; + std::map<Address, size_t> codeAddresses; // map from address to string len EmJsWalker(Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {} @@ -353,8 +427,9 @@ struct EmJsWalker : public PostWalker<EmJsWalker> { } auto* addrConst = consts.list[0]; int64_t address = addrConst->value.getInteger(); - auto code = stringTracker.codeForConstAddr(address); + auto code = stringTracker.stringAtAddr(address); codeByName[funcName] = code; + codeAddresses[address] = strlen(code) + 1; } }; @@ -366,6 +441,27 @@ EmJsWalker findEmJsFuncsAndReturnWalker(Module& wasm) { wasm.removeExport(exp.name); wasm.removeFunction(exp.value); } + + // With newer versions of emscripten/llvm we pack all EM_JS strings into + // single segment. + // We can detect this by checking for segments that contain only JS strings. + // When we find such segements we remove them from the final binary. + for (Index i = 0; i < wasm.memory.segments.size(); i++) { + Address start = walker.stringTracker.segmentOffsets[0]; + Address cur = start; + + while (cur < start + wasm.memory.segments[i].data.size()) { + if (walker.codeAddresses.count(cur) == 0) { + break; + } + cur.addr += walker.codeAddresses[cur]; + } + + if (cur == start + wasm.memory.segments[i].data.size()) { + // Entire segment is contains JS strings. Remove it. + removeSegment(wasm, i); + } + } return walker; } @@ -383,16 +479,15 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() { std::stringstream meta; meta << "{\n"; - AsmConstWalker emAsmWalker = - findEmAsmConstsAndReturnWalker(wasm, minimizeWasmChanges); + std::vector<AsmConst> asmConsts = findEmAsmConsts(wasm, minimizeWasmChanges); // print commaFirst = true; - if (!emAsmWalker.asmConsts.empty()) { + if (!asmConsts.empty()) { meta << " \"asmConsts\": {"; - for (auto& asmConst : emAsmWalker.asmConsts) { + for (auto& asmConst : asmConsts) { meta << nextElement(); - meta << '"' << asmConst.id << "\": \"" << asmConst.code << "\""; + meta << '"' << asmConst.id << "\": \"" << escape(asmConst.code) << "\""; } meta << "\n },\n"; } @@ -405,7 +500,7 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() { auto& name = pair.first; auto& code = pair.second; meta << nextElement(); - meta << '"' << name << "\": \"" << code << '"'; + meta << '"' << name << "\": \"" << escape(code) << '"'; } meta << "\n },\n"; } diff --git a/test/lit/wasm-emscripten-finalize/em_asm.wat b/test/lit/wasm-emscripten-finalize/em_asm.wat new file mode 100644 index 000000000..43dbfb9ae --- /dev/null +++ b/test/lit/wasm-emscripten-finalize/em_asm.wat @@ -0,0 +1,29 @@ +;; Test that em_asm string are extracted correctly when the __start_em_asm +;; and __stop_em_asm globals are exported. + +;; RUN: wasm-emscripten-finalize %s -S | filecheck %s + +;; Check that the data segment that contains only EM_ASM strings resized to +;; zero, and that the string are extracted into the metadata. + +;; CHECK: (data (i32.const 100) "normal data") +;; CHECK-NEXT: (data (i32.const 512) "") +;; CHECK-NEXT: (data (i32.const 1024) "more data") + +;; CHECK: "asmConsts": { +;; CHECK-NEXT: "512": "{ console.log('JS hello'); }", +;; CHECK-NEXT: "541": "{ console.log('hello again'); }" +;; CHECK-NEXT: }, + +;; Check that the exports are removed +;; CHECK-NOT: export + +(module + (memory 1 1) + (global (export "__start_em_asm") i32 (i32.const 512)) + (global (export "__stop_em_asm") i32 (i32.const 573)) + + (data (i32.const 100) "normal data") + (data (i32.const 512) "{ console.log('JS hello'); }\00{ console.log('hello again'); }\00") + (data (i32.const 1024) "more data") +) diff --git a/test/lit/wasm-emscripten-finalize/em_asm_partial.wat b/test/lit/wasm-emscripten-finalize/em_asm_partial.wat new file mode 100644 index 000000000..6432f1689 --- /dev/null +++ b/test/lit/wasm-emscripten-finalize/em_asm_partial.wat @@ -0,0 +1,24 @@ +;; Test that em_asm string are extraced correctly when the __start_em_asm +;; and __stop_em_asm globals are exported. + +;; RUN: wasm-emscripten-finalize %s -S | filecheck %s + +;; Check for the case when __start_em_asm and __stop_em_asm don't define an +;; entire segment. In this case we preserve the segment but zero the data. + +;; CHECK: (data (i32.const 512) "xx\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00yy") + +;; CHECK: "asmConsts": { +;; CHECK-NEXT: "514": "{ console.log('JS hello'); }", +;; CHECK-NEXT: "543": "{ console.log('hello again'); }" +;; CHECK-NEXT: }, + +;; Check that the exports are removed +;; CHECK-NOT: export + +(module + (memory 1 1) + (global (export "__start_em_asm") i32 (i32.const 514)) + (global (export "__stop_em_asm") i32 (i32.const 575)) + (data (i32.const 512) "xx{ console.log('JS hello'); }\00{ console.log('hello again'); }\00yy") +) diff --git a/test/lit/wasm-emscripten-finalize/em_js.wat b/test/lit/wasm-emscripten-finalize/em_js.wat index c2dd4c17d..0cce1e3b5 100644 --- a/test/lit/wasm-emscripten-finalize/em_js.wat +++ b/test/lit/wasm-emscripten-finalize/em_js.wat @@ -3,20 +3,32 @@ ;; RUN: wasm-emscripten-finalize %s -S | filecheck %s -;; Both functions should be stripped from the binary +;; All functions should be stripped from the binary, regardless +;; of internal name ;; CHECK-NOT: (func +;; The data section that contains only em_js strings should +;; be stripped. +;; CHECK-NOT: (i32.const 512) "Only em_js strings here\00") + +;; Data sections that also contain other stuff should not be stripped +;; CHECK: (data (i32.const 1024) "some JS string data\00xxx") +;; CHECK: (data (i32.const 2048) "more JS string data\00yyy") + ;; CHECK: "emJsFuncs": { -;; CHECK-NEXT: "bar": "more JS string dara", -;; CHECK-NEXT: "foo": "some JS string" +;; CHECK-NEXT: "bar": "more JS string data", +;; CHECK-NEXT: "baz": "Only em_js strings here +;; CHECK-NEXT: "foo": "some JS string data" ;; CHECK-NEXT: }, (module (memory 1 1) - (data (i32.const 1024) "some JS string\00") - (data (i32.const 2048) "more JS string dara\00") + (data (i32.const 512) "Only em_js strings here\00") + (data (i32.const 1024) "some JS string data\00xxx") + (data (i32.const 2048) "more JS string data\00yyy") (export "__em_js__foo" (func $__em_js__foo)) (export "__em_js__bar" (func $bar)) + (export "__em_js__baz" (func $baz)) ;; Name matches export name (func $__em_js__foo (result i32) (i32.const 1024) @@ -25,4 +37,7 @@ (func $bar (result i32) (i32.const 2048) ) + (func $baz (result i32) + (i32.const 512) + ) ) |