summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/binaryen-c.h4
-rw-r--r--src/wasm/wasm-emscripten.cpp189
-rw-r--r--test/lit/wasm-emscripten-finalize/em_asm.wat29
-rw-r--r--test/lit/wasm-emscripten-finalize/em_asm_partial.wat24
-rw-r--r--test/lit/wasm-emscripten-finalize/em_js.wat25
5 files changed, 217 insertions, 54 deletions
diff --git a/src/binaryen-c.h b/src/binaryen-c.h
index ba61db885..eb0cfc6e1 100644
--- a/src/binaryen-c.h
+++ b/src/binaryen-c.h
@@ -1633,9 +1633,9 @@ BINARYEN_API void BinaryenMemoryInitSetSize(BinaryenExpressionRef expr,
// DataDrop
-// Gets the index of the segment being dropped by a `memory.drop` expression.
+// Gets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API uint32_t BinaryenDataDropGetSegment(BinaryenExpressionRef expr);
-// Sets the index of the segment being dropped by a `memory.drop` expression.
+// Sets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API void BinaryenDataDropSetSegment(BinaryenExpressionRef expr,
uint32_t segmentIndex);
diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp
index 24fb7bcdc..7a593f599 100644
--- a/src/wasm/wasm-emscripten.cpp
+++ b/src/wasm/wasm-emscripten.cpp
@@ -76,8 +76,7 @@ Global* getStackPointerGlobal(Module& wasm) {
const Address UNKNOWN_OFFSET(uint32_t(-1));
-std::string escape(const char* input) {
- std::string code = input;
+std::string escape(std::string code) {
// replace newlines quotes with escaped newlines
size_t curr = 0;
while ((curr = code.find("\\n", curr)) != std::string::npos) {
@@ -109,14 +108,21 @@ class StringConstantTracker {
public:
StringConstantTracker(Module& wasm) : wasm(wasm) { calcSegmentOffsets(); }
- std::string codeForConstAddr(int64_t address) {
- const char* str = stringAtAddr(address);
- if (!str) {
- Fatal() << "unable to find data for ASM/EM_JS const at: " << address;
+ const char* stringAtAddr(Address address) {
+ for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
+ Memory::Segment& segment = wasm.memory.segments[i];
+ Address offset = segmentOffsets[i];
+ if (offset != UNKNOWN_OFFSET && address >= offset &&
+ address < offset + segment.data.size()) {
+ return &segment.data[address - offset];
+ }
}
- return escape(str);
+ Fatal() << "unable to find data for ASM/EM_JS const at: " << address;
+ return nullptr;
}
+ std::vector<Address> segmentOffsets; // segment index => address offset
+
private:
void calcSegmentOffsets() {
std::unordered_map<Index, Address> passiveOffsets;
@@ -172,20 +178,12 @@ private:
}
}
- const char* stringAtAddr(Address address) {
- for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
- Memory::Segment& segment = wasm.memory.segments[i];
- Address offset = segmentOffsets[i];
- if (offset != UNKNOWN_OFFSET && address >= offset &&
- address < offset + segment.data.size()) {
- return &segment.data[address - offset];
- }
- }
- return nullptr;
- }
-
Module& wasm;
- std::vector<Address> segmentOffsets; // segment index => address offset
+};
+
+struct AsmConst {
+ Address id;
+ std::string code;
};
struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> {
@@ -193,11 +191,6 @@ struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> {
bool minimizeWasmChanges;
StringConstantTracker stringTracker;
- struct AsmConst {
- Address id;
- std::string code;
- };
-
std::vector<AsmConst> asmConsts;
// last sets in the current basic block, per index
std::map<Index, LocalSet*> sets;
@@ -292,9 +285,8 @@ void AsmConstWalker::visitCall(Call* curr) {
}
auto* value = arg->cast<Const>();
- int64_t address = value->value.getInteger();
- auto code = stringTracker.codeForConstAddr(address);
- createAsmConst(address, code);
+ Address address = value->value.getInteger();
+ asmConsts.push_back({address, stringTracker.stringAtAddr(address)});
}
void AsmConstWalker::process() {
@@ -305,24 +297,105 @@ void AsmConstWalker::process() {
addImports();
}
-void AsmConstWalker::createAsmConst(uint64_t id, std::string code) {
- AsmConst asmConst;
- asmConst.id = id;
- asmConst.code = code;
- asmConsts.push_back(asmConst);
-}
-
void AsmConstWalker::addImports() {
for (auto& import : queuedImports) {
wasm.addFunction(import.release());
}
}
-static AsmConstWalker findEmAsmConstsAndReturnWalker(Module& wasm,
- bool minimizeWasmChanges) {
- AsmConstWalker walker(wasm, minimizeWasmChanges);
- walker.process();
- return walker;
+struct SegmentRemover : WalkerPass<PostWalker<SegmentRemover>> {
+ SegmentRemover(Index segment) : segment(segment) {}
+
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new SegmentRemover(segment); }
+
+ void visitMemoryInit(MemoryInit* curr) {
+ if (segment == curr->segment) {
+ Builder builder(*getModule());
+ replaceCurrent(builder.blockify(builder.makeDrop(curr->dest),
+ builder.makeDrop(curr->offset),
+ builder.makeDrop(curr->size)));
+ }
+ }
+
+ void visitDataDrop(DataDrop* curr) {
+ if (segment == curr->segment) {
+ Builder builder(*getModule());
+ replaceCurrent(builder.makeNop());
+ }
+ }
+
+ Index segment;
+};
+
+static void removeSegment(Module& wasm, Index segment) {
+ PassRunner runner(&wasm);
+ SegmentRemover(segment).run(&runner, &wasm);
+ // Resize the segment to zero. In theory we should completely remove it
+ // but that would mean re-numbering the segments that follow which is
+ // non-trivial.
+ wasm.memory.segments[segment].data.resize(0);
+}
+
+static Address getExportedAddress(Module& wasm, Export* export_) {
+ Global* g = wasm.getGlobal(export_->value);
+ auto* addrConst = g->init->dynCast<Const>();
+ return addrConst->value.getInteger();
+}
+
+static std::vector<AsmConst> findEmAsmConsts(Module& wasm,
+ bool minimizeWasmChanges) {
+ Export* start = wasm.getExportOrNull("__start_em_asm");
+ Export* end = wasm.getExportOrNull("__stop_em_asm");
+
+ // Older versions of emscripten don't export these symbols. Instead
+ // we run AsmConstWalker in an attempt to derive the string addresses
+ // from the code.
+ if (!start || !end) {
+ AsmConstWalker walker(wasm, minimizeWasmChanges);
+ walker.process();
+ return walker.asmConsts;
+ }
+
+ // Newer version of emscripten export this symbols and we
+ // can use it ot find all the EM_ASM constants. Sadly __start_em_asm and
+ // __stop_em_asm don't alwasy mark the start and end of segment because in
+ // dynamic linking we merge all data segments into one.
+ std::vector<AsmConst> asmConsts;
+ StringConstantTracker stringTracker(wasm);
+ Address startAddress = getExportedAddress(wasm, start);
+ Address endAddress = getExportedAddress(wasm, end);
+ for (Index i = 0; i < wasm.memory.segments.size(); i++) {
+ Address segmentStart = stringTracker.segmentOffsets[i];
+ size_t segmentSize = wasm.memory.segments[i].data.size();
+ if (segmentStart <= startAddress &&
+ segmentStart + segmentSize >= endAddress) {
+ Address address = startAddress;
+ while (address < endAddress) {
+ auto code = stringTracker.stringAtAddr(address);
+ asmConsts.push_back({address, code});
+ address.addr += strlen(code) + 1;
+ }
+
+ if (segmentStart == startAddress &&
+ segmentStart + segmentSize == endAddress) {
+ removeSegment(wasm, i);
+ } else {
+ // If we can't remove the whole segment then just set the string
+ // data to zero.
+ size_t segmentOffset = startAddress - segmentStart;
+ char* startElem = &wasm.memory.segments[i].data[segmentOffset];
+ memset(startElem, 0, endAddress - startAddress);
+ }
+ break;
+ }
+ }
+
+ assert(asmConsts.size());
+ wasm.removeExport("__start_em_asm");
+ wasm.removeExport("__stop_em_asm");
+ return asmConsts;
}
struct EmJsWalker : public PostWalker<EmJsWalker> {
@@ -331,6 +404,7 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
std::vector<Export> toRemove;
std::map<std::string, std::string> codeByName;
+ std::map<Address, size_t> codeAddresses; // map from address to string len
EmJsWalker(Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {}
@@ -353,8 +427,9 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
}
auto* addrConst = consts.list[0];
int64_t address = addrConst->value.getInteger();
- auto code = stringTracker.codeForConstAddr(address);
+ auto code = stringTracker.stringAtAddr(address);
codeByName[funcName] = code;
+ codeAddresses[address] = strlen(code) + 1;
}
};
@@ -366,6 +441,27 @@ EmJsWalker findEmJsFuncsAndReturnWalker(Module& wasm) {
wasm.removeExport(exp.name);
wasm.removeFunction(exp.value);
}
+
+ // With newer versions of emscripten/llvm we pack all EM_JS strings into
+ // single segment.
+ // We can detect this by checking for segments that contain only JS strings.
+ // When we find such segements we remove them from the final binary.
+ for (Index i = 0; i < wasm.memory.segments.size(); i++) {
+ Address start = walker.stringTracker.segmentOffsets[0];
+ Address cur = start;
+
+ while (cur < start + wasm.memory.segments[i].data.size()) {
+ if (walker.codeAddresses.count(cur) == 0) {
+ break;
+ }
+ cur.addr += walker.codeAddresses[cur];
+ }
+
+ if (cur == start + wasm.memory.segments[i].data.size()) {
+ // Entire segment is contains JS strings. Remove it.
+ removeSegment(wasm, i);
+ }
+ }
return walker;
}
@@ -383,16 +479,15 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() {
std::stringstream meta;
meta << "{\n";
- AsmConstWalker emAsmWalker =
- findEmAsmConstsAndReturnWalker(wasm, minimizeWasmChanges);
+ std::vector<AsmConst> asmConsts = findEmAsmConsts(wasm, minimizeWasmChanges);
// print
commaFirst = true;
- if (!emAsmWalker.asmConsts.empty()) {
+ if (!asmConsts.empty()) {
meta << " \"asmConsts\": {";
- for (auto& asmConst : emAsmWalker.asmConsts) {
+ for (auto& asmConst : asmConsts) {
meta << nextElement();
- meta << '"' << asmConst.id << "\": \"" << asmConst.code << "\"";
+ meta << '"' << asmConst.id << "\": \"" << escape(asmConst.code) << "\"";
}
meta << "\n },\n";
}
@@ -405,7 +500,7 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() {
auto& name = pair.first;
auto& code = pair.second;
meta << nextElement();
- meta << '"' << name << "\": \"" << code << '"';
+ meta << '"' << name << "\": \"" << escape(code) << '"';
}
meta << "\n },\n";
}
diff --git a/test/lit/wasm-emscripten-finalize/em_asm.wat b/test/lit/wasm-emscripten-finalize/em_asm.wat
new file mode 100644
index 000000000..43dbfb9ae
--- /dev/null
+++ b/test/lit/wasm-emscripten-finalize/em_asm.wat
@@ -0,0 +1,29 @@
+;; Test that em_asm string are extracted correctly when the __start_em_asm
+;; and __stop_em_asm globals are exported.
+
+;; RUN: wasm-emscripten-finalize %s -S | filecheck %s
+
+;; Check that the data segment that contains only EM_ASM strings resized to
+;; zero, and that the string are extracted into the metadata.
+
+;; CHECK: (data (i32.const 100) "normal data")
+;; CHECK-NEXT: (data (i32.const 512) "")
+;; CHECK-NEXT: (data (i32.const 1024) "more data")
+
+;; CHECK: "asmConsts": {
+;; CHECK-NEXT: "512": "{ console.log('JS hello'); }",
+;; CHECK-NEXT: "541": "{ console.log('hello again'); }"
+;; CHECK-NEXT: },
+
+;; Check that the exports are removed
+;; CHECK-NOT: export
+
+(module
+ (memory 1 1)
+ (global (export "__start_em_asm") i32 (i32.const 512))
+ (global (export "__stop_em_asm") i32 (i32.const 573))
+
+ (data (i32.const 100) "normal data")
+ (data (i32.const 512) "{ console.log('JS hello'); }\00{ console.log('hello again'); }\00")
+ (data (i32.const 1024) "more data")
+)
diff --git a/test/lit/wasm-emscripten-finalize/em_asm_partial.wat b/test/lit/wasm-emscripten-finalize/em_asm_partial.wat
new file mode 100644
index 000000000..6432f1689
--- /dev/null
+++ b/test/lit/wasm-emscripten-finalize/em_asm_partial.wat
@@ -0,0 +1,24 @@
+;; Test that em_asm string are extraced correctly when the __start_em_asm
+;; and __stop_em_asm globals are exported.
+
+;; RUN: wasm-emscripten-finalize %s -S | filecheck %s
+
+;; Check for the case when __start_em_asm and __stop_em_asm don't define an
+;; entire segment. In this case we preserve the segment but zero the data.
+
+;; CHECK: (data (i32.const 512) "xx\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00yy")
+
+;; CHECK: "asmConsts": {
+;; CHECK-NEXT: "514": "{ console.log('JS hello'); }",
+;; CHECK-NEXT: "543": "{ console.log('hello again'); }"
+;; CHECK-NEXT: },
+
+;; Check that the exports are removed
+;; CHECK-NOT: export
+
+(module
+ (memory 1 1)
+ (global (export "__start_em_asm") i32 (i32.const 514))
+ (global (export "__stop_em_asm") i32 (i32.const 575))
+ (data (i32.const 512) "xx{ console.log('JS hello'); }\00{ console.log('hello again'); }\00yy")
+)
diff --git a/test/lit/wasm-emscripten-finalize/em_js.wat b/test/lit/wasm-emscripten-finalize/em_js.wat
index c2dd4c17d..0cce1e3b5 100644
--- a/test/lit/wasm-emscripten-finalize/em_js.wat
+++ b/test/lit/wasm-emscripten-finalize/em_js.wat
@@ -3,20 +3,32 @@
;; RUN: wasm-emscripten-finalize %s -S | filecheck %s
-;; Both functions should be stripped from the binary
+;; All functions should be stripped from the binary, regardless
+;; of internal name
;; CHECK-NOT: (func
+;; The data section that contains only em_js strings should
+;; be stripped.
+;; CHECK-NOT: (i32.const 512) "Only em_js strings here\00")
+
+;; Data sections that also contain other stuff should not be stripped
+;; CHECK: (data (i32.const 1024) "some JS string data\00xxx")
+;; CHECK: (data (i32.const 2048) "more JS string data\00yyy")
+
;; CHECK: "emJsFuncs": {
-;; CHECK-NEXT: "bar": "more JS string dara",
-;; CHECK-NEXT: "foo": "some JS string"
+;; CHECK-NEXT: "bar": "more JS string data",
+;; CHECK-NEXT: "baz": "Only em_js strings here
+;; CHECK-NEXT: "foo": "some JS string data"
;; CHECK-NEXT: },
(module
(memory 1 1)
- (data (i32.const 1024) "some JS string\00")
- (data (i32.const 2048) "more JS string dara\00")
+ (data (i32.const 512) "Only em_js strings here\00")
+ (data (i32.const 1024) "some JS string data\00xxx")
+ (data (i32.const 2048) "more JS string data\00yyy")
(export "__em_js__foo" (func $__em_js__foo))
(export "__em_js__bar" (func $bar))
+ (export "__em_js__baz" (func $baz))
;; Name matches export name
(func $__em_js__foo (result i32)
(i32.const 1024)
@@ -25,4 +37,7 @@
(func $bar (result i32)
(i32.const 2048)
)
+ (func $baz (result i32)
+ (i32.const 512)
+ )
)