summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/passes/Print.cpp13
-rw-r--r--src/passes/StringLowering.cpp35
-rw-r--r--src/passes/pass.cpp4
-rw-r--r--src/passes/passes.h1
-rw-r--r--src/pretty_printing.h18
-rw-r--r--src/support/string.cpp41
-rw-r--r--src/support/string.h5
-rw-r--r--src/wasm/wasm-s-parser.cpp19
-rw-r--r--test/lit/help/wasm-opt.test4
-rw-r--r--test/lit/help/wasm2js.test4
-rw-r--r--test/lit/passes/string-lowering-imports.wast86
-rw-r--r--test/lit/passes/string-lowering.wast20
-rw-r--r--test/spec/import-after-memory.fail.wast1
-rw-r--r--test/spec/old_import.wast10
14 files changed, 207 insertions, 54 deletions
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 68a2e4cb6..a90ef4669 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -2838,8 +2838,9 @@ void PrintSExpression::handleSignature(HeapType curr, Name name) {
void PrintSExpression::visitExport(Export* curr) {
o << '(';
printMedium(o, "export ");
- // TODO: Escape the string properly.
- printText(o, curr->name.str.data()) << " (";
+ std::stringstream escaped;
+ String::printEscaped(escaped, curr->name.str);
+ printText(o, escaped.str(), false) << " (";
switch (curr->kind) {
case ExternalKind::Function:
o << "func";
@@ -2865,9 +2866,11 @@ void PrintSExpression::visitExport(Export* curr) {
void PrintSExpression::emitImportHeader(Importable* curr) {
printMedium(o, "import ");
- // TODO: Escape the strings properly and use std::string_view.
- printText(o, curr->module.str.data()) << ' ';
- printText(o, curr->base.str.data()) << ' ';
+ std::stringstream escapedModule, escapedBase;
+ String::printEscaped(escapedModule, curr->module.str);
+ String::printEscaped(escapedBase, curr->base.str);
+ printText(o, escapedModule.str(), false) << ' ';
+ printText(o, escapedBase.str(), false) << ' ';
}
void PrintSExpression::visitGlobal(Global* curr) {
diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp
index df2d66860..dd7428546 100644
--- a/src/passes/StringLowering.cpp
+++ b/src/passes/StringLowering.cpp
@@ -189,6 +189,13 @@ struct StringGathering : public Pass {
};
struct StringLowering : public StringGathering {
+ // If true, then encode well-formed strings as (import "'" "string...")
+ // instead of emitting them into the JSON custom section.
+ bool useMagicImports;
+
+ StringLowering(bool useMagicImports = false)
+ : useMagicImports(useMagicImports) {}
+
void run(Module* module) override {
if (!module->features.has(FeatureSet::Strings)) {
return;
@@ -217,25 +224,30 @@ struct StringLowering : public StringGathering {
}
void makeImports(Module* module) {
- Index importIndex = 0;
+ Index jsonImportIndex = 0;
std::stringstream json;
json << '[';
bool first = true;
- std::vector<Name> importedStrings;
for (auto& global : module->globals) {
if (global->init) {
if (auto* c = global->init->dynCast<StringConst>()) {
- global->module = "string.const";
- global->base = std::to_string(importIndex);
- importIndex++;
- global->init = nullptr;
-
- if (first) {
- first = false;
+ std::stringstream utf8;
+ if (useMagicImports &&
+ String::convertUTF16ToUTF8(utf8, c->string.str)) {
+ global->module = "'";
+ global->base = Name(utf8.str());
} else {
- json << ',';
+ global->module = "string.const";
+ global->base = std::to_string(jsonImportIndex);
+ if (first) {
+ first = false;
+ } else {
+ json << ',';
+ }
+ String::printEscapedJSON(json, c->string.str);
+ jsonImportIndex++;
}
- String::printEscapedJSON(json, c->string.str);
+ global->init = nullptr;
}
}
}
@@ -516,5 +528,6 @@ struct StringLowering : public StringGathering {
Pass* createStringGatheringPass() { return new StringGathering(); }
Pass* createStringLoweringPass() { return new StringLowering(); }
+Pass* createStringLoweringMagicImportPass() { return new StringLowering(true); }
} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 0955082ac..19ddaf2d4 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -484,6 +484,10 @@ void PassRegistry::registerPasses() {
"lowers wasm strings and operations to imports",
createStringLoweringPass);
registerPass(
+ "string-lowering-magic-imports",
+ "same as string-lowering, but encodes well-formed strings as magic imports",
+ createStringLoweringMagicImportPass);
+ registerPass(
"strip", "deprecated; same as strip-debug", createStripDebugPass);
registerPass("stack-check",
"enforce limits on llvm's __stack_pointer global",
diff --git a/src/passes/passes.h b/src/passes/passes.h
index 1b1ca99c6..23a9ea70b 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -156,6 +156,7 @@ Pass* createSimplifyLocalsNoTeeNoStructurePass();
Pass* createStackCheckPass();
Pass* createStringGatheringPass();
Pass* createStringLoweringPass();
+Pass* createStringLoweringMagicImportPass();
Pass* createStripDebugPass();
Pass* createStripDWARFPass();
Pass* createStripProducersPass();
diff --git a/src/pretty_printing.h b/src/pretty_printing.h
index f693c4d51..0f1a0ed87 100644
--- a/src/pretty_printing.h
+++ b/src/pretty_printing.h
@@ -51,29 +51,35 @@ inline std::ostream& restoreNormalColor(std::ostream& o) {
return o;
}
-inline std::ostream& printText(std::ostream& o, const char* str) {
- o << '"';
+inline std::ostream&
+printText(std::ostream& o, std::string_view str, bool needQuotes = true) {
+ if (needQuotes) {
+ o << '"';
+ }
Colors::green(o);
o << str;
Colors::normal(o);
- return o << '"';
+ if (needQuotes) {
+ o << '"';
+ }
+ return o;
}
-inline std::ostream& printMajor(std::ostream& o, const char* str) {
+inline std::ostream& printMajor(std::ostream& o, std::string_view str) {
prepareMajorColor(o);
o << str;
restoreNormalColor(o);
return o;
}
-inline std::ostream& printMedium(std::ostream& o, const char* str) {
+inline std::ostream& printMedium(std::ostream& o, std::string_view str) {
prepareColor(o);
o << str;
restoreNormalColor(o);
return o;
}
-inline std::ostream& printMinor(std::ostream& o, const char* str) {
+inline std::ostream& printMinor(std::ostream& o, std::string_view str) {
prepareMinorColor(o);
o << str;
restoreNormalColor(o);
diff --git a/src/support/string.cpp b/src/support/string.cpp
index 68249f51e..31d0e9170 100644
--- a/src/support/string.cpp
+++ b/src/support/string.cpp
@@ -213,7 +213,8 @@ std::optional<uint16_t> takeWTF16CodeUnit(std::string_view& str) {
return u;
}
-std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) {
+std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str,
+ bool allowWTF = true) {
auto u = takeWTF16CodeUnit(str);
if (!u) {
return std::nullopt;
@@ -228,7 +229,13 @@ std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) {
uint16_t highBits = *u - 0xD800;
uint16_t lowBits = *low - 0xDC00;
return 0x10000 + ((highBits << 10) | lowBits);
+ } else if (!allowWTF) {
+ // Unpaired high surrogate.
+ return std::nullopt;
}
+ } else if (!allowWTF && 0xDC00 <= *u && *u < 0xE000) {
+ // Unpaired low surrogate.
+ return std::nullopt;
}
return *u;
@@ -242,6 +249,23 @@ void writeWTF16CodeUnit(std::ostream& os, uint16_t u) {
constexpr uint32_t replacementCharacter = 0xFFFD;
+bool doConvertWTF16ToWTF8(std::ostream& os,
+ std::string_view str,
+ bool allowWTF) {
+ bool valid = true;
+
+ while (str.size()) {
+ auto u = takeWTF16CodePoint(str, allowWTF);
+ if (!u) {
+ valid = false;
+ u = replacementCharacter;
+ }
+ writeWTF8CodePoint(os, *u);
+ }
+
+ return valid;
+}
+
} // anonymous namespace
std::ostream& writeWTF8CodePoint(std::ostream& os, uint32_t u) {
@@ -308,18 +332,11 @@ bool convertWTF8ToWTF16(std::ostream& os, std::string_view str) {
}
bool convertWTF16ToWTF8(std::ostream& os, std::string_view str) {
- bool valid = true;
-
- while (str.size()) {
- auto u = takeWTF16CodePoint(str);
- if (!u) {
- valid = false;
- u = replacementCharacter;
- }
- writeWTF8CodePoint(os, *u);
- }
+ return doConvertWTF16ToWTF8(os, str, true);
+}
- return valid;
+bool convertUTF16ToUTF8(std::ostream& os, std::string_view str) {
+ return doConvertWTF16ToWTF8(os, str, false);
}
std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) {
diff --git a/src/support/string.h b/src/support/string.h
index be2c3c6a3..af120ab4e 100644
--- a/src/support/string.h
+++ b/src/support/string.h
@@ -94,6 +94,11 @@ bool convertWTF8ToWTF16(std::ostream& os, std::string_view str);
// Returns `true` iff the input was valid WTF-16.
bool convertWTF16ToWTF8(std::ostream& os, std::string_view str);
+// Writes the UTF-8 encoding of the given UTF-16LE string to `os`, inserting a
+// replacement character in place of any unpaired surrogate or incomplete code
+// unit. Returns `true` if the input was valid UTF-16.
+bool convertUTF16ToUTF8(std::ostream& os, std::string_view str);
+
} // namespace wasm::String
#endif // wasm_support_string_h
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index eb31355c8..bca94a768 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -3622,7 +3622,9 @@ void SExpressionWasmBuilder::parseInnerData(Element& s,
void SExpressionWasmBuilder::parseExport(Element& s) {
std::unique_ptr<Export> ex = std::make_unique<Export>();
- ex->name = s[1]->str();
+ std::vector<char> nameBytes;
+ stringToBinary(*s[1], s[1]->str().str, nameBytes);
+ ex->name = std::string(nameBytes.data(), nameBytes.size());
if (s[2]->isList()) {
auto& inner = *s[2];
if (elementStartsWith(inner, FUNC)) {
@@ -3703,15 +3705,20 @@ void SExpressionWasmBuilder::parseImport(Element& s) {
if (!newStyle) {
kind = ExternalKind::Function;
}
- auto module = s[i++]->str();
+ std::vector<char> moduleBytes;
+ stringToBinary(*s[i], s[i]->str().str, moduleBytes);
+ Name module = std::string(moduleBytes.data(), moduleBytes.size());
+ i++;
+
if (!s[i]->isStr()) {
throw SParseException("no name for import", s, *s[i]);
}
- auto base = s[i]->str();
- if (!module.size() || !base.size()) {
- throw SParseException("imports must have module and base", s, *s[i]);
- }
+
+ std::vector<char> baseBytes;
+ stringToBinary(*s[i], s[i]->str().str, baseBytes);
+ Name base = std::string(baseBytes.data(), baseBytes.size());
i++;
+
// parse internals
Element& inner = newStyle ? *s[3] : s;
Index j = newStyle ? newStyleInner : i;
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index 84b5049dd..ff07586f2 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -478,6 +478,10 @@
;; CHECK-NEXT: --string-lowering lowers wasm strings and
;; CHECK-NEXT: operations to imports
;; CHECK-NEXT:
+;; CHECK-NEXT: --string-lowering-magic-imports same as string-lowering, but
+;; CHECK-NEXT: encodes well-formed strings as
+;; CHECK-NEXT: magic imports
+;; CHECK-NEXT:
;; CHECK-NEXT: --strip deprecated; same as strip-debug
;; CHECK-NEXT:
;; CHECK-NEXT: --strip-debug strip debug info (including the
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index 3ab4099de..493542135 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -432,6 +432,10 @@
;; CHECK-NEXT: --string-lowering lowers wasm strings and
;; CHECK-NEXT: operations to imports
;; CHECK-NEXT:
+;; CHECK-NEXT: --string-lowering-magic-imports same as string-lowering, but
+;; CHECK-NEXT: encodes well-formed strings as
+;; CHECK-NEXT: magic imports
+;; CHECK-NEXT:
;; CHECK-NEXT: --strip deprecated; same as strip-debug
;; CHECK-NEXT:
;; CHECK-NEXT: --strip-debug strip debug info (including the
diff --git a/test/lit/passes/string-lowering-imports.wast b/test/lit/passes/string-lowering-imports.wast
new file mode 100644
index 000000000..6a908139e
--- /dev/null
+++ b/test/lit/passes/string-lowering-imports.wast
@@ -0,0 +1,86 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements -S -o - | filecheck %s
+;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements --roundtrip -S -o - | filecheck %s --check-prefix=RTRIP
+
+(module
+ ;; CHECK: (type $0 (func))
+
+ ;; CHECK: (import "\'" "bar" (global $string.const_bar (ref extern)))
+
+ ;; CHECK: (import "\'" "foo" (global $string.const_foo (ref extern)))
+
+ ;; CHECK: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (ref extern)))
+
+ ;; CHECK: (import "string.const" "0" (global $"string.const_unpaired high surrogate \ed\a0\80 " (ref extern)))
+
+ ;; CHECK: (import "string.const" "1" (global $"string.const_unpaired low surrogate \ed\bd\88 " (ref extern)))
+
+ ;; CHECK: (export "consts" (func $consts))
+
+ ;; CHECK: (func $consts (type $0)
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $string.const_foo)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $string.const_bar)
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $"string.const_unpaired high surrogate \ed\a0\80 ")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (global.get $"string.const_unpaired low surrogate \ed\bd\88 ")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ ;; RTRIP: (type $0 (func))
+
+ ;; RTRIP: (import "\'" "bar" (global $gimport$0 (ref extern)))
+
+ ;; RTRIP: (import "\'" "foo" (global $gimport$1 (ref extern)))
+
+ ;; RTRIP: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $gimport$2 (ref extern)))
+
+ ;; RTRIP: (import "string.const" "0" (global $gimport$3 (ref extern)))
+
+ ;; RTRIP: (import "string.const" "1" (global $gimport$4 (ref extern)))
+
+ ;; RTRIP: (export "consts" (func $consts))
+
+ ;; RTRIP: (func $consts (type $0)
+ ;; RTRIP-NEXT: (drop
+ ;; RTRIP-NEXT: (global.get $gimport$1)
+ ;; RTRIP-NEXT: )
+ ;; RTRIP-NEXT: (drop
+ ;; RTRIP-NEXT: (global.get $gimport$0)
+ ;; RTRIP-NEXT: )
+ ;; RTRIP-NEXT: (drop
+ ;; RTRIP-NEXT: (global.get $gimport$2)
+ ;; RTRIP-NEXT: )
+ ;; RTRIP-NEXT: (drop
+ ;; RTRIP-NEXT: (global.get $gimport$3)
+ ;; RTRIP-NEXT: )
+ ;; RTRIP-NEXT: (drop
+ ;; RTRIP-NEXT: (global.get $gimport$4)
+ ;; RTRIP-NEXT: )
+ ;; RTRIP-NEXT: )
+ (func $consts (export "consts")
+ (drop
+ (string.const "foo")
+ )
+ (drop
+ (string.const "bar")
+ )
+ (drop
+ (string.const "needs\tescaping\00.'#%- .\r\n\\08\0C\0A\0D\09.ꙮ")
+ )
+ (drop
+ (string.const "unpaired high surrogate \ED\A0\80 ")
+ )
+ (drop
+ (string.const "unpaired low surrogate \ED\BD\88 ")
+ )
+ )
+)
diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast
index c060bc8bd..de684889a 100644
--- a/test/lit/passes/string-lowering.wast
+++ b/test/lit/passes/string-lowering.wast
@@ -16,6 +16,12 @@
(drop
(string.const "needs\tescaping\00.'#%\"- .\r\n\\08\0C\0A\0D\09.ꙮ")
)
+ (drop
+ (string.const "unpaired high surrogate \ED\A0\80 ")
+ )
+ (drop
+ (string.const "unpaired low surrogate \ED\BD\88 ")
+ )
)
)
@@ -24,7 +30,14 @@
;;
;; RUN: wasm-opt %s --string-lowering -all -S -o - | filecheck %s
;;
-;; CHECK: custom section "string.consts", size 69, contents: "[\"bar\",\"foo\",\"needs\\tescaping\\u0000.'#%\\\"- .\\r\\n\\\\08\\f\\n\\r\\t.\\ua66e\"]"
+;; If we use magic imports, only invalid strings should be present in the JSON.
+;;
+;; RUN: wasm-opt %s --string-lowering-magic-imports -all -S -o - \
+;; RUN: | filecheck %s --check-prefix=MAGIC
+;;
+;; CHECK: custom section "string.consts", size 136, contents: "[\"bar\",\"foo\",\"needs\\tescaping\\u0000.'#%\\\"- .\\r\\n\\\\08\\f\\n\\r\\t.\\ua66e\",\"unpaired high surrogate \\ud800 \",\"unpaired low surrogate \\udf48 \"]"
+;;
+;; MAGIC: custom section "string.consts", size 68, contents: "[\"unpaired high surrogate \\ud800 \",\"unpaired low surrogate \\udf48 \"]"
;; The custom section should parse OK using JSON.parse from node.
;; (Note we run --remove-unused-module-elements to remove externref-using
@@ -33,5 +46,6 @@
;; RUN: wasm-opt %s --string-lowering --remove-unused-module-elements -all -o %t.wasm
;; RUN: node %S/string-lowering.js %t.wasm | filecheck %s --check-prefix=CHECK-JS
;;
-;; CHECK-JS: string: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.\ua66e"]
-;; CHECK-JS: JSON: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.ꙮ"]
+;; CHECK-JS: string: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.\ua66e","unpaired high surrogate \ud800 ","unpaired low surrogate \udf48 "]
+;;
+;; CHECK-JS: JSON: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.ꙮ","unpaired high surrogate \ud800 ","unpaired low surrogate \udf48 "]
diff --git a/test/spec/import-after-memory.fail.wast b/test/spec/import-after-memory.fail.wast
deleted file mode 100644
index fbe582a93..000000000
--- a/test/spec/import-after-memory.fail.wast
+++ /dev/null
@@ -1 +0,0 @@
-(module (memory 0) (import "" "" (global i32)))
diff --git a/test/spec/old_import.wast b/test/spec/old_import.wast
index eba633388..a68df1389 100644
--- a/test/spec/old_import.wast
+++ b/test/spec/old_import.wast
@@ -126,16 +126,6 @@
(assert_trap (invoke "call" (i32.const 3)) "uninitialized element")
(assert_trap (invoke "call" (i32.const 100)) "undefined element")
-
-(assert_invalid
- (module (import "" "" (table 10 funcref)) (import "" "" (table 10 funcref)))
- "multiple tables"
-)
-(assert_invalid
- (module (import "" "" (table 10 funcref)) (table 10 funcref))
- "multiple tables"
-)
-
(assert_unlinkable
(module (import "spectest" "unknown" (table 10 funcref)))
"unknown import"