diff options
-rw-r--r-- | src/passes/Print.cpp | 13 | ||||
-rw-r--r-- | src/passes/StringLowering.cpp | 35 | ||||
-rw-r--r-- | src/passes/pass.cpp | 4 | ||||
-rw-r--r-- | src/passes/passes.h | 1 | ||||
-rw-r--r-- | src/pretty_printing.h | 18 | ||||
-rw-r--r-- | src/support/string.cpp | 41 | ||||
-rw-r--r-- | src/support/string.h | 5 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 19 | ||||
-rw-r--r-- | test/lit/help/wasm-opt.test | 4 | ||||
-rw-r--r-- | test/lit/help/wasm2js.test | 4 | ||||
-rw-r--r-- | test/lit/passes/string-lowering-imports.wast | 86 | ||||
-rw-r--r-- | test/lit/passes/string-lowering.wast | 20 | ||||
-rw-r--r-- | test/spec/import-after-memory.fail.wast | 1 | ||||
-rw-r--r-- | test/spec/old_import.wast | 10 |
14 files changed, 207 insertions, 54 deletions
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 68a2e4cb6..a90ef4669 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2838,8 +2838,9 @@ void PrintSExpression::handleSignature(HeapType curr, Name name) { void PrintSExpression::visitExport(Export* curr) { o << '('; printMedium(o, "export "); - // TODO: Escape the string properly. - printText(o, curr->name.str.data()) << " ("; + std::stringstream escaped; + String::printEscaped(escaped, curr->name.str); + printText(o, escaped.str(), false) << " ("; switch (curr->kind) { case ExternalKind::Function: o << "func"; @@ -2865,9 +2866,11 @@ void PrintSExpression::visitExport(Export* curr) { void PrintSExpression::emitImportHeader(Importable* curr) { printMedium(o, "import "); - // TODO: Escape the strings properly and use std::string_view. - printText(o, curr->module.str.data()) << ' '; - printText(o, curr->base.str.data()) << ' '; + std::stringstream escapedModule, escapedBase; + String::printEscaped(escapedModule, curr->module.str); + String::printEscaped(escapedBase, curr->base.str); + printText(o, escapedModule.str(), false) << ' '; + printText(o, escapedBase.str(), false) << ' '; } void PrintSExpression::visitGlobal(Global* curr) { diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index df2d66860..dd7428546 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -189,6 +189,13 @@ struct StringGathering : public Pass { }; struct StringLowering : public StringGathering { + // If true, then encode well-formed strings as (import "'" "string...") + // instead of emitting them into the JSON custom section. + bool useMagicImports; + + StringLowering(bool useMagicImports = false) + : useMagicImports(useMagicImports) {} + void run(Module* module) override { if (!module->features.has(FeatureSet::Strings)) { return; @@ -217,25 +224,30 @@ struct StringLowering : public StringGathering { } void makeImports(Module* module) { - Index importIndex = 0; + Index jsonImportIndex = 0; std::stringstream json; json << '['; bool first = true; - std::vector<Name> importedStrings; for (auto& global : module->globals) { if (global->init) { if (auto* c = global->init->dynCast<StringConst>()) { - global->module = "string.const"; - global->base = std::to_string(importIndex); - importIndex++; - global->init = nullptr; - - if (first) { - first = false; + std::stringstream utf8; + if (useMagicImports && + String::convertUTF16ToUTF8(utf8, c->string.str)) { + global->module = "'"; + global->base = Name(utf8.str()); } else { - json << ','; + global->module = "string.const"; + global->base = std::to_string(jsonImportIndex); + if (first) { + first = false; + } else { + json << ','; + } + String::printEscapedJSON(json, c->string.str); + jsonImportIndex++; } - String::printEscapedJSON(json, c->string.str); + global->init = nullptr; } } } @@ -516,5 +528,6 @@ struct StringLowering : public StringGathering { Pass* createStringGatheringPass() { return new StringGathering(); } Pass* createStringLoweringPass() { return new StringLowering(); } +Pass* createStringLoweringMagicImportPass() { return new StringLowering(true); } } // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 0955082ac..19ddaf2d4 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -484,6 +484,10 @@ void PassRegistry::registerPasses() { "lowers wasm strings and operations to imports", createStringLoweringPass); registerPass( + "string-lowering-magic-imports", + "same as string-lowering, but encodes well-formed strings as magic imports", + createStringLoweringMagicImportPass); + registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", "enforce limits on llvm's __stack_pointer global", diff --git a/src/passes/passes.h b/src/passes/passes.h index 1b1ca99c6..23a9ea70b 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -156,6 +156,7 @@ Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); Pass* createStringGatheringPass(); Pass* createStringLoweringPass(); +Pass* createStringLoweringMagicImportPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); diff --git a/src/pretty_printing.h b/src/pretty_printing.h index f693c4d51..0f1a0ed87 100644 --- a/src/pretty_printing.h +++ b/src/pretty_printing.h @@ -51,29 +51,35 @@ inline std::ostream& restoreNormalColor(std::ostream& o) { return o; } -inline std::ostream& printText(std::ostream& o, const char* str) { - o << '"'; +inline std::ostream& +printText(std::ostream& o, std::string_view str, bool needQuotes = true) { + if (needQuotes) { + o << '"'; + } Colors::green(o); o << str; Colors::normal(o); - return o << '"'; + if (needQuotes) { + o << '"'; + } + return o; } -inline std::ostream& printMajor(std::ostream& o, const char* str) { +inline std::ostream& printMajor(std::ostream& o, std::string_view str) { prepareMajorColor(o); o << str; restoreNormalColor(o); return o; } -inline std::ostream& printMedium(std::ostream& o, const char* str) { +inline std::ostream& printMedium(std::ostream& o, std::string_view str) { prepareColor(o); o << str; restoreNormalColor(o); return o; } -inline std::ostream& printMinor(std::ostream& o, const char* str) { +inline std::ostream& printMinor(std::ostream& o, std::string_view str) { prepareMinorColor(o); o << str; restoreNormalColor(o); diff --git a/src/support/string.cpp b/src/support/string.cpp index 68249f51e..31d0e9170 100644 --- a/src/support/string.cpp +++ b/src/support/string.cpp @@ -213,7 +213,8 @@ std::optional<uint16_t> takeWTF16CodeUnit(std::string_view& str) { return u; } -std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) { +std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str, + bool allowWTF = true) { auto u = takeWTF16CodeUnit(str); if (!u) { return std::nullopt; @@ -228,7 +229,13 @@ std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) { uint16_t highBits = *u - 0xD800; uint16_t lowBits = *low - 0xDC00; return 0x10000 + ((highBits << 10) | lowBits); + } else if (!allowWTF) { + // Unpaired high surrogate. + return std::nullopt; } + } else if (!allowWTF && 0xDC00 <= *u && *u < 0xE000) { + // Unpaired low surrogate. + return std::nullopt; } return *u; @@ -242,6 +249,23 @@ void writeWTF16CodeUnit(std::ostream& os, uint16_t u) { constexpr uint32_t replacementCharacter = 0xFFFD; +bool doConvertWTF16ToWTF8(std::ostream& os, + std::string_view str, + bool allowWTF) { + bool valid = true; + + while (str.size()) { + auto u = takeWTF16CodePoint(str, allowWTF); + if (!u) { + valid = false; + u = replacementCharacter; + } + writeWTF8CodePoint(os, *u); + } + + return valid; +} + } // anonymous namespace std::ostream& writeWTF8CodePoint(std::ostream& os, uint32_t u) { @@ -308,18 +332,11 @@ bool convertWTF8ToWTF16(std::ostream& os, std::string_view str) { } bool convertWTF16ToWTF8(std::ostream& os, std::string_view str) { - bool valid = true; - - while (str.size()) { - auto u = takeWTF16CodePoint(str); - if (!u) { - valid = false; - u = replacementCharacter; - } - writeWTF8CodePoint(os, *u); - } + return doConvertWTF16ToWTF8(os, str, true); +} - return valid; +bool convertUTF16ToUTF8(std::ostream& os, std::string_view str) { + return doConvertWTF16ToWTF8(os, str, false); } std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) { diff --git a/src/support/string.h b/src/support/string.h index be2c3c6a3..af120ab4e 100644 --- a/src/support/string.h +++ b/src/support/string.h @@ -94,6 +94,11 @@ bool convertWTF8ToWTF16(std::ostream& os, std::string_view str); // Returns `true` iff the input was valid WTF-16. bool convertWTF16ToWTF8(std::ostream& os, std::string_view str); +// Writes the UTF-8 encoding of the given UTF-16LE string to `os`, inserting a +// replacement character in place of any unpaired surrogate or incomplete code +// unit. Returns `true` if the input was valid UTF-16. +bool convertUTF16ToUTF8(std::ostream& os, std::string_view str); + } // namespace wasm::String #endif // wasm_support_string_h diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index eb31355c8..bca94a768 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -3622,7 +3622,9 @@ void SExpressionWasmBuilder::parseInnerData(Element& s, void SExpressionWasmBuilder::parseExport(Element& s) { std::unique_ptr<Export> ex = std::make_unique<Export>(); - ex->name = s[1]->str(); + std::vector<char> nameBytes; + stringToBinary(*s[1], s[1]->str().str, nameBytes); + ex->name = std::string(nameBytes.data(), nameBytes.size()); if (s[2]->isList()) { auto& inner = *s[2]; if (elementStartsWith(inner, FUNC)) { @@ -3703,15 +3705,20 @@ void SExpressionWasmBuilder::parseImport(Element& s) { if (!newStyle) { kind = ExternalKind::Function; } - auto module = s[i++]->str(); + std::vector<char> moduleBytes; + stringToBinary(*s[i], s[i]->str().str, moduleBytes); + Name module = std::string(moduleBytes.data(), moduleBytes.size()); + i++; + if (!s[i]->isStr()) { throw SParseException("no name for import", s, *s[i]); } - auto base = s[i]->str(); - if (!module.size() || !base.size()) { - throw SParseException("imports must have module and base", s, *s[i]); - } + + std::vector<char> baseBytes; + stringToBinary(*s[i], s[i]->str().str, baseBytes); + Name base = std::string(baseBytes.data(), baseBytes.size()); i++; + // parse internals Element& inner = newStyle ? *s[3] : s; Index j = newStyle ? newStyleInner : i; diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 84b5049dd..ff07586f2 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -478,6 +478,10 @@ ;; CHECK-NEXT: --string-lowering lowers wasm strings and ;; CHECK-NEXT: operations to imports ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering-magic-imports same as string-lowering, but +;; CHECK-NEXT: encodes well-formed strings as +;; CHECK-NEXT: magic imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 3ab4099de..493542135 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -432,6 +432,10 @@ ;; CHECK-NEXT: --string-lowering lowers wasm strings and ;; CHECK-NEXT: operations to imports ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering-magic-imports same as string-lowering, but +;; CHECK-NEXT: encodes well-formed strings as +;; CHECK-NEXT: magic imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/passes/string-lowering-imports.wast b/test/lit/passes/string-lowering-imports.wast new file mode 100644 index 000000000..6a908139e --- /dev/null +++ b/test/lit/passes/string-lowering-imports.wast @@ -0,0 +1,86 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements -S -o - | filecheck %s +;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements --roundtrip -S -o - | filecheck %s --check-prefix=RTRIP + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (import "\'" "bar" (global $string.const_bar (ref extern))) + + ;; CHECK: (import "\'" "foo" (global $string.const_foo (ref extern))) + + ;; CHECK: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (ref extern))) + + ;; CHECK: (import "string.const" "0" (global $"string.const_unpaired high surrogate \ed\a0\80 " (ref extern))) + + ;; CHECK: (import "string.const" "1" (global $"string.const_unpaired low surrogate \ed\bd\88 " (ref extern))) + + ;; CHECK: (export "consts" (func $consts)) + + ;; CHECK: (func $consts (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $"string.const_unpaired high surrogate \ed\a0\80 ") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $"string.const_unpaired low surrogate \ed\bd\88 ") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; RTRIP: (type $0 (func)) + + ;; RTRIP: (import "\'" "bar" (global $gimport$0 (ref extern))) + + ;; RTRIP: (import "\'" "foo" (global $gimport$1 (ref extern))) + + ;; RTRIP: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $gimport$2 (ref extern))) + + ;; RTRIP: (import "string.const" "0" (global $gimport$3 (ref extern))) + + ;; RTRIP: (import "string.const" "1" (global $gimport$4 (ref extern))) + + ;; RTRIP: (export "consts" (func $consts)) + + ;; RTRIP: (func $consts (type $0) + ;; RTRIP-NEXT: (drop + ;; RTRIP-NEXT: (global.get $gimport$1) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (drop + ;; RTRIP-NEXT: (global.get $gimport$0) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (drop + ;; RTRIP-NEXT: (global.get $gimport$2) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (drop + ;; RTRIP-NEXT: (global.get $gimport$3) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (drop + ;; RTRIP-NEXT: (global.get $gimport$4) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + (func $consts (export "consts") + (drop + (string.const "foo") + ) + (drop + (string.const "bar") + ) + (drop + (string.const "needs\tescaping\00.'#%- .\r\n\\08\0C\0A\0D\09.ꙮ") + ) + (drop + (string.const "unpaired high surrogate \ED\A0\80 ") + ) + (drop + (string.const "unpaired low surrogate \ED\BD\88 ") + ) + ) +) diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index c060bc8bd..de684889a 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -16,6 +16,12 @@ (drop (string.const "needs\tescaping\00.'#%\"- .\r\n\\08\0C\0A\0D\09.ꙮ") ) + (drop + (string.const "unpaired high surrogate \ED\A0\80 ") + ) + (drop + (string.const "unpaired low surrogate \ED\BD\88 ") + ) ) ) @@ -24,7 +30,14 @@ ;; ;; RUN: wasm-opt %s --string-lowering -all -S -o - | filecheck %s ;; -;; CHECK: custom section "string.consts", size 69, contents: "[\"bar\",\"foo\",\"needs\\tescaping\\u0000.'#%\\\"- .\\r\\n\\\\08\\f\\n\\r\\t.\\ua66e\"]" +;; If we use magic imports, only invalid strings should be present in the JSON. +;; +;; RUN: wasm-opt %s --string-lowering-magic-imports -all -S -o - \ +;; RUN: | filecheck %s --check-prefix=MAGIC +;; +;; CHECK: custom section "string.consts", size 136, contents: "[\"bar\",\"foo\",\"needs\\tescaping\\u0000.'#%\\\"- .\\r\\n\\\\08\\f\\n\\r\\t.\\ua66e\",\"unpaired high surrogate \\ud800 \",\"unpaired low surrogate \\udf48 \"]" +;; +;; MAGIC: custom section "string.consts", size 68, contents: "[\"unpaired high surrogate \\ud800 \",\"unpaired low surrogate \\udf48 \"]" ;; The custom section should parse OK using JSON.parse from node. ;; (Note we run --remove-unused-module-elements to remove externref-using @@ -33,5 +46,6 @@ ;; RUN: wasm-opt %s --string-lowering --remove-unused-module-elements -all -o %t.wasm ;; RUN: node %S/string-lowering.js %t.wasm | filecheck %s --check-prefix=CHECK-JS ;; -;; CHECK-JS: string: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.\ua66e"] -;; CHECK-JS: JSON: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.ꙮ"] +;; CHECK-JS: string: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.\ua66e","unpaired high surrogate \ud800 ","unpaired low surrogate \udf48 "] +;; +;; CHECK-JS: JSON: ["bar","foo","needs\tescaping\x00.'#%\"- .\r\n\\08\f\n\r\t.ꙮ","unpaired high surrogate \ud800 ","unpaired low surrogate \udf48 "] diff --git a/test/spec/import-after-memory.fail.wast b/test/spec/import-after-memory.fail.wast deleted file mode 100644 index fbe582a93..000000000 --- a/test/spec/import-after-memory.fail.wast +++ /dev/null @@ -1 +0,0 @@ -(module (memory 0) (import "" "" (global i32))) diff --git a/test/spec/old_import.wast b/test/spec/old_import.wast index eba633388..a68df1389 100644 --- a/test/spec/old_import.wast +++ b/test/spec/old_import.wast @@ -126,16 +126,6 @@ (assert_trap (invoke "call" (i32.const 3)) "uninitialized element") (assert_trap (invoke "call" (i32.const 100)) "undefined element") - -(assert_invalid - (module (import "" "" (table 10 funcref)) (import "" "" (table 10 funcref))) - "multiple tables" -) -(assert_invalid - (module (import "" "" (table 10 funcref)) (table 10 funcref)) - "multiple tables" -) - (assert_unlinkable (module (import "spectest" "unknown" (table 10 funcref))) "unknown import" |