[Strings] Add a string lowering pass using magic imports (#6497)

The latest idea for efficient string constants is to encode the constants in the import names of their globals and implement fast paths in the engines for materializing those constants at instantiation time without needing to parse anything in JS. This strategy only works for valid strings (i.e. strings without unpaired surrogates) because only valid strings can be used as import names in the WebAssembly syntax. Add a new configuration of the StringLowering pass that encodes valid string contents in import names, falling back to the JSON custom section approach for invalid strings. To test this chang, update the printer to escape import and export names properly and update the legacy parser to parse escapes in import and export names properly. As a drive-by, remove the incorrect check in the parser that the import module and base names are non-empty.
author: Thomas Lively <tlively@google.com> 2024-04-15 14:02:24 -0700
committer: GitHub <noreply@github.com> 2024-04-15 14:02:24 -0700
commit: b1245577ba92b77a97e266cf4c7f7cd15e6e7f28 (patch)
tree: 333e17f651e6ed9d24fa13aa86f38fcc907541cf /src/support/string.cpp
parent: 8c834e8257b03ea87b639ddac9adefec64fcad00 (diff)
download: binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.tar.gz
binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.tar.bz2
binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.zip
1 files changed, 29 insertions, 12 deletions
diff --git a/src/support/string.cpp b/src/support/string.cpp
index 68249f51e..31d0e9170 100644
--- a/src/support/string.cpp
+++ b/src/support/string.cpp
@@ -213,7 +213,8 @@ std::optional<uint16_t> takeWTF16CodeUnit(std::string_view& str) {
   return u;
 }
 
-std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) {
+std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str,
+                                           bool allowWTF = true) {
   auto u = takeWTF16CodeUnit(str);
   if (!u) {
     return std::nullopt;
@@ -228,7 +229,13 @@ std::optional<uint32_t> takeWTF16CodePoint(std::string_view& str) {
       uint16_t highBits = *u - 0xD800;
       uint16_t lowBits = *low - 0xDC00;
       return 0x10000 + ((highBits << 10) | lowBits);
+    } else if (!allowWTF) {
+      // Unpaired high surrogate.
+      return std::nullopt;
     }
+  } else if (!allowWTF && 0xDC00 <= *u && *u < 0xE000) {
+    // Unpaired low surrogate.
+    return std::nullopt;
   }
 
   return *u;
@@ -242,6 +249,23 @@ void writeWTF16CodeUnit(std::ostream& os, uint16_t u) {
 
 constexpr uint32_t replacementCharacter = 0xFFFD;
 
+bool doConvertWTF16ToWTF8(std::ostream& os,
+                          std::string_view str,
+                          bool allowWTF) {
+  bool valid = true;
+
+  while (str.size()) {
+    auto u = takeWTF16CodePoint(str, allowWTF);
+    if (!u) {
+      valid = false;
+      u = replacementCharacter;
+    }
+    writeWTF8CodePoint(os, *u);
+  }
+
+  return valid;
+}
+
 } // anonymous namespace
 
 std::ostream& writeWTF8CodePoint(std::ostream& os, uint32_t u) {
@@ -308,18 +332,11 @@ bool convertWTF8ToWTF16(std::ostream& os, std::string_view str) {
 }
 
 bool convertWTF16ToWTF8(std::ostream& os, std::string_view str) {
-  bool valid = true;
-
-  while (str.size()) {
-    auto u = takeWTF16CodePoint(str);
-    if (!u) {
-      valid = false;
-      u = replacementCharacter;
-    }
-    writeWTF8CodePoint(os, *u);
-  }
+  return doConvertWTF16ToWTF8(os, str, true);
+}
 
-  return valid;
+bool convertUTF16ToUTF8(std::ostream& os, std::string_view str) {
+  return doConvertWTF16ToWTF8(os, str, false);
 }
 
 std::ostream& printEscapedJSON(std::ostream& os, std::string_view str) {
author	Thomas Lively <tlively@google.com>	2024-04-15 14:02:24 -0700
committer	GitHub <noreply@github.com>	2024-04-15 14:02:24 -0700
commit	b1245577ba92b77a97e266cf4c7f7cd15e6e7f28 (patch)
tree	333e17f651e6ed9d24fa13aa86f38fcc907541cf /src/support/string.cpp
parent	8c834e8257b03ea87b639ddac9adefec64fcad00 (diff)
download	binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.tar.gz binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.tar.bz2 binaryen-b1245577ba92b77a97e266cf4c7f7cd15e6e7f28.zip