Make `Name` a pointer, length pair (#5122)

With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
author: Thomas Lively <tlively@google.com> 2022-10-11 11:16:14 -0500
committer: GitHub <noreply@github.com> 2022-10-11 16:16:14 +0000
commit: b83450ed1fd98cec4453024f57f892b31851ea50 (patch)
tree: bf0467d96c9966d0f4699ea0afcdf25905b4098c /src/emscripten-optimizer/parser.h
parent: 6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
download: binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
1 files changed, 42 insertions, 7 deletions
diff --git a/src/emscripten-optimizer/parser.h b/src/emscripten-optimizer/parser.h
index c4d596058..8c3f36427 100644
--- a/src/emscripten-optimizer/parser.h
+++ b/src/emscripten-optimizer/parser.h
@@ -30,11 +30,46 @@
 #include <limits>
 #include <vector>
 
-#include "istring.h"
+#include "support/istring.h"
 #include "support/safe_integer.h"
 
 namespace cashew {
 
+using IString = wasm::IString;
+
+// IStringSet
+
+class IStringSet : public std::unordered_set<IString> {
+  std::vector<char> data;
+
+public:
+  IStringSet() = default;
+  IStringSet(const char* init) { // comma-delimited list
+    int size = strlen(init) + 1;
+    data.resize(size);
+    char* curr = &data[0];
+    strncpy(curr, init, size);
+    while (1) {
+      char* end = strchr(curr, ' ');
+      if (end) {
+        *end = 0;
+      }
+      insert(curr);
+      if (!end) {
+        break;
+      }
+      curr = end + 1;
+    }
+  }
+
+  bool has(const IString& str) { return count(str) > 0; }
+};
+
+class IOrderedStringSet : public std::set<IString> {
+public:
+  bool has(const IString& str) { return count(str) > 0; }
+};
+
 // common strings
 
 extern IString TOPLEVEL;
@@ -233,11 +268,11 @@ template<class NodeRef, class Builder> class Parser {
           src++;
         }
         if (*src == 0) {
-          str.set(start);
+          str = IString(start);
         } else {
           char temp = *src;
           *src = 0;
-          str.set(start, false);
+          str = IString(start, false);
           *src = temp;
         }
         type = keywords.has(str) ? KEYWORD : IDENT;
@@ -333,11 +368,11 @@ template<class NodeRef, class Builder> class Parser {
           default:
             abort();
         }
-        size = strlen(str.str);
+        size = str.size();
 #ifndef NDEBUG
         char temp = start[size];
         start[size] = 0;
-        assert(strcmp(str.str, start) == 0);
+        assert(str.str == start);
         start[size] = temp;
 #endif
         type = OPERATOR;
@@ -346,13 +381,13 @@ template<class NodeRef, class Builder> class Parser {
         type = SEPARATOR;
         char temp = src[1];
         src[1] = 0;
-        str.set(src, false);
+        str = IString(src, false);
         src[1] = temp;
         src++;
       } else if (*src == '"' || *src == '\'') {
         char* end = strchr(src + 1, *src);
         *end = 0;
-        str.set(src + 1);
+        str = IString(src + 1);
         src = end + 1;
         type = STRING;
       } else {
author	Thomas Lively <tlively@google.com>	2022-10-11 11:16:14 -0500
committer	GitHub <noreply@github.com>	2022-10-11 16:16:14 +0000
commit	b83450ed1fd98cec4453024f57f892b31851ea50 (patch)
tree	bf0467d96c9966d0f4699ea0afcdf25905b4098c /src/emscripten-optimizer/parser.h
parent	6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
download	binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2 binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip