summaryrefslogtreecommitdiff
path: root/src/support/istring.h
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 11:16:14 -0500
committerGitHub <noreply@github.com>2022-10-11 16:16:14 +0000
commitb83450ed1fd98cec4453024f57f892b31851ea50 (patch)
treebf0467d96c9966d0f4699ea0afcdf25905b4098c /src/support/istring.h
parent6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
downloadbinaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
Make `Name` a pointer, length pair (#5122)
With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
Diffstat (limited to 'src/support/istring.h')
-rw-r--r--src/support/istring.h110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/support/istring.h b/src/support/istring.h
new file mode 100644
index 000000000..14f991c30
--- /dev/null
+++ b/src/support/istring.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Interned String type, 100% interned on creation. Comparisons are always just
+// a pointer comparison
+
+#ifndef wasm_support_istring_h
+#define wasm_support_istring_h
+
+#include <set>
+#include <string_view>
+#include <unordered_set>
+
+#include <assert.h>
+
+#include "threads.h"
+#include "utilities.h"
+
+namespace wasm {
+
+struct IString {
+private:
+ static std::string_view interned(std::string_view s, bool reuse = true);
+
+public:
+ const std::string_view str;
+
+ IString() = default;
+
+ // TODO: This is a wildly unsafe default inherited from the previous
+ // implementation. Change it?
+ IString(std::string_view str, bool reuse = true)
+ : str(interned(str, reuse)) {}
+
+ // But other C strings generally do need to be copied.
+ IString(const char* str) : str(interned(str, false)) {}
+ IString(const std::string& str) : str(interned(str, false)) {}
+
+ IString(const IString& other) = default;
+
+ IString& operator=(const IString& other) {
+ return *(new (this) IString(other));
+ }
+
+ bool operator==(const IString& other) const {
+ // Fast! No need to compare contents due to interning
+ return str.data() == other.str.data();
+ }
+ bool operator!=(const IString& other) const { return !(*this == other); }
+ bool operator<(const IString& other) const { return str < other.str; }
+ bool operator<=(const IString& other) const { return str <= other.str; }
+ bool operator>(const IString& other) const { return str > other.str; }
+ bool operator>=(const IString& other) const { return str >= other.str; }
+
+ char operator[](int x) const { return str[x]; }
+
+ operator bool() const { return str.data() != nullptr; }
+
+ // TODO: deprecate?
+ bool is() const { return bool(*this); }
+ bool isNull() const { return !bool(*this); }
+
+ std::string toString() const { return {str.data(), str.size()}; }
+
+ bool equals(std::string_view other) const { return str == other; }
+
+ bool startsWith(std::string_view prefix) const {
+ // TODO: Use C++20 `starts_with`.
+ return str.substr(0, prefix.size()) == prefix;
+ }
+ bool startsWith(IString str) const { return startsWith(str.str); }
+
+ // Disambiguate for string literals.
+ template<int N> bool startsWith(const char (&str)[N]) {
+ return startsWith(std::string_view(str));
+ }
+
+ size_t size() const { return str.size(); }
+};
+
+} // namespace wasm
+
+namespace std {
+
+template<> struct hash<wasm::IString> {
+ size_t operator()(const wasm::IString& str) const {
+ return std::hash<size_t>{}(uintptr_t(str.str.data()));
+ }
+};
+
+inline std::ostream& operator<<(std::ostream& os, const wasm::IString& str) {
+ return os << str.str;
+}
+
+} // namespace std
+
+#endif // wasm_support_istring_h