summaryrefslogtreecommitdiff
path: root/src/support
diff options
context:
space:
mode:
Diffstat (limited to 'src/support')
-rw-r--r--src/support/CMakeLists.txt1
-rw-r--r--src/support/istring.cpp88
-rw-r--r--src/support/istring.h110
-rw-r--r--src/support/json.h10
-rw-r--r--src/support/name.h30
5 files changed, 221 insertions, 18 deletions
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 0d9350305..a02c1f447 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -6,6 +6,7 @@ set(support_SOURCES
command-line.cpp
debug.cpp
file.cpp
+ istring.cpp
path.cpp
safe_integer.cpp
threads.cpp
diff --git a/src/support/istring.cpp b/src/support/istring.cpp
new file mode 100644
index 000000000..8a3319b5e
--- /dev/null
+++ b/src/support/istring.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "istring.h"
+
+namespace wasm {
+
+std::string_view IString::interned(std::string_view s, bool reuse) {
+ // We need a set of string_views that can be modified in-place to minimize
+ // the number of lookups we do. Since set elements cannot normally be
+ // modified, wrap the string_views in a container that provides mutability
+ // even through a const reference.
+ struct MutStringView {
+ mutable std::string_view str;
+ MutStringView(std::string_view str) : str(str) {}
+ };
+ struct MutStringViewHash {
+ size_t operator()(const MutStringView& mut) const {
+ return std::hash<std::string_view>{}(mut.str);
+ }
+ };
+ struct MutStringViewEqual {
+ bool operator()(const MutStringView& a, const MutStringView& b) const {
+ return a.str == b.str;
+ }
+ };
+ using StringSet =
+ std::unordered_set<MutStringView, MutStringViewHash, MutStringViewEqual>;
+
+ // The authoritative global set of interned string views.
+ static StringSet globalStrings;
+
+ // The global backing store for interned strings that do not otherwise have
+ // stable addresses.
+ static std::vector<std::vector<char>> allocated;
+
+ // Guards access to `globalStrings` and `allocated`.
+ static std::mutex mutex;
+
+ // A thread-local cache of strings to reduce contention.
+ thread_local static StringSet localStrings;
+
+ auto [localIt, localInserted] = localStrings.insert(s);
+ if (!localInserted) {
+ // We already had a local copy of this string.
+ return localIt->str;
+ }
+
+ // No copy yet in the local cache. Check the global cache.
+ std::unique_lock<std::mutex> lock(mutex);
+ auto [globalIt, globalInserted] = globalStrings.insert(s);
+ if (!globalInserted) {
+ // We already had a global copy of this string. Cache it locally.
+ localIt->str = globalIt->str;
+ return localIt->str;
+ }
+
+ if (!reuse) {
+ // We have a new string, but it doesn't have a stable address. Create a copy
+ // of the data at a stable address we can use. Make sure it is null
+ // terminated so legacy uses that get a C string still work.
+ allocated.emplace_back();
+ auto& data = allocated.back();
+ data.reserve(s.size() + 1);
+ data.insert(data.end(), s.begin(), s.end());
+ data.push_back('\0');
+ s = std::string_view(allocated.back().data(), s.size());
+ }
+
+ // Intern our new string.
+ localIt->str = globalIt->str = s;
+ return s;
+}
+
+} // namespace wasm
diff --git a/src/support/istring.h b/src/support/istring.h
new file mode 100644
index 000000000..14f991c30
--- /dev/null
+++ b/src/support/istring.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2022 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Interned String type, 100% interned on creation. Comparisons are always just
+// a pointer comparison
+
+#ifndef wasm_support_istring_h
+#define wasm_support_istring_h
+
+#include <set>
+#include <string_view>
+#include <unordered_set>
+
+#include <assert.h>
+
+#include "threads.h"
+#include "utilities.h"
+
+namespace wasm {
+
+struct IString {
+private:
+ static std::string_view interned(std::string_view s, bool reuse = true);
+
+public:
+ const std::string_view str;
+
+ IString() = default;
+
+ // TODO: This is a wildly unsafe default inherited from the previous
+ // implementation. Change it?
+ IString(std::string_view str, bool reuse = true)
+ : str(interned(str, reuse)) {}
+
+ // But other C strings generally do need to be copied.
+ IString(const char* str) : str(interned(str, false)) {}
+ IString(const std::string& str) : str(interned(str, false)) {}
+
+ IString(const IString& other) = default;
+
+ IString& operator=(const IString& other) {
+ return *(new (this) IString(other));
+ }
+
+ bool operator==(const IString& other) const {
+ // Fast! No need to compare contents due to interning
+ return str.data() == other.str.data();
+ }
+ bool operator!=(const IString& other) const { return !(*this == other); }
+ bool operator<(const IString& other) const { return str < other.str; }
+ bool operator<=(const IString& other) const { return str <= other.str; }
+ bool operator>(const IString& other) const { return str > other.str; }
+ bool operator>=(const IString& other) const { return str >= other.str; }
+
+ char operator[](int x) const { return str[x]; }
+
+ operator bool() const { return str.data() != nullptr; }
+
+ // TODO: deprecate?
+ bool is() const { return bool(*this); }
+ bool isNull() const { return !bool(*this); }
+
+ std::string toString() const { return {str.data(), str.size()}; }
+
+ bool equals(std::string_view other) const { return str == other; }
+
+ bool startsWith(std::string_view prefix) const {
+ // TODO: Use C++20 `starts_with`.
+ return str.substr(0, prefix.size()) == prefix;
+ }
+ bool startsWith(IString str) const { return startsWith(str.str); }
+
+ // Disambiguate for string literals.
+ template<int N> bool startsWith(const char (&str)[N]) {
+ return startsWith(std::string_view(str));
+ }
+
+ size_t size() const { return str.size(); }
+};
+
+} // namespace wasm
+
+namespace std {
+
+template<> struct hash<wasm::IString> {
+ size_t operator()(const wasm::IString& str) const {
+ return std::hash<size_t>{}(uintptr_t(str.str.data()));
+ }
+};
+
+inline std::ostream& operator<<(std::ostream& os, const wasm::IString& str) {
+ return os << str.str;
+}
+
+} // namespace std
+
+#endif // wasm_support_istring_h
diff --git a/src/support/json.h b/src/support/json.h
index a145e1f6b..269b6c0e9 100644
--- a/src/support/json.h
+++ b/src/support/json.h
@@ -37,12 +37,12 @@
#include <unordered_set>
#include <vector>
-#include "emscripten-optimizer/istring.h"
+#include "support/istring.h"
#include "support/safe_integer.h"
namespace json {
-using IString = cashew::IString;
+using IString = wasm::IString;
// Main value type
struct Value {
@@ -112,13 +112,13 @@ struct Value {
Value& setString(const char* s) {
free();
type = String;
- str.set(s);
+ str = s;
return *this;
}
Value& setString(const IString& s) {
free();
type = String;
- str.set(s);
+ str = s;
return *this;
}
Value& setNumber(double n) {
@@ -173,7 +173,7 @@ struct Value {
const char* getCString() {
assert(isString());
- return str.str;
+ return str.str.data();
}
IString& getIString() {
assert(isString());
diff --git a/src/support/name.h b/src/support/name.h
index 615740e09..a22461d5d 100644
--- a/src/support/name.h
+++ b/src/support/name.h
@@ -17,9 +17,7 @@
#ifndef wasm_support_name_h
#define wasm_support_name_h
-#include <string>
-
-#include "emscripten-optimizer/istring.h"
+#include "support/istring.h"
namespace wasm {
@@ -33,14 +31,19 @@ namespace wasm {
// TODO: as an optimization, IString values < some threshold could be considered
// numerical indices directly.
-struct Name : public cashew::IString {
- Name() : cashew::IString() {}
- Name(const char* str) : cashew::IString(str, false) {}
- Name(cashew::IString str) : cashew::IString(str) {}
- Name(const std::string& str) : cashew::IString(str.c_str(), false) {}
+struct Name : public IString {
+ Name() : IString() {}
+ Name(std::string_view str) : IString(str, false) {}
+ Name(const char* str) : IString(str, false) {}
+ Name(IString str) : IString(str) {}
+ Name(const std::string& str) : IString(str) {}
+
+ // String literals do not need to be copied. Note: Not safe to construct from
+ // temporary char arrays! Take their address first.
+ template<size_t N> Name(const char (&str)[N]) : IString(str) {}
friend std::ostream& operator<<(std::ostream& o, Name name) {
- if (name.str) {
+ if (name) {
return o << name.str;
} else {
return o << "(null Name)";
@@ -48,11 +51,12 @@ struct Name : public cashew::IString {
}
static Name fromInt(size_t i) {
- return cashew::IString(std::to_string(i).c_str(), false);
+ return IString(std::to_string(i).c_str(), false);
}
- bool hasSubstring(cashew::IString substring) {
- return strstr(c_str(), substring.c_str()) != nullptr;
+ bool hasSubstring(IString substring) {
+ // TODO: Use C++23 `contains`.
+ return str.find(substring.str) != std::string_view::npos;
}
};
@@ -60,7 +64,7 @@ struct Name : public cashew::IString {
namespace std {
-template<> struct hash<wasm::Name> : hash<cashew::IString> {};
+template<> struct hash<wasm::Name> : hash<wasm::IString> {};
} // namespace std