diff options
Diffstat (limited to 'src/support')
-rw-r--r-- | src/support/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/support/istring.cpp | 88 | ||||
-rw-r--r-- | src/support/istring.h | 110 | ||||
-rw-r--r-- | src/support/json.h | 10 | ||||
-rw-r--r-- | src/support/name.h | 30 |
5 files changed, 221 insertions, 18 deletions
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 0d9350305..a02c1f447 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -6,6 +6,7 @@ set(support_SOURCES command-line.cpp debug.cpp file.cpp + istring.cpp path.cpp safe_integer.cpp threads.cpp diff --git a/src/support/istring.cpp b/src/support/istring.cpp new file mode 100644 index 000000000..8a3319b5e --- /dev/null +++ b/src/support/istring.cpp @@ -0,0 +1,88 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "istring.h" + +namespace wasm { + +std::string_view IString::interned(std::string_view s, bool reuse) { + // We need a set of string_views that can be modified in-place to minimize + // the number of lookups we do. Since set elements cannot normally be + // modified, wrap the string_views in a container that provides mutability + // even through a const reference. + struct MutStringView { + mutable std::string_view str; + MutStringView(std::string_view str) : str(str) {} + }; + struct MutStringViewHash { + size_t operator()(const MutStringView& mut) const { + return std::hash<std::string_view>{}(mut.str); + } + }; + struct MutStringViewEqual { + bool operator()(const MutStringView& a, const MutStringView& b) const { + return a.str == b.str; + } + }; + using StringSet = + std::unordered_set<MutStringView, MutStringViewHash, MutStringViewEqual>; + + // The authoritative global set of interned string views. + static StringSet globalStrings; + + // The global backing store for interned strings that do not otherwise have + // stable addresses. + static std::vector<std::vector<char>> allocated; + + // Guards access to `globalStrings` and `allocated`. + static std::mutex mutex; + + // A thread-local cache of strings to reduce contention. + thread_local static StringSet localStrings; + + auto [localIt, localInserted] = localStrings.insert(s); + if (!localInserted) { + // We already had a local copy of this string. + return localIt->str; + } + + // No copy yet in the local cache. Check the global cache. + std::unique_lock<std::mutex> lock(mutex); + auto [globalIt, globalInserted] = globalStrings.insert(s); + if (!globalInserted) { + // We already had a global copy of this string. Cache it locally. + localIt->str = globalIt->str; + return localIt->str; + } + + if (!reuse) { + // We have a new string, but it doesn't have a stable address. Create a copy + // of the data at a stable address we can use. Make sure it is null + // terminated so legacy uses that get a C string still work. + allocated.emplace_back(); + auto& data = allocated.back(); + data.reserve(s.size() + 1); + data.insert(data.end(), s.begin(), s.end()); + data.push_back('\0'); + s = std::string_view(allocated.back().data(), s.size()); + } + + // Intern our new string. + localIt->str = globalIt->str = s; + return s; +} + +} // namespace wasm diff --git a/src/support/istring.h b/src/support/istring.h new file mode 100644 index 000000000..14f991c30 --- /dev/null +++ b/src/support/istring.h @@ -0,0 +1,110 @@ +/* + * Copyright 2022 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Interned String type, 100% interned on creation. Comparisons are always just +// a pointer comparison + +#ifndef wasm_support_istring_h +#define wasm_support_istring_h + +#include <set> +#include <string_view> +#include <unordered_set> + +#include <assert.h> + +#include "threads.h" +#include "utilities.h" + +namespace wasm { + +struct IString { +private: + static std::string_view interned(std::string_view s, bool reuse = true); + +public: + const std::string_view str; + + IString() = default; + + // TODO: This is a wildly unsafe default inherited from the previous + // implementation. Change it? + IString(std::string_view str, bool reuse = true) + : str(interned(str, reuse)) {} + + // But other C strings generally do need to be copied. + IString(const char* str) : str(interned(str, false)) {} + IString(const std::string& str) : str(interned(str, false)) {} + + IString(const IString& other) = default; + + IString& operator=(const IString& other) { + return *(new (this) IString(other)); + } + + bool operator==(const IString& other) const { + // Fast! No need to compare contents due to interning + return str.data() == other.str.data(); + } + bool operator!=(const IString& other) const { return !(*this == other); } + bool operator<(const IString& other) const { return str < other.str; } + bool operator<=(const IString& other) const { return str <= other.str; } + bool operator>(const IString& other) const { return str > other.str; } + bool operator>=(const IString& other) const { return str >= other.str; } + + char operator[](int x) const { return str[x]; } + + operator bool() const { return str.data() != nullptr; } + + // TODO: deprecate? + bool is() const { return bool(*this); } + bool isNull() const { return !bool(*this); } + + std::string toString() const { return {str.data(), str.size()}; } + + bool equals(std::string_view other) const { return str == other; } + + bool startsWith(std::string_view prefix) const { + // TODO: Use C++20 `starts_with`. + return str.substr(0, prefix.size()) == prefix; + } + bool startsWith(IString str) const { return startsWith(str.str); } + + // Disambiguate for string literals. + template<int N> bool startsWith(const char (&str)[N]) { + return startsWith(std::string_view(str)); + } + + size_t size() const { return str.size(); } +}; + +} // namespace wasm + +namespace std { + +template<> struct hash<wasm::IString> { + size_t operator()(const wasm::IString& str) const { + return std::hash<size_t>{}(uintptr_t(str.str.data())); + } +}; + +inline std::ostream& operator<<(std::ostream& os, const wasm::IString& str) { + return os << str.str; +} + +} // namespace std + +#endif // wasm_support_istring_h diff --git a/src/support/json.h b/src/support/json.h index a145e1f6b..269b6c0e9 100644 --- a/src/support/json.h +++ b/src/support/json.h @@ -37,12 +37,12 @@ #include <unordered_set> #include <vector> -#include "emscripten-optimizer/istring.h" +#include "support/istring.h" #include "support/safe_integer.h" namespace json { -using IString = cashew::IString; +using IString = wasm::IString; // Main value type struct Value { @@ -112,13 +112,13 @@ struct Value { Value& setString(const char* s) { free(); type = String; - str.set(s); + str = s; return *this; } Value& setString(const IString& s) { free(); type = String; - str.set(s); + str = s; return *this; } Value& setNumber(double n) { @@ -173,7 +173,7 @@ struct Value { const char* getCString() { assert(isString()); - return str.str; + return str.str.data(); } IString& getIString() { assert(isString()); diff --git a/src/support/name.h b/src/support/name.h index 615740e09..a22461d5d 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -17,9 +17,7 @@ #ifndef wasm_support_name_h #define wasm_support_name_h -#include <string> - -#include "emscripten-optimizer/istring.h" +#include "support/istring.h" namespace wasm { @@ -33,14 +31,19 @@ namespace wasm { // TODO: as an optimization, IString values < some threshold could be considered // numerical indices directly. -struct Name : public cashew::IString { - Name() : cashew::IString() {} - Name(const char* str) : cashew::IString(str, false) {} - Name(cashew::IString str) : cashew::IString(str) {} - Name(const std::string& str) : cashew::IString(str.c_str(), false) {} +struct Name : public IString { + Name() : IString() {} + Name(std::string_view str) : IString(str, false) {} + Name(const char* str) : IString(str, false) {} + Name(IString str) : IString(str) {} + Name(const std::string& str) : IString(str) {} + + // String literals do not need to be copied. Note: Not safe to construct from + // temporary char arrays! Take their address first. + template<size_t N> Name(const char (&str)[N]) : IString(str) {} friend std::ostream& operator<<(std::ostream& o, Name name) { - if (name.str) { + if (name) { return o << name.str; } else { return o << "(null Name)"; @@ -48,11 +51,12 @@ struct Name : public cashew::IString { } static Name fromInt(size_t i) { - return cashew::IString(std::to_string(i).c_str(), false); + return IString(std::to_string(i).c_str(), false); } - bool hasSubstring(cashew::IString substring) { - return strstr(c_str(), substring.c_str()) != nullptr; + bool hasSubstring(IString substring) { + // TODO: Use C++23 `contains`. + return str.find(substring.str) != std::string_view::npos; } }; @@ -60,7 +64,7 @@ struct Name : public cashew::IString { namespace std { -template<> struct hash<wasm::Name> : hash<cashew::IString> {}; +template<> struct hash<wasm::Name> : hash<wasm::IString> {}; } // namespace std |