summaryrefslogtreecommitdiff
path: root/src/emscripten-optimizer/istring.h
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 11:16:14 -0500
committerGitHub <noreply@github.com>2022-10-11 16:16:14 +0000
commitb83450ed1fd98cec4453024f57f892b31851ea50 (patch)
treebf0467d96c9966d0f4699ea0afcdf25905b4098c /src/emscripten-optimizer/istring.h
parent6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
downloadbinaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
Make `Name` a pointer, length pair (#5122)
With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
Diffstat (limited to 'src/emscripten-optimizer/istring.h')
-rw-r--r--src/emscripten-optimizer/istring.h218
1 files changed, 0 insertions, 218 deletions
diff --git a/src/emscripten-optimizer/istring.h b/src/emscripten-optimizer/istring.h
deleted file mode 100644
index 69df761fd..000000000
--- a/src/emscripten-optimizer/istring.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright 2015 WebAssembly Community Group participants
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Interned String type, 100% interned on creation. Comparisons are always just
-// a pointer comparison
-
-#ifndef wasm_istring_h
-#define wasm_istring_h
-
-#include <set>
-#include <unordered_map>
-#include <unordered_set>
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "support/threads.h"
-#include "support/utilities.h"
-
-namespace cashew {
-
-struct IString {
- const char* str = nullptr;
-
- static size_t
- hash_c(const char* str) { // see http://www.cse.yorku.ca/~oz/hash.html
- unsigned int hash = 5381;
- int c;
- while ((c = *str++)) {
- hash = ((hash << 5) + hash) ^ c;
- }
- return (size_t)hash;
- }
-
- class CStringHash {
- public:
- size_t operator()(const char* str) const { return IString::hash_c(str); }
- };
- class CStringEqual {
- public:
- bool operator()(const char* x, const char* y) const {
- return strcmp(x, y) == 0;
- }
- };
-
- IString() = default;
- // if reuse=true, then input is assumed to remain alive; not copied
- IString(const char* s, bool reuse = true) {
- assert(s);
- set(s, reuse);
- }
-
- void set(const char* s, bool reuse = true) {
- typedef std::unordered_set<const char*, CStringHash, CStringEqual>
- StringSet;
- // one global store of strings per thread, we must not access this
- // in parallel
- thread_local static StringSet strings;
-
- auto existing = strings.find(s);
-
- if (existing == strings.end()) {
- // if the string isn't already known, we must use a single global
- // storage location, guarded by a mutex, so each string is allocated
- // exactly once
- static std::mutex mutex;
- std::unique_lock<std::mutex> lock(mutex);
- // a single global set contains the actual strings, so we allocate each
- // one exactly once.
- static StringSet globalStrings;
- auto globalExisting = globalStrings.find(s);
- if (globalExisting == globalStrings.end()) {
- if (!reuse) {
- static std::vector<std::unique_ptr<std::string>> allocated;
- allocated.emplace_back(wasm::make_unique<std::string>(s));
- s = allocated.back()->c_str(); // we'll never modify it, so this is ok
- }
- // insert into global set
- globalStrings.insert(s);
- } else {
- s = *globalExisting;
- }
- // add the string to our thread-local set
- strings.insert(s);
- } else {
- s = *existing;
- }
-
- str = s;
- }
-
- void set(const IString& s) { str = s.str; }
-
- void clear() { str = nullptr; }
-
- bool operator==(const IString& other) const {
- // assert((str == other.str) == !strcmp(str, other.str));
- return str == other.str; // fast!
- }
- bool operator!=(const IString& other) const {
- // assert((str == other.str) == !strcmp(str, other.str));
- return str != other.str; // fast!
- }
- bool operator<(const IString& other) const {
- return strcmp(str ? str : "", other.str ? other.str : "") < 0;
- }
-
- char operator[](int x) const { return str[x]; }
-
- bool operator!() const { // no string, or empty string
- return !str || str[0] == 0;
- }
-
- const char* c_str() const { return str; }
- bool equals(const char* other) const { return !strcmp(str, other); }
-
- bool is() const { return str != nullptr; }
- bool isNull() const { return str == nullptr; }
-
- const char* stripPrefix(const char* prefix) const {
- const char* ptr = str;
- while (true) {
- if (*prefix == 0) {
- return ptr;
- }
- if (*ptr == 0) {
- return nullptr;
- }
- if (*ptr++ != *prefix++) {
- return nullptr;
- }
- }
- }
-
- bool startsWith(const char* prefix) const {
- return stripPrefix(prefix) != nullptr;
- }
- bool startsWith(const IString& prefix) const {
- return startsWith(prefix.str);
- }
-
- size_t size() const { return str ? strlen(str) : 0; }
-};
-
-} // namespace cashew
-
-// Utilities for creating hashmaps/sets over IStrings
-
-namespace std {
-
-template<> struct hash<cashew::IString> {
- size_t operator()(const cashew::IString& str) const {
- return std::hash<size_t>{}(size_t(str.str));
- }
-};
-
-template<> struct equal_to<cashew::IString> {
- bool operator()(const cashew::IString& x, const cashew::IString& y) const {
- return x == y;
- }
-};
-
-} // namespace std
-
-namespace cashew {
-
-// IStringSet
-
-class IStringSet : public std::unordered_set<IString> {
- std::vector<char> data;
-
-public:
- IStringSet() = default;
- IStringSet(const char* init) { // comma-delimited list
- int size = strlen(init) + 1;
- data.resize(size);
- char* curr = &data[0];
- strncpy(curr, init, size);
- while (1) {
- char* end = strchr(curr, ' ');
- if (end) {
- *end = 0;
- }
- insert(curr);
- if (!end) {
- break;
- }
- curr = end + 1;
- }
- }
-
- bool has(const IString& str) { return count(str) > 0; }
-};
-
-class IOrderedStringSet : public std::set<IString> {
-public:
- bool has(const IString& str) { return count(str) > 0; }
-};
-
-} // namespace cashew
-
-#endif // wasm_istring_h