summaryrefslogtreecommitdiff
path: root/src/emscripten-optimizer
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 11:16:14 -0500
committerGitHub <noreply@github.com>2022-10-11 16:16:14 +0000
commitb83450ed1fd98cec4453024f57f892b31851ea50 (patch)
treebf0467d96c9966d0f4699ea0afcdf25905b4098c /src/emscripten-optimizer
parent6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
downloadbinaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
Make `Name` a pointer, length pair (#5122)
With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
Diffstat (limited to 'src/emscripten-optimizer')
-rw-r--r--src/emscripten-optimizer/istring.h218
-rw-r--r--src/emscripten-optimizer/optimizer-shared.cpp2
-rw-r--r--src/emscripten-optimizer/optimizer.h1
-rw-r--r--src/emscripten-optimizer/parser.cpp2
-rw-r--r--src/emscripten-optimizer/parser.h49
-rw-r--r--src/emscripten-optimizer/simple_ast.cpp14
-rw-r--r--src/emscripten-optimizer/simple_ast.h19
7 files changed, 61 insertions, 244 deletions
diff --git a/src/emscripten-optimizer/istring.h b/src/emscripten-optimizer/istring.h
deleted file mode 100644
index 69df761fd..000000000
--- a/src/emscripten-optimizer/istring.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright 2015 WebAssembly Community Group participants
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Interned String type, 100% interned on creation. Comparisons are always just
-// a pointer comparison
-
-#ifndef wasm_istring_h
-#define wasm_istring_h
-
-#include <set>
-#include <unordered_map>
-#include <unordered_set>
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "support/threads.h"
-#include "support/utilities.h"
-
-namespace cashew {
-
-struct IString {
- const char* str = nullptr;
-
- static size_t
- hash_c(const char* str) { // see http://www.cse.yorku.ca/~oz/hash.html
- unsigned int hash = 5381;
- int c;
- while ((c = *str++)) {
- hash = ((hash << 5) + hash) ^ c;
- }
- return (size_t)hash;
- }
-
- class CStringHash {
- public:
- size_t operator()(const char* str) const { return IString::hash_c(str); }
- };
- class CStringEqual {
- public:
- bool operator()(const char* x, const char* y) const {
- return strcmp(x, y) == 0;
- }
- };
-
- IString() = default;
- // if reuse=true, then input is assumed to remain alive; not copied
- IString(const char* s, bool reuse = true) {
- assert(s);
- set(s, reuse);
- }
-
- void set(const char* s, bool reuse = true) {
- typedef std::unordered_set<const char*, CStringHash, CStringEqual>
- StringSet;
- // one global store of strings per thread, we must not access this
- // in parallel
- thread_local static StringSet strings;
-
- auto existing = strings.find(s);
-
- if (existing == strings.end()) {
- // if the string isn't already known, we must use a single global
- // storage location, guarded by a mutex, so each string is allocated
- // exactly once
- static std::mutex mutex;
- std::unique_lock<std::mutex> lock(mutex);
- // a single global set contains the actual strings, so we allocate each
- // one exactly once.
- static StringSet globalStrings;
- auto globalExisting = globalStrings.find(s);
- if (globalExisting == globalStrings.end()) {
- if (!reuse) {
- static std::vector<std::unique_ptr<std::string>> allocated;
- allocated.emplace_back(wasm::make_unique<std::string>(s));
- s = allocated.back()->c_str(); // we'll never modify it, so this is ok
- }
- // insert into global set
- globalStrings.insert(s);
- } else {
- s = *globalExisting;
- }
- // add the string to our thread-local set
- strings.insert(s);
- } else {
- s = *existing;
- }
-
- str = s;
- }
-
- void set(const IString& s) { str = s.str; }
-
- void clear() { str = nullptr; }
-
- bool operator==(const IString& other) const {
- // assert((str == other.str) == !strcmp(str, other.str));
- return str == other.str; // fast!
- }
- bool operator!=(const IString& other) const {
- // assert((str == other.str) == !strcmp(str, other.str));
- return str != other.str; // fast!
- }
- bool operator<(const IString& other) const {
- return strcmp(str ? str : "", other.str ? other.str : "") < 0;
- }
-
- char operator[](int x) const { return str[x]; }
-
- bool operator!() const { // no string, or empty string
- return !str || str[0] == 0;
- }
-
- const char* c_str() const { return str; }
- bool equals(const char* other) const { return !strcmp(str, other); }
-
- bool is() const { return str != nullptr; }
- bool isNull() const { return str == nullptr; }
-
- const char* stripPrefix(const char* prefix) const {
- const char* ptr = str;
- while (true) {
- if (*prefix == 0) {
- return ptr;
- }
- if (*ptr == 0) {
- return nullptr;
- }
- if (*ptr++ != *prefix++) {
- return nullptr;
- }
- }
- }
-
- bool startsWith(const char* prefix) const {
- return stripPrefix(prefix) != nullptr;
- }
- bool startsWith(const IString& prefix) const {
- return startsWith(prefix.str);
- }
-
- size_t size() const { return str ? strlen(str) : 0; }
-};
-
-} // namespace cashew
-
-// Utilities for creating hashmaps/sets over IStrings
-
-namespace std {
-
-template<> struct hash<cashew::IString> {
- size_t operator()(const cashew::IString& str) const {
- return std::hash<size_t>{}(size_t(str.str));
- }
-};
-
-template<> struct equal_to<cashew::IString> {
- bool operator()(const cashew::IString& x, const cashew::IString& y) const {
- return x == y;
- }
-};
-
-} // namespace std
-
-namespace cashew {
-
-// IStringSet
-
-class IStringSet : public std::unordered_set<IString> {
- std::vector<char> data;
-
-public:
- IStringSet() = default;
- IStringSet(const char* init) { // comma-delimited list
- int size = strlen(init) + 1;
- data.resize(size);
- char* curr = &data[0];
- strncpy(curr, init, size);
- while (1) {
- char* end = strchr(curr, ' ');
- if (end) {
- *end = 0;
- }
- insert(curr);
- if (!end) {
- break;
- }
- curr = end + 1;
- }
- }
-
- bool has(const IString& str) { return count(str) > 0; }
-};
-
-class IOrderedStringSet : public std::set<IString> {
-public:
- bool has(const IString& str) { return count(str) > 0; }
-};
-
-} // namespace cashew
-
-#endif // wasm_istring_h
diff --git a/src/emscripten-optimizer/optimizer-shared.cpp b/src/emscripten-optimizer/optimizer-shared.cpp
index 8208f6f47..9ca66441a 100644
--- a/src/emscripten-optimizer/optimizer-shared.cpp
+++ b/src/emscripten-optimizer/optimizer-shared.cpp
@@ -19,8 +19,6 @@
#include "optimizer.h"
#include "support/safe_integer.h"
-using namespace cashew;
-
IString JS_FLOAT_ZERO;
IString SIMD_INT8X16_CHECK("SIMD_Int8x16_check");
diff --git a/src/emscripten-optimizer/optimizer.h b/src/emscripten-optimizer/optimizer.h
index 132ff6fb8..9e5c7afe8 100644
--- a/src/emscripten-optimizer/optimizer.h
+++ b/src/emscripten-optimizer/optimizer.h
@@ -20,6 +20,7 @@
#include "simple_ast.h"
using namespace cashew;
+using IString = wasm::IString;
extern IString JS_FLOAT_ZERO;
diff --git a/src/emscripten-optimizer/parser.cpp b/src/emscripten-optimizer/parser.cpp
index 016966299..533043232 100644
--- a/src/emscripten-optimizer/parser.cpp
+++ b/src/emscripten-optimizer/parser.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <unordered_map>
+
#include "parser.h"
namespace cashew {
diff --git a/src/emscripten-optimizer/parser.h b/src/emscripten-optimizer/parser.h
index c4d596058..8c3f36427 100644
--- a/src/emscripten-optimizer/parser.h
+++ b/src/emscripten-optimizer/parser.h
@@ -30,11 +30,46 @@
#include <limits>
#include <vector>
-#include "istring.h"
+#include "support/istring.h"
#include "support/safe_integer.h"
namespace cashew {
+using IString = wasm::IString;
+
+// IStringSet
+
+class IStringSet : public std::unordered_set<IString> {
+ std::vector<char> data;
+
+public:
+ IStringSet() = default;
+ IStringSet(const char* init) { // comma-delimited list
+ int size = strlen(init) + 1;
+ data.resize(size);
+ char* curr = &data[0];
+ strncpy(curr, init, size);
+ while (1) {
+ char* end = strchr(curr, ' ');
+ if (end) {
+ *end = 0;
+ }
+ insert(curr);
+ if (!end) {
+ break;
+ }
+ curr = end + 1;
+ }
+ }
+
+ bool has(const IString& str) { return count(str) > 0; }
+};
+
+class IOrderedStringSet : public std::set<IString> {
+public:
+ bool has(const IString& str) { return count(str) > 0; }
+};
+
// common strings
extern IString TOPLEVEL;
@@ -233,11 +268,11 @@ template<class NodeRef, class Builder> class Parser {
src++;
}
if (*src == 0) {
- str.set(start);
+ str = IString(start);
} else {
char temp = *src;
*src = 0;
- str.set(start, false);
+ str = IString(start, false);
*src = temp;
}
type = keywords.has(str) ? KEYWORD : IDENT;
@@ -333,11 +368,11 @@ template<class NodeRef, class Builder> class Parser {
default:
abort();
}
- size = strlen(str.str);
+ size = str.size();
#ifndef NDEBUG
char temp = start[size];
start[size] = 0;
- assert(strcmp(str.str, start) == 0);
+ assert(str.str == start);
start[size] = temp;
#endif
type = OPERATOR;
@@ -346,13 +381,13 @@ template<class NodeRef, class Builder> class Parser {
type = SEPARATOR;
char temp = src[1];
src[1] = 0;
- str.set(src, false);
+ str = IString(src, false);
src[1] = temp;
src++;
} else if (*src == '"' || *src == '\'') {
char* end = strchr(src + 1, *src);
*end = 0;
- str.set(src + 1);
+ str = IString(src + 1);
src = end + 1;
type = STRING;
} else {
diff --git a/src/emscripten-optimizer/simple_ast.cpp b/src/emscripten-optimizer/simple_ast.cpp
index 6afa0a66e..086d504ba 100644
--- a/src/emscripten-optimizer/simple_ast.cpp
+++ b/src/emscripten-optimizer/simple_ast.cpp
@@ -24,13 +24,11 @@ Ref& Ref::operator[](unsigned x) { return (*get())[x]; }
Ref& Ref::operator[](IString x) { return (*get())[x]; }
-bool Ref::operator==(const char* str) {
- return get()->isString() && !strcmp(get()->str.str, str);
+bool Ref::operator==(std::string_view str) {
+ return get()->isString() && get()->str == str;
}
-bool Ref::operator!=(const char* str) {
- return get()->isString() ? !!strcmp(get()->str.str, str) : true;
-}
+bool Ref::operator!=(std::string_view str) { return !(*this == str); }
bool Ref::operator==(const IString& str) {
return get()->isString() && get()->str == str;
@@ -81,8 +79,8 @@ void Value::stringify(std::ostream& os, bool pretty) {
}
switch (type) {
case String: {
- if (str.str) {
- os << '"' << str.str << '"';
+ if (str) {
+ os << '"' << str << '"';
} else {
os << "\"(null)\"";
}
@@ -147,7 +145,7 @@ void Value::stringify(std::ostream& os, bool pretty) {
}
}
indentify();
- os << '"' << i.first.c_str() << "\": ";
+ os << '"' << i.first << "\": ";
i.second->stringify(os, pretty);
}
if (pretty) {
diff --git a/src/emscripten-optimizer/simple_ast.h b/src/emscripten-optimizer/simple_ast.h
index 64db04d79..60399d6d2 100644
--- a/src/emscripten-optimizer/simple_ast.h
+++ b/src/emscripten-optimizer/simple_ast.h
@@ -63,8 +63,9 @@ struct Ref {
Ref& operator[](IString x);
// special conveniences
- bool operator==(const char* str); // comparison to string, which is by value
- bool operator!=(const char* str);
+ bool
+ operator==(std::string_view str); // comparison to string, which is by value
+ bool operator!=(std::string_view str);
bool operator==(const IString& str);
bool operator!=(const IString& str);
// prevent Ref == number, which is potentially ambiguous; use ->getNumber() ==
@@ -163,13 +164,13 @@ struct Value {
Value& setString(const char* s) {
free();
type = String;
- str.set(s);
+ str = IString(s);
return *this;
}
Value& setString(const IString& s) {
free();
type = String;
- str.set(s);
+ str = s;
return *this;
}
Value& setNumber(double n) {
@@ -233,7 +234,7 @@ struct Value {
const char* getCString() {
assert(isString());
- return str.str;
+ return str.str.data();
}
IString& getIString() {
assert(isString());
@@ -817,7 +818,7 @@ struct JSPrinter {
break;
}
default: {
- errv("cannot yet print %s\n", type.str);
+ errv("cannot yet print %s\n", type.str.data());
abort();
}
}
@@ -908,7 +909,7 @@ struct JSPrinter {
void printAssignName(Ref node) {
auto* assign = node->asAssignName();
- emit(assign->target().c_str());
+ emit(assign->target().str.data());
space();
emit('=');
space();
@@ -1472,10 +1473,10 @@ struct JSPrinter {
needQuote = true;
str = args[i][0][1]->getCString();
} else if (args[i][0][0] == GETTER) {
- getterSetter = GETTER.c_str();
+ getterSetter = GETTER.str.data();
str = args[i][0][1]->getCString();
} else if (args[i][0][0] == SETTER) {
- getterSetter = SETTER.c_str();
+ getterSetter = SETTER.str.data();
str = args[i][0][1]->getCString();
setterParam = args[i][0][2]->getCString();
} else {