summaryrefslogtreecommitdiff
path: root/src/wasm/wasm-binary.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 11:16:14 -0500
committerGitHub <noreply@github.com>2022-10-11 16:16:14 +0000
commitb83450ed1fd98cec4453024f57f892b31851ea50 (patch)
treebf0467d96c9966d0f4699ea0afcdf25905b4098c /src/wasm/wasm-binary.cpp
parent6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
downloadbinaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
Make `Name` a pointer, length pair (#5122)
With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
Diffstat (limited to 'src/wasm/wasm-binary.cpp')
-rw-r--r--src/wasm/wasm-binary.cpp46
1 files changed, 22 insertions, 24 deletions
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index f2698bd79..b9b8558bb 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -1263,7 +1263,7 @@ void WasmBinaryWriter::writeLegacyDylinkSection() {
o << U32LEB(wasm->dylinkSection->tableAlignment);
o << U32LEB(wasm->dylinkSection->neededDynlibs.size());
for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) {
- writeInlineString(neededDynlib.c_str());
+ writeInlineString(neededDynlib.str);
}
finishSection(start);
}
@@ -1294,7 +1294,7 @@ void WasmBinaryWriter::writeDylinkSection() {
startSubsection(BinaryConsts::UserSections::Subsection::DylinkNeeded);
o << U32LEB(wasm->dylinkSection->neededDynlibs.size());
for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) {
- writeInlineString(neededDynlib.c_str());
+ writeInlineString(neededDynlib.str);
}
finishSubsection(substart);
}
@@ -1350,10 +1350,9 @@ void WasmBinaryWriter::writeData(const char* data, size_t size) {
}
}
-void WasmBinaryWriter::writeInlineString(const char* name) {
- int32_t size = strlen(name);
- o << U32LEB(size);
- writeData(name, size);
+void WasmBinaryWriter::writeInlineString(std::string_view name) {
+ o << U32LEB(name.size());
+ writeData(name.data(), name.size());
}
static bool isHexDigit(char ch) {
@@ -1365,19 +1364,17 @@ static int decodeHexNibble(char ch) {
return ch <= '9' ? ch & 15 : (ch & 15) + 9;
}
-void WasmBinaryWriter::writeEscapedName(const char* name) {
- assert(name);
- if (!strpbrk(name, "\\")) {
+void WasmBinaryWriter::writeEscapedName(std::string_view name) {
+ if (name.find('\\') == std::string_view::npos) {
writeInlineString(name);
return;
}
// decode escaped by escapeName (see below) function names
std::string unescaped;
- int32_t size = strlen(name);
- for (int32_t i = 0; i < size;) {
+ for (size_t i = 0; i < name.size();) {
char ch = name[i++];
// support only `\xx` escapes; ignore invalid or unsupported escapes
- if (ch != '\\' || i + 1 >= size || !isHexDigit(name[i]) ||
+ if (ch != '\\' || i + 1 >= name.size() || !isHexDigit(name[i]) ||
!isHexDigit(name[i + 1])) {
unescaped.push_back(ch);
continue;
@@ -1386,7 +1383,7 @@ void WasmBinaryWriter::writeEscapedName(const char* name) {
char((decodeHexNibble(name[i]) << 4) | decodeHexNibble(name[i + 1])));
i += 2;
}
- writeInlineString(unescaped.c_str());
+ writeInlineString({unescaped.data(), unescaped.size()});
}
void WasmBinaryWriter::writeInlineBuffer(const char* data, size_t size) {
@@ -2357,8 +2354,8 @@ void WasmBinaryBuilder::readImports() {
functionTypes.push_back(getTypeByIndex(index));
auto type = getTypeByIndex(index);
if (!type.isSignature()) {
- throwError(std::string("Imported function ") + module.str + '.' +
- base.str +
+ throwError(std::string("Imported function ") + module.toString() +
+ '.' + base.toString() +
"'s type must be a signature. Given: " + type.toString());
}
auto curr = builder.makeFunction(name, type, {});
@@ -3268,24 +3265,25 @@ static char formatNibble(int nibble) {
Name WasmBinaryBuilder::escape(Name name) {
bool allIdChars = true;
- for (const char* p = name.str; allIdChars && *p; p++) {
- allIdChars = isIdChar(*p);
+ for (char c : name.str) {
+ if (!(allIdChars = isIdChar(c))) {
+ break;
+ }
}
if (allIdChars) {
return name;
}
// encode name, if at least one non-idchar (per WebAssembly spec) was found
std::string escaped;
- for (const char* p = name.str; *p; p++) {
- char ch = *p;
- if (isIdChar(ch)) {
- escaped.push_back(ch);
+ for (char c : name.str) {
+ if (isIdChar(c)) {
+ escaped.push_back(c);
continue;
}
// replace non-idchar with `\xx` escape
escaped.push_back('\\');
- escaped.push_back(formatNibble(ch >> 4));
- escaped.push_back(formatNibble(ch & 15));
+ escaped.push_back(formatNibble(c >> 4));
+ escaped.push_back(formatNibble(c & 15));
}
return escaped;
}
@@ -6839,7 +6837,7 @@ void WasmBinaryBuilder::visitRethrow(Rethrow* curr) {
// This special target is valid only for delegates
if (curr->target == DELEGATE_CALLER_TARGET) {
throwError(std::string("rethrow target cannot use internal name ") +
- DELEGATE_CALLER_TARGET.str);
+ DELEGATE_CALLER_TARGET.toString());
}
curr->finalize();
}