summaryrefslogtreecommitdiff
path: root/src/wasm/wasm-s-parser.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2022-10-11 11:16:14 -0500
committerGitHub <noreply@github.com>2022-10-11 16:16:14 +0000
commitb83450ed1fd98cec4453024f57f892b31851ea50 (patch)
treebf0467d96c9966d0f4699ea0afcdf25905b4098c /src/wasm/wasm-s-parser.cpp
parent6d4ac3162c290e32a98de349d49e26e904a40414 (diff)
downloadbinaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.gz
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.tar.bz2
binaryen-b83450ed1fd98cec4453024f57f892b31851ea50.zip
Make `Name` a pointer, length pair (#5122)
With the goal of supporting null characters (i.e. zero bytes) in strings. Rewrite the underlying interned `IString` to store a `std::string_view` rather than a `const char*`, reduce the number of map lookups necessary to intern a string, and present a more immutable interface. Most importantly, replace the `c_str()` method that returned a `const char*` with a `toString()` method that returns a `std::string`. This new method can correctly handle strings containing null characters. A `const char*` can still be had by calling `data()` on the `std::string_view`, although this usage should be discouraged. This change is NFC in spirit, although not in practice. It does not intend to support any particular new functionality, but it is probably now possible to use strings containing null characters in at least some cases. At least one parser bug is also incidentally fixed. Follow-on PRs will explicitly support and test strings containing nulls for particular use cases. The C API still uses `const char*` to represent strings. As strings containing nulls become better supported by the rest of Binaryen, this will no longer be sufficient. Updating the C and JS APIs to use pointer, length pairs is left as future work.
Diffstat (limited to 'src/wasm/wasm-s-parser.cpp')
-rw-r--r--src/wasm/wasm-s-parser.cpp325
1 files changed, 156 insertions, 169 deletions
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index 172ef15b6..64d7a21a2 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -54,7 +54,9 @@ static Name STRUCT("struct"), FIELD("field"), ARRAY("array"),
ARRAY_SUBTYPE("array_subtype"), EXTENDS("extends"), REC("rec"), I8("i8"),
I16("i16"), DECLARE("declare"), ITEM("item"), OFFSET("offset");
-static Address getAddress(const Element* s) { return atoll(s->c_str()); }
+static Address getAddress(const Element* s) {
+ return std::stoll(s->toString());
+}
static void
checkAddress(Address a, const char* errorText, const Element* errorElem) {
@@ -95,11 +97,11 @@ IString Element::str() const {
return str_;
}
-const char* Element::c_str() const {
+std::string Element::toString() const {
if (!isStr()) {
throw ParseException("expected string", line, col);
}
- return str_.str;
+ return str_.toString();
}
Element* Element::setString(IString str__, bool dollared__, bool quoted__) {
@@ -356,11 +358,8 @@ SExpressionWasmBuilder::SExpressionWasmBuilder(Module& wasm,
if (i < module.size() && module[i]->isStr()) {
// these s-expressions contain a binary module, actually
std::vector<char> data;
- while (i < module.size()) {
- auto str = module[i++]->c_str();
- if (auto size = strlen(str)) {
- stringToBinary(str, size, data);
- }
+ for (; i < module.size(); ++i) {
+ stringToBinary(*module[i], module[i]->str().str, data);
}
// TODO: support applying features here
WasmBinaryBuilder binaryBuilder(wasm, FeatureSet::MVP, data);
@@ -474,12 +473,20 @@ void SExpressionWasmBuilder::parseModuleElement(Element& curr) {
throw ParseException("unknown module element", curr.line, curr.col);
}
+int SExpressionWasmBuilder::parseIndex(Element& s) {
+ try {
+ return std::stoi(s.toString());
+ } catch (...) {
+ throw ParseException("expected integer", s.line, s.col);
+ }
+}
+
Name SExpressionWasmBuilder::getFunctionName(Element& s) {
if (s.dollared()) {
return s.str();
} else {
// index
- size_t offset = atoi(s.str().c_str());
+ size_t offset = parseIndex(s);
if (offset >= functionNames.size()) {
throw ParseException(
"unknown function in getFunctionName", s.line, s.col);
@@ -493,7 +500,7 @@ Name SExpressionWasmBuilder::getTableName(Element& s) {
return s.str();
} else {
// index
- size_t offset = atoi(s.str().c_str());
+ size_t offset = parseIndex(s);
if (offset >= tableNames.size()) {
throw ParseException("unknown table in getTableName", s.line, s.col);
}
@@ -521,7 +528,7 @@ Name SExpressionWasmBuilder::getMemoryName(Element& s) {
return s.str();
} else {
// index
- size_t offset = atoi(s.str().c_str());
+ size_t offset = parseIndex(s);
return getMemoryNameAtIdx(offset);
}
}
@@ -531,7 +538,7 @@ Name SExpressionWasmBuilder::getGlobalName(Element& s) {
return s.str();
} else {
// index
- size_t offset = atoi(s.str().c_str());
+ size_t offset = parseIndex(s);
if (offset >= globalNames.size()) {
throw ParseException("unknown global in getGlobalName", s.line, s.col);
}
@@ -544,7 +551,7 @@ Name SExpressionWasmBuilder::getTagName(Element& s) {
return s.str();
} else {
// index
- size_t offset = atoi(s.str().c_str());
+ size_t offset = parseIndex(s);
if (offset >= tagNames.size()) {
throw ParseException("unknown tag in getTagName", s.line, s.col);
}
@@ -738,7 +745,7 @@ void SExpressionWasmBuilder::preParseHeapTypes(Element& module) {
size_t numTypes = 0;
forEachType([&](Element& elem, size_t) {
if (elem[1]->dollared()) {
- std::string name = elem[1]->c_str();
+ std::string name = elem[1]->toString();
if (!typeIndices.insert({name, numTypes}).second) {
throw ParseException("duplicate function type", elem.line, elem.col);
}
@@ -769,23 +776,23 @@ void SExpressionWasmBuilder::preParseHeapTypes(Element& module) {
auto nullable =
elem[1]->isStr() && *elem[1] == NULL_ ? Nullable : NonNullable;
auto& referent = nullable ? *elem[2] : *elem[1];
- const char* name = referent.c_str();
+ auto name = referent.toString();
if (referent.dollared()) {
return builder.getTempRefType(builder[typeIndices[name]], nullable);
} else if (String::isNumber(name)) {
- size_t index = atoi(name);
+ size_t index = parseIndex(referent);
if (index >= numTypes) {
throw ParseException("invalid type index", elem.line, elem.col);
}
return builder.getTempRefType(builder[index], nullable);
} else {
- return Type(stringToHeapType(name), nullable);
+ return Type(stringToHeapType(referent.str()), nullable);
}
};
auto parseValType = [&](Element& elem) {
if (elem.isStr()) {
- return stringToType(elem.c_str());
+ return stringToType(elem.str());
} else if (*elem[0] == REF) {
return parseRefType(elem);
} else {
@@ -921,7 +928,7 @@ void SExpressionWasmBuilder::preParseHeapTypes(Element& module) {
super = extends[1];
}
if (super) {
- auto it = typeIndices.find(super->c_str());
+ auto it = typeIndices.find(super->toString());
if (it == typeIndices.end()) {
throw ParseException("unknown supertype", super->line, super->col);
}
@@ -1139,135 +1146,124 @@ void SExpressionWasmBuilder::parseFunction(Element& s, bool preParseImport) {
nameMapper.clear();
}
-Type SExpressionWasmBuilder::stringToType(const char* str,
+Type SExpressionWasmBuilder::stringToType(std::string_view str,
bool allowError,
bool prefix) {
- if (str[0] == 'i') {
- if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) {
- return Type::i32;
- }
- if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) {
- return Type::i64;
- }
- }
- if (str[0] == 'f') {
- if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) {
- return Type::f32;
+ if (str.size() >= 3) {
+ if (str[0] == 'i') {
+ if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) {
+ return Type::i32;
+ }
+ if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) {
+ return Type::i64;
+ }
}
- if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) {
- return Type::f64;
+ if (str[0] == 'f') {
+ if (str[1] == '3' && str[2] == '2' && (prefix || str[3] == 0)) {
+ return Type::f32;
+ }
+ if (str[1] == '6' && str[2] == '4' && (prefix || str[3] == 0)) {
+ return Type::f64;
+ }
}
}
- if (str[0] == 'v') {
- if (str[1] == '1' && str[2] == '2' && str[3] == '8' &&
- (prefix || str[4] == 0)) {
- return Type::v128;
+ if (str.size() >= 4) {
+ if (str[0] == 'v') {
+ if (str[1] == '1' && str[2] == '2' && str[3] == '8' &&
+ (prefix || str[4] == 0)) {
+ return Type::v128;
+ }
}
}
- if (strncmp(str, "funcref", 7) == 0 && (prefix || str[7] == 0)) {
+ if (str.substr(0, 7) == "funcref" && (prefix || str.size() == 7)) {
return Type(HeapType::func, Nullable);
}
- if (strncmp(str, "externref", 9) == 0 && (prefix || str[9] == 0)) {
+ if (str.substr(0, 9) == "externref" && (prefix || str.size() == 9)) {
return Type(HeapType::ext, Nullable);
}
- if (strncmp(str, "anyref", 6) == 0 && (prefix || str[6] == 0)) {
+ if (str.substr(0, 6) == "anyref" && (prefix || str.size() == 6)) {
return Type(HeapType::any, Nullable);
}
- if (strncmp(str, "eqref", 5) == 0 && (prefix || str[5] == 0)) {
+ if (str.substr(0, 5) == "eqref" && (prefix || str.size() == 5)) {
return Type(HeapType::eq, Nullable);
}
- if (strncmp(str, "i31ref", 6) == 0 && (prefix || str[6] == 0)) {
+ if (str.substr(0, 6) == "i31ref" && (prefix || str.size() == 6)) {
return Type(HeapType::i31, Nullable);
}
- if (strncmp(str, "dataref", 7) == 0 && (prefix || str[7] == 0)) {
+ if (str.substr(0, 7) == "dataref" && (prefix || str.size() == 7)) {
return Type(HeapType::data, Nullable);
}
- if (strncmp(str, "stringref", 9) == 0 && (prefix || str[9] == 0)) {
+ if (str.substr(0, 9) == "stringref" && (prefix || str.size() == 9)) {
return Type(HeapType::string, Nullable);
}
- if (strncmp(str, "stringview_wtf8", 15) == 0 && (prefix || str[15] == 0)) {
+ if (str.substr(0, 15) == "stringview_wtf8" && (prefix || str.size() == 15)) {
return Type(HeapType::stringview_wtf8, Nullable);
}
- if (strncmp(str, "stringview_wtf16", 16) == 0 && (prefix || str[16] == 0)) {
+ if (str.substr(0, 16) == "stringview_wtf16" && (prefix || str.size() == 16)) {
return Type(HeapType::stringview_wtf16, Nullable);
}
- if (strncmp(str, "stringview_iter", 15) == 0 && (prefix || str[15] == 0)) {
+ if (str.substr(0, 15) == "stringview_iter" && (prefix || str.size() == 15)) {
return Type(HeapType::stringview_iter, Nullable);
}
- if (strncmp(str, "nullref", 7) == 0 && (prefix || str[7] == 0)) {
+ if (str.substr(0, 7) == "nullref" && (prefix || str.size() == 7)) {
return Type(HeapType::none, Nullable);
}
- if (strncmp(str, "nullexternref", 13) == 0 && (prefix || str[13] == 0)) {
+ if (str.substr(0, 13) == "nullexternref" && (prefix || str.size() == 13)) {
return Type(HeapType::noext, Nullable);
}
- if (strncmp(str, "nullfuncref", 11) == 0 && (prefix || str[11] == 0)) {
+ if (str.substr(0, 11) == "nullfuncref" && (prefix || str.size() == 11)) {
return Type(HeapType::nofunc, Nullable);
}
if (allowError) {
return Type::none;
}
- throw ParseException(std::string("invalid wasm type: ") + str);
+ throw ParseException(std::string("invalid wasm type: ") +
+ std::string(str.data(), str.size()));
}
-HeapType SExpressionWasmBuilder::stringToHeapType(const char* str,
+HeapType SExpressionWasmBuilder::stringToHeapType(std::string_view str,
bool prefix) {
- if (str[0] == 'f') {
- if (str[1] == 'u' && str[2] == 'n' && str[3] == 'c' &&
- (prefix || str[4] == 0)) {
- return HeapType::func;
- }
+ if (str.substr(0, 4) == "func" && (prefix || str.size() == 4)) {
+ return HeapType::func;
}
- if (str[0] == 'e') {
- if (str[1] == 'q' && (prefix || str[2] == 0)) {
- return HeapType::eq;
- }
- if (str[1] == 'x' && str[2] == 't' && str[3] == 'e' && str[4] == 'r' &&
- str[5] == 'n' && (prefix || str[6] == 0)) {
- return HeapType::ext;
- }
+ if (str.substr(0, 2) == "eq" && (prefix || str.size() == 2)) {
+ return HeapType::eq;
}
- if (str[0] == 'a') {
- if (str[1] == 'n' && str[2] == 'y' && (prefix || str[3] == 0)) {
- return HeapType::any;
- }
+ if (str.substr(0, 6) == "extern" && (prefix || str.size() == 6)) {
+ return HeapType::ext;
}
- if (str[0] == 'i') {
- if (str[1] == '3' && str[2] == '1' && (prefix || str[3] == 0)) {
- return HeapType::i31;
- }
+ if (str.substr(0, 3) == "any" && (prefix || str.size() == 3)) {
+ return HeapType::any;
}
- if (str[0] == 'd') {
- if (str[1] == 'a' && str[2] == 't' && str[3] == 'a' &&
- (prefix || str[4] == 0)) {
- return HeapType::data;
- }
+ if (str.substr(0, 3) == "i31" && (prefix || str.size() == 3)) {
+ return HeapType::i31;
}
- if (str[0] == 's') {
- if (strncmp(str, "string", 6) == 0 && (prefix || str[6] == 0)) {
- return HeapType::string;
- }
- if (strncmp(str, "stringview_wtf8", 15) == 0 && (prefix || str[15] == 0)) {
- return HeapType::stringview_wtf8;
- }
- if (strncmp(str, "stringview_wtf16", 16) == 0 && (prefix || str[16] == 0)) {
- return HeapType::stringview_wtf16;
- }
- if (strncmp(str, "stringview_iter", 15) == 0 && (prefix || str[15] == 0)) {
- return HeapType::stringview_iter;
- }
+ if (str.substr(0, 4) == "data" && (prefix || str.size() == 4)) {
+ return HeapType::data;
}
- if (str[0] == 'n') {
- if (strncmp(str, "none", 4) == 0 && (prefix || str[4] == 0)) {
- return HeapType::none;
- }
- if (strncmp(str, "noextern", 8) == 0 && (prefix || str[8] == 0)) {
- return HeapType::noext;
- }
- if (strncmp(str, "nofunc", 6) == 0 && (prefix || str[6] == 0)) {
- return HeapType::nofunc;
- }
+ if (str.substr(0, 6) == "string" && (prefix || str.size() == 6)) {
+ return HeapType::string;
+ }
+ if (str.substr(0, 15) == "stringview_wtf8" && (prefix || str.size() == 15)) {
+ return HeapType::stringview_wtf8;
+ }
+ if (str.substr(0, 16) == "stringview_wtf16" && (prefix || str.size() == 16)) {
+ return HeapType::stringview_wtf16;
}
- throw ParseException(std::string("invalid wasm heap type: ") + str);
+ if (str.substr(0, 15) == "stringview_iter" && (prefix || str.size() == 15)) {
+ return HeapType::stringview_iter;
+ }
+ if (str.substr(0, 4) == "none" && (prefix || str.size() == 4)) {
+ return HeapType::none;
+ }
+ if (str.substr(0, 8) == "noextern" && (prefix || str.size() == 8)) {
+ return HeapType::noext;
+ }
+ if (str.substr(0, 6) == "nofunc" && (prefix || str.size() == 6)) {
+ return HeapType::nofunc;
+ }
+ throw ParseException(std::string("invalid wasm heap type: ") +
+ std::string(str.data(), str.size()));
}
Type SExpressionWasmBuilder::elementToType(Element& s) {
@@ -1345,7 +1341,7 @@ SExpressionWasmBuilder::getDebugLocation(const SourceLocation& loc) {
auto iter = debugInfoFileIndices.find(file);
if (iter == debugInfoFileIndices.end()) {
Index index = debugInfoFileNames.size();
- debugInfoFileNames.push_back(file.c_str());
+ debugInfoFileNames.push_back(file.toString());
debugInfoFileIndices[file] = index;
}
uint32_t fileIndex = debugInfoFileIndices[file];
@@ -1457,7 +1453,7 @@ Index SExpressionWasmBuilder::getLocalIndex(Element& s) {
return currFunction->getLocalIndex(ret);
}
// this is a numeric index
- Index ret = atoi(s.c_str());
+ Index ret = parseIndex(s);
if (ret >= currFunction->getNumLocals()) {
throw ParseException("bad local index", s.line, s.col);
}
@@ -1620,7 +1616,7 @@ Expression* SExpressionWasmBuilder::makeThenOrElse(Element& s) {
}
static Expression* parseConst(IString s, Type type, MixedArena& allocator) {
- const char* str = s.str;
+ const char* str = s.str.data();
auto ret = allocator.alloc<Const>();
ret->type = type;
if (type.isFloat()) {
@@ -1635,7 +1631,6 @@ static Expression* parseConst(IString s, Type type, MixedArena& allocator) {
default:
return nullptr;
}
- // std::cerr << "make constant " << str << " ==> " << ret->value << '\n';
return ret;
}
if (s == NEG_INFINITY) {
@@ -1649,7 +1644,6 @@ static Expression* parseConst(IString s, Type type, MixedArena& allocator) {
default:
return nullptr;
}
- // std::cerr << "make constant " << str << " ==> " << ret->value << '\n';
return ret;
}
if (s == _NAN) {
@@ -1663,7 +1657,6 @@ static Expression* parseConst(IString s, Type type, MixedArena& allocator) {
default:
return nullptr;
}
- // std::cerr << "make constant " << str << " ==> " << ret->value << '\n';
return ret;
}
bool negative = str[0] == '-';
@@ -1813,7 +1806,6 @@ static Expression* parseConst(IString s, Type type, MixedArena& allocator) {
if (ret->value.type != type) {
throw ParseException("parsed type does not match expected type");
}
- // std::cerr << "make constant " << str << " ==> " << ret->value << '\n';
return ret;
}
@@ -1842,7 +1834,7 @@ Expression* SExpressionWasmBuilder::makeConst(Element& s, Type type) {
}
auto ret = allocator.alloc<Const>();
- Type lane_t = stringToLaneType(s[1]->str().str);
+ Type lane_t = stringToLaneType(s[1]->str().str.data());
size_t lanes = s.size() - 2;
switch (lanes) {
case 2: {
@@ -1918,7 +1910,7 @@ static size_t parseMemAttributes(size_t i,
align = fallbackAlign;
// Parse "align=X" and "offset=X" arguments, bailing out on anything else.
while (!s[i]->isList()) {
- const char* str = s[i]->c_str();
+ const char* str = s[i]->str().str.data();
if (strncmp(str, "align", 5) != 0 && strncmp(str, "offset", 6) != 0) {
return i;
}
@@ -1957,7 +1949,7 @@ static size_t parseMemAttributes(size_t i,
}
static const char* findMemExtra(const Element& s, size_t skip, bool isAtomic) {
- auto* str = s.c_str();
+ auto* str = s.str().str.data();
auto size = strlen(str);
auto* ret = strchr(str, '.');
if (!ret) {
@@ -1977,8 +1969,8 @@ bool SExpressionWasmBuilder::hasMemoryIdx(Element& s,
Index defaultSize,
Index i) {
if (s.size() > defaultSize && !s[i]->isList() &&
- strncmp(s[i]->c_str(), "align", 5) != 0 &&
- strncmp(s[i]->c_str(), "offset", 6) != 0) {
+ strncmp(s[i]->str().str.data(), "align", 5) != 0 &&
+ strncmp(s[i]->str().str.data(), "offset", 6) != 0) {
return true;
}
return false;
@@ -2190,7 +2182,7 @@ Expression* SExpressionWasmBuilder::makeAtomicFence(Element& s) {
}
static uint8_t parseLaneIndex(const Element* s, size_t lanes) {
- const char* str = s->c_str();
+ const char* str = s->str().str.data();
char* end;
auto n = static_cast<unsigned long long>(strtoll(str, &end, 10));
if (end == str || *end != '\0') {
@@ -2360,7 +2352,7 @@ Expression* SExpressionWasmBuilder::makeMemoryInit(Element& s) {
memory = getMemoryNameAtIdx(0);
}
ret->memory = memory;
- ret->segment = atoi(s[i++]->str().c_str());
+ ret->segment = parseIndex(*s[i++]);
ret->dest = parseExpression(s[i++]);
ret->offset = parseExpression(s[i++]);
ret->size = parseExpression(s[i]);
@@ -2370,7 +2362,7 @@ Expression* SExpressionWasmBuilder::makeMemoryInit(Element& s) {
Expression* SExpressionWasmBuilder::makeDataDrop(Element& s) {
auto ret = allocator.alloc<DataDrop>();
- ret->segment = atoi(s[1]->str().c_str());
+ ret->segment = parseIndex(*s[1]);
ret->finalize();
return ret;
}
@@ -2552,7 +2544,7 @@ Name SExpressionWasmBuilder::getLabel(Element& s, LabelType labelType) {
// offset, break to nth outside label
uint64_t offset;
try {
- offset = std::stoll(s.c_str(), nullptr, 0);
+ offset = std::stoll(s.toString(), nullptr, 0);
} catch (std::invalid_argument&) {
throw ParseException("invalid break offset", s.line, s.col);
} catch (std::out_of_range&) {
@@ -2836,7 +2828,7 @@ Expression* SExpressionWasmBuilder::makeTupleMake(Element& s) {
Expression* SExpressionWasmBuilder::makeTupleExtract(Element& s) {
auto ret = allocator.alloc<TupleExtract>();
- ret->index = atoi(s[1]->str().c_str());
+ ret->index = parseIndex(*s[1]);
ret->tuple = parseExpression(s[2]);
if (ret->tuple->type != Type::unreachable &&
ret->index >= ret->tuple->type.size()) {
@@ -2943,7 +2935,7 @@ Expression* SExpressionWasmBuilder::makeStructNewStatic(Element& s,
Index SExpressionWasmBuilder::getStructIndex(Element& type, Element& field) {
if (field.dollared()) {
auto name = field.str();
- auto index = typeIndices[type.str().str];
+ auto index = typeIndices[type.toString()];
auto struct_ = types[index].getStruct();
auto& fields = struct_.fields;
const auto& names = fieldNames[index];
@@ -2956,7 +2948,7 @@ Index SExpressionWasmBuilder::getStructIndex(Element& type, Element& field) {
throw ParseException("bad struct field name", field.line, field.col);
}
// this is a numeric index
- return atoi(field.c_str());
+ return parseIndex(field);
}
Expression* SExpressionWasmBuilder::makeStructGet(Element& s, bool signed_) {
@@ -3055,12 +3047,12 @@ Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) {
size_t i = 1;
Expression* length = nullptr;
if (op == StringNewWTF8) {
- const char* str = s[i++]->c_str();
- if (strncmp(str, "utf8", 4) == 0) {
+ std::string_view str = s[i++]->str().str;
+ if (str == "utf8") {
op = StringNewUTF8;
- } else if (strncmp(str, "wtf8", 4) == 0) {
+ } else if (str == "wtf8") {
op = StringNewWTF8;
- } else if (strncmp(str, "replace", 7) == 0) {
+ } else if (str == "replace") {
op = StringNewReplace;
} else {
throw ParseException("bad string.new op", s.line, s.col);
@@ -3071,12 +3063,12 @@ Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) {
length = parseExpression(s[i + 1]);
return Builder(wasm).makeStringNew(op, parseExpression(s[i]), length);
} else if (op == StringNewWTF8Array) {
- const char* str = s[i++]->c_str();
- if (strncmp(str, "utf8", 4) == 0) {
+ std::string_view str = s[i++]->str().str;
+ if (str == "utf8") {
op = StringNewUTF8Array;
- } else if (strncmp(str, "wtf8", 4) == 0) {
+ } else if (str == "wtf8") {
op = StringNewWTF8Array;
- } else if (strncmp(str, "replace", 7) == 0) {
+ } else if (str == "replace") {
op = StringNewReplaceArray;
} else {
throw ParseException("bad string.new op", s.line, s.col);
@@ -3094,16 +3086,9 @@ Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) {
}
Expression* SExpressionWasmBuilder::makeStringConst(Element& s) {
- Name rawStr = s[1]->str();
- size_t len = rawStr.size();
std::vector<char> data;
- stringToBinary(rawStr.c_str(), len, data);
- data.push_back('\0');
- Name str = data.empty() ? "" : &data[0];
- if (str.size() != data.size() - 1) {
- throw ParseException(
- "zero bytes not yet supported in string constants", s.line, s.col);
- }
+ stringToBinary(*s[1], s[1]->str().str, data);
+ Name str = std::string_view(data.data(), data.size());
return Builder(wasm).makeStringConst(str);
}
@@ -3111,10 +3096,10 @@ Expression* SExpressionWasmBuilder::makeStringMeasure(Element& s,
StringMeasureOp op) {
size_t i = 1;
if (op == StringMeasureWTF8) {
- const char* str = s[i++]->c_str();
- if (strncmp(str, "utf8", 4) == 0) {
+ std::string_view str = s[i++]->str().str;
+ if (str == "utf8") {
op = StringMeasureUTF8;
- } else if (strncmp(str, "wtf8", 4) == 0) {
+ } else if (str == "wtf8") {
op = StringMeasureWTF8;
} else {
throw ParseException("bad string.measure op", s.line, s.col);
@@ -3128,19 +3113,19 @@ Expression* SExpressionWasmBuilder::makeStringEncode(Element& s,
size_t i = 1;
Expression* start = nullptr;
if (op == StringEncodeWTF8) {
- const char* str = s[i++]->c_str();
- if (strncmp(str, "utf8", 4) == 0) {
+ std::string_view str = s[i++]->str().str;
+ if (str == "utf8") {
op = StringEncodeUTF8;
- } else if (strncmp(str, "wtf8", 4) == 0) {
+ } else if (str == "wtf8") {
op = StringEncodeWTF8;
} else {
throw ParseException("bad string.new op", s.line, s.col);
}
} else if (op == StringEncodeWTF8Array) {
- const char* str = s[i++]->c_str();
- if (strncmp(str, "utf8", 4) == 0) {
+ std::string_view str = s[i++]->str().str;
+ if (str == "utf8") {
op = StringEncodeUTF8Array;
- } else if (strncmp(str, "wtf8", 4) == 0) {
+ } else if (str == "wtf8") {
op = StringEncodeWTF8Array;
} else {
throw ParseException("bad string.new op", s.line, s.col);
@@ -3201,17 +3186,18 @@ Expression* SExpressionWasmBuilder::makeStringSliceIter(Element& s) {
// converts an s-expression string representing binary data into an output
// sequence of raw bytes this appends to data, which may already contain
// content.
-void SExpressionWasmBuilder::stringToBinary(const char* input,
- size_t size,
+void SExpressionWasmBuilder::stringToBinary(Element& s,
+ std::string_view str,
std::vector<char>& data) {
auto originalSize = data.size();
- data.resize(originalSize + size);
+ data.resize(originalSize + str.size());
char* write = data.data() + originalSize;
- while (1) {
- if (input[0] == 0) {
- break;
- }
+ const char* end = str.data() + str.size();
+ for (const char* input = str.data(); input < end;) {
if (input[0] == '\\') {
+ if (input + 1 >= end) {
+ throw ParseException("Unterminated escape sequence", s.line, s.col);
+ }
if (input[1] == 't') {
*write++ = '\t';
input += 2;
@@ -3237,6 +3223,9 @@ void SExpressionWasmBuilder::stringToBinary(const char* input,
input += 2;
continue;
} else {
+ if (input + 2 >= end) {
+ throw ParseException("Unterminated escape sequence", s.line, s.col);
+ }
*write++ = (char)(unhex(input[1]) * 16 + unhex(input[2]));
input += 3;
continue;
@@ -3361,7 +3350,7 @@ void SExpressionWasmBuilder::parseMemory(Element& s, bool preParseImport) {
checkAddress(offsetValue, "excessive memory offset", offsetElem);
}
}
- const char* input = curr[j]->c_str();
+ std::string_view input = curr[j]->str().str;
auto* offset = allocator.alloc<Const>();
if (memory->is64()) {
offset->type = Type::i64;
@@ -3370,9 +3359,9 @@ void SExpressionWasmBuilder::parseMemory(Element& s, bool preParseImport) {
offset->type = Type::i32;
offset->value = Literal(int32_t(offsetValue));
}
- if (auto size = strlen(input)) {
+ if (input.size()) {
std::vector<char> data;
- stringToBinary(input, size, data);
+ stringToBinary(*curr[j], input, data);
auto segment = Builder::makeDataSegment(Name::fromInt(dataCounter++),
memory->name,
false,
@@ -3437,10 +3426,8 @@ void SExpressionWasmBuilder::parseInnerData(Element& s,
std::unique_ptr<DataSegment>& seg) {
std::vector<char> data;
while (i < s.size()) {
- const char* input = s[i++]->c_str();
- if (auto size = strlen(input)) {
- stringToBinary(input, size, data);
- }
+ std::string_view input = s[i++]->str().str;
+ stringToBinary(s, input, data);
}
seg->data.resize(data.size());
std::copy_n(data.data(), data.size(), seg->data.begin());
@@ -3736,12 +3723,12 @@ void SExpressionWasmBuilder::parseTable(Element& s, bool preParseImport) {
bool hasExplicitLimit = false;
- if (s[i]->isStr() && String::isNumber(s[i]->c_str())) {
- table->initial = atoi(s[i++]->c_str());
+ if (s[i]->isStr() && String::isNumber(s[i]->toString())) {
+ table->initial = parseIndex(*s[i++]);
hasExplicitLimit = true;
}
- if (s[i]->isStr() && String::isNumber(s[i]->c_str())) {
- table->max = atoi(s[i++]->c_str());
+ if (s[i]->isStr() && String::isNumber(s[i]->toString())) {
+ table->max = parseIndex(*s[i++]);
}
table->type = elementToType(*s[i++]);
@@ -3905,7 +3892,7 @@ HeapType SExpressionWasmBuilder::parseHeapType(Element& s) {
if (s.isStr()) {
// It's a string.
if (s.dollared()) {
- auto it = typeIndices.find(s.str().str);
+ auto it = typeIndices.find(s.toString());
if (it == typeIndices.end()) {
throw ParseException("unknown dollared function type", s.line, s.col);
}
@@ -3913,15 +3900,15 @@ HeapType SExpressionWasmBuilder::parseHeapType(Element& s) {
} else {
// It may be a numerical index, or it may be a built-in type name like
// "i31".
- auto* str = s.str().c_str();
+ auto str = s.toString();
if (String::isNumber(str)) {
- size_t offset = atoi(str);
+ size_t offset = parseIndex(s);
if (offset >= types.size()) {
throw ParseException("unknown indexed function type", s.line, s.col);
}
return types[offset];
}
- return stringToHeapType(str, /* prefix = */ false);
+ return stringToHeapType(s.str(), /* prefix = */ false);
}
}
throw ParseException("invalid heap type", s.line, s.col);