summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2022-07-06 08:48:12 -0700
committerGitHub <noreply@github.com>2022-07-06 08:48:12 -0700
commit876638f8fb5bfc8b264eddc6c0c0d54ed40d0095 (patch)
tree40138beab484617d5b3af474f8e1bd485ffc603e
parentb69d3a8fa0d6d7588811ef92067a48eed576e03f (diff)
downloadbinaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.tar.gz
binaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.tar.bz2
binaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.zip
[Strings] Add string.const (#4768)
This is more work than a typical instruction because it also adds a new section: all the (string.const "foo") strings are put in a new "strings" section in the binary, and the instructions refer to them by index.
-rwxr-xr-xscripts/gen-s-parser.py1
-rw-r--r--src/gen-s-parser.inc20
-rw-r--r--src/ir/ReFinalize.cpp1
-rw-r--r--src/ir/cost.h1
-rw-r--r--src/ir/effects.h1
-rw-r--r--src/ir/possible-contents.cpp3
-rw-r--r--src/ir/properties.h3
-rw-r--r--src/passes/Print.cpp5
-rw-r--r--src/wasm-binary.h14
-rw-r--r--src/wasm-builder.h6
-rw-r--r--src/wasm-delegations-fields.def6
-rw-r--r--src/wasm-delegations.def1
-rw-r--r--src/wasm-interpreter.h3
-rw-r--r--src/wasm-s-parser.h1
-rw-r--r--src/wasm.h13
-rw-r--r--src/wasm/wasm-binary.cpp102
-rw-r--r--src/wasm/wasm-s-parser.cpp4
-rw-r--r--src/wasm/wasm-stack.cpp5
-rw-r--r--src/wasm/wasm.cpp2
-rw-r--r--src/wasm2js.h4
-rw-r--r--test/lit/strings.wast36
21 files changed, 221 insertions, 11 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py
index 60c9fc6e7..1c7936320 100755
--- a/scripts/gen-s-parser.py
+++ b/scripts/gen-s-parser.py
@@ -616,6 +616,7 @@ instructions = [
("ref.as_i31", "makeRefAs(s, RefAsI31)"),
("string.new_wtf8", "makeStringNew(s, StringNewWTF8)"),
("string.new_wtf16", "makeStringNew(s, StringNewWTF16)"),
+ ("string.const", "makeStringConst(s)"),
]
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc
index 315916fdb..84fc66520 100644
--- a/src/gen-s-parser.inc
+++ b/src/gen-s-parser.inc
@@ -3128,13 +3128,21 @@ switch (op[0]) {
case 't': {
switch (op[3]) {
case 'i': {
- switch (op[14]) {
- case '1':
- if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); }
- goto parse_error;
- case '8':
- if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); }
+ switch (op[7]) {
+ case 'c':
+ if (strcmp(op, "string.const") == 0) { return makeStringConst(s); }
goto parse_error;
+ case 'n': {
+ switch (op[14]) {
+ case '1':
+ if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); }
+ goto parse_error;
+ case '8':
+ if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); }
+ goto parse_error;
+ default: goto parse_error;
+ }
+ }
default: goto parse_error;
}
}
diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp
index 6b0909666..6b3e8863e 100644
--- a/src/ir/ReFinalize.cpp
+++ b/src/ir/ReFinalize.cpp
@@ -173,6 +173,7 @@ void ReFinalize::visitArrayLen(ArrayLen* curr) { curr->finalize(); }
void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); }
void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); }
void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); }
+void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); }
void ReFinalize::visitFunction(Function* curr) {
// we may have changed the body from unreachable to none, which might be bad
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 1b0862bf6..2b918bf38 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -674,6 +674,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
CostType visitStringNew(StringNew* curr) {
return 4 + visit(curr->ptr) + visit(curr->length);
}
+ CostType visitStringConst(StringConst* curr) { return 4; }
private:
CostType nullCheckCost(Expression* ref) {
diff --git a/src/ir/effects.h b/src/ir/effects.h
index d97b01a36..f023b547a 100644
--- a/src/ir/effects.h
+++ b/src/ir/effects.h
@@ -733,6 +733,7 @@ private:
// cycle may be needed in some cases.
}
void visitStringNew(StringNew* curr) {}
+ void visitStringConst(StringConst* curr) {}
};
public:
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index 8b95490b6..1d1825865 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -678,6 +678,9 @@ struct InfoCollector
}
addRoot(curr, PossibleContents::exactType(curr->type));
}
+ void visitStringConst(StringConst* curr) {
+ addRoot(curr, PossibleContents::exactType(curr->type));
+ }
// TODO: Model which throws can go to which catches. For now, anything thrown
// is sent to the location of that tag, and any catch of that tag can
diff --git a/src/ir/properties.h b/src/ir/properties.h
index 07898169f..4f7fb96ca 100644
--- a/src/ir/properties.h
+++ b/src/ir/properties.h
@@ -417,7 +417,8 @@ bool isGenerative(Expression* curr, FeatureSet features);
inline bool isValidInConstantExpression(Expression* expr, FeatureSet features) {
if (isSingleConstantExpression(expr) || expr->is<GlobalGet>() ||
expr->is<RttCanon>() || expr->is<RttSub>() || expr->is<StructNew>() ||
- expr->is<ArrayNew>() || expr->is<ArrayInit>() || expr->is<I31New>()) {
+ expr->is<ArrayNew>() || expr->is<ArrayInit>() || expr->is<I31New>() ||
+ expr->is<StringConst>()) {
return true;
}
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 70e32e5d7..e766917ec 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -2237,6 +2237,11 @@ struct PrintExpressionContents
WASM_UNREACHABLE("invalid string.new*");
}
}
+ void visitStringConst(StringConst* curr) {
+ printMedium(o, "string.const \"");
+ o << curr->string.str;
+ o << '"';
+ }
};
// Prints an expression in s-expr format, including both the
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index c7ac7e7b7..c88aa3895 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -326,7 +326,8 @@ enum Section {
Code = 10,
Data = 11,
DataCount = 12,
- Tag = 13
+ Tag = 13,
+ Strings = 14,
};
// A passive segment is a segment that will not be automatically copied into a
@@ -1138,6 +1139,7 @@ enum ASTNodes {
BrOnNonI31 = 0x65,
StringNewWTF8 = 0x80,
StringNewWTF16 = 0x81,
+ StringConst = 0x82,
};
enum MemoryAccess {
@@ -1280,6 +1282,7 @@ public:
void writeFunctionSignatures();
void writeExpression(Expression* curr);
void writeFunctions();
+ void writeStrings();
void writeGlobals();
void writeExports();
void writeDataCount();
@@ -1291,6 +1294,7 @@ public:
uint32_t getGlobalIndex(Name name) const;
uint32_t getTagIndex(Name name) const;
uint32_t getTypeIndex(HeapType type) const;
+ uint32_t getStringIndex(Name string) const;
void writeTableDeclarations();
void writeElementSegments();
@@ -1381,6 +1385,9 @@ private:
// info here, and then use it when writing the names.
std::unordered_map<Name, MappedLocals> funcMappedLocals;
+ // Indexes in the string literal section of each StringConst in the wasm.
+ std::unordered_map<Name, Index> stringIndexes;
+
void prepare();
};
@@ -1534,6 +1541,10 @@ public:
std::vector<Export*> exportOrder;
void readExports();
+ // The strings in the strings section (which are referred to by StringConst).
+ std::vector<Name> strings;
+ void readStrings();
+
Expression* readExpression();
void readGlobals();
@@ -1710,6 +1721,7 @@ public:
bool maybeVisitArrayLen(Expression*& out, uint32_t code);
bool maybeVisitArrayCopy(Expression*& out, uint32_t code);
bool maybeVisitStringNew(Expression*& out, uint32_t code);
+ bool maybeVisitStringConst(Expression*& out, uint32_t code);
void visitSelect(Select* curr, uint8_t code);
void visitReturn(Return* curr);
void visitMemorySize(MemorySize* curr);
diff --git a/src/wasm-builder.h b/src/wasm-builder.h
index 7eebf3f04..238f7d738 100644
--- a/src/wasm-builder.h
+++ b/src/wasm-builder.h
@@ -998,6 +998,12 @@ public:
ret->finalize();
return ret;
}
+ StringConst* makeStringConst(Name string) {
+ auto* ret = wasm.allocator.alloc<StringConst>();
+ ret->string = string;
+ ret->finalize();
+ return ret;
+ }
// Additional helpers
diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def
index 6f028a107..a7f39c3c7 100644
--- a/src/wasm-delegations-fields.def
+++ b/src/wasm-delegations-fields.def
@@ -721,6 +721,12 @@ switch (DELEGATE_ID) {
DELEGATE_END(StringNew);
break;
}
+ case Expression::Id::StringConstId: {
+ DELEGATE_START(StringConst);
+ DELEGATE_FIELD_NAME(StringConst, string);
+ DELEGATE_END(StringConst);
+ break;
+ }
}
#undef DELEGATE_ID
diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def
index 5e9a486e3..ba47d9cf5 100644
--- a/src/wasm-delegations.def
+++ b/src/wasm-delegations.def
@@ -86,5 +86,6 @@ DELEGATE(ArrayLen);
DELEGATE(ArrayCopy);
DELEGATE(RefAs);
DELEGATE(StringNew);
+DELEGATE(StringConst);
#undef DELEGATE
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index d91f471a9..1cbbd7689 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1958,6 +1958,9 @@ public:
Flow visitStringNew(StringNew* curr) {
WASM_UNREACHABLE("unimplemented string.new");
}
+ Flow visitStringConst(StringConst* curr) {
+ WASM_UNREACHABLE("unimplemented string.new");
+ }
virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); }
diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h
index 051157483..0eb4680af 100644
--- a/src/wasm-s-parser.h
+++ b/src/wasm-s-parser.h
@@ -304,6 +304,7 @@ private:
Expression* makeArrayCopy(Element& s);
Expression* makeRefAs(Element& s, RefAsOp op);
Expression* makeStringNew(Element& s, StringNewOp op);
+ Expression* makeStringConst(Element& s);
// Helper functions
Type parseOptionalResultType(Element& s, Index& i);
diff --git a/src/wasm.h b/src/wasm.h
index 6d0733d34..3032b4ac1 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -686,6 +686,7 @@ public:
ArrayCopyId,
RefAsId,
StringNewId,
+ StringConstId,
NumExpressionIds
};
Id _id;
@@ -1664,6 +1665,18 @@ public:
void finalize();
};
+class StringConst : public SpecificExpression<Expression::StringConstId> {
+public:
+ StringConst(MixedArena& allocator) {}
+
+ // TODO: Use a different type to allow null bytes in the middle -
+ // ArenaVector<char> perhaps? However, Name has the benefit of being
+ // interned and immutable (which is appropriate here).
+ Name string;
+
+ void finalize();
+};
+
// Globals
struct Named {
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 263f905fb..16950f88f 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -54,6 +54,9 @@ void WasmBinaryWriter::write() {
writeTableDeclarations();
writeMemory();
writeTags();
+ if (wasm->features.hasStrings()) {
+ writeStrings();
+ }
writeGlobals();
writeExports();
writeStart();
@@ -451,6 +454,69 @@ void WasmBinaryWriter::writeFunctions() {
finishSection(sectionStart);
}
+void WasmBinaryWriter::writeStrings() {
+ assert(wasm->features.hasStrings());
+
+ // Scan the entire wasm to find the relevant strings.
+ // To find all the string literals we must scan all the code.
+ using StringSet = std::unordered_set<Name>;
+
+ struct StringWalker : public PostWalker<StringWalker> {
+ StringSet& strings;
+
+ StringWalker(StringSet& strings) : strings(strings) {}
+
+ void visitStringConst(StringConst* curr) { strings.insert(curr->string); }
+ };
+
+ ModuleUtils::ParallelFunctionAnalysis<StringSet> analysis(
+ *wasm, [&](Function* func, StringSet& strings) {
+ if (!func->imported()) {
+ StringWalker(strings).walk(func->body);
+ }
+ });
+
+ // Also walk the global module code (for simplicity, also add it to the
+ // function map, using a "function" key of nullptr).
+ auto& globalStrings = analysis.map[nullptr];
+ StringWalker(globalStrings).walkModuleCode(wasm);
+
+ // Generate the indexes from the combined set of necessary strings,
+ // which we sort for determinism.
+ StringSet allStrings;
+ for (auto& [func, strings] : analysis.map) {
+ for (auto& string : strings) {
+ allStrings.insert(string);
+ }
+ }
+ std::vector<Name> sorted;
+ for (auto& string : allStrings) {
+ sorted.push_back(string);
+ }
+ std::sort(sorted.begin(), sorted.end());
+ for (Index i = 0; i < sorted.size(); i++) {
+ stringIndexes[sorted[i]] = i;
+ }
+
+ auto num = sorted.size();
+ if (num == 0) {
+ return;
+ }
+
+ auto start = startSection(BinaryConsts::Section::Strings);
+
+ // Placeholder for future use in the spec.
+ o << U32LEB(0);
+
+ // The number of strings and then their contents.
+ o << U32LEB(num);
+ for (auto& string : sorted) {
+ writeInlineString(string.str);
+ }
+
+ finishSection(start);
+}
+
void WasmBinaryWriter::writeGlobals() {
if (importInfo->getNumDefinedGlobals() == 0) {
return;
@@ -586,6 +652,12 @@ uint32_t WasmBinaryWriter::getTypeIndex(HeapType type) const {
return it->second;
}
+uint32_t WasmBinaryWriter::getStringIndex(Name string) const {
+ auto it = stringIndexes.find(string);
+ assert(it != stringIndexes.end());
+ return it->second;
+}
+
void WasmBinaryWriter::writeTableDeclarations() {
if (importInfo->getNumDefinedTables() == 0) {
// std::cerr << std::endl << "(WasmBinaryWriter::writeTableDeclarations) No
@@ -1489,6 +1561,9 @@ void WasmBinaryBuilder::read() {
case BinaryConsts::Section::Element:
readElementSegments();
break;
+ case BinaryConsts::Section::Strings:
+ readStrings();
+ break;
case BinaryConsts::Section::Global:
readGlobals();
break;
@@ -2612,6 +2687,18 @@ Expression* WasmBinaryBuilder::readExpression() {
return ret;
}
+void WasmBinaryBuilder::readStrings() {
+ auto reserved = getU32LEB();
+ if (reserved != 0) {
+ throwError("unexpected reserved value in strings");
+ }
+ size_t num = getU32LEB();
+ for (size_t i = 0; i < num; i++) {
+ auto string = getInlineString();
+ strings.push_back(string);
+ }
+}
+
void WasmBinaryBuilder::readGlobals() {
BYN_TRACE("== readGlobals\n");
size_t num = getU32LEB();
@@ -3834,6 +3921,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
if (maybeVisitStringNew(curr, opcode)) {
break;
}
+ if (maybeVisitStringConst(curr, opcode)) {
+ break;
+ }
if (opcode == BinaryConsts::RefIsFunc ||
opcode == BinaryConsts::RefIsData ||
opcode == BinaryConsts::RefIsI31) {
@@ -7060,6 +7150,18 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
return true;
}
+bool WasmBinaryBuilder::maybeVisitStringConst(Expression*& out, uint32_t code) {
+ if (code != BinaryConsts::StringConst) {
+ return false;
+ }
+ auto index = getU32LEB();
+ if (index >= strings.size()) {
+ throwError("bad string index");
+ }
+ out = Builder(wasm).makeStringConst(strings[index]);
+ return true;
+}
+
void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) {
BYN_TRACE("zz node: RefAs\n");
switch (code) {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index ac15c5e68..cd6c167c0 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -2953,6 +2953,10 @@ Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) {
op, parseExpression(s[i]), parseExpression(s[i + 1]));
}
+Expression* SExpressionWasmBuilder::makeStringConst(Element& s) {
+ return Builder(wasm).makeStringConst(s[1]->str());
+}
+
// converts an s-expression string representing binary data into an output
// sequence of raw bytes this appends to data, which may already contain
// content.
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index b63ea0fb0..e131ab207 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -2257,6 +2257,11 @@ void BinaryInstWriter::visitStringNew(StringNew* curr) {
}
}
+void BinaryInstWriter::visitStringConst(StringConst* curr) {
+ o << int8_t(BinaryConsts::GCPrefix) << U32LEB(BinaryConsts::StringConst)
+ << U32LEB(parent.getStringIndex(curr->string));
+}
+
void BinaryInstWriter::emitScopeEnd(Expression* curr) {
assert(!breakStack.empty());
breakStack.pop_back();
diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp
index 86072a880..86eed184f 100644
--- a/src/wasm/wasm.cpp
+++ b/src/wasm/wasm.cpp
@@ -1182,6 +1182,8 @@ void StringNew::finalize() {
}
}
+void StringConst::finalize() { type = Type(HeapType::string, NonNullable); }
+
size_t Function::getNumParams() { return getParams().size(); }
size_t Function::getNumVars() { return vars.size(); }
diff --git a/src/wasm2js.h b/src/wasm2js.h
index c2b71aa7a..ad373074d 100644
--- a/src/wasm2js.h
+++ b/src/wasm2js.h
@@ -2307,6 +2307,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
unimplemented(curr);
WASM_UNREACHABLE("unimp");
}
+ Ref visitStringConst(StringConst* curr) {
+ unimplemented(curr);
+ WASM_UNREACHABLE("unimp");
+ }
Ref visitRefAs(RefAs* curr) {
unimplemented(curr);
WASM_UNREACHABLE("unimp");
diff --git a/test/lit/strings.wast b/test/lit/strings.wast
index c4b6f39af..7436a04b9 100644
--- a/test/lit/strings.wast
+++ b/test/lit/strings.wast
@@ -1,11 +1,18 @@
-;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
;; Check that string types are emitted properly in the binary format.
;; RUN: foreach %s %t wasm-opt --enable-strings --enable-reference-types --roundtrip -S -o - | filecheck %s
(module
- ;; CHECK: (func $foo (param $a stringref) (param $b stringview_wtf8) (param $c stringview_wtf16) (param $d stringview_iter) (param $e stringref) (param $f stringview_wtf8) (param $g stringview_wtf16) (param $h stringview_iter) (param $i (ref string)) (param $j (ref stringview_wtf8)) (param $k (ref stringview_wtf16)) (param $l (ref stringview_iter))
+ ;; CHECK: (type $ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref|string|_ref|stringview_wtf8|_ref|stringview_wtf16|_ref|stringview_iter|_=>_none (func (param stringref stringview_wtf8 stringview_wtf16 stringview_iter stringref stringview_wtf8 stringview_wtf16 stringview_iter (ref string) (ref stringview_wtf8) (ref stringview_wtf16) (ref stringview_iter))))
+
+ ;; CHECK: (type $none_=>_none (func))
+
+ ;; CHECK: (global $string-const stringref (string.const "string in a global"))
+ (global $string-const stringref (string.const "string in a global"))
+
+ ;; CHECK: (func $string.new (param $a stringref) (param $b stringview_wtf8) (param $c stringview_wtf16) (param $d stringview_iter) (param $e stringref) (param $f stringview_wtf8) (param $g stringview_wtf16) (param $h stringview_iter) (param $i (ref string)) (param $j (ref stringview_wtf8)) (param $k (ref stringview_wtf16)) (param $l (ref stringview_iter))
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (string.new_wtf8 utf8
;; CHECK-NEXT: (i32.const 1)
@@ -31,7 +38,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
- (func $foo
+ (func $string.new
(param $a stringref)
(param $b stringview_wtf8)
(param $c stringview_wtf16)
@@ -69,4 +76,27 @@
)
)
)
+
+ ;; CHECK: (func $string.const
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (string.const "foo")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (string.const "foo")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: (drop
+ ;; CHECK-NEXT: (string.const "bar")
+ ;; CHECK-NEXT: )
+ ;; CHECK-NEXT: )
+ (func $string.const
+ (drop
+ (string.const "foo")
+ )
+ (drop
+ (string.const "foo") ;; intentionally repeat the previous one
+ )
+ (drop
+ (string.const "bar")
+ )
+ )
)