diff options
author | Alon Zakai <azakai@google.com> | 2022-07-06 08:48:12 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-06 08:48:12 -0700 |
commit | 876638f8fb5bfc8b264eddc6c0c0d54ed40d0095 (patch) | |
tree | 40138beab484617d5b3af474f8e1bd485ffc603e | |
parent | b69d3a8fa0d6d7588811ef92067a48eed576e03f (diff) | |
download | binaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.tar.gz binaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.tar.bz2 binaryen-876638f8fb5bfc8b264eddc6c0c0d54ed40d0095.zip |
[Strings] Add string.const (#4768)
This is more work than a typical instruction because it also adds a new section:
all the (string.const "foo") strings are put in a new "strings" section in the binary, and
the instructions refer to them by index.
-rwxr-xr-x | scripts/gen-s-parser.py | 1 | ||||
-rw-r--r-- | src/gen-s-parser.inc | 20 | ||||
-rw-r--r-- | src/ir/ReFinalize.cpp | 1 | ||||
-rw-r--r-- | src/ir/cost.h | 1 | ||||
-rw-r--r-- | src/ir/effects.h | 1 | ||||
-rw-r--r-- | src/ir/possible-contents.cpp | 3 | ||||
-rw-r--r-- | src/ir/properties.h | 3 | ||||
-rw-r--r-- | src/passes/Print.cpp | 5 | ||||
-rw-r--r-- | src/wasm-binary.h | 14 | ||||
-rw-r--r-- | src/wasm-builder.h | 6 | ||||
-rw-r--r-- | src/wasm-delegations-fields.def | 6 | ||||
-rw-r--r-- | src/wasm-delegations.def | 1 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 3 | ||||
-rw-r--r-- | src/wasm-s-parser.h | 1 | ||||
-rw-r--r-- | src/wasm.h | 13 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 102 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 4 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 5 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 2 | ||||
-rw-r--r-- | src/wasm2js.h | 4 | ||||
-rw-r--r-- | test/lit/strings.wast | 36 |
21 files changed, 221 insertions, 11 deletions
diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index 60c9fc6e7..1c7936320 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -616,6 +616,7 @@ instructions = [ ("ref.as_i31", "makeRefAs(s, RefAsI31)"), ("string.new_wtf8", "makeStringNew(s, StringNewWTF8)"), ("string.new_wtf16", "makeStringNew(s, StringNewWTF16)"), + ("string.const", "makeStringConst(s)"), ] diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 315916fdb..84fc66520 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -3128,13 +3128,21 @@ switch (op[0]) { case 't': { switch (op[3]) { case 'i': { - switch (op[14]) { - case '1': - if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); } - goto parse_error; - case '8': - if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); } + switch (op[7]) { + case 'c': + if (strcmp(op, "string.const") == 0) { return makeStringConst(s); } goto parse_error; + case 'n': { + switch (op[14]) { + case '1': + if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); } + goto parse_error; + case '8': + if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); } + goto parse_error; + default: goto parse_error; + } + } default: goto parse_error; } } diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index 6b0909666..6b3e8863e 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -173,6 +173,7 @@ void ReFinalize::visitArrayLen(ArrayLen* curr) { curr->finalize(); } void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); } void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); } void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); } +void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); } void ReFinalize::visitFunction(Function* curr) { // we may have changed the body from unreachable to none, which might be bad diff --git a/src/ir/cost.h b/src/ir/cost.h index 1b0862bf6..2b918bf38 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -674,6 +674,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { CostType visitStringNew(StringNew* curr) { return 4 + visit(curr->ptr) + visit(curr->length); } + CostType visitStringConst(StringConst* curr) { return 4; } private: CostType nullCheckCost(Expression* ref) { diff --git a/src/ir/effects.h b/src/ir/effects.h index d97b01a36..f023b547a 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -733,6 +733,7 @@ private: // cycle may be needed in some cases. } void visitStringNew(StringNew* curr) {} + void visitStringConst(StringConst* curr) {} }; public: diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 8b95490b6..1d1825865 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -678,6 +678,9 @@ struct InfoCollector } addRoot(curr, PossibleContents::exactType(curr->type)); } + void visitStringConst(StringConst* curr) { + addRoot(curr, PossibleContents::exactType(curr->type)); + } // TODO: Model which throws can go to which catches. For now, anything thrown // is sent to the location of that tag, and any catch of that tag can diff --git a/src/ir/properties.h b/src/ir/properties.h index 07898169f..4f7fb96ca 100644 --- a/src/ir/properties.h +++ b/src/ir/properties.h @@ -417,7 +417,8 @@ bool isGenerative(Expression* curr, FeatureSet features); inline bool isValidInConstantExpression(Expression* expr, FeatureSet features) { if (isSingleConstantExpression(expr) || expr->is<GlobalGet>() || expr->is<RttCanon>() || expr->is<RttSub>() || expr->is<StructNew>() || - expr->is<ArrayNew>() || expr->is<ArrayInit>() || expr->is<I31New>()) { + expr->is<ArrayNew>() || expr->is<ArrayInit>() || expr->is<I31New>() || + expr->is<StringConst>()) { return true; } diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 70e32e5d7..e766917ec 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2237,6 +2237,11 @@ struct PrintExpressionContents WASM_UNREACHABLE("invalid string.new*"); } } + void visitStringConst(StringConst* curr) { + printMedium(o, "string.const \""); + o << curr->string.str; + o << '"'; + } }; // Prints an expression in s-expr format, including both the diff --git a/src/wasm-binary.h b/src/wasm-binary.h index c7ac7e7b7..c88aa3895 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -326,7 +326,8 @@ enum Section { Code = 10, Data = 11, DataCount = 12, - Tag = 13 + Tag = 13, + Strings = 14, }; // A passive segment is a segment that will not be automatically copied into a @@ -1138,6 +1139,7 @@ enum ASTNodes { BrOnNonI31 = 0x65, StringNewWTF8 = 0x80, StringNewWTF16 = 0x81, + StringConst = 0x82, }; enum MemoryAccess { @@ -1280,6 +1282,7 @@ public: void writeFunctionSignatures(); void writeExpression(Expression* curr); void writeFunctions(); + void writeStrings(); void writeGlobals(); void writeExports(); void writeDataCount(); @@ -1291,6 +1294,7 @@ public: uint32_t getGlobalIndex(Name name) const; uint32_t getTagIndex(Name name) const; uint32_t getTypeIndex(HeapType type) const; + uint32_t getStringIndex(Name string) const; void writeTableDeclarations(); void writeElementSegments(); @@ -1381,6 +1385,9 @@ private: // info here, and then use it when writing the names. std::unordered_map<Name, MappedLocals> funcMappedLocals; + // Indexes in the string literal section of each StringConst in the wasm. + std::unordered_map<Name, Index> stringIndexes; + void prepare(); }; @@ -1534,6 +1541,10 @@ public: std::vector<Export*> exportOrder; void readExports(); + // The strings in the strings section (which are referred to by StringConst). + std::vector<Name> strings; + void readStrings(); + Expression* readExpression(); void readGlobals(); @@ -1710,6 +1721,7 @@ public: bool maybeVisitArrayLen(Expression*& out, uint32_t code); bool maybeVisitArrayCopy(Expression*& out, uint32_t code); bool maybeVisitStringNew(Expression*& out, uint32_t code); + bool maybeVisitStringConst(Expression*& out, uint32_t code); void visitSelect(Select* curr, uint8_t code); void visitReturn(Return* curr); void visitMemorySize(MemorySize* curr); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 7eebf3f04..238f7d738 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -998,6 +998,12 @@ public: ret->finalize(); return ret; } + StringConst* makeStringConst(Name string) { + auto* ret = wasm.allocator.alloc<StringConst>(); + ret->string = string; + ret->finalize(); + return ret; + } // Additional helpers diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def index 6f028a107..a7f39c3c7 100644 --- a/src/wasm-delegations-fields.def +++ b/src/wasm-delegations-fields.def @@ -721,6 +721,12 @@ switch (DELEGATE_ID) { DELEGATE_END(StringNew); break; } + case Expression::Id::StringConstId: { + DELEGATE_START(StringConst); + DELEGATE_FIELD_NAME(StringConst, string); + DELEGATE_END(StringConst); + break; + } } #undef DELEGATE_ID diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def index 5e9a486e3..ba47d9cf5 100644 --- a/src/wasm-delegations.def +++ b/src/wasm-delegations.def @@ -86,5 +86,6 @@ DELEGATE(ArrayLen); DELEGATE(ArrayCopy); DELEGATE(RefAs); DELEGATE(StringNew); +DELEGATE(StringConst); #undef DELEGATE diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index d91f471a9..1cbbd7689 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1958,6 +1958,9 @@ public: Flow visitStringNew(StringNew* curr) { WASM_UNREACHABLE("unimplemented string.new"); } + Flow visitStringConst(StringConst* curr) { + WASM_UNREACHABLE("unimplemented string.new"); + } virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); } diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index 051157483..0eb4680af 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -304,6 +304,7 @@ private: Expression* makeArrayCopy(Element& s); Expression* makeRefAs(Element& s, RefAsOp op); Expression* makeStringNew(Element& s, StringNewOp op); + Expression* makeStringConst(Element& s); // Helper functions Type parseOptionalResultType(Element& s, Index& i); diff --git a/src/wasm.h b/src/wasm.h index 6d0733d34..3032b4ac1 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -686,6 +686,7 @@ public: ArrayCopyId, RefAsId, StringNewId, + StringConstId, NumExpressionIds }; Id _id; @@ -1664,6 +1665,18 @@ public: void finalize(); }; +class StringConst : public SpecificExpression<Expression::StringConstId> { +public: + StringConst(MixedArena& allocator) {} + + // TODO: Use a different type to allow null bytes in the middle - + // ArenaVector<char> perhaps? However, Name has the benefit of being + // interned and immutable (which is appropriate here). + Name string; + + void finalize(); +}; + // Globals struct Named { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 263f905fb..16950f88f 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -54,6 +54,9 @@ void WasmBinaryWriter::write() { writeTableDeclarations(); writeMemory(); writeTags(); + if (wasm->features.hasStrings()) { + writeStrings(); + } writeGlobals(); writeExports(); writeStart(); @@ -451,6 +454,69 @@ void WasmBinaryWriter::writeFunctions() { finishSection(sectionStart); } +void WasmBinaryWriter::writeStrings() { + assert(wasm->features.hasStrings()); + + // Scan the entire wasm to find the relevant strings. + // To find all the string literals we must scan all the code. + using StringSet = std::unordered_set<Name>; + + struct StringWalker : public PostWalker<StringWalker> { + StringSet& strings; + + StringWalker(StringSet& strings) : strings(strings) {} + + void visitStringConst(StringConst* curr) { strings.insert(curr->string); } + }; + + ModuleUtils::ParallelFunctionAnalysis<StringSet> analysis( + *wasm, [&](Function* func, StringSet& strings) { + if (!func->imported()) { + StringWalker(strings).walk(func->body); + } + }); + + // Also walk the global module code (for simplicity, also add it to the + // function map, using a "function" key of nullptr). + auto& globalStrings = analysis.map[nullptr]; + StringWalker(globalStrings).walkModuleCode(wasm); + + // Generate the indexes from the combined set of necessary strings, + // which we sort for determinism. + StringSet allStrings; + for (auto& [func, strings] : analysis.map) { + for (auto& string : strings) { + allStrings.insert(string); + } + } + std::vector<Name> sorted; + for (auto& string : allStrings) { + sorted.push_back(string); + } + std::sort(sorted.begin(), sorted.end()); + for (Index i = 0; i < sorted.size(); i++) { + stringIndexes[sorted[i]] = i; + } + + auto num = sorted.size(); + if (num == 0) { + return; + } + + auto start = startSection(BinaryConsts::Section::Strings); + + // Placeholder for future use in the spec. + o << U32LEB(0); + + // The number of strings and then their contents. + o << U32LEB(num); + for (auto& string : sorted) { + writeInlineString(string.str); + } + + finishSection(start); +} + void WasmBinaryWriter::writeGlobals() { if (importInfo->getNumDefinedGlobals() == 0) { return; @@ -586,6 +652,12 @@ uint32_t WasmBinaryWriter::getTypeIndex(HeapType type) const { return it->second; } +uint32_t WasmBinaryWriter::getStringIndex(Name string) const { + auto it = stringIndexes.find(string); + assert(it != stringIndexes.end()); + return it->second; +} + void WasmBinaryWriter::writeTableDeclarations() { if (importInfo->getNumDefinedTables() == 0) { // std::cerr << std::endl << "(WasmBinaryWriter::writeTableDeclarations) No @@ -1489,6 +1561,9 @@ void WasmBinaryBuilder::read() { case BinaryConsts::Section::Element: readElementSegments(); break; + case BinaryConsts::Section::Strings: + readStrings(); + break; case BinaryConsts::Section::Global: readGlobals(); break; @@ -2612,6 +2687,18 @@ Expression* WasmBinaryBuilder::readExpression() { return ret; } +void WasmBinaryBuilder::readStrings() { + auto reserved = getU32LEB(); + if (reserved != 0) { + throwError("unexpected reserved value in strings"); + } + size_t num = getU32LEB(); + for (size_t i = 0; i < num; i++) { + auto string = getInlineString(); + strings.push_back(string); + } +} + void WasmBinaryBuilder::readGlobals() { BYN_TRACE("== readGlobals\n"); size_t num = getU32LEB(); @@ -3834,6 +3921,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { if (maybeVisitStringNew(curr, opcode)) { break; } + if (maybeVisitStringConst(curr, opcode)) { + break; + } if (opcode == BinaryConsts::RefIsFunc || opcode == BinaryConsts::RefIsData || opcode == BinaryConsts::RefIsI31) { @@ -7060,6 +7150,18 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) { return true; } +bool WasmBinaryBuilder::maybeVisitStringConst(Expression*& out, uint32_t code) { + if (code != BinaryConsts::StringConst) { + return false; + } + auto index = getU32LEB(); + if (index >= strings.size()) { + throwError("bad string index"); + } + out = Builder(wasm).makeStringConst(strings[index]); + return true; +} + void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) { BYN_TRACE("zz node: RefAs\n"); switch (code) { diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index ac15c5e68..cd6c167c0 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -2953,6 +2953,10 @@ Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) { op, parseExpression(s[i]), parseExpression(s[i + 1])); } +Expression* SExpressionWasmBuilder::makeStringConst(Element& s) { + return Builder(wasm).makeStringConst(s[1]->str()); +} + // converts an s-expression string representing binary data into an output // sequence of raw bytes this appends to data, which may already contain // content. diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index b63ea0fb0..e131ab207 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2257,6 +2257,11 @@ void BinaryInstWriter::visitStringNew(StringNew* curr) { } } +void BinaryInstWriter::visitStringConst(StringConst* curr) { + o << int8_t(BinaryConsts::GCPrefix) << U32LEB(BinaryConsts::StringConst) + << U32LEB(parent.getStringIndex(curr->string)); +} + void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 86072a880..86eed184f 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -1182,6 +1182,8 @@ void StringNew::finalize() { } } +void StringConst::finalize() { type = Type(HeapType::string, NonNullable); } + size_t Function::getNumParams() { return getParams().size(); } size_t Function::getNumVars() { return vars.size(); } diff --git a/src/wasm2js.h b/src/wasm2js.h index c2b71aa7a..ad373074d 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -2307,6 +2307,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, unimplemented(curr); WASM_UNREACHABLE("unimp"); } + Ref visitStringConst(StringConst* curr) { + unimplemented(curr); + WASM_UNREACHABLE("unimp"); + } Ref visitRefAs(RefAs* curr) { unimplemented(curr); WASM_UNREACHABLE("unimp"); diff --git a/test/lit/strings.wast b/test/lit/strings.wast index c4b6f39af..7436a04b9 100644 --- a/test/lit/strings.wast +++ b/test/lit/strings.wast @@ -1,11 +1,18 @@ -;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; Check that string types are emitted properly in the binary format. ;; RUN: foreach %s %t wasm-opt --enable-strings --enable-reference-types --roundtrip -S -o - | filecheck %s (module - ;; CHECK: (func $foo (param $a stringref) (param $b stringview_wtf8) (param $c stringview_wtf16) (param $d stringview_iter) (param $e stringref) (param $f stringview_wtf8) (param $g stringview_wtf16) (param $h stringview_iter) (param $i (ref string)) (param $j (ref stringview_wtf8)) (param $k (ref stringview_wtf16)) (param $l (ref stringview_iter)) + ;; CHECK: (type $ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref?|string|_ref?|stringview_wtf8|_ref?|stringview_wtf16|_ref?|stringview_iter|_ref|string|_ref|stringview_wtf8|_ref|stringview_wtf16|_ref|stringview_iter|_=>_none (func (param stringref stringview_wtf8 stringview_wtf16 stringview_iter stringref stringview_wtf8 stringview_wtf16 stringview_iter (ref string) (ref stringview_wtf8) (ref stringview_wtf16) (ref stringview_iter)))) + + ;; CHECK: (type $none_=>_none (func)) + + ;; CHECK: (global $string-const stringref (string.const "string in a global")) + (global $string-const stringref (string.const "string in a global")) + + ;; CHECK: (func $string.new (param $a stringref) (param $b stringview_wtf8) (param $c stringview_wtf16) (param $d stringview_iter) (param $e stringref) (param $f stringview_wtf8) (param $g stringview_wtf16) (param $h stringview_iter) (param $i (ref string)) (param $j (ref stringview_wtf8)) (param $k (ref stringview_wtf16)) (param $l (ref stringview_iter)) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (string.new_wtf8 utf8 ;; CHECK-NEXT: (i32.const 1) @@ -31,7 +38,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $foo + (func $string.new (param $a stringref) (param $b stringview_wtf8) (param $c stringview_wtf16) @@ -69,4 +76,27 @@ ) ) ) + + ;; CHECK: (func $string.const + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (string.const "foo") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (string.const "foo") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (string.const "bar") + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.const + (drop + (string.const "foo") + ) + (drop + (string.const "foo") ;; intentionally repeat the previous one + ) + (drop + (string.const "bar") + ) + ) ) |