diff options
author | Alon Zakai <azakai@google.com> | 2022-06-29 16:05:10 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-29 16:05:10 -0700 |
commit | 19f4db6ef5263a578baef7e64bf9c9169bb771e6 (patch) | |
tree | d9608e8483415332b744701a714ec83d8e8009fb /src | |
parent | d252c3e9e5dee98150c5ac625b6deb0e95139ede (diff) | |
download | binaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.tar.gz binaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.tar.bz2 binaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.zip |
[Strings] Add string.new* instructions (#4761)
This is the first instruction from the Strings proposal.
This includes everything but interpreter support.
Diffstat (limited to 'src')
-rw-r--r-- | src/gen-s-parser.inc | 84 | ||||
-rw-r--r-- | src/ir/ReFinalize.cpp | 1 | ||||
-rw-r--r-- | src/ir/cost.h | 3 | ||||
-rw-r--r-- | src/ir/effects.h | 1 | ||||
-rw-r--r-- | src/ir/possible-contents.cpp | 7 | ||||
-rw-r--r-- | src/passes/Print.cpp | 18 | ||||
-rw-r--r-- | src/wasm-binary.h | 11 | ||||
-rw-r--r-- | src/wasm-builder.h | 9 | ||||
-rw-r--r-- | src/wasm-delegations-fields.def | 8 | ||||
-rw-r--r-- | src/wasm-delegations.def | 1 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 3 | ||||
-rw-r--r-- | src/wasm-s-parser.h | 1 | ||||
-rw-r--r-- | src/wasm.h | 20 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 31 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 18 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 23 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 8 | ||||
-rw-r--r-- | src/wasm2js.h | 4 |
18 files changed, 216 insertions, 35 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index baeab95ec..315916fdb 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -3126,56 +3126,72 @@ switch (op[0]) { if (strcmp(op, "select") == 0) { return makeSelect(s); } goto parse_error; case 't': { - switch (op[7]) { - case 'g': { - switch (op[10]) { - case '\0': - if (strcmp(op, "struct.get") == 0) { return makeStructGet(s); } + switch (op[3]) { + case 'i': { + switch (op[14]) { + case '1': + if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); } + goto parse_error; + case '8': + if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); } goto parse_error; - case '_': { - switch (op[11]) { - case 's': - if (strcmp(op, "struct.get_s") == 0) { return makeStructGet(s, true); } - goto parse_error; - case 'u': - if (strcmp(op, "struct.get_u") == 0) { return makeStructGet(s, false); } - goto parse_error; - default: goto parse_error; - } - } default: goto parse_error; } } - case 'n': { - switch (op[10]) { - case '\0': - if (strcmp(op, "struct.new") == 0) { return makeStructNewStatic(s, false); } - goto parse_error; - case '_': { - switch (op[11]) { - case 'd': { - switch (op[18]) { - case '\0': - if (strcmp(op, "struct.new_default") == 0) { return makeStructNewStatic(s, true); } + case 'u': { + switch (op[7]) { + case 'g': { + switch (op[10]) { + case '\0': + if (strcmp(op, "struct.get") == 0) { return makeStructGet(s); } + goto parse_error; + case '_': { + switch (op[11]) { + case 's': + if (strcmp(op, "struct.get_s") == 0) { return makeStructGet(s, true); } goto parse_error; - case '_': - if (strcmp(op, "struct.new_default_with_rtt") == 0) { return makeStructNew(s, true); } + case 'u': + if (strcmp(op, "struct.get_u") == 0) { return makeStructGet(s, false); } goto parse_error; default: goto parse_error; } } - case 'w': - if (strcmp(op, "struct.new_with_rtt") == 0) { return makeStructNew(s, false); } + default: goto parse_error; + } + } + case 'n': { + switch (op[10]) { + case '\0': + if (strcmp(op, "struct.new") == 0) { return makeStructNewStatic(s, false); } goto parse_error; + case '_': { + switch (op[11]) { + case 'd': { + switch (op[18]) { + case '\0': + if (strcmp(op, "struct.new_default") == 0) { return makeStructNewStatic(s, true); } + goto parse_error; + case '_': + if (strcmp(op, "struct.new_default_with_rtt") == 0) { return makeStructNew(s, true); } + goto parse_error; + default: goto parse_error; + } + } + case 'w': + if (strcmp(op, "struct.new_with_rtt") == 0) { return makeStructNew(s, false); } + goto parse_error; + default: goto parse_error; + } + } default: goto parse_error; } } + case 's': + if (strcmp(op, "struct.set") == 0) { return makeStructSet(s); } + goto parse_error; default: goto parse_error; } } - case 's': - if (strcmp(op, "struct.set") == 0) { return makeStructSet(s); } - goto parse_error; default: goto parse_error; } } diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index ef09ddbae..6b0909666 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -172,6 +172,7 @@ void ReFinalize::visitArraySet(ArraySet* curr) { curr->finalize(); } void ReFinalize::visitArrayLen(ArrayLen* curr) { curr->finalize(); } void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); } void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); } +void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); } void ReFinalize::visitFunction(Function* curr) { // we may have changed the body from unreachable to none, which might be bad diff --git a/src/ir/cost.h b/src/ir/cost.h index 8502a7610..1b0862bf6 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -671,6 +671,9 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { visit(curr->srcRef) + visit(curr->srcIndex) + visit(curr->length); } CostType visitRefAs(RefAs* curr) { return 1 + visit(curr->value); } + CostType visitStringNew(StringNew* curr) { + return 4 + visit(curr->ptr) + visit(curr->length); + } private: CostType nullCheckCost(Expression* ref) { diff --git a/src/ir/effects.h b/src/ir/effects.h index 2bd209d3e..d97b01a36 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -732,6 +732,7 @@ private: // we keep the code here simpler, but it does mean another optimization // cycle may be needed in some cases. } + void visitStringNew(StringNew* curr) {} }; public: diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 94c22aacc..d10f2b4ac 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -672,6 +672,13 @@ struct InfoCollector visitArraySet(set); } + void visitStringNew(StringNew* curr) { + if (curr->type == Type::unreachable) { + return; + } + addRoot(curr, PossibleContents::exactType(curr->type)); + } + // TODO: Model which throws can go to which catches. For now, anything thrown // is sent to the location of that tag, and any catch of that tag can // read them. diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 2aacb513b..c9ef1a224 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2203,6 +2203,24 @@ struct PrintExpressionContents WASM_UNREACHABLE("invalid ref.is_*"); } } + void visitStringNew(StringNew* curr) { + switch (curr->op) { + case StringNewUTF8: + printMedium(o, "string.new_wtf8 utf8"); + break; + case StringNewWTF8: + printMedium(o, "string.new_wtf8 wtf8"); + break; + case StringNewReplace: + printMedium(o, "string.new_wtf8 replace"); + break; + case StringNewWTF16: + printMedium(o, "string.new_wtf16"); + break; + default: + WASM_UNREACHABLE("invalid string.new*"); + } + } }; // Prints an expression in s-expr format, including both the diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 3b3fa30f6..a5c6d0a46 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1135,6 +1135,8 @@ enum ASTNodes { BrOnNonFunc = 0x63, BrOnNonData = 0x64, BrOnNonI31 = 0x65, + StringNewWTF8 = 0x80, + StringNewWTF16 = 0x81, }; enum MemoryAccess { @@ -1145,6 +1147,12 @@ enum MemoryAccess { enum MemoryFlags { HasMaximum = 1 << 0, IsShared = 1 << 1, Is64 = 1 << 2 }; +enum StringNewPolicy { + UTF8 = 0x00, + WTF8 = 0x01, + Replace = 0x02, +}; + enum FeaturePrefix { FeatureUsed = '+', FeatureRequired = '=', @@ -1700,6 +1708,7 @@ public: bool maybeVisitArraySet(Expression*& out, uint32_t code); bool maybeVisitArrayLen(Expression*& out, uint32_t code); bool maybeVisitArrayCopy(Expression*& out, uint32_t code); + bool maybeVisitStringNew(Expression*& out, uint32_t code); void visitSelect(Select* curr, uint8_t code); void visitReturn(Return* curr); void visitMemorySize(MemorySize* curr); @@ -1721,7 +1730,7 @@ public: // Let is lowered into a block. void visitLet(Block* curr); - void throwError(std::string text); + [[noreturn]] void throwError(std::string text); // Struct/Array instructions have an unnecessary heap type that is just for // validation (except for the case of unreachability, but that's not a problem diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 20babacf1..7eebf3f04 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -989,6 +989,15 @@ public: ret->finalize(); return ret; } + StringNew* + makeStringNew(StringNewOp op, Expression* ptr, Expression* length) { + auto* ret = wasm.allocator.alloc<StringNew>(); + ret->op = op; + ret->ptr = ptr; + ret->length = length; + ret->finalize(); + return ret; + } // Additional helpers diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def index 68e23fdd0..6f028a107 100644 --- a/src/wasm-delegations-fields.def +++ b/src/wasm-delegations-fields.def @@ -713,6 +713,14 @@ switch (DELEGATE_ID) { DELEGATE_END(RefAs); break; } + case Expression::Id::StringNewId: { + DELEGATE_START(StringNew); + DELEGATE_FIELD_INT(StringNew, op); + DELEGATE_FIELD_CHILD(StringNew, length); + DELEGATE_FIELD_CHILD(StringNew, ptr); + DELEGATE_END(StringNew); + break; + } } #undef DELEGATE_ID diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def index c3f04674a..5e9a486e3 100644 --- a/src/wasm-delegations.def +++ b/src/wasm-delegations.def @@ -85,5 +85,6 @@ DELEGATE(ArraySet); DELEGATE(ArrayLen); DELEGATE(ArrayCopy); DELEGATE(RefAs); +DELEGATE(StringNew); #undef DELEGATE diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index f682e89a2..d91f471a9 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1955,6 +1955,9 @@ public: } return value; } + Flow visitStringNew(StringNew* curr) { + WASM_UNREACHABLE("unimplemented string.new"); + } virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); } diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index 75c4f0dec..051157483 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -303,6 +303,7 @@ private: Expression* makeArrayLen(Element& s); Expression* makeArrayCopy(Element& s); Expression* makeRefAs(Element& s, RefAsOp op); + Expression* makeStringNew(Element& s, StringNewOp op); // Helper functions Type parseOptionalResultType(Element& s, Index& i); diff --git a/src/wasm.h b/src/wasm.h index a466acdbe..6d0733d34 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -583,6 +583,13 @@ enum BrOnOp { BrOnNonI31, }; +enum StringNewOp { + StringNewUTF8, + StringNewWTF8, + StringNewReplace, + StringNewWTF16 +}; + // // Expressions // @@ -678,6 +685,7 @@ public: ArrayLenId, ArrayCopyId, RefAsId, + StringNewId, NumExpressionIds }; Id _id; @@ -1644,6 +1652,18 @@ public: void finalize(); }; +class StringNew : public SpecificExpression<Expression::StringNewId> { +public: + StringNew(MixedArena& allocator) {} + + StringNewOp op; + + Expression* ptr; + Expression* length; + + void finalize(); +}; + // Globals struct Named { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 87ee00c8a..f087e3e4b 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -3827,6 +3827,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { if (maybeVisitArrayCopy(curr, opcode)) { break; } + if (maybeVisitStringNew(curr, opcode)) { + break; + } if (opcode == BinaryConsts::RefIsFunc || opcode == BinaryConsts::RefIsData || opcode == BinaryConsts::RefIsI31) { @@ -7025,6 +7028,34 @@ bool WasmBinaryBuilder::maybeVisitArrayCopy(Expression*& out, uint32_t code) { return true; } +bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) { + StringNewOp op; + if (code == BinaryConsts::StringNewWTF8) { + auto policy = getU32LEB(); + switch (policy) { + case BinaryConsts::StringNewPolicy::UTF8: + op = StringNewUTF8; + break; + case BinaryConsts::StringNewPolicy::WTF8: + op = StringNewWTF8; + break; + case BinaryConsts::StringNewPolicy::Replace: + op = StringNewReplace; + break; + default: + throwError("bad policy for string.new"); + } + } else if (code == BinaryConsts::StringNewWTF16) { + op = StringNewWTF16; + } else { + return false; + } + auto* length = popNonVoidExpression(); + auto* ptr = popNonVoidExpression(); + out = Builder(wasm).makeStringNew(op, ptr, length); + return true; +} + void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) { BYN_TRACE("zz node: RefAs\n"); switch (code) { diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index 22557a06d..ac15c5e68 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -2935,6 +2935,24 @@ Expression* SExpressionWasmBuilder::makeRefAs(Element& s, RefAsOp op) { return Builder(wasm).makeRefAs(op, parseExpression(s[1])); } +Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) { + size_t i = 1; + if (op == StringNewWTF8) { + const char* str = s[i++]->c_str(); + if (strncmp(str, "utf8", 4) == 0) { + op = StringNewUTF8; + } else if (strncmp(str, "wtf8", 4) == 0) { + op = StringNewWTF8; + } else if (strncmp(str, "replace", 7) == 0) { + op = StringNewReplace; + } else { + throw ParseException("bad string.new op", s.line, s.col); + } + } + return Builder(wasm).makeStringNew( + op, parseExpression(s[i]), parseExpression(s[i + 1])); +} + // converts an s-expression string representing binary data into an output // sequence of raw bytes this appends to data, which may already contain // content. diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index fb24dbb01..b63ea0fb0 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2234,6 +2234,29 @@ void BinaryInstWriter::visitRefAs(RefAs* curr) { } } +void BinaryInstWriter::visitStringNew(StringNew* curr) { + o << int8_t(BinaryConsts::GCPrefix); + switch (curr->op) { + case StringNewUTF8: + o << U32LEB(BinaryConsts::StringNewWTF8) + << U32LEB(BinaryConsts::StringNewPolicy::UTF8); + break; + case StringNewWTF8: + o << U32LEB(BinaryConsts::StringNewWTF8) + << U32LEB(BinaryConsts::StringNewPolicy::WTF8); + break; + case StringNewReplace: + o << U32LEB(BinaryConsts::StringNewWTF8) + << U32LEB(BinaryConsts::StringNewPolicy::Replace); + break; + case StringNewWTF16: + o << U32LEB(BinaryConsts::StringNewWTF16); + break; + default: + WASM_UNREACHABLE("invalid string.new*"); + } +} + void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 8d4f1dcee..428e5093c 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -1173,6 +1173,14 @@ void RefAs::finalize() { } } +void StringNew::finalize() { + if (ptr->type == Type::unreachable || length->type == Type::unreachable) { + type = Type::unreachable; + } else { + type = Type(HeapType::string, NonNullable); + } +} + size_t Function::getNumParams() { return getParams().size(); } size_t Function::getNumVars() { return vars.size(); } diff --git a/src/wasm2js.h b/src/wasm2js.h index 5b98362ad..c2b71aa7a 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -2303,6 +2303,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, unimplemented(curr); WASM_UNREACHABLE("unimp"); } + Ref visitStringNew(StringNew* curr) { + unimplemented(curr); + WASM_UNREACHABLE("unimp"); + } Ref visitRefAs(RefAs* curr) { unimplemented(curr); WASM_UNREACHABLE("unimp"); |