summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2022-06-29 16:05:10 -0700
committerGitHub <noreply@github.com>2022-06-29 16:05:10 -0700
commit19f4db6ef5263a578baef7e64bf9c9169bb771e6 (patch)
treed9608e8483415332b744701a714ec83d8e8009fb /src
parentd252c3e9e5dee98150c5ac625b6deb0e95139ede (diff)
downloadbinaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.tar.gz
binaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.tar.bz2
binaryen-19f4db6ef5263a578baef7e64bf9c9169bb771e6.zip
[Strings] Add string.new* instructions (#4761)
This is the first instruction from the Strings proposal. This includes everything but interpreter support.
Diffstat (limited to 'src')
-rw-r--r--src/gen-s-parser.inc84
-rw-r--r--src/ir/ReFinalize.cpp1
-rw-r--r--src/ir/cost.h3
-rw-r--r--src/ir/effects.h1
-rw-r--r--src/ir/possible-contents.cpp7
-rw-r--r--src/passes/Print.cpp18
-rw-r--r--src/wasm-binary.h11
-rw-r--r--src/wasm-builder.h9
-rw-r--r--src/wasm-delegations-fields.def8
-rw-r--r--src/wasm-delegations.def1
-rw-r--r--src/wasm-interpreter.h3
-rw-r--r--src/wasm-s-parser.h1
-rw-r--r--src/wasm.h20
-rw-r--r--src/wasm/wasm-binary.cpp31
-rw-r--r--src/wasm/wasm-s-parser.cpp18
-rw-r--r--src/wasm/wasm-stack.cpp23
-rw-r--r--src/wasm/wasm.cpp8
-rw-r--r--src/wasm2js.h4
18 files changed, 216 insertions, 35 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc
index baeab95ec..315916fdb 100644
--- a/src/gen-s-parser.inc
+++ b/src/gen-s-parser.inc
@@ -3126,56 +3126,72 @@ switch (op[0]) {
if (strcmp(op, "select") == 0) { return makeSelect(s); }
goto parse_error;
case 't': {
- switch (op[7]) {
- case 'g': {
- switch (op[10]) {
- case '\0':
- if (strcmp(op, "struct.get") == 0) { return makeStructGet(s); }
+ switch (op[3]) {
+ case 'i': {
+ switch (op[14]) {
+ case '1':
+ if (strcmp(op, "string.new_wtf16") == 0) { return makeStringNew(s, StringNewWTF16); }
+ goto parse_error;
+ case '8':
+ if (strcmp(op, "string.new_wtf8") == 0) { return makeStringNew(s, StringNewWTF8); }
goto parse_error;
- case '_': {
- switch (op[11]) {
- case 's':
- if (strcmp(op, "struct.get_s") == 0) { return makeStructGet(s, true); }
- goto parse_error;
- case 'u':
- if (strcmp(op, "struct.get_u") == 0) { return makeStructGet(s, false); }
- goto parse_error;
- default: goto parse_error;
- }
- }
default: goto parse_error;
}
}
- case 'n': {
- switch (op[10]) {
- case '\0':
- if (strcmp(op, "struct.new") == 0) { return makeStructNewStatic(s, false); }
- goto parse_error;
- case '_': {
- switch (op[11]) {
- case 'd': {
- switch (op[18]) {
- case '\0':
- if (strcmp(op, "struct.new_default") == 0) { return makeStructNewStatic(s, true); }
+ case 'u': {
+ switch (op[7]) {
+ case 'g': {
+ switch (op[10]) {
+ case '\0':
+ if (strcmp(op, "struct.get") == 0) { return makeStructGet(s); }
+ goto parse_error;
+ case '_': {
+ switch (op[11]) {
+ case 's':
+ if (strcmp(op, "struct.get_s") == 0) { return makeStructGet(s, true); }
goto parse_error;
- case '_':
- if (strcmp(op, "struct.new_default_with_rtt") == 0) { return makeStructNew(s, true); }
+ case 'u':
+ if (strcmp(op, "struct.get_u") == 0) { return makeStructGet(s, false); }
goto parse_error;
default: goto parse_error;
}
}
- case 'w':
- if (strcmp(op, "struct.new_with_rtt") == 0) { return makeStructNew(s, false); }
+ default: goto parse_error;
+ }
+ }
+ case 'n': {
+ switch (op[10]) {
+ case '\0':
+ if (strcmp(op, "struct.new") == 0) { return makeStructNewStatic(s, false); }
goto parse_error;
+ case '_': {
+ switch (op[11]) {
+ case 'd': {
+ switch (op[18]) {
+ case '\0':
+ if (strcmp(op, "struct.new_default") == 0) { return makeStructNewStatic(s, true); }
+ goto parse_error;
+ case '_':
+ if (strcmp(op, "struct.new_default_with_rtt") == 0) { return makeStructNew(s, true); }
+ goto parse_error;
+ default: goto parse_error;
+ }
+ }
+ case 'w':
+ if (strcmp(op, "struct.new_with_rtt") == 0) { return makeStructNew(s, false); }
+ goto parse_error;
+ default: goto parse_error;
+ }
+ }
default: goto parse_error;
}
}
+ case 's':
+ if (strcmp(op, "struct.set") == 0) { return makeStructSet(s); }
+ goto parse_error;
default: goto parse_error;
}
}
- case 's':
- if (strcmp(op, "struct.set") == 0) { return makeStructSet(s); }
- goto parse_error;
default: goto parse_error;
}
}
diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp
index ef09ddbae..6b0909666 100644
--- a/src/ir/ReFinalize.cpp
+++ b/src/ir/ReFinalize.cpp
@@ -172,6 +172,7 @@ void ReFinalize::visitArraySet(ArraySet* curr) { curr->finalize(); }
void ReFinalize::visitArrayLen(ArrayLen* curr) { curr->finalize(); }
void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); }
void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); }
+void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); }
void ReFinalize::visitFunction(Function* curr) {
// we may have changed the body from unreachable to none, which might be bad
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 8502a7610..1b0862bf6 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -671,6 +671,9 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
visit(curr->srcRef) + visit(curr->srcIndex) + visit(curr->length);
}
CostType visitRefAs(RefAs* curr) { return 1 + visit(curr->value); }
+ CostType visitStringNew(StringNew* curr) {
+ return 4 + visit(curr->ptr) + visit(curr->length);
+ }
private:
CostType nullCheckCost(Expression* ref) {
diff --git a/src/ir/effects.h b/src/ir/effects.h
index 2bd209d3e..d97b01a36 100644
--- a/src/ir/effects.h
+++ b/src/ir/effects.h
@@ -732,6 +732,7 @@ private:
// we keep the code here simpler, but it does mean another optimization
// cycle may be needed in some cases.
}
+ void visitStringNew(StringNew* curr) {}
};
public:
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index 94c22aacc..d10f2b4ac 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -672,6 +672,13 @@ struct InfoCollector
visitArraySet(set);
}
+ void visitStringNew(StringNew* curr) {
+ if (curr->type == Type::unreachable) {
+ return;
+ }
+ addRoot(curr, PossibleContents::exactType(curr->type));
+ }
+
// TODO: Model which throws can go to which catches. For now, anything thrown
// is sent to the location of that tag, and any catch of that tag can
// read them.
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 2aacb513b..c9ef1a224 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -2203,6 +2203,24 @@ struct PrintExpressionContents
WASM_UNREACHABLE("invalid ref.is_*");
}
}
+ void visitStringNew(StringNew* curr) {
+ switch (curr->op) {
+ case StringNewUTF8:
+ printMedium(o, "string.new_wtf8 utf8");
+ break;
+ case StringNewWTF8:
+ printMedium(o, "string.new_wtf8 wtf8");
+ break;
+ case StringNewReplace:
+ printMedium(o, "string.new_wtf8 replace");
+ break;
+ case StringNewWTF16:
+ printMedium(o, "string.new_wtf16");
+ break;
+ default:
+ WASM_UNREACHABLE("invalid string.new*");
+ }
+ }
};
// Prints an expression in s-expr format, including both the
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 3b3fa30f6..a5c6d0a46 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1135,6 +1135,8 @@ enum ASTNodes {
BrOnNonFunc = 0x63,
BrOnNonData = 0x64,
BrOnNonI31 = 0x65,
+ StringNewWTF8 = 0x80,
+ StringNewWTF16 = 0x81,
};
enum MemoryAccess {
@@ -1145,6 +1147,12 @@ enum MemoryAccess {
enum MemoryFlags { HasMaximum = 1 << 0, IsShared = 1 << 1, Is64 = 1 << 2 };
+enum StringNewPolicy {
+ UTF8 = 0x00,
+ WTF8 = 0x01,
+ Replace = 0x02,
+};
+
enum FeaturePrefix {
FeatureUsed = '+',
FeatureRequired = '=',
@@ -1700,6 +1708,7 @@ public:
bool maybeVisitArraySet(Expression*& out, uint32_t code);
bool maybeVisitArrayLen(Expression*& out, uint32_t code);
bool maybeVisitArrayCopy(Expression*& out, uint32_t code);
+ bool maybeVisitStringNew(Expression*& out, uint32_t code);
void visitSelect(Select* curr, uint8_t code);
void visitReturn(Return* curr);
void visitMemorySize(MemorySize* curr);
@@ -1721,7 +1730,7 @@ public:
// Let is lowered into a block.
void visitLet(Block* curr);
- void throwError(std::string text);
+ [[noreturn]] void throwError(std::string text);
// Struct/Array instructions have an unnecessary heap type that is just for
// validation (except for the case of unreachability, but that's not a problem
diff --git a/src/wasm-builder.h b/src/wasm-builder.h
index 20babacf1..7eebf3f04 100644
--- a/src/wasm-builder.h
+++ b/src/wasm-builder.h
@@ -989,6 +989,15 @@ public:
ret->finalize();
return ret;
}
+ StringNew*
+ makeStringNew(StringNewOp op, Expression* ptr, Expression* length) {
+ auto* ret = wasm.allocator.alloc<StringNew>();
+ ret->op = op;
+ ret->ptr = ptr;
+ ret->length = length;
+ ret->finalize();
+ return ret;
+ }
// Additional helpers
diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def
index 68e23fdd0..6f028a107 100644
--- a/src/wasm-delegations-fields.def
+++ b/src/wasm-delegations-fields.def
@@ -713,6 +713,14 @@ switch (DELEGATE_ID) {
DELEGATE_END(RefAs);
break;
}
+ case Expression::Id::StringNewId: {
+ DELEGATE_START(StringNew);
+ DELEGATE_FIELD_INT(StringNew, op);
+ DELEGATE_FIELD_CHILD(StringNew, length);
+ DELEGATE_FIELD_CHILD(StringNew, ptr);
+ DELEGATE_END(StringNew);
+ break;
+ }
}
#undef DELEGATE_ID
diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def
index c3f04674a..5e9a486e3 100644
--- a/src/wasm-delegations.def
+++ b/src/wasm-delegations.def
@@ -85,5 +85,6 @@ DELEGATE(ArraySet);
DELEGATE(ArrayLen);
DELEGATE(ArrayCopy);
DELEGATE(RefAs);
+DELEGATE(StringNew);
#undef DELEGATE
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index f682e89a2..d91f471a9 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1955,6 +1955,9 @@ public:
}
return value;
}
+ Flow visitStringNew(StringNew* curr) {
+ WASM_UNREACHABLE("unimplemented string.new");
+ }
virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); }
diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h
index 75c4f0dec..051157483 100644
--- a/src/wasm-s-parser.h
+++ b/src/wasm-s-parser.h
@@ -303,6 +303,7 @@ private:
Expression* makeArrayLen(Element& s);
Expression* makeArrayCopy(Element& s);
Expression* makeRefAs(Element& s, RefAsOp op);
+ Expression* makeStringNew(Element& s, StringNewOp op);
// Helper functions
Type parseOptionalResultType(Element& s, Index& i);
diff --git a/src/wasm.h b/src/wasm.h
index a466acdbe..6d0733d34 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -583,6 +583,13 @@ enum BrOnOp {
BrOnNonI31,
};
+enum StringNewOp {
+ StringNewUTF8,
+ StringNewWTF8,
+ StringNewReplace,
+ StringNewWTF16
+};
+
//
// Expressions
//
@@ -678,6 +685,7 @@ public:
ArrayLenId,
ArrayCopyId,
RefAsId,
+ StringNewId,
NumExpressionIds
};
Id _id;
@@ -1644,6 +1652,18 @@ public:
void finalize();
};
+class StringNew : public SpecificExpression<Expression::StringNewId> {
+public:
+ StringNew(MixedArena& allocator) {}
+
+ StringNewOp op;
+
+ Expression* ptr;
+ Expression* length;
+
+ void finalize();
+};
+
// Globals
struct Named {
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 87ee00c8a..f087e3e4b 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -3827,6 +3827,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
if (maybeVisitArrayCopy(curr, opcode)) {
break;
}
+ if (maybeVisitStringNew(curr, opcode)) {
+ break;
+ }
if (opcode == BinaryConsts::RefIsFunc ||
opcode == BinaryConsts::RefIsData ||
opcode == BinaryConsts::RefIsI31) {
@@ -7025,6 +7028,34 @@ bool WasmBinaryBuilder::maybeVisitArrayCopy(Expression*& out, uint32_t code) {
return true;
}
+bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
+ StringNewOp op;
+ if (code == BinaryConsts::StringNewWTF8) {
+ auto policy = getU32LEB();
+ switch (policy) {
+ case BinaryConsts::StringNewPolicy::UTF8:
+ op = StringNewUTF8;
+ break;
+ case BinaryConsts::StringNewPolicy::WTF8:
+ op = StringNewWTF8;
+ break;
+ case BinaryConsts::StringNewPolicy::Replace:
+ op = StringNewReplace;
+ break;
+ default:
+ throwError("bad policy for string.new");
+ }
+ } else if (code == BinaryConsts::StringNewWTF16) {
+ op = StringNewWTF16;
+ } else {
+ return false;
+ }
+ auto* length = popNonVoidExpression();
+ auto* ptr = popNonVoidExpression();
+ out = Builder(wasm).makeStringNew(op, ptr, length);
+ return true;
+}
+
void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) {
BYN_TRACE("zz node: RefAs\n");
switch (code) {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index 22557a06d..ac15c5e68 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -2935,6 +2935,24 @@ Expression* SExpressionWasmBuilder::makeRefAs(Element& s, RefAsOp op) {
return Builder(wasm).makeRefAs(op, parseExpression(s[1]));
}
+Expression* SExpressionWasmBuilder::makeStringNew(Element& s, StringNewOp op) {
+ size_t i = 1;
+ if (op == StringNewWTF8) {
+ const char* str = s[i++]->c_str();
+ if (strncmp(str, "utf8", 4) == 0) {
+ op = StringNewUTF8;
+ } else if (strncmp(str, "wtf8", 4) == 0) {
+ op = StringNewWTF8;
+ } else if (strncmp(str, "replace", 7) == 0) {
+ op = StringNewReplace;
+ } else {
+ throw ParseException("bad string.new op", s.line, s.col);
+ }
+ }
+ return Builder(wasm).makeStringNew(
+ op, parseExpression(s[i]), parseExpression(s[i + 1]));
+}
+
// converts an s-expression string representing binary data into an output
// sequence of raw bytes this appends to data, which may already contain
// content.
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index fb24dbb01..b63ea0fb0 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -2234,6 +2234,29 @@ void BinaryInstWriter::visitRefAs(RefAs* curr) {
}
}
+void BinaryInstWriter::visitStringNew(StringNew* curr) {
+ o << int8_t(BinaryConsts::GCPrefix);
+ switch (curr->op) {
+ case StringNewUTF8:
+ o << U32LEB(BinaryConsts::StringNewWTF8)
+ << U32LEB(BinaryConsts::StringNewPolicy::UTF8);
+ break;
+ case StringNewWTF8:
+ o << U32LEB(BinaryConsts::StringNewWTF8)
+ << U32LEB(BinaryConsts::StringNewPolicy::WTF8);
+ break;
+ case StringNewReplace:
+ o << U32LEB(BinaryConsts::StringNewWTF8)
+ << U32LEB(BinaryConsts::StringNewPolicy::Replace);
+ break;
+ case StringNewWTF16:
+ o << U32LEB(BinaryConsts::StringNewWTF16);
+ break;
+ default:
+ WASM_UNREACHABLE("invalid string.new*");
+ }
+}
+
void BinaryInstWriter::emitScopeEnd(Expression* curr) {
assert(!breakStack.empty());
breakStack.pop_back();
diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp
index 8d4f1dcee..428e5093c 100644
--- a/src/wasm/wasm.cpp
+++ b/src/wasm/wasm.cpp
@@ -1173,6 +1173,14 @@ void RefAs::finalize() {
}
}
+void StringNew::finalize() {
+ if (ptr->type == Type::unreachable || length->type == Type::unreachable) {
+ type = Type::unreachable;
+ } else {
+ type = Type(HeapType::string, NonNullable);
+ }
+}
+
size_t Function::getNumParams() { return getParams().size(); }
size_t Function::getNumVars() { return vars.size(); }
diff --git a/src/wasm2js.h b/src/wasm2js.h
index 5b98362ad..c2b71aa7a 100644
--- a/src/wasm2js.h
+++ b/src/wasm2js.h
@@ -2303,6 +2303,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
unimplemented(curr);
WASM_UNREACHABLE("unimp");
}
+ Ref visitStringNew(StringNew* curr) {
+ unimplemented(curr);
+ WASM_UNREACHABLE("unimp");
+ }
Ref visitRefAs(RefAs* curr) {
unimplemented(curr);
WASM_UNREACHABLE("unimp");