summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2022-07-07 10:32:57 -0700
committerGitHub <noreply@github.com>2022-07-07 10:32:57 -0700
commitdc73f07807400eb48b8c4bc173bae37f188fc90b (patch)
treeed9672cb7847c6d9d33657151dc76fa4cb519ff3 /src
parent876638f8fb5bfc8b264eddc6c0c0d54ed40d0095 (diff)
downloadbinaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.tar.gz
binaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.tar.bz2
binaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.zip
[Strings] string.measure (#4775)
Diffstat (limited to 'src')
-rw-r--r--src/gen-s-parser.inc11
-rw-r--r--src/ir/ReFinalize.cpp1
-rw-r--r--src/ir/cost.h3
-rw-r--r--src/ir/effects.h1
-rw-r--r--src/ir/possible-contents.cpp4
-rw-r--r--src/passes/Print.cpp15
-rw-r--r--src/wasm-binary.h5
-rw-r--r--src/wasm-builder.h7
-rw-r--r--src/wasm-delegations-fields.def7
-rw-r--r--src/wasm-delegations.def1
-rw-r--r--src/wasm-interpreter.h5
-rw-r--r--src/wasm-s-parser.h1
-rw-r--r--src/wasm.h18
-rw-r--r--src/wasm/wasm-binary.cpp34
-rw-r--r--src/wasm/wasm-s-parser.cpp16
-rw-r--r--src/wasm/wasm-stack.cpp25
-rw-r--r--src/wasm/wasm.cpp8
-rw-r--r--src/wasm2js.h4
18 files changed, 158 insertions, 8 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc
index 84fc66520..3020cf977 100644
--- a/src/gen-s-parser.inc
+++ b/src/gen-s-parser.inc
@@ -3132,6 +3132,17 @@ switch (op[0]) {
case 'c':
if (strcmp(op, "string.const") == 0) { return makeStringConst(s); }
goto parse_error;
+ case 'm': {
+ switch (op[18]) {
+ case '1':
+ if (strcmp(op, "string.measure_wtf16") == 0) { return makeStringMeasure(s, StringMeasureWTF16); }
+ goto parse_error;
+ case '8':
+ if (strcmp(op, "string.measure_wtf8") == 0) { return makeStringMeasure(s, StringMeasureWTF8); }
+ goto parse_error;
+ default: goto parse_error;
+ }
+ }
case 'n': {
switch (op[14]) {
case '1':
diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp
index 6b3e8863e..59b994ec1 100644
--- a/src/ir/ReFinalize.cpp
+++ b/src/ir/ReFinalize.cpp
@@ -174,6 +174,7 @@ void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); }
void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); }
void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); }
void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); }
+void ReFinalize::visitStringMeasure(StringMeasure* curr) { curr->finalize(); }
void ReFinalize::visitFunction(Function* curr) {
// we may have changed the body from unreachable to none, which might be bad
diff --git a/src/ir/cost.h b/src/ir/cost.h
index 2b918bf38..372c597a5 100644
--- a/src/ir/cost.h
+++ b/src/ir/cost.h
@@ -675,6 +675,9 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
return 4 + visit(curr->ptr) + visit(curr->length);
}
CostType visitStringConst(StringConst* curr) { return 4; }
+ CostType visitStringMeasure(StringMeasure* curr) {
+ return 6 + visit(curr->ref);
+ }
private:
CostType nullCheckCost(Expression* ref) {
diff --git a/src/ir/effects.h b/src/ir/effects.h
index f023b547a..b8007cfba 100644
--- a/src/ir/effects.h
+++ b/src/ir/effects.h
@@ -734,6 +734,7 @@ private:
}
void visitStringNew(StringNew* curr) {}
void visitStringConst(StringConst* curr) {}
+ void visitStringMeasure(StringMeasure* curr) {}
};
public:
diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp
index 1d1825865..3b5c19f15 100644
--- a/src/ir/possible-contents.cpp
+++ b/src/ir/possible-contents.cpp
@@ -681,6 +681,10 @@ struct InfoCollector
void visitStringConst(StringConst* curr) {
addRoot(curr, PossibleContents::exactType(curr->type));
}
+ void visitStringMeasure(StringMeasure* curr) {
+ // TODO: optimize when possible
+ addRoot(curr);
+ }
// TODO: Model which throws can go to which catches. For now, anything thrown
// is sent to the location of that tag, and any catch of that tag can
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index e766917ec..26140c030 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -2242,6 +2242,21 @@ struct PrintExpressionContents
o << curr->string.str;
o << '"';
}
+ void visitStringMeasure(StringMeasure* curr) {
+ switch (curr->op) {
+ case StringMeasureUTF8:
+ printMedium(o, "string.measure_wtf8 utf8");
+ break;
+ case StringMeasureWTF8:
+ printMedium(o, "string.measure_wtf8 wtf8");
+ break;
+ case StringMeasureWTF16:
+ printMedium(o, "string.measure_wtf16");
+ break;
+ default:
+ WASM_UNREACHABLE("invalid string.measure*");
+ }
+ }
};
// Prints an expression in s-expr format, including both the
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index c88aa3895..cb881301f 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1140,6 +1140,8 @@ enum ASTNodes {
StringNewWTF8 = 0x80,
StringNewWTF16 = 0x81,
StringConst = 0x82,
+ StringMeasureWTF8 = 0x84,
+ StringMeasureWTF16 = 0x85,
};
enum MemoryAccess {
@@ -1150,7 +1152,7 @@ enum MemoryAccess {
enum MemoryFlags { HasMaximum = 1 << 0, IsShared = 1 << 1, Is64 = 1 << 2 };
-enum StringNewPolicy {
+enum StringPolicy {
UTF8 = 0x00,
WTF8 = 0x01,
Replace = 0x02,
@@ -1722,6 +1724,7 @@ public:
bool maybeVisitArrayCopy(Expression*& out, uint32_t code);
bool maybeVisitStringNew(Expression*& out, uint32_t code);
bool maybeVisitStringConst(Expression*& out, uint32_t code);
+ bool maybeVisitStringMeasure(Expression*& out, uint32_t code);
void visitSelect(Select* curr, uint8_t code);
void visitReturn(Return* curr);
void visitMemorySize(MemorySize* curr);
diff --git a/src/wasm-builder.h b/src/wasm-builder.h
index 238f7d738..0829719ed 100644
--- a/src/wasm-builder.h
+++ b/src/wasm-builder.h
@@ -1004,6 +1004,13 @@ public:
ret->finalize();
return ret;
}
+ StringMeasure* makeStringMeasure(StringMeasureOp op, Expression* ref) {
+ auto* ret = wasm.allocator.alloc<StringMeasure>();
+ ret->op = op;
+ ret->ref = ref;
+ ret->finalize();
+ return ret;
+ }
// Additional helpers
diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def
index a7f39c3c7..a08a887a4 100644
--- a/src/wasm-delegations-fields.def
+++ b/src/wasm-delegations-fields.def
@@ -727,6 +727,13 @@ switch (DELEGATE_ID) {
DELEGATE_END(StringConst);
break;
}
+ case Expression::Id::StringMeasureId: {
+ DELEGATE_START(StringMeasure);
+ DELEGATE_FIELD_INT(StringMeasure, op);
+ DELEGATE_FIELD_CHILD(StringMeasure, ref);
+ DELEGATE_END(StringMeasure);
+ break;
+ }
}
#undef DELEGATE_ID
diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def
index ba47d9cf5..e0a2ff13e 100644
--- a/src/wasm-delegations.def
+++ b/src/wasm-delegations.def
@@ -87,5 +87,6 @@ DELEGATE(ArrayCopy);
DELEGATE(RefAs);
DELEGATE(StringNew);
DELEGATE(StringConst);
+DELEGATE(StringMeasure);
#undef DELEGATE
diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h
index 1cbbd7689..e9f79df82 100644
--- a/src/wasm-interpreter.h
+++ b/src/wasm-interpreter.h
@@ -1959,7 +1959,10 @@ public:
WASM_UNREACHABLE("unimplemented string.new");
}
Flow visitStringConst(StringConst* curr) {
- WASM_UNREACHABLE("unimplemented string.new");
+ WASM_UNREACHABLE("unimplemented string.const");
+ }
+ Flow visitStringMeasure(StringMeasure* curr) {
+ WASM_UNREACHABLE("unimplemented string.measure");
}
virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); }
diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h
index 0eb4680af..b21b86505 100644
--- a/src/wasm-s-parser.h
+++ b/src/wasm-s-parser.h
@@ -305,6 +305,7 @@ private:
Expression* makeRefAs(Element& s, RefAsOp op);
Expression* makeStringNew(Element& s, StringNewOp op);
Expression* makeStringConst(Element& s);
+ Expression* makeStringMeasure(Element& s, StringMeasureOp op);
// Helper functions
Type parseOptionalResultType(Element& s, Index& i);
diff --git a/src/wasm.h b/src/wasm.h
index 3032b4ac1..092ffb7de 100644
--- a/src/wasm.h
+++ b/src/wasm.h
@@ -590,6 +590,12 @@ enum StringNewOp {
StringNewWTF16
};
+enum StringMeasureOp {
+ StringMeasureUTF8,
+ StringMeasureWTF8,
+ StringMeasureWTF16,
+};
+
//
// Expressions
//
@@ -687,6 +693,7 @@ public:
RefAsId,
StringNewId,
StringConstId,
+ StringMeasureId,
NumExpressionIds
};
Id _id;
@@ -1677,6 +1684,17 @@ public:
void finalize();
};
+class StringMeasure : public SpecificExpression<Expression::StringMeasureId> {
+public:
+ StringMeasure(MixedArena& allocator) {}
+
+ StringMeasureOp op;
+
+ Expression* ref;
+
+ void finalize();
+};
+
// Globals
struct Named {
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 16950f88f..d81aa0ca4 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -3924,6 +3924,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) {
if (maybeVisitStringConst(curr, opcode)) {
break;
}
+ if (maybeVisitStringMeasure(curr, opcode)) {
+ break;
+ }
if (opcode == BinaryConsts::RefIsFunc ||
opcode == BinaryConsts::RefIsData ||
opcode == BinaryConsts::RefIsI31) {
@@ -7127,13 +7130,13 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
if (code == BinaryConsts::StringNewWTF8) {
auto policy = getU32LEB();
switch (policy) {
- case BinaryConsts::StringNewPolicy::UTF8:
+ case BinaryConsts::StringPolicy::UTF8:
op = StringNewUTF8;
break;
- case BinaryConsts::StringNewPolicy::WTF8:
+ case BinaryConsts::StringPolicy::WTF8:
op = StringNewWTF8;
break;
- case BinaryConsts::StringNewPolicy::Replace:
+ case BinaryConsts::StringPolicy::Replace:
op = StringNewReplace;
break;
default:
@@ -7162,6 +7165,31 @@ bool WasmBinaryBuilder::maybeVisitStringConst(Expression*& out, uint32_t code) {
return true;
}
+bool WasmBinaryBuilder::maybeVisitStringMeasure(Expression*& out,
+ uint32_t code) {
+ StringMeasureOp op;
+ if (code == BinaryConsts::StringMeasureWTF8) {
+ auto policy = getU32LEB();
+ switch (policy) {
+ case BinaryConsts::StringPolicy::UTF8:
+ op = StringMeasureUTF8;
+ break;
+ case BinaryConsts::StringPolicy::WTF8:
+ op = StringMeasureWTF8;
+ break;
+ default:
+ throwError("bad policy for string.measure");
+ }
+ } else if (code == BinaryConsts::StringMeasureWTF16) {
+ op = StringMeasureWTF16;
+ } else {
+ return false;
+ }
+ auto* ref = popNonVoidExpression();
+ out = Builder(wasm).makeStringMeasure(op, ref);
+ return true;
+}
+
void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) {
BYN_TRACE("zz node: RefAs\n");
switch (code) {
diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp
index cd6c167c0..98ed64c2d 100644
--- a/src/wasm/wasm-s-parser.cpp
+++ b/src/wasm/wasm-s-parser.cpp
@@ -2957,6 +2957,22 @@ Expression* SExpressionWasmBuilder::makeStringConst(Element& s) {
return Builder(wasm).makeStringConst(s[1]->str());
}
+Expression* SExpressionWasmBuilder::makeStringMeasure(Element& s,
+ StringMeasureOp op) {
+ size_t i = 1;
+ if (op == StringMeasureWTF8) {
+ const char* str = s[i++]->c_str();
+ if (strncmp(str, "utf8", 4) == 0) {
+ op = StringMeasureUTF8;
+ } else if (strncmp(str, "wtf8", 4) == 0) {
+ op = StringMeasureWTF8;
+ } else {
+ throw ParseException("bad string.new op", s.line, s.col);
+ }
+ }
+ return Builder(wasm).makeStringMeasure(op, parseExpression(s[i]));
+}
+
// converts an s-expression string representing binary data into an output
// sequence of raw bytes this appends to data, which may already contain
// content.
diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp
index e131ab207..e3740ecee 100644
--- a/src/wasm/wasm-stack.cpp
+++ b/src/wasm/wasm-stack.cpp
@@ -2239,15 +2239,15 @@ void BinaryInstWriter::visitStringNew(StringNew* curr) {
switch (curr->op) {
case StringNewUTF8:
o << U32LEB(BinaryConsts::StringNewWTF8)
- << U32LEB(BinaryConsts::StringNewPolicy::UTF8);
+ << U32LEB(BinaryConsts::StringPolicy::UTF8);
break;
case StringNewWTF8:
o << U32LEB(BinaryConsts::StringNewWTF8)
- << U32LEB(BinaryConsts::StringNewPolicy::WTF8);
+ << U32LEB(BinaryConsts::StringPolicy::WTF8);
break;
case StringNewReplace:
o << U32LEB(BinaryConsts::StringNewWTF8)
- << U32LEB(BinaryConsts::StringNewPolicy::Replace);
+ << U32LEB(BinaryConsts::StringPolicy::Replace);
break;
case StringNewWTF16:
o << U32LEB(BinaryConsts::StringNewWTF16);
@@ -2262,6 +2262,25 @@ void BinaryInstWriter::visitStringConst(StringConst* curr) {
<< U32LEB(parent.getStringIndex(curr->string));
}
+void BinaryInstWriter::visitStringMeasure(StringMeasure* curr) {
+ o << int8_t(BinaryConsts::GCPrefix);
+ switch (curr->op) {
+ case StringMeasureUTF8:
+ o << U32LEB(BinaryConsts::StringMeasureWTF8)
+ << U32LEB(BinaryConsts::StringPolicy::UTF8);
+ break;
+ case StringMeasureWTF8:
+ o << U32LEB(BinaryConsts::StringMeasureWTF8)
+ << U32LEB(BinaryConsts::StringPolicy::WTF8);
+ break;
+ case StringMeasureWTF16:
+ o << U32LEB(BinaryConsts::StringMeasureWTF16);
+ break;
+ default:
+ WASM_UNREACHABLE("invalid string.new*");
+ }
+}
+
void BinaryInstWriter::emitScopeEnd(Expression* curr) {
assert(!breakStack.empty());
breakStack.pop_back();
diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp
index 86eed184f..749a44339 100644
--- a/src/wasm/wasm.cpp
+++ b/src/wasm/wasm.cpp
@@ -1184,6 +1184,14 @@ void StringNew::finalize() {
void StringConst::finalize() { type = Type(HeapType::string, NonNullable); }
+void StringMeasure::finalize() {
+ if (ref->type == Type::unreachable) {
+ type = Type::unreachable;
+ } else {
+ type = Type::i32;
+ }
+}
+
size_t Function::getNumParams() { return getParams().size(); }
size_t Function::getNumVars() { return vars.size(); }
diff --git a/src/wasm2js.h b/src/wasm2js.h
index ad373074d..a6782ac04 100644
--- a/src/wasm2js.h
+++ b/src/wasm2js.h
@@ -2311,6 +2311,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
unimplemented(curr);
WASM_UNREACHABLE("unimp");
}
+ Ref visitStringMeasure(StringMeasure* curr) {
+ unimplemented(curr);
+ WASM_UNREACHABLE("unimp");
+ }
Ref visitRefAs(RefAs* curr) {
unimplemented(curr);
WASM_UNREACHABLE("unimp");