diff options
author | Alon Zakai <azakai@google.com> | 2022-07-07 10:32:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-07 10:32:57 -0700 |
commit | dc73f07807400eb48b8c4bc173bae37f188fc90b (patch) | |
tree | ed9672cb7847c6d9d33657151dc76fa4cb519ff3 /src | |
parent | 876638f8fb5bfc8b264eddc6c0c0d54ed40d0095 (diff) | |
download | binaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.tar.gz binaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.tar.bz2 binaryen-dc73f07807400eb48b8c4bc173bae37f188fc90b.zip |
[Strings] string.measure (#4775)
Diffstat (limited to 'src')
-rw-r--r-- | src/gen-s-parser.inc | 11 | ||||
-rw-r--r-- | src/ir/ReFinalize.cpp | 1 | ||||
-rw-r--r-- | src/ir/cost.h | 3 | ||||
-rw-r--r-- | src/ir/effects.h | 1 | ||||
-rw-r--r-- | src/ir/possible-contents.cpp | 4 | ||||
-rw-r--r-- | src/passes/Print.cpp | 15 | ||||
-rw-r--r-- | src/wasm-binary.h | 5 | ||||
-rw-r--r-- | src/wasm-builder.h | 7 | ||||
-rw-r--r-- | src/wasm-delegations-fields.def | 7 | ||||
-rw-r--r-- | src/wasm-delegations.def | 1 | ||||
-rw-r--r-- | src/wasm-interpreter.h | 5 | ||||
-rw-r--r-- | src/wasm-s-parser.h | 1 | ||||
-rw-r--r-- | src/wasm.h | 18 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 34 | ||||
-rw-r--r-- | src/wasm/wasm-s-parser.cpp | 16 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 25 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 8 | ||||
-rw-r--r-- | src/wasm2js.h | 4 |
18 files changed, 158 insertions, 8 deletions
diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index 84fc66520..3020cf977 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -3132,6 +3132,17 @@ switch (op[0]) { case 'c': if (strcmp(op, "string.const") == 0) { return makeStringConst(s); } goto parse_error; + case 'm': { + switch (op[18]) { + case '1': + if (strcmp(op, "string.measure_wtf16") == 0) { return makeStringMeasure(s, StringMeasureWTF16); } + goto parse_error; + case '8': + if (strcmp(op, "string.measure_wtf8") == 0) { return makeStringMeasure(s, StringMeasureWTF8); } + goto parse_error; + default: goto parse_error; + } + } case 'n': { switch (op[14]) { case '1': diff --git a/src/ir/ReFinalize.cpp b/src/ir/ReFinalize.cpp index 6b3e8863e..59b994ec1 100644 --- a/src/ir/ReFinalize.cpp +++ b/src/ir/ReFinalize.cpp @@ -174,6 +174,7 @@ void ReFinalize::visitArrayCopy(ArrayCopy* curr) { curr->finalize(); } void ReFinalize::visitRefAs(RefAs* curr) { curr->finalize(); } void ReFinalize::visitStringNew(StringNew* curr) { curr->finalize(); } void ReFinalize::visitStringConst(StringConst* curr) { curr->finalize(); } +void ReFinalize::visitStringMeasure(StringMeasure* curr) { curr->finalize(); } void ReFinalize::visitFunction(Function* curr) { // we may have changed the body from unreachable to none, which might be bad diff --git a/src/ir/cost.h b/src/ir/cost.h index 2b918bf38..372c597a5 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -675,6 +675,9 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> { return 4 + visit(curr->ptr) + visit(curr->length); } CostType visitStringConst(StringConst* curr) { return 4; } + CostType visitStringMeasure(StringMeasure* curr) { + return 6 + visit(curr->ref); + } private: CostType nullCheckCost(Expression* ref) { diff --git a/src/ir/effects.h b/src/ir/effects.h index f023b547a..b8007cfba 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -734,6 +734,7 @@ private: } void visitStringNew(StringNew* curr) {} void visitStringConst(StringConst* curr) {} + void visitStringMeasure(StringMeasure* curr) {} }; public: diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 1d1825865..3b5c19f15 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -681,6 +681,10 @@ struct InfoCollector void visitStringConst(StringConst* curr) { addRoot(curr, PossibleContents::exactType(curr->type)); } + void visitStringMeasure(StringMeasure* curr) { + // TODO: optimize when possible + addRoot(curr); + } // TODO: Model which throws can go to which catches. For now, anything thrown // is sent to the location of that tag, and any catch of that tag can diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index e766917ec..26140c030 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2242,6 +2242,21 @@ struct PrintExpressionContents o << curr->string.str; o << '"'; } + void visitStringMeasure(StringMeasure* curr) { + switch (curr->op) { + case StringMeasureUTF8: + printMedium(o, "string.measure_wtf8 utf8"); + break; + case StringMeasureWTF8: + printMedium(o, "string.measure_wtf8 wtf8"); + break; + case StringMeasureWTF16: + printMedium(o, "string.measure_wtf16"); + break; + default: + WASM_UNREACHABLE("invalid string.measure*"); + } + } }; // Prints an expression in s-expr format, including both the diff --git a/src/wasm-binary.h b/src/wasm-binary.h index c88aa3895..cb881301f 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1140,6 +1140,8 @@ enum ASTNodes { StringNewWTF8 = 0x80, StringNewWTF16 = 0x81, StringConst = 0x82, + StringMeasureWTF8 = 0x84, + StringMeasureWTF16 = 0x85, }; enum MemoryAccess { @@ -1150,7 +1152,7 @@ enum MemoryAccess { enum MemoryFlags { HasMaximum = 1 << 0, IsShared = 1 << 1, Is64 = 1 << 2 }; -enum StringNewPolicy { +enum StringPolicy { UTF8 = 0x00, WTF8 = 0x01, Replace = 0x02, @@ -1722,6 +1724,7 @@ public: bool maybeVisitArrayCopy(Expression*& out, uint32_t code); bool maybeVisitStringNew(Expression*& out, uint32_t code); bool maybeVisitStringConst(Expression*& out, uint32_t code); + bool maybeVisitStringMeasure(Expression*& out, uint32_t code); void visitSelect(Select* curr, uint8_t code); void visitReturn(Return* curr); void visitMemorySize(MemorySize* curr); diff --git a/src/wasm-builder.h b/src/wasm-builder.h index 238f7d738..0829719ed 100644 --- a/src/wasm-builder.h +++ b/src/wasm-builder.h @@ -1004,6 +1004,13 @@ public: ret->finalize(); return ret; } + StringMeasure* makeStringMeasure(StringMeasureOp op, Expression* ref) { + auto* ret = wasm.allocator.alloc<StringMeasure>(); + ret->op = op; + ret->ref = ref; + ret->finalize(); + return ret; + } // Additional helpers diff --git a/src/wasm-delegations-fields.def b/src/wasm-delegations-fields.def index a7f39c3c7..a08a887a4 100644 --- a/src/wasm-delegations-fields.def +++ b/src/wasm-delegations-fields.def @@ -727,6 +727,13 @@ switch (DELEGATE_ID) { DELEGATE_END(StringConst); break; } + case Expression::Id::StringMeasureId: { + DELEGATE_START(StringMeasure); + DELEGATE_FIELD_INT(StringMeasure, op); + DELEGATE_FIELD_CHILD(StringMeasure, ref); + DELEGATE_END(StringMeasure); + break; + } } #undef DELEGATE_ID diff --git a/src/wasm-delegations.def b/src/wasm-delegations.def index ba47d9cf5..e0a2ff13e 100644 --- a/src/wasm-delegations.def +++ b/src/wasm-delegations.def @@ -87,5 +87,6 @@ DELEGATE(ArrayCopy); DELEGATE(RefAs); DELEGATE(StringNew); DELEGATE(StringConst); +DELEGATE(StringMeasure); #undef DELEGATE diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 1cbbd7689..e9f79df82 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1959,7 +1959,10 @@ public: WASM_UNREACHABLE("unimplemented string.new"); } Flow visitStringConst(StringConst* curr) { - WASM_UNREACHABLE("unimplemented string.new"); + WASM_UNREACHABLE("unimplemented string.const"); + } + Flow visitStringMeasure(StringMeasure* curr) { + WASM_UNREACHABLE("unimplemented string.measure"); } virtual void trap(const char* why) { WASM_UNREACHABLE("unimp"); } diff --git a/src/wasm-s-parser.h b/src/wasm-s-parser.h index 0eb4680af..b21b86505 100644 --- a/src/wasm-s-parser.h +++ b/src/wasm-s-parser.h @@ -305,6 +305,7 @@ private: Expression* makeRefAs(Element& s, RefAsOp op); Expression* makeStringNew(Element& s, StringNewOp op); Expression* makeStringConst(Element& s); + Expression* makeStringMeasure(Element& s, StringMeasureOp op); // Helper functions Type parseOptionalResultType(Element& s, Index& i); diff --git a/src/wasm.h b/src/wasm.h index 3032b4ac1..092ffb7de 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -590,6 +590,12 @@ enum StringNewOp { StringNewWTF16 }; +enum StringMeasureOp { + StringMeasureUTF8, + StringMeasureWTF8, + StringMeasureWTF16, +}; + // // Expressions // @@ -687,6 +693,7 @@ public: RefAsId, StringNewId, StringConstId, + StringMeasureId, NumExpressionIds }; Id _id; @@ -1677,6 +1684,17 @@ public: void finalize(); }; +class StringMeasure : public SpecificExpression<Expression::StringMeasureId> { +public: + StringMeasure(MixedArena& allocator) {} + + StringMeasureOp op; + + Expression* ref; + + void finalize(); +}; + // Globals struct Named { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 16950f88f..d81aa0ca4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -3924,6 +3924,9 @@ BinaryConsts::ASTNodes WasmBinaryBuilder::readExpression(Expression*& curr) { if (maybeVisitStringConst(curr, opcode)) { break; } + if (maybeVisitStringMeasure(curr, opcode)) { + break; + } if (opcode == BinaryConsts::RefIsFunc || opcode == BinaryConsts::RefIsData || opcode == BinaryConsts::RefIsI31) { @@ -7127,13 +7130,13 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) { if (code == BinaryConsts::StringNewWTF8) { auto policy = getU32LEB(); switch (policy) { - case BinaryConsts::StringNewPolicy::UTF8: + case BinaryConsts::StringPolicy::UTF8: op = StringNewUTF8; break; - case BinaryConsts::StringNewPolicy::WTF8: + case BinaryConsts::StringPolicy::WTF8: op = StringNewWTF8; break; - case BinaryConsts::StringNewPolicy::Replace: + case BinaryConsts::StringPolicy::Replace: op = StringNewReplace; break; default: @@ -7162,6 +7165,31 @@ bool WasmBinaryBuilder::maybeVisitStringConst(Expression*& out, uint32_t code) { return true; } +bool WasmBinaryBuilder::maybeVisitStringMeasure(Expression*& out, + uint32_t code) { + StringMeasureOp op; + if (code == BinaryConsts::StringMeasureWTF8) { + auto policy = getU32LEB(); + switch (policy) { + case BinaryConsts::StringPolicy::UTF8: + op = StringMeasureUTF8; + break; + case BinaryConsts::StringPolicy::WTF8: + op = StringMeasureWTF8; + break; + default: + throwError("bad policy for string.measure"); + } + } else if (code == BinaryConsts::StringMeasureWTF16) { + op = StringMeasureWTF16; + } else { + return false; + } + auto* ref = popNonVoidExpression(); + out = Builder(wasm).makeStringMeasure(op, ref); + return true; +} + void WasmBinaryBuilder::visitRefAs(RefAs* curr, uint8_t code) { BYN_TRACE("zz node: RefAs\n"); switch (code) { diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index cd6c167c0..98ed64c2d 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -2957,6 +2957,22 @@ Expression* SExpressionWasmBuilder::makeStringConst(Element& s) { return Builder(wasm).makeStringConst(s[1]->str()); } +Expression* SExpressionWasmBuilder::makeStringMeasure(Element& s, + StringMeasureOp op) { + size_t i = 1; + if (op == StringMeasureWTF8) { + const char* str = s[i++]->c_str(); + if (strncmp(str, "utf8", 4) == 0) { + op = StringMeasureUTF8; + } else if (strncmp(str, "wtf8", 4) == 0) { + op = StringMeasureWTF8; + } else { + throw ParseException("bad string.new op", s.line, s.col); + } + } + return Builder(wasm).makeStringMeasure(op, parseExpression(s[i])); +} + // converts an s-expression string representing binary data into an output // sequence of raw bytes this appends to data, which may already contain // content. diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index e131ab207..e3740ecee 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2239,15 +2239,15 @@ void BinaryInstWriter::visitStringNew(StringNew* curr) { switch (curr->op) { case StringNewUTF8: o << U32LEB(BinaryConsts::StringNewWTF8) - << U32LEB(BinaryConsts::StringNewPolicy::UTF8); + << U32LEB(BinaryConsts::StringPolicy::UTF8); break; case StringNewWTF8: o << U32LEB(BinaryConsts::StringNewWTF8) - << U32LEB(BinaryConsts::StringNewPolicy::WTF8); + << U32LEB(BinaryConsts::StringPolicy::WTF8); break; case StringNewReplace: o << U32LEB(BinaryConsts::StringNewWTF8) - << U32LEB(BinaryConsts::StringNewPolicy::Replace); + << U32LEB(BinaryConsts::StringPolicy::Replace); break; case StringNewWTF16: o << U32LEB(BinaryConsts::StringNewWTF16); @@ -2262,6 +2262,25 @@ void BinaryInstWriter::visitStringConst(StringConst* curr) { << U32LEB(parent.getStringIndex(curr->string)); } +void BinaryInstWriter::visitStringMeasure(StringMeasure* curr) { + o << int8_t(BinaryConsts::GCPrefix); + switch (curr->op) { + case StringMeasureUTF8: + o << U32LEB(BinaryConsts::StringMeasureWTF8) + << U32LEB(BinaryConsts::StringPolicy::UTF8); + break; + case StringMeasureWTF8: + o << U32LEB(BinaryConsts::StringMeasureWTF8) + << U32LEB(BinaryConsts::StringPolicy::WTF8); + break; + case StringMeasureWTF16: + o << U32LEB(BinaryConsts::StringMeasureWTF16); + break; + default: + WASM_UNREACHABLE("invalid string.new*"); + } +} + void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 86eed184f..749a44339 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -1184,6 +1184,14 @@ void StringNew::finalize() { void StringConst::finalize() { type = Type(HeapType::string, NonNullable); } +void StringMeasure::finalize() { + if (ref->type == Type::unreachable) { + type = Type::unreachable; + } else { + type = Type::i32; + } +} + size_t Function::getNumParams() { return getParams().size(); } size_t Function::getNumVars() { return vars.size(); } diff --git a/src/wasm2js.h b/src/wasm2js.h index ad373074d..a6782ac04 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -2311,6 +2311,10 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m, unimplemented(curr); WASM_UNREACHABLE("unimp"); } + Ref visitStringMeasure(StringMeasure* curr) { + unimplemented(curr); + WASM_UNREACHABLE("unimp"); + } Ref visitRefAs(RefAs* curr) { unimplemented(curr); WASM_UNREACHABLE("unimp"); |