From 0fe08192d866f72d0f4e680c862bb7af48dec1ac Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 2 Oct 2023 16:09:05 -0700 Subject: [Parser] Parse labels and br (#5970) The parser previously parsed labels and could attach them to control flow structures, but did not maintain the context necessary to correctly parse branches. Support parsing labels as both names and indices in IRBuilder, handling shadowing correctly, and use that support to implement parsing of br. --- src/parser/contexts.h | 16 +++++++- src/parser/parsers.h | 17 +++++++- src/wasm-ir-builder.h | 77 ++++++++++++++++++++++++++++++------ src/wasm/wasm-ir-builder.cpp | 94 ++++++++++++++++++++++++++++++++++++++------ src/wasm/wasm-validator.cpp | 7 +++- 5 files changed, 182 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 7eedcccc2..862d9772d 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -279,6 +279,7 @@ struct NullInstrParserCtx { using GlobalIdxT = Ok; using MemoryIdxT = Ok; using DataIdxT = Ok; + using LabelIdxT = Ok; using MemargT = Ok; @@ -298,6 +299,8 @@ struct NullInstrParserCtx { MemoryIdxT getMemoryFromName(Name) { return Ok{}; } DataIdxT getDataFromIdx(uint32_t) { return Ok{}; } DataIdxT getDataFromName(Name) { return Ok{}; } + LabelIdxT getLabelFromIdx(uint32_t) { return Ok{}; } + LabelIdxT getLabelFromName(Name) { return Ok{}; } MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; } @@ -370,7 +373,7 @@ struct NullInstrParserCtx { Result<> makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; } Result<> makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; } - + Result<> makeBreak(Index, LabelIdxT) { return Ok{}; } Result<> makeReturn(Index) { return Ok{}; } template Result<> makeRefNull(Index, HeapTypeT) { return Ok{}; @@ -793,6 +796,7 @@ struct ParseDefsCtx : TypeParserCtx { using FieldIdxT = Index; using LocalIdxT = Index; + using LabelIdxT = Index; using GlobalIdxT = Name; using MemoryIdxT = Name; using DataIdxT = Name; @@ -936,6 +940,12 @@ struct ParseDefsCtx : TypeParserCtx { return name; } + Result getLabelFromIdx(uint32_t idx) { return idx; } + + Result getLabelFromName(Name name) { + return irBuilder.getLabelIndex(name); + } + Result makeTypeUse(Index pos, std::optional type, ParamsT* params, @@ -1203,6 +1213,10 @@ struct ParseDefsCtx : TypeParserCtx { return withLoc(pos, irBuilder.makeMemoryFill(*m)); } + Result<> makeBreak(Index pos, Index label) { + return withLoc(pos, irBuilder.makeBreak(label)); + } + Result<> makeReturn(Index pos) { return withLoc(pos, irBuilder.makeReturn()); } diff --git a/src/parser/parsers.h b/src/parser/parsers.h index 38005253f..32f6709df 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -177,6 +177,7 @@ template Result memidx(Ctx&); template MaybeResult maybeMemuse(Ctx&); template Result globalidx(Ctx&); template Result localidx(Ctx&); +template Result labelidx(Ctx&); template Result typeuse(Ctx&); MaybeResult inlineImport(ParseInput&); Result> inlineExports(ParseInput&); @@ -1195,7 +1196,9 @@ Result<> makeCallIndirect(Ctx& ctx, Index pos, bool isReturn) { } template Result<> makeBreak(Ctx& ctx, Index pos) { - return ctx.in.err("unimplemented instruction"); + auto label = labelidx(ctx); + CHECK_ERR(label); + return ctx.makeBreak(pos, *label); } template Result<> makeBreakTable(Ctx& ctx, Index pos) { @@ -1569,6 +1572,18 @@ template Result localidx(Ctx& ctx) { return ctx.in.err("expected local index or identifier"); } +// labelidx ::= x:u32 => x +// | v:id => x (if labels[x] = v) +template Result labelidx(Ctx& ctx) { + if (auto x = ctx.in.takeU32()) { + return ctx.getLabelFromIdx(*x); + } + if (auto id = ctx.in.takeID()) { + return ctx.getLabelFromName(*id); + } + return ctx.in.err("expected label index or identifier"); +} + // typeuse ::= '(' 'type' x:typeidx ')' => x, [] // (if typedefs[x] = [t1*] -> [t2*] // | '(' 'type' x:typeidx ')' ((t1,IDs):param)* (t2:result)* => x, IDs diff --git a/src/wasm-ir-builder.h b/src/wasm-ir-builder.h index 2da69fff7..b42d40892 100644 --- a/src/wasm-ir-builder.h +++ b/src/wasm-ir-builder.h @@ -19,6 +19,7 @@ #include +#include "ir/names.h" #include "support/result.h" #include "wasm-builder.h" #include "wasm-traversal.h" @@ -58,14 +59,20 @@ public: [[nodiscard]] Result<> visitLoopStart(Loop* iff); [[nodiscard]] Result<> visitEnd(); - // Alternatively, call makeXYZ to have the IRBuilder allocate the nodes. This - // is generally safer than calling `visit` because the function signatures - // ensure that there are no missing fields. + // Binaryen IR uses names to refer to branch targets, but in general there may + // be branches to constructs that do not yet have names, so in IRBuilder we + // use indices to refer to branch targets instead, just as the binary format + // does. This function converts a branch target name to the correct index. + [[nodiscard]] Result getLabelIndex(Name label); + + // Instead of calling visit, call makeXYZ to have the IRBuilder allocate the + // nodes. This is generally safer than calling `visit` because the function + // signatures ensure that there are no missing fields. [[nodiscard]] Result<> makeNop(); [[nodiscard]] Result<> makeBlock(Name label, Type type); [[nodiscard]] Result<> makeIf(Name label, Type type); [[nodiscard]] Result<> makeLoop(Name label, Type type); - // [[nodiscard]] Result<> makeBreak(); + [[nodiscard]] Result<> makeBreak(Index label); // [[nodiscard]] Result<> makeSwitch(); // [[nodiscard]] Result<> makeCall(); // [[nodiscard]] Result<> makeCallIndirect(); @@ -177,6 +184,8 @@ public: [[nodiscard]] Result<> visitReturn(Return*); [[nodiscard]] Result<> visitStructNew(StructNew*); [[nodiscard]] Result<> visitArrayNew(ArrayNew*); + [[nodiscard]] Result<> visitBreak(Break*, + std::optional label = std::nullopt); private: Module& wasm; @@ -196,11 +205,11 @@ private: }; struct IfScope { If* iff; - Name label; + Name originalLabel; }; struct ElseScope { If* iff; - Name label; + Name originalLabel; }; struct LoopScope { Loop* loop; @@ -211,6 +220,9 @@ private: // The control flow structure we are building expressions for. Scope scope; + // The branch label name for this scope. Always fresh, never shadowed. + Name label; + std::vector exprStack; // Whether we have seen an unreachable instruction and are in // stack-polymorphic unreachable mode. @@ -218,6 +230,7 @@ private: ScopeCtx() : scope(NoScope{}) {} ScopeCtx(Scope scope) : scope(scope) {} + ScopeCtx(Scope scope, Name label) : scope(scope), label(label) {} static ScopeCtx makeFunc(Function* func) { return ScopeCtx(FuncScope{func}); @@ -225,11 +238,11 @@ private: static ScopeCtx makeBlock(Block* block) { return ScopeCtx(BlockScope{block}); } - static ScopeCtx makeIf(If* iff, Name label = {}) { - return ScopeCtx(IfScope{iff, label}); + static ScopeCtx makeIf(If* iff, Name originalLabel = {}) { + return ScopeCtx(IfScope{iff, originalLabel}); } - static ScopeCtx makeElse(If* iff, Name label = {}) { - return ScopeCtx(ElseScope{iff, label}); + static ScopeCtx makeElse(If* iff, Name originalLabel, Name label) { + return ScopeCtx(ElseScope{iff, originalLabel}, label); } static ScopeCtx makeLoop(Loop* loop) { return ScopeCtx(LoopScope{loop}); } @@ -282,15 +295,18 @@ private: } WASM_UNREACHABLE("unexpected scope kind"); } - Name getLabel() { + Name getOriginalLabel() { + if (getFunction()) { + return Name{}; + } if (auto* block = getBlock()) { return block->name; } if (auto* ifScope = std::get_if(&scope)) { - return ifScope->label; + return ifScope->originalLabel; } if (auto* elseScope = std::get_if(&scope)) { - return elseScope->label; + return elseScope->originalLabel; } if (auto* loop = getLoop()) { return loop->name; @@ -302,6 +318,29 @@ private: // The stack of block contexts currently being parsed. std::vector scopeStack; + // Map label names to stacks of label depths at which they appear. The + // relative index of a label name is the current depth minus the top depth on + // its stack. + std::unordered_map> labelDepths; + + Name makeFresh(Name label) { + return Names::getValidName(label, [&](Name candidate) { + return labelDepths.insert({candidate, {}}).second; + }); + } + + void pushScope(ScopeCtx scope) { + if (auto label = scope.getOriginalLabel()) { + // Assign a fresh label to the scope, if necessary. + if (!scope.label) { + scope.label = makeFresh(label); + } + // Record the original label to handle references to it correctly. + labelDepths[label].push_back(scopeStack.size() + 1); + } + scopeStack.push_back(scope); + } + ScopeCtx& getScope() { if (scopeStack.empty()) { // We are not in a block context, so push a dummy scope. @@ -310,12 +349,24 @@ private: return scopeStack.back(); } + Result getScope(Index label) { + Index numLabels = scopeStack.size(); + if (!scopeStack.empty() && scopeStack[0].isNone()) { + --numLabels; + } + if (label >= numLabels) { + return Err{"label index out of bounds"}; + } + return &scopeStack[scopeStack.size() - 1 - label]; + } + // Collect the current scope into a single expression. If it has multiple // top-level expressions, this requires collecting them into a block. If we // are in a block context, we can collect them directly into the destination // `block`, but otherwise we will have to allocate a new block. Result finishScope(Block* block = nullptr); + [[nodiscard]] Result getLabelName(Index label); [[nodiscard]] Result addScratchLocal(Type); [[nodiscard]] Result pop(); void push(Expression*); diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index 4312fefe3..4d1c3353d 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -181,6 +181,7 @@ Result IRBuilder::build() { assert(scopeStack.back().exprStack.size() == 1); auto* expr = scopeStack.back().exprStack.back(); scopeStack.clear(); + labelDepths.clear(); return expr; } @@ -206,6 +207,10 @@ Result<> IRBuilder::visitExpression(Expression* curr) { auto field = pop(); \ CHECK_ERR(field); \ expr->field = *field; +#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) \ + if (labelDepths.count(expr->field)) { \ + return Err{"repeated label"}; \ + } #define DELEGATE_END(id) #define DELEGATE_FIELD_OPTIONAL_CHILD(id, field) \ @@ -214,15 +219,18 @@ Result<> IRBuilder::visitExpression(Expression* curr) { #define DELEGATE_FIELD_CHILD_VECTOR(id, field) \ WASM_UNREACHABLE("should have called visit" #id " because " #id \ " has child vector " #field); +#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) \ + WASM_UNREACHABLE("should have called visit" #id " because " #id \ + " has scope name use " #field); +#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field) \ + WASM_UNREACHABLE("should have called visit" #id " because " #id \ + " has scope name use vector " #field); #define DELEGATE_FIELD_INT(id, field) #define DELEGATE_FIELD_INT_ARRAY(id, field) #define DELEGATE_FIELD_LITERAL(id, field) #define DELEGATE_FIELD_NAME(id, field) #define DELEGATE_FIELD_NAME_VECTOR(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field) #define DELEGATE_FIELD_TYPE(id, field) #define DELEGATE_FIELD_HEAPTYPE(id, field) #define DELEGATE_FIELD_ADDRESS(id, field) @@ -281,6 +289,30 @@ Result<> IRBuilder::visitArrayNew(ArrayNew* curr) { return Ok{}; } +Result<> IRBuilder::visitBreak(Break* curr, std::optional label) { + if (!label) { + auto index = getLabelIndex(curr->name); + CHECK_ERR(index); + label = *index; + } + auto scope = getScope(*label); + CHECK_ERR(scope); + std::vector values((*scope)->getResultType().size()); + for (size_t i = 0, size = values.size(); i < size; ++i) { + auto val = pop(); + CHECK_ERR(val); + values[size - 1 - i] = *val; + } + if (values.size() == 0) { + curr->value = nullptr; + } else if (values.size() == 1) { + curr->value = values[0]; + } else { + curr->value = builder.makeTupleMake(values); + } + return Ok{}; +} + Result<> IRBuilder::visitFunctionStart(Function* func) { if (!scopeStack.empty()) { return Err{"unexpected start of function"}; @@ -291,7 +323,7 @@ Result<> IRBuilder::visitFunctionStart(Function* func) { } Result<> IRBuilder::visitBlockStart(Block* curr) { - scopeStack.push_back(ScopeCtx::makeBlock(curr)); + pushScope(ScopeCtx::makeBlock(curr)); return Ok{}; } @@ -299,12 +331,12 @@ Result<> IRBuilder::visitIfStart(If* iff, Name label) { auto cond = pop(); CHECK_ERR(cond); iff->condition = *cond; - scopeStack.push_back(ScopeCtx::makeIf(iff, label)); + pushScope(ScopeCtx::makeIf(iff, label)); return Ok{}; } Result<> IRBuilder::visitLoopStart(Loop* loop) { - scopeStack.push_back(ScopeCtx::makeLoop(loop)); + pushScope(ScopeCtx::makeLoop(loop)); return Ok{}; } @@ -356,6 +388,7 @@ Result IRBuilder::finishScope(Block* block) { auto hoisted = hoistLastValue(); CHECK_ERR(hoisted); } + Expression* ret = nullptr; if (scope.exprStack.size() == 0) { // No expressions for this scope, but we need something. If we were given a @@ -385,6 +418,12 @@ Result IRBuilder::finishScope(Block* block) { } ret = block; } + + // If this scope had a label, remove it from the context. + if (auto label = scope.getOriginalLabel()) { + labelDepths.at(label).pop_back(); + } + scopeStack.pop_back(); return ret; } @@ -395,11 +434,12 @@ Result<> IRBuilder::visitElse() { if (!iff) { return Err{"unexpected else"}; } - auto label = scope.getLabel(); + auto originalLabel = scope.getOriginalLabel(); + auto label = scope.label; auto expr = finishScope(); CHECK_ERR(expr); iff->ifTrue = *expr; - scopeStack.push_back(ScopeCtx::makeElse(iff, label)); + pushScope(ScopeCtx::makeElse(iff, originalLabel, label)); return Ok{}; } @@ -414,22 +454,25 @@ Result<> IRBuilder::visitEnd() { // If the scope expression cannot be directly labeled, we may need to wrap it // in a block. auto maybeWrapForLabel = [&](Expression* curr) -> Expression* { - if (auto label = scope.getLabel()) { - return builder.makeBlock(label, {curr}, curr->type); + if (scope.label) { + return builder.makeBlock(scope.label, {curr}, scope.getResultType()); } return curr; }; if (auto* func = scope.getFunction()) { - func->body = *expr; + func->body = maybeWrapForLabel(*expr); + labelDepths.clear(); } else if (auto* block = scope.getBlock()) { assert(*expr == block); + block->name = scope.label; // TODO: Track branches so we can know whether this block is a target and // finalize more efficiently. block->finalize(block->type); push(block); } else if (auto* loop = scope.getLoop()) { loop->body = *expr; + loop->name = scope.label; loop->finalize(loop->type); push(loop); } else if (auto* iff = scope.getIf()) { @@ -447,6 +490,25 @@ Result<> IRBuilder::visitEnd() { return Ok{}; } +Result IRBuilder::getLabelIndex(Name label) { + auto it = labelDepths.find(label); + if (it == labelDepths.end() || it->second.empty()) { + return Err{"unexpected label '"s + label.toString()}; + } + return scopeStack.size() - it->second.back(); +} + +Result IRBuilder::getLabelName(Index label) { + auto scope = getScope(label); + CHECK_ERR(scope); + auto& scopeLabel = (*scope)->label; + if (!scopeLabel) { + // The scope does not already have a name, so we need to create one. + scopeLabel = makeFresh("label"); + } + return scopeLabel; +} + Result<> IRBuilder::makeNop() { push(builder.makeNop()); return Ok{}; @@ -472,7 +534,15 @@ Result<> IRBuilder::makeLoop(Name label, Type type) { return visitLoopStart(loop); } -// Result<> IRBuilder::makeBreak() {} +Result<> IRBuilder::makeBreak(Index label) { + auto name = getLabelName(label); + CHECK_ERR(name); + Break curr; + curr.name = *name; + CHECK_ERR(visitBreak(&curr, label)); + push(builder.makeBreak(curr.name, curr.value)); + return Ok{}; +} // Result<> IRBuilder::makeSwitch() {} diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index a8bb4a536..337065b91 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -651,8 +651,11 @@ void FunctionValidator::visitBlock(Block* curr) { auto iter = breakTypes.find(curr->name); assert(iter != breakTypes.end()); // we set it ourselves for (Type breakType : iter->second) { - // none or unreachable means a poison value that we should ignore - if - // consumed, it will error + if (breakType == Type::none && curr->type == Type::unreachable) { + // We allow empty breaks to unreachable blocks. + continue; + } + shouldBeSubType(breakType, curr->type, curr, -- cgit v1.2.3