diff options
author | Thomas Lively <tlively@google.com> | 2023-10-02 16:09:05 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-02 16:09:05 -0700 |
commit | 0fe08192d866f72d0f4e680c862bb7af48dec1ac (patch) | |
tree | a37b814de2ab85a64612ea112b6303776a44bfa7 /src | |
parent | 77f36789aac707d1d5daed20e6e7612c9a9af51b (diff) | |
download | binaryen-0fe08192d866f72d0f4e680c862bb7af48dec1ac.tar.gz binaryen-0fe08192d866f72d0f4e680c862bb7af48dec1ac.tar.bz2 binaryen-0fe08192d866f72d0f4e680c862bb7af48dec1ac.zip |
[Parser] Parse labels and br (#5970)
The parser previously parsed labels and could attach them to control flow
structures, but did not maintain the context necessary to correctly parse
branches. Support parsing labels as both names and indices in IRBuilder,
handling shadowing correctly, and use that support to implement parsing of br.
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/contexts.h | 16 | ||||
-rw-r--r-- | src/parser/parsers.h | 17 | ||||
-rw-r--r-- | src/wasm-ir-builder.h | 77 | ||||
-rw-r--r-- | src/wasm/wasm-ir-builder.cpp | 94 | ||||
-rw-r--r-- | src/wasm/wasm-validator.cpp | 7 |
5 files changed, 182 insertions, 29 deletions
diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 7eedcccc2..862d9772d 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -279,6 +279,7 @@ struct NullInstrParserCtx { using GlobalIdxT = Ok; using MemoryIdxT = Ok; using DataIdxT = Ok; + using LabelIdxT = Ok; using MemargT = Ok; @@ -298,6 +299,8 @@ struct NullInstrParserCtx { MemoryIdxT getMemoryFromName(Name) { return Ok{}; } DataIdxT getDataFromIdx(uint32_t) { return Ok{}; } DataIdxT getDataFromName(Name) { return Ok{}; } + LabelIdxT getLabelFromIdx(uint32_t) { return Ok{}; } + LabelIdxT getLabelFromName(Name) { return Ok{}; } MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; } @@ -370,7 +373,7 @@ struct NullInstrParserCtx { Result<> makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; } Result<> makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; } - + Result<> makeBreak(Index, LabelIdxT) { return Ok{}; } Result<> makeReturn(Index) { return Ok{}; } template<typename HeapTypeT> Result<> makeRefNull(Index, HeapTypeT) { return Ok{}; @@ -793,6 +796,7 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> { using FieldIdxT = Index; using LocalIdxT = Index; + using LabelIdxT = Index; using GlobalIdxT = Name; using MemoryIdxT = Name; using DataIdxT = Name; @@ -936,6 +940,12 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> { return name; } + Result<Index> getLabelFromIdx(uint32_t idx) { return idx; } + + Result<Index> getLabelFromName(Name name) { + return irBuilder.getLabelIndex(name); + } + Result<TypeUseT> makeTypeUse(Index pos, std::optional<HeapTypeT> type, ParamsT* params, @@ -1203,6 +1213,10 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> { return withLoc(pos, irBuilder.makeMemoryFill(*m)); } + Result<> makeBreak(Index pos, Index label) { + return withLoc(pos, irBuilder.makeBreak(label)); + } + Result<> makeReturn(Index pos) { return withLoc(pos, irBuilder.makeReturn()); } diff --git a/src/parser/parsers.h b/src/parser/parsers.h index 38005253f..32f6709df 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -177,6 +177,7 @@ template<typename Ctx> Result<typename Ctx::MemoryIdxT> memidx(Ctx&); template<typename Ctx> MaybeResult<typename Ctx::MemoryIdxT> maybeMemuse(Ctx&); template<typename Ctx> Result<typename Ctx::GlobalIdxT> globalidx(Ctx&); template<typename Ctx> Result<typename Ctx::LocalIdxT> localidx(Ctx&); +template<typename Ctx> Result<typename Ctx::LabelIdxT> labelidx(Ctx&); template<typename Ctx> Result<typename Ctx::TypeUseT> typeuse(Ctx&); MaybeResult<ImportNames> inlineImport(ParseInput&); Result<std::vector<Name>> inlineExports(ParseInput&); @@ -1195,7 +1196,9 @@ Result<> makeCallIndirect(Ctx& ctx, Index pos, bool isReturn) { } template<typename Ctx> Result<> makeBreak(Ctx& ctx, Index pos) { - return ctx.in.err("unimplemented instruction"); + auto label = labelidx(ctx); + CHECK_ERR(label); + return ctx.makeBreak(pos, *label); } template<typename Ctx> Result<> makeBreakTable(Ctx& ctx, Index pos) { @@ -1569,6 +1572,18 @@ template<typename Ctx> Result<typename Ctx::LocalIdxT> localidx(Ctx& ctx) { return ctx.in.err("expected local index or identifier"); } +// labelidx ::= x:u32 => x +// | v:id => x (if labels[x] = v) +template<typename Ctx> Result<typename Ctx::LabelIdxT> labelidx(Ctx& ctx) { + if (auto x = ctx.in.takeU32()) { + return ctx.getLabelFromIdx(*x); + } + if (auto id = ctx.in.takeID()) { + return ctx.getLabelFromName(*id); + } + return ctx.in.err("expected label index or identifier"); +} + // typeuse ::= '(' 'type' x:typeidx ')' => x, [] // (if typedefs[x] = [t1*] -> [t2*] // | '(' 'type' x:typeidx ')' ((t1,IDs):param)* (t2:result)* => x, IDs diff --git a/src/wasm-ir-builder.h b/src/wasm-ir-builder.h index 2da69fff7..b42d40892 100644 --- a/src/wasm-ir-builder.h +++ b/src/wasm-ir-builder.h @@ -19,6 +19,7 @@ #include <vector> +#include "ir/names.h" #include "support/result.h" #include "wasm-builder.h" #include "wasm-traversal.h" @@ -58,14 +59,20 @@ public: [[nodiscard]] Result<> visitLoopStart(Loop* iff); [[nodiscard]] Result<> visitEnd(); - // Alternatively, call makeXYZ to have the IRBuilder allocate the nodes. This - // is generally safer than calling `visit` because the function signatures - // ensure that there are no missing fields. + // Binaryen IR uses names to refer to branch targets, but in general there may + // be branches to constructs that do not yet have names, so in IRBuilder we + // use indices to refer to branch targets instead, just as the binary format + // does. This function converts a branch target name to the correct index. + [[nodiscard]] Result<Index> getLabelIndex(Name label); + + // Instead of calling visit, call makeXYZ to have the IRBuilder allocate the + // nodes. This is generally safer than calling `visit` because the function + // signatures ensure that there are no missing fields. [[nodiscard]] Result<> makeNop(); [[nodiscard]] Result<> makeBlock(Name label, Type type); [[nodiscard]] Result<> makeIf(Name label, Type type); [[nodiscard]] Result<> makeLoop(Name label, Type type); - // [[nodiscard]] Result<> makeBreak(); + [[nodiscard]] Result<> makeBreak(Index label); // [[nodiscard]] Result<> makeSwitch(); // [[nodiscard]] Result<> makeCall(); // [[nodiscard]] Result<> makeCallIndirect(); @@ -177,6 +184,8 @@ public: [[nodiscard]] Result<> visitReturn(Return*); [[nodiscard]] Result<> visitStructNew(StructNew*); [[nodiscard]] Result<> visitArrayNew(ArrayNew*); + [[nodiscard]] Result<> visitBreak(Break*, + std::optional<Index> label = std::nullopt); private: Module& wasm; @@ -196,11 +205,11 @@ private: }; struct IfScope { If* iff; - Name label; + Name originalLabel; }; struct ElseScope { If* iff; - Name label; + Name originalLabel; }; struct LoopScope { Loop* loop; @@ -211,6 +220,9 @@ private: // The control flow structure we are building expressions for. Scope scope; + // The branch label name for this scope. Always fresh, never shadowed. + Name label; + std::vector<Expression*> exprStack; // Whether we have seen an unreachable instruction and are in // stack-polymorphic unreachable mode. @@ -218,6 +230,7 @@ private: ScopeCtx() : scope(NoScope{}) {} ScopeCtx(Scope scope) : scope(scope) {} + ScopeCtx(Scope scope, Name label) : scope(scope), label(label) {} static ScopeCtx makeFunc(Function* func) { return ScopeCtx(FuncScope{func}); @@ -225,11 +238,11 @@ private: static ScopeCtx makeBlock(Block* block) { return ScopeCtx(BlockScope{block}); } - static ScopeCtx makeIf(If* iff, Name label = {}) { - return ScopeCtx(IfScope{iff, label}); + static ScopeCtx makeIf(If* iff, Name originalLabel = {}) { + return ScopeCtx(IfScope{iff, originalLabel}); } - static ScopeCtx makeElse(If* iff, Name label = {}) { - return ScopeCtx(ElseScope{iff, label}); + static ScopeCtx makeElse(If* iff, Name originalLabel, Name label) { + return ScopeCtx(ElseScope{iff, originalLabel}, label); } static ScopeCtx makeLoop(Loop* loop) { return ScopeCtx(LoopScope{loop}); } @@ -282,15 +295,18 @@ private: } WASM_UNREACHABLE("unexpected scope kind"); } - Name getLabel() { + Name getOriginalLabel() { + if (getFunction()) { + return Name{}; + } if (auto* block = getBlock()) { return block->name; } if (auto* ifScope = std::get_if<IfScope>(&scope)) { - return ifScope->label; + return ifScope->originalLabel; } if (auto* elseScope = std::get_if<ElseScope>(&scope)) { - return elseScope->label; + return elseScope->originalLabel; } if (auto* loop = getLoop()) { return loop->name; @@ -302,6 +318,29 @@ private: // The stack of block contexts currently being parsed. std::vector<ScopeCtx> scopeStack; + // Map label names to stacks of label depths at which they appear. The + // relative index of a label name is the current depth minus the top depth on + // its stack. + std::unordered_map<Name, std::vector<Index>> labelDepths; + + Name makeFresh(Name label) { + return Names::getValidName(label, [&](Name candidate) { + return labelDepths.insert({candidate, {}}).second; + }); + } + + void pushScope(ScopeCtx scope) { + if (auto label = scope.getOriginalLabel()) { + // Assign a fresh label to the scope, if necessary. + if (!scope.label) { + scope.label = makeFresh(label); + } + // Record the original label to handle references to it correctly. + labelDepths[label].push_back(scopeStack.size() + 1); + } + scopeStack.push_back(scope); + } + ScopeCtx& getScope() { if (scopeStack.empty()) { // We are not in a block context, so push a dummy scope. @@ -310,12 +349,24 @@ private: return scopeStack.back(); } + Result<ScopeCtx*> getScope(Index label) { + Index numLabels = scopeStack.size(); + if (!scopeStack.empty() && scopeStack[0].isNone()) { + --numLabels; + } + if (label >= numLabels) { + return Err{"label index out of bounds"}; + } + return &scopeStack[scopeStack.size() - 1 - label]; + } + // Collect the current scope into a single expression. If it has multiple // top-level expressions, this requires collecting them into a block. If we // are in a block context, we can collect them directly into the destination // `block`, but otherwise we will have to allocate a new block. Result<Expression*> finishScope(Block* block = nullptr); + [[nodiscard]] Result<Name> getLabelName(Index label); [[nodiscard]] Result<Index> addScratchLocal(Type); [[nodiscard]] Result<Expression*> pop(); void push(Expression*); diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index 4312fefe3..4d1c3353d 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -181,6 +181,7 @@ Result<Expression*> IRBuilder::build() { assert(scopeStack.back().exprStack.size() == 1); auto* expr = scopeStack.back().exprStack.back(); scopeStack.clear(); + labelDepths.clear(); return expr; } @@ -206,6 +207,10 @@ Result<> IRBuilder::visitExpression(Expression* curr) { auto field = pop(); \ CHECK_ERR(field); \ expr->field = *field; +#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) \ + if (labelDepths.count(expr->field)) { \ + return Err{"repeated label"}; \ + } #define DELEGATE_END(id) #define DELEGATE_FIELD_OPTIONAL_CHILD(id, field) \ @@ -214,15 +219,18 @@ Result<> IRBuilder::visitExpression(Expression* curr) { #define DELEGATE_FIELD_CHILD_VECTOR(id, field) \ WASM_UNREACHABLE("should have called visit" #id " because " #id \ " has child vector " #field); +#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) \ + WASM_UNREACHABLE("should have called visit" #id " because " #id \ + " has scope name use " #field); +#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field) \ + WASM_UNREACHABLE("should have called visit" #id " because " #id \ + " has scope name use vector " #field); #define DELEGATE_FIELD_INT(id, field) #define DELEGATE_FIELD_INT_ARRAY(id, field) #define DELEGATE_FIELD_LITERAL(id, field) #define DELEGATE_FIELD_NAME(id, field) #define DELEGATE_FIELD_NAME_VECTOR(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) -#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field) #define DELEGATE_FIELD_TYPE(id, field) #define DELEGATE_FIELD_HEAPTYPE(id, field) #define DELEGATE_FIELD_ADDRESS(id, field) @@ -281,6 +289,30 @@ Result<> IRBuilder::visitArrayNew(ArrayNew* curr) { return Ok{}; } +Result<> IRBuilder::visitBreak(Break* curr, std::optional<Index> label) { + if (!label) { + auto index = getLabelIndex(curr->name); + CHECK_ERR(index); + label = *index; + } + auto scope = getScope(*label); + CHECK_ERR(scope); + std::vector<Expression*> values((*scope)->getResultType().size()); + for (size_t i = 0, size = values.size(); i < size; ++i) { + auto val = pop(); + CHECK_ERR(val); + values[size - 1 - i] = *val; + } + if (values.size() == 0) { + curr->value = nullptr; + } else if (values.size() == 1) { + curr->value = values[0]; + } else { + curr->value = builder.makeTupleMake(values); + } + return Ok{}; +} + Result<> IRBuilder::visitFunctionStart(Function* func) { if (!scopeStack.empty()) { return Err{"unexpected start of function"}; @@ -291,7 +323,7 @@ Result<> IRBuilder::visitFunctionStart(Function* func) { } Result<> IRBuilder::visitBlockStart(Block* curr) { - scopeStack.push_back(ScopeCtx::makeBlock(curr)); + pushScope(ScopeCtx::makeBlock(curr)); return Ok{}; } @@ -299,12 +331,12 @@ Result<> IRBuilder::visitIfStart(If* iff, Name label) { auto cond = pop(); CHECK_ERR(cond); iff->condition = *cond; - scopeStack.push_back(ScopeCtx::makeIf(iff, label)); + pushScope(ScopeCtx::makeIf(iff, label)); return Ok{}; } Result<> IRBuilder::visitLoopStart(Loop* loop) { - scopeStack.push_back(ScopeCtx::makeLoop(loop)); + pushScope(ScopeCtx::makeLoop(loop)); return Ok{}; } @@ -356,6 +388,7 @@ Result<Expression*> IRBuilder::finishScope(Block* block) { auto hoisted = hoistLastValue(); CHECK_ERR(hoisted); } + Expression* ret = nullptr; if (scope.exprStack.size() == 0) { // No expressions for this scope, but we need something. If we were given a @@ -385,6 +418,12 @@ Result<Expression*> IRBuilder::finishScope(Block* block) { } ret = block; } + + // If this scope had a label, remove it from the context. + if (auto label = scope.getOriginalLabel()) { + labelDepths.at(label).pop_back(); + } + scopeStack.pop_back(); return ret; } @@ -395,11 +434,12 @@ Result<> IRBuilder::visitElse() { if (!iff) { return Err{"unexpected else"}; } - auto label = scope.getLabel(); + auto originalLabel = scope.getOriginalLabel(); + auto label = scope.label; auto expr = finishScope(); CHECK_ERR(expr); iff->ifTrue = *expr; - scopeStack.push_back(ScopeCtx::makeElse(iff, label)); + pushScope(ScopeCtx::makeElse(iff, originalLabel, label)); return Ok{}; } @@ -414,22 +454,25 @@ Result<> IRBuilder::visitEnd() { // If the scope expression cannot be directly labeled, we may need to wrap it // in a block. auto maybeWrapForLabel = [&](Expression* curr) -> Expression* { - if (auto label = scope.getLabel()) { - return builder.makeBlock(label, {curr}, curr->type); + if (scope.label) { + return builder.makeBlock(scope.label, {curr}, scope.getResultType()); } return curr; }; if (auto* func = scope.getFunction()) { - func->body = *expr; + func->body = maybeWrapForLabel(*expr); + labelDepths.clear(); } else if (auto* block = scope.getBlock()) { assert(*expr == block); + block->name = scope.label; // TODO: Track branches so we can know whether this block is a target and // finalize more efficiently. block->finalize(block->type); push(block); } else if (auto* loop = scope.getLoop()) { loop->body = *expr; + loop->name = scope.label; loop->finalize(loop->type); push(loop); } else if (auto* iff = scope.getIf()) { @@ -447,6 +490,25 @@ Result<> IRBuilder::visitEnd() { return Ok{}; } +Result<Index> IRBuilder::getLabelIndex(Name label) { + auto it = labelDepths.find(label); + if (it == labelDepths.end() || it->second.empty()) { + return Err{"unexpected label '"s + label.toString()}; + } + return scopeStack.size() - it->second.back(); +} + +Result<Name> IRBuilder::getLabelName(Index label) { + auto scope = getScope(label); + CHECK_ERR(scope); + auto& scopeLabel = (*scope)->label; + if (!scopeLabel) { + // The scope does not already have a name, so we need to create one. + scopeLabel = makeFresh("label"); + } + return scopeLabel; +} + Result<> IRBuilder::makeNop() { push(builder.makeNop()); return Ok{}; @@ -472,7 +534,15 @@ Result<> IRBuilder::makeLoop(Name label, Type type) { return visitLoopStart(loop); } -// Result<> IRBuilder::makeBreak() {} +Result<> IRBuilder::makeBreak(Index label) { + auto name = getLabelName(label); + CHECK_ERR(name); + Break curr; + curr.name = *name; + CHECK_ERR(visitBreak(&curr, label)); + push(builder.makeBreak(curr.name, curr.value)); + return Ok{}; +} // Result<> IRBuilder::makeSwitch() {} diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index a8bb4a536..337065b91 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -651,8 +651,11 @@ void FunctionValidator::visitBlock(Block* curr) { auto iter = breakTypes.find(curr->name); assert(iter != breakTypes.end()); // we set it ourselves for (Type breakType : iter->second) { - // none or unreachable means a poison value that we should ignore - if - // consumed, it will error + if (breakType == Type::none && curr->type == Type::unreachable) { + // We allow empty breaks to unreachable blocks. + continue; + } + shouldBeSubType(breakType, curr->type, curr, |