From 3777624e19f22a6eb80d995408329d789593e427 Mon Sep 17 00:00:00 2001 From: Thomas Lively <7121787+tlively@users.noreply.github.com> Date: Fri, 26 Aug 2022 11:06:17 -0700 Subject: [Parser] Parse functions and implicit types (#4972) Implement function parsing, including parsing of locals and type uses. Also add a new phase of parsing that iterates through type uses that do not have explicit types to create any implicitly defined types and append them to the type index space. This is important because the implicitly defined types may be referred to by index elsewhere and because the legacy parser does not handle these implicitly defined types correctly. Finally, maintain a map of implicit type use locations to corresponding types for use in later phases of parsing. --- src/wasm/wat-parser.cpp | 414 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 396 insertions(+), 18 deletions(-) (limited to 'src/wasm/wat-parser.cpp') diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp index 5455c0438..843e26d08 100644 --- a/src/wasm/wat-parser.cpp +++ b/src/wasm/wat-parser.cpp @@ -38,15 +38,14 @@ // other module elements that can be referred to by name before their // definitions have been parsed. // -// The third phase, not yet implemented, further parses and constructs types -// implicitly defined by type uses in functions, blocks, and call_indirect -// instructions. These implicitly defined types may be referred to by index -// elsewhere. +// The third phase further parses and constructs types implicitly defined by +// type uses in functions, blocks, and call_indirect instructions. These +// implicitly defined types may be referred to by index elsewhere. // -// The fourth phase, not yet implemented, parses and sets the types of globals, -// functions, and other top-level module elements. These types need to be set -// before we parse instructions because they determine the types of instructions -// such as global.get and ref.func. +// The fourth phase parses and sets the types of globals, functions, and other +// top-level module elements. These types need to be set before we parse +// instructions because they determine the types of instructions such as +// global.get and ref.func. // // The fifth and final phase parses the remaining contents of all module // elements, including instructions. @@ -282,6 +281,13 @@ struct GlobalType { Type type; }; +// A signature type and parameter names (possibly empty), used for parsing +// function types. +struct TypeUse { + HeapType type; + std::vector names; +}; + struct ImportNames { Name mod; Name nm; @@ -366,6 +372,8 @@ struct NullTypeParserCtx { using StructT = Ok; using ArrayT = Ok; using GlobalTypeT = Ok; + using TypeUseT = Ok; + using LocalsT = Ok; HeapTypeT makeFunc() { return Ok{}; } HeapTypeT makeAny() { return Ok{}; } @@ -405,6 +413,9 @@ struct NullTypeParserCtx { GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } + LocalsT makeLocals() { return Ok{}; } + void appendLocal(LocalsT&, Name, TypeT) {} + Result getTypeIndex(Name, ParseInput&) { return 1; } Result getHeapTypeFromIdx(Index, ParseInput&) { return Ok{}; } }; @@ -421,6 +432,7 @@ template struct TypeParserCtx { using FieldsT = std::pair, std::vector>; using StructT = std::pair, Struct>; using ArrayT = Array; + using LocalsT = std::vector; // Map heap type names to their indices. const IndexMap& typeIndices; @@ -492,6 +504,11 @@ template struct TypeParserCtx { return {}; } + LocalsT makeLocals() { return {}; } + void appendLocal(LocalsT& locals, Name id, TypeT type) { + locals.push_back({id, type}); + } + Result getTypeIndex(Name id, ParseInput& in) { auto it = typeIndices.find(id); if (it == typeIndices.end()) { @@ -573,10 +590,15 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { // The module element definitions we are parsing in this phase. std::vector typeDefs; std::vector subtypeDefs; + std::vector funcDefs; std::vector globalDefs; + // Positions of typeuses that might implicitly define new types. + std::vector implicitTypeDefs; + // Counters used for generating names for module elements. int globalCounter = 0; + int funcCounter = 0; // Used to verify that all imports come before all non-imports. bool hasNonImport = false; @@ -594,6 +616,57 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { void addRecGroup(Index, size_t) {} void finishDeftype(Index pos) { typeDefs.push_back({{}, pos}); } + Result makeTypeUse(Index pos, + std::optional type, + ParamsT*, + ResultsT*, + ParseInput&) { + if (!type) { + implicitTypeDefs.push_back(pos); + } + return Ok{}; + } + + Result addFuncDecl(ParseInput& in, + Name name, + std::optional importNames) { + auto f = std::make_unique(); + if (name.is()) { + if (wasm.getFunctionOrNull(name)) { + // TDOO: if the existing function is not explicitly named, fix its name + // and continue. + // TODO: Fix error location to point to name. + return in.err("repeated function name"); + } + f->setExplicitName(name); + } else { + name = (importNames ? "fimport$" : "") + std::to_string(funcCounter++); + name = Names::getValidFunctionName(wasm, name); + f->name = name; + } + applyImportNames(*f, importNames); + return wasm.addFunction(std::move(f)); + } + + Result<> addFunc(Name name, + const std::vector& exports, + ImportNames* import, + TypeUseT type, + std::optional, + std::optional, + Index pos, + ParseInput& in) { + if (import && hasNonImport) { + return in.err("import after non-import"); + } + auto imp = import ? std::make_optional(*import) : std::nullopt; + auto f = addFuncDecl(in, name, imp); + CHECK_ERR(f); + CHECK_ERR(addExports(in, wasm, *f, exports, ExternalKind::Function)); + funcDefs.push_back({name, pos}); + return Ok{}; + } + Result addGlobalDecl(ParseInput& in, Name name, std::optional importNames) { @@ -697,7 +770,65 @@ struct ParseTypeDefsCtx : TypeParserCtx { void finishDeftype(Index) {} }; -// TODO: Phase 3: ParseImplicitTypeDefsCtx +// Phase 3: Parse type uses to find implicitly defined types. +struct ParseImplicitTypeDefsCtx : TypeParserCtx { + using TypeUseT = Ok; + + // Types parsed so far. + std::vector& types; + + // Map typeuse positions without an explicit type to the correct type. + std::unordered_map& implicitTypes; + + // Map signatures to the first defined heap type they match. + std::unordered_map sigTypes; + + ParseImplicitTypeDefsCtx(std::vector& types, + std::unordered_map& implicitTypes, + const IndexMap& typeIndices) + : TypeParserCtx(typeIndices), types(types), + implicitTypes(implicitTypes) { + for (auto type : types) { + if (type.isSignature() && type.getRecGroup().size() == 1) { + sigTypes.insert({type.getSignature(), type}); + } + } + } + + Result getHeapTypeFromIdx(Index idx, ParseInput& in) { + if (idx >= types.size()) { + return in.err("type index out of bounds"); + } + return types[idx]; + } + + Result makeTypeUse(Index pos, + std::optional, + ParamsT* params, + ResultsT* results, + ParseInput& in) { + std::vector paramTypes; + if (params) { + paramTypes = getUnnamedTypes(*params); + } + + std::vector resultTypes; + if (results) { + resultTypes = *results; + } + + auto sig = Signature(Type(paramTypes), Type(resultTypes)); + auto [it, inserted] = sigTypes.insert({sig, HeapType::func}); + if (inserted) { + auto type = HeapType(sig); + it->second = type; + types.push_back(type); + } + implicitTypes.insert({pos, it->second}); + + return Ok{}; + } +}; // Phase 4: Parse and set the types of module elements. struct ParseModuleTypesCtx : TypeParserCtx, @@ -706,19 +837,22 @@ struct ParseModuleTypesCtx : TypeParserCtx, // validate them when they are used. using GlobalTypeT = GlobalType; + using TypeUseT = TypeUse; Module& wasm; const std::vector& types; + const std::unordered_map& implicitTypes; // The index of the current type. Index index = 0; ParseModuleTypesCtx(Module& wasm, const std::vector& types, + const std::unordered_map& implicitTypes, const IndexMap& typeIndices) - : TypeParserCtx(typeIndices), wasm(wasm), - types(types) {} + : TypeParserCtx(typeIndices), wasm(wasm), types(types), + implicitTypes(implicitTypes) {} Result getHeapTypeFromIdx(Index idx, ParseInput& in) { if (idx >= types.size()) { @@ -727,10 +861,57 @@ struct ParseModuleTypesCtx : TypeParserCtx, return types[idx]; } + Result makeTypeUse(Index pos, + std::optional type, + ParamsT* params, + ResultsT* results, + ParseInput& in) { + std::vector ids; + if (params) { + ids.reserve(params->size()); + for (auto& p : *params) { + ids.push_back(p.name); + } + } + + if (type) { + return TypeUse{*type, ids}; + } + + auto it = implicitTypes.find(pos); + assert(it != implicitTypes.end()); + + return TypeUse{it->second, ids}; + } + GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) { return {mutability, type}; } + Result<> addFunc(Name name, + const std::vector&, + ImportNames*, + TypeUseT type, + std::optional locals, + std::optional, + Index, + ParseInput&) { + auto& f = wasm.functions[index]; + f->type = type.type; + for (Index i = 0; i < type.names.size(); ++i) { + if (type.names[i].is()) { + f->setLocalName(i, type.names[i]); + } + } + if (locals) { + for (auto& l : *locals) { + Builder::addVar(f.get(), l.name, l.type); + } + } + // TODO: local types and names. + return Ok{}; + } + Result<> addGlobal(Name, const std::vector&, ImportNames*, @@ -748,6 +929,7 @@ struct ParseModuleTypesCtx : TypeParserCtx, // Phase 5: Parse module element definitions, including instructions. struct ParseDefsCtx : InstrParserCtx { using GlobalTypeT = Ok; + using TypeUseT = HeapType; using InstrT = Expression*; using InstrsT = std::vector; @@ -759,15 +941,17 @@ struct ParseDefsCtx : InstrParserCtx { std::vector> instrStack; const std::vector& types; + const std::unordered_map& implicitTypes; // The index of the current module element. Index index = 0; ParseDefsCtx(Module& wasm, const std::vector& types, + const std::unordered_map& implicitTypes, const IndexMap& typeIndices) - : InstrParserCtx(wasm, typeIndices), wasm(wasm), - types(types) {} + : InstrParserCtx(wasm, typeIndices), wasm(wasm), types(types), + implicitTypes(implicitTypes) {} GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } @@ -778,6 +962,66 @@ struct ParseDefsCtx : InstrParserCtx { return types[idx]; } + Result makeTypeUse(Index pos, + std::optional type, + ParamsT* params, + ResultsT* results, + ParseInput& in) { + if (type && (params || results)) { + std::vector paramTypes; + if (params) { + paramTypes = getUnnamedTypes(*params); + } + + std::vector resultTypes; + if (results) { + resultTypes = *results; + } + + auto sig = Signature(Type(paramTypes), Type(resultTypes)); + + if (!type->isSignature() || type->getSignature() != sig) { + return in.err("type does not match provided signature"); + } + } + + if (type) { + return *type; + } + + auto it = implicitTypes.find(pos); + assert(it != implicitTypes.end()); + return it->second; + } + + Result<> addFunc(Name, + const std::vector&, + ImportNames*, + TypeUseT, + std::optional, + std::optional insts, + Index, + ParseInput&) { + Expression* body; + if (insts) { + switch (insts->size()) { + case 0: + body = builder.makeNop(); + break; + case 1: + body = insts->back(); + break; + default: + body = builder.makeBlock(*insts, wasm.functions[index]->getResults()); + break; + } + } else { + body = builder.makeNop(); + } + wasm.functions[index]->body = body; + return Ok{}; + } + Result<> addGlobal(Name, const std::vector&, ImportNames*, @@ -1026,12 +1270,17 @@ template MaybeResult maybeTypeidx(Ctx& ctx, ParseInput& in); template Result typeidx(Ctx&, ParseInput&); +template +Result typeuse(Ctx&, ParseInput&); MaybeResult inlineImport(ParseInput&); Result> inlineExports(ParseInput&); template Result<> strtype(Ctx&, ParseInput&); template MaybeResult subtype(Ctx&, ParseInput&); template MaybeResult<> deftype(Ctx&, ParseInput&); +template +MaybeResult locals(Ctx&, ParseInput&); +template MaybeResult<> func(Ctx&, ParseInput&); template MaybeResult<> global(Ctx&, ParseInput&); MaybeResult<> modulefield(ParseDeclsCtx&, ParseInput&); Result<> module(ParseDeclsCtx&, ParseInput&); @@ -1920,6 +2169,38 @@ Result typeidx(Ctx& ctx, ParseInput& in) { return in.err("expected type index or identifier"); } +// typeuse ::= '(' 'type' x:typeidx ')' => x, [] +// (if typedefs[x] = [t1*] -> [t2*] +// | '(' 'type' x:typeidx ')' ((t1,IDs):param)* (t2:result)* => x, IDs +// (if typedefs[x] = [t1*] -> [t2*]) +// | ((t1,IDs):param)* (t2:result)* => x, IDs +// (if x is minimum s.t. typedefs[x] = [t1*] -> [t2*]) +template +Result typeuse(Ctx& ctx, ParseInput& in) { + auto pos = in.getPos(); + std::optional type; + if (in.takeSExprStart("type"sv)) { + auto x = typeidx(ctx, in); + CHECK_ERR(x); + + if (!in.takeRParen()) { + return in.err("expected end of type use"); + } + + type = *x; + } + + auto namedParams = params(ctx, in); + CHECK_ERR(namedParams); + + auto resultTypes = results(ctx, in); + CHECK_ERR(resultTypes); + + // TODO: Use `pos` for error reporting rather than `in`. + return ctx.makeTypeUse( + pos, type, namedParams.getPtr(), resultTypes.getPtr(), in); +} + // ('(' 'import' mod:name nm:name ')')? MaybeResult inlineImport(ParseInput& in) { if (!in.takeSExprStart("import"sv)) { @@ -2041,14 +2322,96 @@ template MaybeResult<> deftype(Ctx& ctx, ParseInput& in) { return Ok{}; } +// local ::= '(' 'local id? t:valtype ')' => [t] +// | '(' 'local t*:valtype* ')' => [t*] +// locals ::= local* +template +MaybeResult locals(Ctx& ctx, ParseInput& in) { + bool hasAny = false; + auto res = ctx.makeLocals(); + while (in.takeSExprStart("local"sv)) { + hasAny = true; + if (auto id = in.takeID()) { + // Single named local + auto type = valtype(ctx, in); + CHECK_ERR(type); + if (!in.takeRParen()) { + return in.err("expected end of local"); + } + ctx.appendLocal(res, *id, *type); + } else { + // Repeated unnamed locals + while (!in.takeRParen()) { + auto type = valtype(ctx, in); + CHECK_ERR(type); + ctx.appendLocal(res, {}, *type); + } + } + } + if (hasAny) { + return res; + } + return {}; +} + +// func ::= '(' 'func' id? ('(' 'export' name ')')* +// x,I:typeuse t*:vec(local) (in:instr)* ')' +// | '(' 'func' id? ('(' 'export' name ')')* +// '(' 'import' mode:name nm:name ')' typeuse ')' +template MaybeResult<> func(Ctx& ctx, ParseInput& in) { + auto pos = in.getPos(); + if (!in.takeSExprStart("func"sv)) { + return {}; + } + + Name name; + if (auto id = in.takeID()) { + name = *id; + } + + auto exports = inlineExports(in); + CHECK_ERR(exports); + + auto import = inlineImport(in); + CHECK_ERR(import); + + auto type = typeuse(ctx, in); + CHECK_ERR(type); + + std::optional localVars; + if (!import) { + if (auto l = locals(ctx, in)) { + CHECK_ERR(l); + localVars = *l; + } + } + + std::optional insts; + if (!import) { + auto i = instrs(ctx, in); + CHECK_ERR(i); + insts = *i; + } + + if (!in.takeRParen()) { + return in.err("expected end of function"); + } + + // TODO: Use `pos` instead of `in` for error position. + CHECK_ERR(ctx.addFunc( + name, *exports, import.getPtr(), *type, localVars, insts, pos, in)); + return Ok{}; +} + // global ::= '(' 'global' id? ('(' 'export' name ')')* gt:globaltype e:expr ')' -// | '(' 'global' id? '(' 'import' mod:name nm:name ')' -// gt:globaltype ')' +// | '(' 'global' id? ('(' 'export' name ')')* +// '(' 'import' mod:name nm:name ')' gt:globaltype ')' template MaybeResult<> global(Ctx& ctx, ParseInput& in) { auto pos = in.getPos(); if (!in.takeSExprStart("global"sv)) { return {}; } + Name name; if (auto id = in.takeID()) { name = *id; @@ -2098,6 +2461,10 @@ MaybeResult<> modulefield(ParseDeclsCtx& ctx, ParseInput& in) { CHECK_ERR(res); return Ok{}; } + if (auto res = func(ctx, in)) { + CHECK_ERR(res); + return Ok{}; + } if (auto res = global(ctx, in)) { CHECK_ERR(res); return Ok{}; @@ -2168,19 +2535,30 @@ Result<> parseModule(Module& wasm, std::string_view input) { } } - // TODO: Parse implicit type definitions. + // Parse implicit type definitions and map typeuses without explicit types to + // the correct types. + std::unordered_map implicitTypes; + { + ParseImplicitTypeDefsCtx ctx(types, implicitTypes, *typeIndices); + for (Index pos : decls.implicitTypeDefs) { + ParseInput in(input, pos); + CHECK_ERR(typeuse(ctx, in)); + } + } { // Parse module-level types. - ParseModuleTypesCtx ctx(wasm, types, *typeIndices); + ParseModuleTypesCtx ctx(wasm, types, implicitTypes, *typeIndices); CHECK_ERR(parseDefs(ctx, input, decls.globalDefs, global)); + CHECK_ERR(parseDefs(ctx, input, decls.funcDefs, func)); // TODO: Parse types of other module elements. } { // Parse definitions. // TODO: Parallelize this. - ParseDefsCtx ctx(wasm, types, *typeIndices); + ParseDefsCtx ctx(wasm, types, implicitTypes, *typeIndices); CHECK_ERR(parseDefs(ctx, input, decls.globalDefs, global)); + CHECK_ERR(parseDefs(ctx, input, decls.funcDefs, func)); } return Ok{}; -- cgit v1.2.3