diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/parser/CMakeLists.txt | 9 | ||||
-rw-r--r-- | src/parser/common.h | 31 | ||||
-rw-r--r-- | src/parser/context-decls.cpp | 194 | ||||
-rw-r--r-- | src/parser/context-defs.cpp | 98 | ||||
-rw-r--r-- | src/parser/contexts.h | 1275 | ||||
-rw-r--r-- | src/parser/input-impl.h | 273 | ||||
-rw-r--r-- | src/parser/input.h | 75 | ||||
-rw-r--r-- | src/parser/lexer.cpp (renamed from src/wasm/wat-lexer.cpp) | 4 | ||||
-rw-r--r-- | src/parser/lexer.h (renamed from src/wat-lexer.h) | 8 | ||||
-rw-r--r-- | src/parser/parsers.h (renamed from src/wasm/wat-parser.cpp) | 1923 | ||||
-rw-r--r-- | src/parser/wat-parser.cpp | 172 | ||||
-rw-r--r-- | src/parser/wat-parser.h | 32 | ||||
-rw-r--r-- | src/wasm/CMakeLists.txt | 2 |
13 files changed, 2197 insertions, 1899 deletions
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt new file mode 100644 index 000000000..bae90379e --- /dev/null +++ b/src/parser/CMakeLists.txt @@ -0,0 +1,9 @@ +FILE(GLOB parser_HEADERS *.h) +set(parser_SOURCES + context-decls.cpp + context-defs.cpp + lexer.cpp + wat-parser.cpp + ${parser_HEADERS} +) +add_library(parser OBJECT ${parser_SOURCES}) diff --git a/src/parser/common.h b/src/parser/common.h new file mode 100644 index 000000000..7adf2e5fa --- /dev/null +++ b/src/parser/common.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef parser_common_h +#define parser_common_h + +#include "support/name.h" + +namespace wasm::WATParser { + +struct ImportNames { + Name mod; + Name nm; +}; + +} // namespace wasm::WATParser + +#endif // parser_common_h diff --git a/src/parser/context-decls.cpp b/src/parser/context-decls.cpp new file mode 100644 index 000000000..f668c67ae --- /dev/null +++ b/src/parser/context-decls.cpp @@ -0,0 +1,194 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "contexts.h" + +namespace wasm::WATParser { + +namespace { + +void applyImportNames(Importable& item, ImportNames* names) { + if (names) { + item.module = names->mod; + item.base = names->nm; + } +} + +Result<> addExports(ParseInput& in, + Module& wasm, + const Named* item, + const std::vector<Name>& exports, + ExternalKind kind) { + for (auto name : exports) { + if (wasm.getExportOrNull(name)) { + // TODO: Fix error location + return in.err("repeated export name"); + } + wasm.addExport(Builder(wasm).makeExport(name, item->name, kind)); + } + return Ok{}; +} + +} // anonymous namespace + +Result<Function*> +ParseDeclsCtx::addFuncDecl(Index pos, Name name, ImportNames* importNames) { + auto f = std::make_unique<Function>(); + if (name.is()) { + if (wasm.getFunctionOrNull(name)) { + // TDOO: if the existing function is not explicitly named, fix its name + // and continue. + return in.err(pos, "repeated function name"); + } + f->setExplicitName(name); + } else { + name = (importNames ? "fimport$" : "") + std::to_string(funcCounter++); + name = Names::getValidFunctionName(wasm, name); + f->name = name; + } + applyImportNames(*f, importNames); + return wasm.addFunction(std::move(f)); +} + +Result<> ParseDeclsCtx::addFunc(Name name, + const std::vector<Name>& exports, + ImportNames* import, + TypeUseT type, + std::optional<LocalsT>, + std::optional<InstrsT>, + Index pos) { + if (import && hasNonImport) { + return in.err(pos, "import after non-import"); + } + auto f = addFuncDecl(pos, name, import); + CHECK_ERR(f); + CHECK_ERR(addExports(in, wasm, *f, exports, ExternalKind::Function)); + funcDefs.push_back({name, pos, Index(funcDefs.size())}); + return Ok{}; +} + +Result<Memory*> ParseDeclsCtx::addMemoryDecl(Index pos, + Name name, + ImportNames* importNames, + MemType type) { + auto m = std::make_unique<Memory>(); + m->indexType = type.type; + m->initial = type.limits.initial; + m->max = type.limits.max; + m->shared = type.shared; + if (name) { + // TODO: if the existing memory is not explicitly named, fix its name + // and continue. + if (wasm.getMemoryOrNull(name)) { + return in.err(pos, "repeated memory name"); + } + m->setExplicitName(name); + } else { + name = (importNames ? "mimport$" : "") + std::to_string(memoryCounter++); + name = Names::getValidMemoryName(wasm, name); + m->name = name; + } + applyImportNames(*m, importNames); + return wasm.addMemory(std::move(m)); +} + +Result<> ParseDeclsCtx::addMemory(Name name, + const std::vector<Name>& exports, + ImportNames* import, + MemType type, + Index pos) { + if (import && hasNonImport) { + return in.err(pos, "import after non-import"); + } + auto m = addMemoryDecl(pos, name, import, type); + CHECK_ERR(m); + CHECK_ERR(addExports(in, wasm, *m, exports, ExternalKind::Memory)); + memoryDefs.push_back({name, pos, Index(memoryDefs.size())}); + return Ok{}; +} + +Result<> ParseDeclsCtx::addImplicitData(DataStringT&& data) { + auto& mem = *wasm.memories.back(); + auto d = std::make_unique<DataSegment>(); + d->memory = mem.name; + d->isPassive = false; + d->offset = Builder(wasm).makeConstPtr(0, mem.indexType); + d->data = std::move(data); + d->name = Names::getValidDataSegmentName(wasm, "implicit-data"); + wasm.addDataSegment(std::move(d)); + return Ok{}; +} + +Result<Global*> +ParseDeclsCtx::addGlobalDecl(Index pos, Name name, ImportNames* importNames) { + auto g = std::make_unique<Global>(); + if (name) { + if (wasm.getGlobalOrNull(name)) { + // TODO: if the existing global is not explicitly named, fix its name + // and continue. + return in.err(pos, "repeated global name"); + } + g->setExplicitName(name); + } else { + name = (importNames ? "gimport$" : "") + std::to_string(globalCounter++); + name = Names::getValidGlobalName(wasm, name); + g->name = name; + } + applyImportNames(*g, importNames); + return wasm.addGlobal(std::move(g)); +} + +Result<> ParseDeclsCtx::addGlobal(Name name, + const std::vector<Name>& exports, + ImportNames* import, + GlobalTypeT, + std::optional<ExprT>, + Index pos) { + if (import && hasNonImport) { + return in.err(pos, "import after non-import"); + } + auto g = addGlobalDecl(pos, name, import); + CHECK_ERR(g); + CHECK_ERR(addExports(in, wasm, *g, exports, ExternalKind::Global)); + globalDefs.push_back({name, pos, Index(globalDefs.size())}); + return Ok{}; +} + +Result<> ParseDeclsCtx::addData(Name name, + MemoryIdxT*, + std::optional<ExprT>, + std::vector<char>&& data, + Index pos) { + auto d = std::make_unique<DataSegment>(); + if (name) { + if (wasm.getDataSegmentOrNull(name)) { + // TODO: if the existing segment is not explicitly named, fix its name + // and continue. + return in.err(pos, "repeated data segment name"); + } + d->setExplicitName(name); + } else { + name = std::to_string(dataCounter++); + name = Names::getValidDataSegmentName(wasm, name); + d->name = name; + } + d->data = std::move(data); + dataDefs.push_back({name, pos, Index(wasm.dataSegments.size())}); + wasm.addDataSegment(std::move(d)); + return Ok{}; +} + +} // namespace wasm::WATParser diff --git a/src/parser/context-defs.cpp b/src/parser/context-defs.cpp new file mode 100644 index 000000000..ca8f61ec3 --- /dev/null +++ b/src/parser/context-defs.cpp @@ -0,0 +1,98 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "contexts.h" + +namespace wasm::WATParser { + +Result<typename ParseDefsCtx::TypeUseT> +ParseDefsCtx::makeTypeUse(Index pos, + std::optional<HeapTypeT> type, + ParamsT* params, + ResultsT* results) { + if (type && (params || results)) { + std::vector<Type> paramTypes; + if (params) { + paramTypes = getUnnamedTypes(*params); + } + + std::vector<Type> resultTypes; + if (results) { + resultTypes = *results; + } + + auto sig = Signature(Type(paramTypes), Type(resultTypes)); + + if (!type->isSignature() || type->getSignature() != sig) { + return in.err(pos, "type does not match provided signature"); + } + } + + if (type) { + return *type; + } + + auto it = implicitTypes.find(pos); + assert(it != implicitTypes.end()); + return it->second; +} + +Result<> ParseDefsCtx::addFunc(Name, + const std::vector<Name>&, + ImportNames*, + TypeUseT, + std::optional<LocalsT>, + std::optional<InstrsT>, + Index pos) { + CHECK_ERR(withLoc(pos, irBuilder.visitEnd())); + auto body = irBuilder.build(); + CHECK_ERR(withLoc(pos, body)); + wasm.functions[index]->body = *body; + return Ok{}; +} + +Result<> ParseDefsCtx::addGlobal(Name, + const std::vector<Name>&, + ImportNames*, + GlobalTypeT, + std::optional<ExprT> exp, + Index) { + if (exp) { + wasm.globals[index]->init = *exp; + } + return Ok{}; +} + +Result<> ParseDefsCtx::addData( + Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos) { + auto& d = wasm.dataSegments[index]; + if (offset) { + d->isPassive = false; + d->offset = *offset; + if (mem) { + d->memory = *mem; + } else if (wasm.memories.size() > 0) { + d->memory = wasm.memories[0]->name; + } else { + return in.err(pos, "active segment with no memory"); + } + } else { + d->isPassive = true; + } + return Ok{}; +} + +} // namespace wasm::WATParser diff --git a/src/parser/contexts.h b/src/parser/contexts.h new file mode 100644 index 000000000..210945e8d --- /dev/null +++ b/src/parser/contexts.h @@ -0,0 +1,1275 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef parser_context_h +#define parser_context_h + +#include "common.h" +#include "input.h" +#include "ir/names.h" +#include "support/name.h" +#include "support/result.h" +#include "wasm-builder.h" +#include "wasm-ir-builder.h" +#include "wasm.h" + +namespace wasm::WATParser { + +using IndexMap = std::unordered_map<Name, Index>; + +inline std::vector<Type> getUnnamedTypes(const std::vector<NameType>& named) { + std::vector<Type> types; + types.reserve(named.size()); + for (auto& t : named) { + types.push_back(t.type); + } + return types; +} + +struct Limits { + uint64_t initial; + uint64_t max; +}; + +struct MemType { + Type type; + Limits limits; + bool shared; +}; + +struct Memarg { + uint64_t offset; + uint32_t align; +}; + +// The location, possible name, and index in the respective module index space +// of a module-level definition in the input. +struct DefPos { + Name name; + Index pos; + Index index; +}; + +struct GlobalType { + Mutability mutability; + Type type; +}; + +// A signature type and parameter names (possibly empty), used for parsing +// function types. +struct TypeUse { + HeapType type; + std::vector<Name> names; +}; + +struct NullTypeParserCtx { + using IndexT = Ok; + using HeapTypeT = Ok; + using TypeT = Ok; + using ParamsT = Ok; + using ResultsT = size_t; + using BlockTypeT = Ok; + using SignatureT = Ok; + using StorageT = Ok; + using FieldT = Ok; + using FieldsT = Ok; + using StructT = Ok; + using ArrayT = Ok; + using LimitsT = Ok; + using MemTypeT = Ok; + using GlobalTypeT = Ok; + using TypeUseT = Ok; + using LocalsT = Ok; + using DataStringT = Ok; + + HeapTypeT makeFunc() { return Ok{}; } + HeapTypeT makeAny() { return Ok{}; } + HeapTypeT makeExtern() { return Ok{}; } + HeapTypeT makeEq() { return Ok{}; } + HeapTypeT makeI31() { return Ok{}; } + HeapTypeT makeStructType() { return Ok{}; } + HeapTypeT makeArrayType() { return Ok{}; } + + TypeT makeI32() { return Ok{}; } + TypeT makeI64() { return Ok{}; } + TypeT makeF32() { return Ok{}; } + TypeT makeF64() { return Ok{}; } + TypeT makeV128() { return Ok{}; } + + TypeT makeRefType(HeapTypeT, Nullability) { return Ok{}; } + + ParamsT makeParams() { return Ok{}; } + void appendParam(ParamsT&, Name, TypeT) {} + + // We have to count results because whether or not a block introduces a + // typeuse that may implicitly define a type depends on how many results it + // has. + size_t makeResults() { return 0; } + void appendResult(size_t& results, TypeT) { ++results; } + size_t getResultsSize(size_t results) { return results; } + + SignatureT makeFuncType(ParamsT*, ResultsT*) { return Ok{}; } + + StorageT makeI8() { return Ok{}; } + StorageT makeI16() { return Ok{}; } + StorageT makeStorageType(TypeT) { return Ok{}; } + + FieldT makeFieldType(StorageT, Mutability) { return Ok{}; } + + FieldsT makeFields() { return Ok{}; } + void appendField(FieldsT&, Name, FieldT) {} + + StructT makeStruct(FieldsT&) { return Ok{}; } + + std::optional<ArrayT> makeArray(FieldsT&) { return Ok{}; } + + GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } + + LocalsT makeLocals() { return Ok{}; } + void appendLocal(LocalsT&, Name, TypeT) {} + + Result<Index> getTypeIndex(Name) { return 1; } + Result<HeapTypeT> getHeapTypeFromIdx(Index) { return Ok{}; } + + DataStringT makeDataString() { return Ok{}; } + void appendDataString(DataStringT&, std::string_view) {} + + MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } + + BlockTypeT getBlockTypeFromResult(size_t results) { return Ok{}; } + + Result<> getBlockTypeFromTypeUse(Index, TypeUseT) { return Ok{}; } +}; + +template<typename Ctx> struct TypeParserCtx { + using IndexT = Index; + using HeapTypeT = HeapType; + using TypeT = Type; + using ParamsT = std::vector<NameType>; + using ResultsT = std::vector<Type>; + using BlockTypeT = HeapType; + using SignatureT = Signature; + using StorageT = Field; + using FieldT = Field; + using FieldsT = std::pair<std::vector<Name>, std::vector<Field>>; + using StructT = std::pair<std::vector<Name>, Struct>; + using ArrayT = Array; + using LimitsT = Ok; + using MemTypeT = Ok; + using LocalsT = std::vector<NameType>; + using DataStringT = Ok; + + // Map heap type names to their indices. + const IndexMap& typeIndices; + + TypeParserCtx(const IndexMap& typeIndices) : typeIndices(typeIndices) {} + + Ctx& self() { return *static_cast<Ctx*>(this); } + + HeapTypeT makeFunc() { return HeapType::func; } + HeapTypeT makeAny() { return HeapType::any; } + HeapTypeT makeExtern() { return HeapType::ext; } + HeapTypeT makeEq() { return HeapType::eq; } + HeapTypeT makeI31() { return HeapType::i31; } + HeapTypeT makeStructType() { return HeapType::struct_; } + HeapTypeT makeArrayType() { return HeapType::array; } + + TypeT makeI32() { return Type::i32; } + TypeT makeI64() { return Type::i64; } + TypeT makeF32() { return Type::f32; } + TypeT makeF64() { return Type::f64; } + TypeT makeV128() { return Type::v128; } + + TypeT makeRefType(HeapTypeT ht, Nullability nullability) { + return Type(ht, nullability); + } + + TypeT makeTupleType(const std::vector<Type> types) { return Tuple(types); } + + ParamsT makeParams() { return {}; } + void appendParam(ParamsT& params, Name id, TypeT type) { + params.push_back({id, type}); + } + + ResultsT makeResults() { return {}; } + void appendResult(ResultsT& results, TypeT type) { results.push_back(type); } + size_t getResultsSize(const ResultsT& results) { return results.size(); } + + SignatureT makeFuncType(ParamsT* params, ResultsT* results) { + std::vector<Type> empty; + const auto& paramTypes = params ? getUnnamedTypes(*params) : empty; + const auto& resultTypes = results ? *results : empty; + return Signature(self().makeTupleType(paramTypes), + self().makeTupleType(resultTypes)); + } + + StorageT makeI8() { return Field(Field::i8, Immutable); } + StorageT makeI16() { return Field(Field::i16, Immutable); } + StorageT makeStorageType(TypeT type) { return Field(type, Immutable); } + + FieldT makeFieldType(FieldT field, Mutability mutability) { + if (field.packedType == Field::not_packed) { + return Field(field.type, mutability); + } + return Field(field.packedType, mutability); + } + + FieldsT makeFields() { return {}; } + void appendField(FieldsT& fields, Name name, FieldT field) { + fields.first.push_back(name); + fields.second.push_back(field); + } + + StructT makeStruct(FieldsT& fields) { + return {std::move(fields.first), Struct(std::move(fields.second))}; + } + + std::optional<ArrayT> makeArray(FieldsT& fields) { + if (fields.second.size() == 1) { + return Array(fields.second[0]); + } + return {}; + } + + LocalsT makeLocals() { return {}; } + void appendLocal(LocalsT& locals, Name id, TypeT type) { + locals.push_back({id, type}); + } + + Result<Index> getTypeIndex(Name id) { + auto it = typeIndices.find(id); + if (it == typeIndices.end()) { + return self().in.err("unknown type identifier"); + } + return it->second; + } + + DataStringT makeDataString() { return Ok{}; } + void appendDataString(DataStringT&, std::string_view) {} + + LimitsT makeLimits(uint64_t, std::optional<uint64_t>) { return Ok{}; } + LimitsT getLimitsFromData(DataStringT) { return Ok{}; } + + MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } + + HeapType getBlockTypeFromResult(const std::vector<Type> results) { + assert(results.size() == 1); + return HeapType(Signature(Type::none, results[0])); + } +}; + +struct NullInstrParserCtx { + using InstrT = Ok; + using InstrsT = Ok; + using ExprT = Ok; + + using FieldIdxT = Ok; + using LocalIdxT = Ok; + using GlobalIdxT = Ok; + using MemoryIdxT = Ok; + using DataIdxT = Ok; + + using MemargT = Ok; + + InstrsT makeInstrs() { return Ok{}; } + void appendInstr(InstrsT&, InstrT) {} + InstrsT finishInstrs(InstrsT&) { return Ok{}; } + + ExprT makeExpr(InstrsT) { return Ok{}; } + Result<ExprT> instrToExpr(InstrT) { return Ok{}; } + + template<typename HeapTypeT> FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) { + return Ok{}; + } + template<typename HeapTypeT> FieldIdxT getFieldFromName(HeapTypeT, Name) { + return Ok{}; + } + LocalIdxT getLocalFromIdx(uint32_t) { return Ok{}; } + LocalIdxT getLocalFromName(Name) { return Ok{}; } + GlobalIdxT getGlobalFromIdx(uint32_t) { return Ok{}; } + GlobalIdxT getGlobalFromName(Name) { return Ok{}; } + MemoryIdxT getMemoryFromIdx(uint32_t) { return Ok{}; } + MemoryIdxT getMemoryFromName(Name) { return Ok{}; } + DataIdxT getDataFromIdx(uint32_t) { return Ok{}; } + DataIdxT getDataFromName(Name) { return Ok{}; } + + MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; } + + template<typename BlockTypeT> + InstrT makeBlock(Index, std::optional<Name>, BlockTypeT) { + return Ok{}; + } + InstrT finishBlock(Index, InstrsT) { return Ok{}; } + + InstrT makeUnreachable(Index) { return Ok{}; } + InstrT makeNop(Index) { return Ok{}; } + InstrT makeBinary(Index, BinaryOp) { return Ok{}; } + InstrT makeUnary(Index, UnaryOp) { return Ok{}; } + template<typename ResultsT> InstrT makeSelect(Index, ResultsT*) { + return Ok{}; + } + InstrT makeDrop(Index) { return Ok{}; } + InstrT makeMemorySize(Index, MemoryIdxT*) { return Ok{}; } + InstrT makeMemoryGrow(Index, MemoryIdxT*) { return Ok{}; } + InstrT makeLocalGet(Index, LocalIdxT) { return Ok{}; } + InstrT makeLocalTee(Index, LocalIdxT) { return Ok{}; } + InstrT makeLocalSet(Index, LocalIdxT) { return Ok{}; } + InstrT makeGlobalGet(Index, GlobalIdxT) { return Ok{}; } + InstrT makeGlobalSet(Index, GlobalIdxT) { return Ok{}; } + + InstrT makeI32Const(Index, uint32_t) { return Ok{}; } + InstrT makeI64Const(Index, uint64_t) { return Ok{}; } + InstrT makeF32Const(Index, float) { return Ok{}; } + InstrT makeF64Const(Index, double) { return Ok{}; } + InstrT makeLoad(Index, Type, bool, int, bool, MemoryIdxT*, MemargT) { + return Ok{}; + } + InstrT makeStore(Index, Type, int, bool, MemoryIdxT*, MemargT) { + return Ok{}; + } + InstrT makeAtomicRMW(Index, AtomicRMWOp, Type, int, MemoryIdxT*, MemargT) { + return Ok{}; + } + InstrT makeAtomicCmpxchg(Index, Type, int, MemoryIdxT*, MemargT) { + return Ok{}; + } + InstrT makeAtomicWait(Index, Type, MemoryIdxT*, MemargT) { return Ok{}; } + InstrT makeAtomicNotify(Index, MemoryIdxT*, MemargT) { return Ok{}; } + InstrT makeAtomicFence(Index) { return Ok{}; } + InstrT makeSIMDExtract(Index, SIMDExtractOp, uint8_t) { return Ok{}; } + InstrT makeSIMDReplace(Index, SIMDReplaceOp, uint8_t) { return Ok{}; } + InstrT makeSIMDShuffle(Index, const std::array<uint8_t, 16>&) { return Ok{}; } + InstrT makeSIMDTernary(Index, SIMDTernaryOp) { return Ok{}; } + InstrT makeSIMDShift(Index, SIMDShiftOp) { return Ok{}; } + InstrT makeSIMDLoad(Index, SIMDLoadOp, MemoryIdxT*, MemargT) { return Ok{}; } + InstrT makeSIMDLoadStoreLane( + Index, SIMDLoadStoreLaneOp, MemoryIdxT*, MemargT, uint8_t) { + return Ok{}; + } + InstrT makeMemoryInit(Index, MemoryIdxT*, DataIdxT) { return Ok{}; } + InstrT makeDataDrop(Index, DataIdxT) { return Ok{}; } + + InstrT makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; } + InstrT makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; } + + InstrT makeReturn(Index) { return Ok{}; } + template<typename HeapTypeT> InstrT makeRefNull(Index, HeapTypeT) { + return Ok{}; + } + InstrT makeRefIsNull(Index) { return Ok{}; } + + InstrT makeRefEq(Index) { return Ok{}; } + + InstrT makeRefI31(Index) { return Ok{}; } + InstrT makeI31Get(Index, bool) { return Ok{}; } + + template<typename HeapTypeT> InstrT makeStructNew(Index, HeapTypeT) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeStructNewDefault(Index, HeapTypeT) { + return Ok{}; + } + template<typename HeapTypeT> + InstrT makeStructGet(Index, HeapTypeT, FieldIdxT, bool) { + return Ok{}; + } + template<typename HeapTypeT> + InstrT makeStructSet(Index, HeapTypeT, FieldIdxT) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeArrayNew(Index, HeapTypeT) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeArrayNewDefault(Index, HeapTypeT) { + return Ok{}; + } + template<typename HeapTypeT> + InstrT makeArrayNewData(Index, HeapTypeT, DataIdxT) { + return Ok{}; + } + template<typename HeapTypeT> + InstrT makeArrayNewElem(Index, HeapTypeT, DataIdxT) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeArrayGet(Index, HeapTypeT, bool) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeArraySet(Index, HeapTypeT) { + return Ok{}; + } + InstrT makeArrayLen(Index) { return Ok{}; } + template<typename HeapTypeT> + InstrT makeArrayCopy(Index, HeapTypeT, HeapTypeT) { + return Ok{}; + } + template<typename HeapTypeT> InstrT makeArrayFill(Index, HeapTypeT) { + return Ok{}; + } +}; + +// Phase 1: Parse definition spans for top-level module elements and determine +// their indices and names. +struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { + using DataStringT = std::vector<char>; + using LimitsT = Limits; + using MemTypeT = MemType; + + ParseInput in; + + // At this stage we only look at types to find implicit type definitions, + // which are inserted directly into the context. We cannot materialize or + // validate any types because we don't know what types exist yet. + // + // Declared module elements are inserted into the module, but their bodies are + // not filled out until later parsing phases. + Module& wasm; + + // The module element definitions we are parsing in this phase. + std::vector<DefPos> typeDefs; + std::vector<DefPos> subtypeDefs; + std::vector<DefPos> funcDefs; + std::vector<DefPos> memoryDefs; + std::vector<DefPos> globalDefs; + std::vector<DefPos> dataDefs; + + // Positions of typeuses that might implicitly define new types. + std::vector<Index> implicitTypeDefs; + + // Counters used for generating names for module elements. + int funcCounter = 0; + int memoryCounter = 0; + int globalCounter = 0; + int dataCounter = 0; + + // Used to verify that all imports come before all non-imports. + bool hasNonImport = false; + + ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {} + + void addFuncType(SignatureT) {} + void addStructType(StructT) {} + void addArrayType(ArrayT) {} + void setOpen() {} + Result<> addSubtype(Index) { return Ok{}; } + void finishSubtype(Name name, Index pos) { + subtypeDefs.push_back({name, pos, Index(subtypeDefs.size())}); + } + size_t getRecGroupStartIndex() { return 0; } + void addRecGroup(Index, size_t) {} + void finishDeftype(Index pos) { + typeDefs.push_back({{}, pos, Index(typeDefs.size())}); + } + + std::vector<char> makeDataString() { return {}; } + void appendDataString(std::vector<char>& data, std::string_view str) { + data.insert(data.end(), str.begin(), str.end()); + } + + Limits makeLimits(uint64_t n, std::optional<uint64_t> m) { + return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize}; + } + Limits getLimitsFromData(const std::vector<char>& data) { + uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize; + return {size, size}; + } + + MemType makeMemType(Type type, Limits limits, bool shared) { + return {type, limits, shared}; + } + + Result<TypeUseT> + makeTypeUse(Index pos, std::optional<HeapTypeT> type, ParamsT*, ResultsT*) { + if (!type) { + implicitTypeDefs.push_back(pos); + } + return Ok{}; + } + + Result<Function*> addFuncDecl(Index pos, Name name, ImportNames* importNames); + Result<> addFunc(Name name, + const std::vector<Name>& exports, + ImportNames* import, + TypeUseT type, + std::optional<LocalsT>, + std::optional<InstrsT>, + Index pos); + + Result<Memory*> + addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type); + + Result<> addMemory(Name name, + const std::vector<Name>& exports, + ImportNames* import, + MemType type, + Index pos); + + Result<> addImplicitData(DataStringT&& data); + + Result<Global*> addGlobalDecl(Index pos, Name name, ImportNames* importNames); + + Result<> addGlobal(Name name, + const std::vector<Name>& exports, + ImportNames* import, + GlobalTypeT, + std::optional<ExprT>, + Index pos); + + Result<> addData(Name name, + MemoryIdxT*, + std::optional<ExprT>, + std::vector<char>&& data, + Index pos); +}; + +// Phase 2: Parse type definitions into a TypeBuilder. +struct ParseTypeDefsCtx : TypeParserCtx<ParseTypeDefsCtx> { + ParseInput in; + + // We update slots in this builder as we parse type definitions. + TypeBuilder& builder; + + // Parse the names of types and fields as we go. + std::vector<TypeNames> names; + + // The index of the subtype definition we are parsing. + Index index = 0; + + ParseTypeDefsCtx(std::string_view in, + TypeBuilder& builder, + const IndexMap& typeIndices) + : TypeParserCtx<ParseTypeDefsCtx>(typeIndices), in(in), builder(builder), + names(builder.size()) {} + + TypeT makeRefType(HeapTypeT ht, Nullability nullability) { + return builder.getTempRefType(ht, nullability); + } + + TypeT makeTupleType(const std::vector<Type> types) { + return builder.getTempTupleType(types); + } + + Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { + if (idx >= builder.size()) { + return in.err("type index out of bounds"); + } + return builder[idx]; + } + + void addFuncType(SignatureT& type) { builder[index] = type; } + + void addStructType(StructT& type) { + auto& [fieldNames, str] = type; + builder[index] = str; + for (Index i = 0; i < fieldNames.size(); ++i) { + if (auto name = fieldNames[i]; name.is()) { + names[index].fieldNames[i] = name; + } + } + } + + void addArrayType(ArrayT& type) { builder[index] = type; } + + void setOpen() { builder[index].setOpen(); } + + Result<> addSubtype(Index super) { + if (super >= builder.size()) { + return in.err("supertype index out of bounds"); + } + builder[index].subTypeOf(builder[super]); + return Ok{}; + } + + void finishSubtype(Name name, Index pos) { names[index++].name = name; } + + size_t getRecGroupStartIndex() { return index; } + + void addRecGroup(Index start, size_t len) { + builder.createRecGroup(start, len); + } + + void finishDeftype(Index) {} +}; + +// Phase 3: Parse type uses to find implicitly defined types. +struct ParseImplicitTypeDefsCtx : TypeParserCtx<ParseImplicitTypeDefsCtx> { + using TypeUseT = Ok; + + ParseInput in; + + // Types parsed so far. + std::vector<HeapType>& types; + + // Map typeuse positions without an explicit type to the correct type. + std::unordered_map<Index, HeapType>& implicitTypes; + + // Map signatures to the first defined heap type they match. + std::unordered_map<Signature, HeapType> sigTypes; + + ParseImplicitTypeDefsCtx(std::string_view in, + std::vector<HeapType>& types, + std::unordered_map<Index, HeapType>& implicitTypes, + const IndexMap& typeIndices) + : TypeParserCtx<ParseImplicitTypeDefsCtx>(typeIndices), in(in), + types(types), implicitTypes(implicitTypes) { + for (auto type : types) { + if (type.isSignature() && type.getRecGroup().size() == 1) { + sigTypes.insert({type.getSignature(), type}); + } + } + } + + Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { + if (idx >= types.size()) { + return in.err("type index out of bounds"); + } + return types[idx]; + } + + Result<TypeUseT> makeTypeUse(Index pos, + std::optional<HeapTypeT>, + ParamsT* params, + ResultsT* results) { + std::vector<Type> paramTypes; + if (params) { + paramTypes = getUnnamedTypes(*params); + } + + std::vector<Type> resultTypes; + if (results) { + resultTypes = *results; + } + + auto sig = Signature(Type(paramTypes), Type(resultTypes)); + auto [it, inserted] = sigTypes.insert({sig, HeapType::func}); + if (inserted) { + auto type = HeapType(sig); + it->second = type; + types.push_back(type); + } + implicitTypes.insert({pos, it->second}); + + return Ok{}; + } +}; + +// Phase 4: Parse and set the types of module elements. +struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>, + NullInstrParserCtx { + // In this phase we have constructed all the types, so we can materialize and + // validate them when they are used. + + using GlobalTypeT = GlobalType; + using TypeUseT = TypeUse; + + ParseInput in; + + Module& wasm; + + const std::vector<HeapType>& types; + const std::unordered_map<Index, HeapType>& implicitTypes; + + // The index of the current type. + Index index = 0; + + ParseModuleTypesCtx(std::string_view in, + Module& wasm, + const std::vector<HeapType>& types, + const std::unordered_map<Index, HeapType>& implicitTypes, + const IndexMap& typeIndices) + : TypeParserCtx<ParseModuleTypesCtx>(typeIndices), in(in), wasm(wasm), + types(types), implicitTypes(implicitTypes) {} + + Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { + if (idx >= types.size()) { + return in.err("type index out of bounds"); + } + return types[idx]; + } + + Result<TypeUseT> makeTypeUse(Index pos, + std::optional<HeapTypeT> type, + ParamsT* params, + ResultsT* results) { + std::vector<Name> ids; + if (params) { + ids.reserve(params->size()); + for (auto& p : *params) { + ids.push_back(p.name); + } + } + + if (type) { + return TypeUse{*type, ids}; + } + + auto it = implicitTypes.find(pos); + assert(it != implicitTypes.end()); + + return TypeUse{it->second, ids}; + } + + Result<HeapType> getBlockTypeFromTypeUse(Index pos, TypeUse use) { + assert(use.type.isSignature()); + if (use.type.getSignature().params != Type::none) { + return in.err(pos, "block parameters not yet supported"); + } + // TODO: Once we support block parameters, return an error here if any of + // them are named. + return use.type; + } + + GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) { + return {mutability, type}; + } + + Result<> addFunc(Name name, + const std::vector<Name>&, + ImportNames*, + TypeUse type, + std::optional<LocalsT> locals, + std::optional<InstrsT>, + Index pos) { + auto& f = wasm.functions[index]; + if (!type.type.isSignature()) { + return in.err(pos, "expected signature type"); + } + f->type = type.type; + for (Index i = 0; i < type.names.size(); ++i) { + if (type.names[i].is()) { + f->setLocalName(i, type.names[i]); + } + } + if (locals) { + for (auto& l : *locals) { + Builder::addVar(f.get(), l.name, l.type); + } + } + return Ok{}; + } + + Result<> + addMemory(Name, const std::vector<Name>&, ImportNames*, MemTypeT, Index) { + return Ok{}; + } + + Result<> addImplicitData(DataStringT&& data) { return Ok{}; } + + Result<> addGlobal(Name, + const std::vector<Name>&, + ImportNames*, + GlobalType type, + std::optional<ExprT>, + Index) { + auto& g = wasm.globals[index]; + g->mutable_ = type.mutability; + g->type = type.type; + return Ok{}; + } +}; + +// Phase 5: Parse module element definitions, including instructions. +struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> { + using GlobalTypeT = Ok; + using TypeUseT = HeapType; + + // Keep track of instructions internally rather than letting the general + // parser collect them. + using InstrT = Ok; + using InstrsT = Ok; + using ExprT = Expression*; + + using FieldIdxT = Index; + using LocalIdxT = Index; + using GlobalIdxT = Name; + using MemoryIdxT = Name; + using DataIdxT = Name; + + using MemargT = Memarg; + + ParseInput in; + + Module& wasm; + Builder builder; + + const std::vector<HeapType>& types; + const std::unordered_map<Index, HeapType>& implicitTypes; + + // The index of the current module element. + Index index = 0; + + // The current function being parsed, used to create scratch locals, type + // local.get, etc. + Function* func = nullptr; + + IRBuilder irBuilder; + + void setFunction(Function* func) { + this->func = func; + irBuilder.setFunction(func); + } + + ParseDefsCtx(std::string_view in, + Module& wasm, + const std::vector<HeapType>& types, + const std::unordered_map<Index, HeapType>& implicitTypes, + const IndexMap& typeIndices) + : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm), + types(types), implicitTypes(implicitTypes), irBuilder(wasm) {} + + template<typename T> Result<T> withLoc(Index pos, Result<T> res) { + if (auto err = res.getErr()) { + return in.err(pos, err->msg); + } + return res; + } + + template<typename T> Result<T> withLoc(Result<T> res) { + return withLoc(in.getPos(), res); + } + + HeapType getBlockTypeFromResult(const std::vector<Type> results) { + assert(results.size() == 1); + return HeapType(Signature(Type::none, results[0])); + } + + Result<HeapType> getBlockTypeFromTypeUse(Index pos, HeapType type) { + return type; + } + + Ok makeInstrs() { return Ok{}; } + + void appendInstr(Ok&, InstrT instr) {} + + Result<InstrsT> finishInstrs(Ok&) { return Ok{}; } + + Result<Expression*> instrToExpr(Ok&) { return irBuilder.build(); } + + GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } + + Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { + if (idx >= types.size()) { + return in.err("type index out of bounds"); + } + return types[idx]; + } + + Result<Index> getFieldFromIdx(HeapType type, uint32_t idx) { + if (!type.isStruct()) { + return in.err("expected struct type"); + } + if (idx >= type.getStruct().fields.size()) { + return in.err("struct index out of bounds"); + } + return idx; + } + + Result<Index> getFieldFromName(HeapType type, Name name) { + // TODO: Field names + return in.err("symbolic field names note yet supported"); + } + + Result<Index> getLocalFromIdx(uint32_t idx) { + if (!func) { + return in.err("cannot access locals outside of a function"); + } + if (idx >= func->getNumLocals()) { + return in.err("local index out of bounds"); + } + return idx; + } + + Result<Index> getLocalFromName(Name name) { + if (!func) { + return in.err("cannot access locals outside of a function"); + } + if (!func->hasLocalIndex(name)) { + return in.err("local $" + name.toString() + " does not exist"); + } + return func->getLocalIndex(name); + } + + Result<Name> getGlobalFromIdx(uint32_t idx) { + if (idx >= wasm.globals.size()) { + return in.err("global index out of bounds"); + } + return wasm.globals[idx]->name; + } + + Result<Name> getGlobalFromName(Name name) { + if (!wasm.getGlobalOrNull(name)) { + return in.err("global $" + name.toString() + " does not exist"); + } + return name; + } + + Result<Name> getMemoryFromIdx(uint32_t idx) { + if (idx >= wasm.memories.size()) { + return in.err("memory index out of bounds"); + } + return wasm.memories[idx]->name; + } + + Result<Name> getMemoryFromName(Name name) { + if (!wasm.getMemoryOrNull(name)) { + return in.err("memory $" + name.toString() + " does not exist"); + } + return name; + } + + Result<Name> getDataFromIdx(uint32_t idx) { + if (idx >= wasm.dataSegments.size()) { + return in.err("data index out of bounds"); + } + return wasm.dataSegments[idx]->name; + } + + Result<Name> getDataFromName(Name name) { + if (!wasm.getDataSegmentOrNull(name)) { + return in.err("data $" + name.toString() + " does not exist"); + } + return name; + } + + Result<TypeUseT> makeTypeUse(Index pos, + std::optional<HeapTypeT> type, + ParamsT* params, + ResultsT* results); + Result<> addFunc(Name, + const std::vector<Name>&, + ImportNames*, + TypeUseT, + std::optional<LocalsT>, + std::optional<InstrsT>, + Index pos); + + Result<> addGlobal(Name, + const std::vector<Name>&, + ImportNames*, + GlobalTypeT, + std::optional<ExprT> exp, + Index); + Result<> + addData(Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos); + Result<Index> addScratchLocal(Index pos, Type type) { + if (!func) { + return in.err(pos, + "scratch local required, but there is no function context"); + } + Name name = Names::getValidLocalName(*func, "scratch"); + return Builder::addVar(func, name, type); + } + + Result<Expression*> makeExpr(InstrsT& instrs) { return irBuilder.build(); } + + Memarg getMemarg(uint64_t offset, uint32_t align) { return {offset, align}; } + + Result<Name> getMemory(Index pos, Name* mem) { + if (mem) { + return *mem; + } + if (wasm.memories.empty()) { + return in.err(pos, "memory required, but there is no memory"); + } + return wasm.memories[0]->name; + } + + Result<> makeBlock(Index pos, std::optional<Name> label, HeapType type) { + // TODO: validate labels? + // TODO: Move error on input types to here? + return withLoc(pos, + irBuilder.makeBlock(label ? *label : Name{}, + type.getSignature().results)); + } + + Result<> finishBlock(Index pos, InstrsT) { + return withLoc(pos, irBuilder.visitEnd()); + } + + Result<> makeUnreachable(Index pos) { + return withLoc(pos, irBuilder.makeUnreachable()); + } + + Result<> makeNop(Index pos) { return withLoc(pos, irBuilder.makeNop()); } + + Result<> makeBinary(Index pos, BinaryOp op) { + return withLoc(pos, irBuilder.makeBinary(op)); + } + + Result<> makeUnary(Index pos, UnaryOp op) { + return withLoc(pos, irBuilder.makeUnary(op)); + } + + Result<> makeSelect(Index pos, std::vector<Type>* res) { + if (res && res->size()) { + if (res->size() > 1) { + return in.err(pos, "select may not have more than one result type"); + } + return withLoc(pos, irBuilder.makeSelect((*res)[0])); + } + return withLoc(pos, irBuilder.makeSelect()); + } + + Result<> makeDrop(Index pos) { return withLoc(pos, irBuilder.makeDrop()); } + + Result<> makeMemorySize(Index pos, Name* mem) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeMemorySize(*m)); + } + + Result<> makeMemoryGrow(Index pos, Name* mem) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeMemoryGrow(*m)); + } + + Result<> makeLocalGet(Index pos, Index local) { + return withLoc(pos, irBuilder.makeLocalGet(local)); + } + + Result<> makeLocalTee(Index pos, Index local) { + return withLoc(pos, irBuilder.makeLocalTee(local)); + } + + Result<> makeLocalSet(Index pos, Index local) { + return withLoc(pos, irBuilder.makeLocalSet(local)); + } + + Result<> makeGlobalGet(Index pos, Name global) { + return withLoc(pos, irBuilder.makeGlobalGet(global)); + } + + Result<> makeGlobalSet(Index pos, Name global) { + assert(wasm.getGlobalOrNull(global)); + return withLoc(pos, irBuilder.makeGlobalSet(global)); + } + + Result<> makeI32Const(Index pos, uint32_t c) { + return withLoc(pos, irBuilder.makeConst(Literal(c))); + } + + Result<> makeI64Const(Index pos, uint64_t c) { + return withLoc(pos, irBuilder.makeConst(Literal(c))); + } + + Result<> makeF32Const(Index pos, float c) { + return withLoc(pos, irBuilder.makeConst(Literal(c))); + } + + Result<> makeF64Const(Index pos, double c) { + return withLoc(pos, irBuilder.makeConst(Literal(c))); + } + + Result<> makeLoad(Index pos, + Type type, + bool signed_, + int bytes, + bool isAtomic, + Name* mem, + Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + if (isAtomic) { + return withLoc(pos, + irBuilder.makeAtomicLoad(bytes, memarg.offset, type, *m)); + } + return withLoc(pos, + irBuilder.makeLoad( + bytes, signed_, memarg.offset, memarg.align, type, *m)); + } + + Result<> makeStore( + Index pos, Type type, int bytes, bool isAtomic, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + if (isAtomic) { + return withLoc(pos, + irBuilder.makeAtomicStore(bytes, memarg.offset, type, *m)); + } + return withLoc( + pos, irBuilder.makeStore(bytes, memarg.offset, memarg.align, type, *m)); + } + + Result<> makeAtomicRMW( + Index pos, AtomicRMWOp op, Type type, int bytes, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, + irBuilder.makeAtomicRMW(op, bytes, memarg.offset, type, *m)); + } + + Result<> + makeAtomicCmpxchg(Index pos, Type type, int bytes, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, + irBuilder.makeAtomicCmpxchg(bytes, memarg.offset, type, *m)); + } + + Result<> makeAtomicWait(Index pos, Type type, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeAtomicWait(type, memarg.offset, *m)); + } + + Result<> makeAtomicNotify(Index pos, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeAtomicNotify(memarg.offset, *m)); + } + + Result<> makeAtomicFence(Index pos) { + return withLoc(pos, irBuilder.makeAtomicFence()); + } + + Result<> makeSIMDExtract(Index pos, SIMDExtractOp op, uint8_t lane) { + return withLoc(pos, irBuilder.makeSIMDExtract(op, lane)); + } + + Result<> makeSIMDReplace(Index pos, SIMDReplaceOp op, uint8_t lane) { + return withLoc(pos, irBuilder.makeSIMDReplace(op, lane)); + } + + Result<> makeSIMDShuffle(Index pos, const std::array<uint8_t, 16>& lanes) { + return withLoc(pos, irBuilder.makeSIMDShuffle(lanes)); + } + + Result<> makeSIMDTernary(Index pos, SIMDTernaryOp op) { + return withLoc(pos, irBuilder.makeSIMDTernary(op)); + } + + Result<> makeSIMDShift(Index pos, SIMDShiftOp op) { + return withLoc(pos, irBuilder.makeSIMDShift(op)); + } + + Result<> makeSIMDLoad(Index pos, SIMDLoadOp op, Name* mem, Memarg memarg) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, + irBuilder.makeSIMDLoad(op, memarg.offset, memarg.align, *m)); + } + + Result<> makeSIMDLoadStoreLane( + Index pos, SIMDLoadStoreLaneOp op, Name* mem, Memarg memarg, uint8_t lane) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, + irBuilder.makeSIMDLoadStoreLane( + op, memarg.offset, memarg.align, lane, *m)); + } + + Result<> makeMemoryInit(Index pos, Name* mem, Name data) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeMemoryInit(data, *m)); + } + + Result<> makeDataDrop(Index pos, Name data) { + return withLoc(pos, irBuilder.makeDataDrop(data)); + } + + Result<> makeMemoryCopy(Index pos, Name* destMem, Name* srcMem) { + auto destMemory = getMemory(pos, destMem); + CHECK_ERR(destMemory); + auto srcMemory = getMemory(pos, srcMem); + CHECK_ERR(srcMemory); + return withLoc(pos, irBuilder.makeMemoryCopy(*destMemory, *srcMemory)); + } + + Result<> makeMemoryFill(Index pos, Name* mem) { + auto m = getMemory(pos, mem); + CHECK_ERR(m); + return withLoc(pos, irBuilder.makeMemoryFill(*m)); + } + + Result<> makeReturn(Index pos) { + return withLoc(pos, irBuilder.makeReturn()); + } + + Result<> makeRefNull(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeRefNull(type)); + } + + Result<> makeRefIsNull(Index pos) { + return withLoc(pos, irBuilder.makeRefIsNull()); + } + + Result<> makeRefEq(Index pos) { return withLoc(pos, irBuilder.makeRefEq()); } + + Result<> makeRefI31(Index pos) { + return withLoc(pos, irBuilder.makeRefI31()); + } + + Result<> makeI31Get(Index pos, bool signed_) { + return withLoc(pos, irBuilder.makeI31Get(signed_)); + } + + Result<> makeStructNew(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeStructNew(type)); + } + + Result<> makeStructNewDefault(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeStructNewDefault(type)); + } + + Result<> makeStructGet(Index pos, HeapType type, Index field, bool signed_) { + return withLoc(pos, irBuilder.makeStructGet(type, field, signed_)); + } + + Result<> makeStructSet(Index pos, HeapType type, Index field) { + return withLoc(pos, irBuilder.makeStructSet(type, field)); + } + + Result<> makeArrayNew(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeArrayNew(type)); + } + + Result<> makeArrayNewDefault(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeArrayNewDefault(type)); + } + + Result<> makeArrayNewData(Index pos, HeapType type, Name data) { + return withLoc(pos, irBuilder.makeArrayNewData(type, data)); + } + + Result<> makeArrayNewElem(Index pos, HeapType type, Name elem) { + return withLoc(pos, irBuilder.makeArrayNewElem(type, elem)); + } + + Result<> makeArrayGet(Index pos, HeapType type, bool signed_) { + return withLoc(pos, irBuilder.makeArrayGet(type, signed_)); + } + + Result<> makeArraySet(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeArraySet(type)); + } + + Result<> makeArrayLen(Index pos) { + return withLoc(pos, irBuilder.makeArrayLen()); + } + + Result<> makeArrayCopy(Index pos, HeapType destType, HeapType srcType) { + return withLoc(pos, irBuilder.makeArrayCopy(destType, srcType)); + } + + Result<> makeArrayFill(Index pos, HeapType type) { + return withLoc(pos, irBuilder.makeArrayFill(type)); + } +}; + +} // namespace wasm::WATParser + +#endif // parser_context_h diff --git a/src/parser/input-impl.h b/src/parser/input-impl.h new file mode 100644 index 000000000..35a39b2f3 --- /dev/null +++ b/src/parser/input-impl.h @@ -0,0 +1,273 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "input.h" + +#ifndef parser_input_impl_h +#define parser_input_impl_h + +inline std::optional<Token> ParseInput::peek() { + if (!empty()) { + return *lexer; + } + return {}; +} + +inline bool ParseInput::takeLParen() { + auto t = peek(); + if (!t || !t->isLParen()) { + return false; + } + ++lexer; + return true; +} + +inline bool ParseInput::takeRParen() { + auto t = peek(); + if (!t || !t->isRParen()) { + return false; + } + ++lexer; + return true; +} + +inline bool ParseInput::takeUntilParen() { + while (true) { + auto t = peek(); + if (!t) { + return false; + } + if (t->isLParen() || t->isRParen()) { + return true; + } + ++lexer; + } +} + +inline std::optional<Name> ParseInput::takeID() { + if (auto t = peek()) { + if (auto id = t->getID()) { + ++lexer; + // See comment on takeName. + return Name(std::string(*id)); + } + } + return {}; +} + +inline std::optional<std::string_view> ParseInput::takeKeyword() { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + ++lexer; + return *keyword; + } + } + return {}; +} + +inline bool ParseInput::takeKeyword(std::string_view expected) { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (*keyword == expected) { + ++lexer; + return true; + } + } + } + return false; +} + +inline std::optional<uint64_t> ParseInput::takeOffset() { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 7) != "offset="sv) { + return {}; + } + Lexer subLexer(keyword->substr(7)); + if (subLexer == subLexer.end()) { + return {}; + } + if (auto o = subLexer->getU64()) { + ++subLexer; + if (subLexer == subLexer.end()) { + ++lexer; + return o; + } + } + } + } + return std::nullopt; +} + +inline std::optional<uint32_t> ParseInput::takeAlign() { + if (auto t = peek()) { + if (auto keyword = t->getKeyword()) { + if (keyword->substr(0, 6) != "align="sv) { + return {}; + } + Lexer subLexer(keyword->substr(6)); + if (subLexer == subLexer.end()) { + return {}; + } + if (auto a = subLexer->getU32()) { + ++subLexer; + if (subLexer == subLexer.end()) { + ++lexer; + return a; + } + } + } + } + return {}; +} + +inline std::optional<uint64_t> ParseInput::takeU64() { + if (auto t = peek()) { + if (auto n = t->getU64()) { + ++lexer; + return n; + } + } + return std::nullopt; +} + +inline std::optional<int64_t> ParseInput::takeS64() { + if (auto t = peek()) { + if (auto n = t->getS64()) { + ++lexer; + return n; + } + } + return {}; +} + +inline std::optional<int64_t> ParseInput::takeI64() { + if (auto t = peek()) { + if (auto n = t->getI64()) { + ++lexer; + return n; + } + } + return {}; +} + +inline std::optional<uint32_t> ParseInput::takeU32() { + if (auto t = peek()) { + if (auto n = t->getU32()) { + ++lexer; + return n; + } + } + return std::nullopt; +} + +inline std::optional<int32_t> ParseInput::takeS32() { + if (auto t = peek()) { + if (auto n = t->getS32()) { + ++lexer; + return n; + } + } + return {}; +} + +inline std::optional<int32_t> ParseInput::takeI32() { + if (auto t = peek()) { + if (auto n = t->getI32()) { + ++lexer; + return n; + } + } + return {}; +} + +inline std::optional<uint8_t> ParseInput::takeU8() { + if (auto t = peek()) { + if (auto n = t->getU32()) { + if (n <= std::numeric_limits<uint8_t>::max()) { + ++lexer; + return uint8_t(*n); + } + } + } + return {}; +} + +inline std::optional<double> ParseInput::takeF64() { + if (auto t = peek()) { + if (auto d = t->getF64()) { + ++lexer; + return d; + } + } + return std::nullopt; +} + +inline std::optional<float> ParseInput::takeF32() { + if (auto t = peek()) { + if (auto f = t->getF32()) { + ++lexer; + return f; + } + } + return std::nullopt; +} + +inline std::optional<std::string_view> ParseInput::takeString() { + if (auto t = peek()) { + if (auto s = t->getString()) { + ++lexer; + return s; + } + } + return {}; +} + +inline std::optional<Name> ParseInput::takeName() { + // TODO: Move this to lexer and validate UTF. + if (auto str = takeString()) { + // Copy to a std::string to make sure we have a null terminator, otherwise + // the `Name` constructor won't work correctly. + // TODO: Update `Name` to use string_view instead of char* and/or to take + // rvalue strings to avoid this extra copy. + return Name(std::string(*str)); + } + return {}; +} + +inline bool ParseInput::takeSExprStart(std::string_view expected) { + auto original = lexer; + if (takeLParen() && takeKeyword(expected)) { + return true; + } + lexer = original; + return false; +} + +inline Index ParseInput::getPos() { + if (auto t = peek()) { + return lexer.getIndex() - t->span.size(); + } + return lexer.getIndex(); +} + +inline Err ParseInput::err(Index pos, std::string reason) { + std::stringstream msg; + msg << lexer.position(pos) << ": error: " << reason; + return Err{msg.str()}; +} + +#endif // parser_input_impl_h diff --git a/src/parser/input.h b/src/parser/input.h new file mode 100644 index 000000000..5c7c57d20 --- /dev/null +++ b/src/parser/input.h @@ -0,0 +1,75 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef parser_input_h +#define parser_input_h + +#include "lexer.h" +#include "support/result.h" +#include "wasm.h" + +namespace wasm::WATParser { + +using namespace std::string_view_literals; + +// Wraps a lexer and provides utilities for consuming tokens. +struct ParseInput { + Lexer lexer; + + explicit ParseInput(std::string_view in) : lexer(in) {} + + ParseInput(std::string_view in, size_t index) : lexer(in) { + lexer.setIndex(index); + } + + ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) { + lexer.setIndex(index); + } + + bool empty() { return lexer.empty(); } + + std::optional<Token> peek(); + bool takeLParen(); + bool takeRParen(); + bool takeUntilParen(); + std::optional<Name> takeID(); + std::optional<std::string_view> takeKeyword(); + bool takeKeyword(std::string_view expected); + std::optional<uint64_t> takeOffset(); + std::optional<uint32_t> takeAlign(); + std::optional<uint64_t> takeU64(); + std::optional<int64_t> takeS64(); + std::optional<int64_t> takeI64(); + std::optional<uint32_t> takeU32(); + std::optional<int32_t> takeS32(); + std::optional<int32_t> takeI32(); + std::optional<uint8_t> takeU8(); + std::optional<double> takeF64(); + std::optional<float> takeF32(); + std::optional<std::string_view> takeString(); + std::optional<Name> takeName(); + bool takeSExprStart(std::string_view expected); + + Index getPos(); + [[nodiscard]] Err err(Index pos, std::string reason); + [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } +}; + +#include "input-impl.h" + +} // namespace wasm::WATParser + +#endif // parser_input_h diff --git a/src/wasm/wat-lexer.cpp b/src/parser/lexer.cpp index 264ffd40c..0796013fe 100644 --- a/src/wasm/wat-lexer.cpp +++ b/src/parser/lexer.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2022 WebAssembly Community Group participants + * Copyright 2023 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include <sstream> #include <variant> -#include "wat-lexer.h" +#include "lexer.h" using namespace std::string_view_literals; diff --git a/src/wat-lexer.h b/src/parser/lexer.h index 7b6c93552..67d29b002 100644 --- a/src/wat-lexer.h +++ b/src/parser/lexer.h @@ -1,5 +1,5 @@ /* - * Copyright 2022 WebAssembly Community Group participants + * Copyright 2023 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,8 @@ #include <string_view> #include <variant> -#ifndef wasm_wat_lexer_h -#define wasm_wat_lexer_h +#ifndef parser_lexer_h +#define parser_lexer_h namespace wasm::WATParser { @@ -224,4 +224,4 @@ private: } // namespace wasm::WATParser -#endif // wasm_wat_lexer_h +#endif // parser_lexer_h diff --git a/src/wasm/wat-parser.cpp b/src/parser/parsers.h index b31019811..5f9f23a2a 100644 --- a/src/wasm/wat-parser.cpp +++ b/src/parser/parsers.h @@ -1,5 +1,5 @@ /* - * Copyright 2022 WebAssembly Community Group participants + * Copyright 2023 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,1818 +14,15 @@ * limitations under the License. */ -#include "wat-parser.h" -#include "ir/names.h" -#include "support/name.h" -#include "wasm-builder.h" -#include "wasm-ir-builder.h" -#include "wasm-type.h" -#include "wasm.h" -#include "wat-lexer.h" - -// The WebAssembly text format is recursive in the sense that elements may be -// referred to before they are declared. Furthermore, elements may be referred -// to by index or by name. As a result, we need to parse text modules in -// multiple phases. -// -// In the first phase, we find all of the module element declarations and -// record, but do not interpret, the input spans of their corresponding -// definitions. This phase establishes the indices and names of each module -// element so that subsequent phases can look them up. -// -// The second phase parses type definitions to construct the types used in the -// module. This has to be its own phase because we have no way to refer to a -// type before it has been built along with all the other types, unlike for -// other module elements that can be referred to by name before their -// definitions have been parsed. -// -// The third phase further parses and constructs types implicitly defined by -// type uses in functions, blocks, and call_indirect instructions. These -// implicitly defined types may be referred to by index elsewhere. -// -// The fourth phase parses and sets the types of globals, functions, and other -// top-level module elements. These types need to be set before we parse -// instructions because they determine the types of instructions such as -// global.get and ref.func. -// -// The fifth and final phase parses the remaining contents of all module -// elements, including instructions. -// -// Each phase of parsing gets its own context type that is passed to the -// individual parsing functions. There is a parsing function for each element of -// the grammar given in the spec. Parsing functions are templatized so that they -// may be passed the appropriate context type and return the correct result type -// for each phase. +#ifndef parser_parsers_h +#define parser_parsers_h -using namespace std::string_view_literals; +#include "common.h" +#include "input.h" namespace wasm::WATParser { -namespace { - -// ============ -// Parser Input -// ============ - -// Wraps a lexer and provides utilities for consuming tokens. -struct ParseInput { - Lexer lexer; - - explicit ParseInput(std::string_view in) : lexer(in) {} - - ParseInput(std::string_view in, size_t index) : lexer(in) { - lexer.setIndex(index); - } - - ParseInput(const ParseInput& other, size_t index) : lexer(other.lexer) { - lexer.setIndex(index); - } - - bool empty() { return lexer.empty(); } - - std::optional<Token> peek() { - if (!empty()) { - return *lexer; - } - return {}; - } - - bool takeLParen() { - auto t = peek(); - if (!t || !t->isLParen()) { - return false; - } - ++lexer; - return true; - } - - bool takeRParen() { - auto t = peek(); - if (!t || !t->isRParen()) { - return false; - } - ++lexer; - return true; - } - - bool takeUntilParen() { - while (true) { - auto t = peek(); - if (!t) { - return false; - } - if (t->isLParen() || t->isRParen()) { - return true; - } - ++lexer; - } - } - - std::optional<Name> takeID() { - if (auto t = peek()) { - if (auto id = t->getID()) { - ++lexer; - // See comment on takeName. - return Name(std::string(*id)); - } - } - return {}; - } - - std::optional<std::string_view> takeKeyword() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - ++lexer; - return *keyword; - } - } - return {}; - } - - bool takeKeyword(std::string_view expected) { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (*keyword == expected) { - ++lexer; - return true; - } - } - } - return false; - } - - std::optional<uint64_t> takeOffset() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (keyword->substr(0, 7) != "offset="sv) { - return {}; - } - Lexer subLexer(keyword->substr(7)); - if (subLexer == subLexer.end()) { - return {}; - } - if (auto o = subLexer->getU64()) { - ++subLexer; - if (subLexer == subLexer.end()) { - ++lexer; - return o; - } - } - } - } - return std::nullopt; - } - - std::optional<uint32_t> takeAlign() { - if (auto t = peek()) { - if (auto keyword = t->getKeyword()) { - if (keyword->substr(0, 6) != "align="sv) { - return {}; - } - Lexer subLexer(keyword->substr(6)); - if (subLexer == subLexer.end()) { - return {}; - } - if (auto a = subLexer->getU32()) { - ++subLexer; - if (subLexer == subLexer.end()) { - ++lexer; - return a; - } - } - } - } - return {}; - } - - std::optional<uint64_t> takeU64() { - if (auto t = peek()) { - if (auto n = t->getU64()) { - ++lexer; - return n; - } - } - return std::nullopt; - } - - std::optional<int64_t> takeS64() { - if (auto t = peek()) { - if (auto n = t->getS64()) { - ++lexer; - return n; - } - } - return {}; - } - - std::optional<int64_t> takeI64() { - if (auto t = peek()) { - if (auto n = t->getI64()) { - ++lexer; - return n; - } - } - return {}; - } - - std::optional<uint32_t> takeU32() { - if (auto t = peek()) { - if (auto n = t->getU32()) { - ++lexer; - return n; - } - } - return std::nullopt; - } - - std::optional<int32_t> takeS32() { - if (auto t = peek()) { - if (auto n = t->getS32()) { - ++lexer; - return n; - } - } - return {}; - } - - std::optional<int32_t> takeI32() { - if (auto t = peek()) { - if (auto n = t->getI32()) { - ++lexer; - return n; - } - } - return {}; - } - - std::optional<uint8_t> takeU8() { - if (auto t = peek()) { - if (auto n = t->getU32()) { - if (n <= std::numeric_limits<uint8_t>::max()) { - ++lexer; - return uint8_t(*n); - } - } - } - return {}; - } - - std::optional<double> takeF64() { - if (auto t = peek()) { - if (auto d = t->getF64()) { - ++lexer; - return d; - } - } - return std::nullopt; - } - - std::optional<float> takeF32() { - if (auto t = peek()) { - if (auto f = t->getF32()) { - ++lexer; - return f; - } - } - return std::nullopt; - } - - std::optional<std::string_view> takeString() { - if (auto t = peek()) { - if (auto s = t->getString()) { - ++lexer; - return s; - } - } - return {}; - } - - std::optional<Name> takeName() { - // TODO: Move this to lexer and validate UTF. - if (auto str = takeString()) { - // Copy to a std::string to make sure we have a null terminator, otherwise - // the `Name` constructor won't work correctly. - // TODO: Update `Name` to use string_view instead of char* and/or to take - // rvalue strings to avoid this extra copy. - return Name(std::string(*str)); - } - return {}; - } - - bool takeSExprStart(std::string_view expected) { - auto original = lexer; - if (takeLParen() && takeKeyword(expected)) { - return true; - } - lexer = original; - return false; - } - - Index getPos() { - if (auto t = peek()) { - return lexer.getIndex() - t->span.size(); - } - return lexer.getIndex(); - } - - [[nodiscard]] Err err(Index pos, std::string reason) { - std::stringstream msg; - msg << lexer.position(pos) << ": error: " << reason; - return Err{msg.str()}; - } - - [[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); } -}; - -// ========= -// Utilities -// ========= - -// The location, possible name, and index in the respective module index space -// of a module-level definition in the input. -struct DefPos { - Name name; - Index pos; - Index index; -}; - -struct GlobalType { - Mutability mutability; - Type type; -}; - -// A signature type and parameter names (possibly empty), used for parsing -// function types. -struct TypeUse { - HeapType type; - std::vector<Name> names; -}; - -struct ImportNames { - Name mod; - Name nm; -}; - -struct Limits { - uint64_t initial; - uint64_t max; -}; - -struct MemType { - Type type; - Limits limits; - bool shared; -}; - -struct Memarg { - uint64_t offset; - uint32_t align; -}; - -// RAII utility for temporarily changing the parsing position of a parsing -// context. -template<typename Ctx> struct WithPosition { - Ctx& ctx; - Index original; - - WithPosition(Ctx& ctx, Index pos) : ctx(ctx), original(ctx.in.getPos()) { - ctx.in.lexer.setIndex(pos); - } - - ~WithPosition() { ctx.in.lexer.setIndex(original); } -}; - -// Deduction guide to satisfy -Wctad-maybe-unsupported. -template<typename Ctx> WithPosition(Ctx& ctx, Index) -> WithPosition<Ctx>; - -using IndexMap = std::unordered_map<Name, Index>; - -void applyImportNames(Importable& item, ImportNames* names) { - if (names) { - item.module = names->mod; - item.base = names->nm; - } -} - -Result<> addExports(ParseInput& in, - Module& wasm, - const Named* item, - const std::vector<Name>& exports, - ExternalKind kind) { - for (auto name : exports) { - if (wasm.getExportOrNull(name)) { - // TODO: Fix error location - return in.err("repeated export name"); - } - wasm.addExport(Builder(wasm).makeExport(name, item->name, kind)); - } - return Ok{}; -} - -Result<IndexMap> createIndexMap(ParseInput& in, - const std::vector<DefPos>& defs) { - IndexMap indices; - for (auto& def : defs) { - if (def.name.is()) { - if (!indices.insert({def.name, def.index}).second) { - return in.err(def.pos, "duplicate element name"); - } - } - } - return indices; -} - -std::vector<Type> getUnnamedTypes(const std::vector<NameType>& named) { - std::vector<Type> types; - types.reserve(named.size()); - for (auto& t : named) { - types.push_back(t.type); - } - return types; -} - -template<typename Ctx> -Result<> parseDefs(Ctx& ctx, - const std::vector<DefPos>& defs, - MaybeResult<> (*parser)(Ctx&)) { - for (auto& def : defs) { - ctx.index = def.index; - WithPosition with(ctx, def.pos); - auto parsed = parser(ctx); - CHECK_ERR(parsed); - assert(parsed); - } - return Ok{}; -} - -// =============== -// Parser Contexts -// =============== - -struct NullTypeParserCtx { - using IndexT = Ok; - using HeapTypeT = Ok; - using TypeT = Ok; - using ParamsT = Ok; - using ResultsT = size_t; - using BlockTypeT = Ok; - using SignatureT = Ok; - using StorageT = Ok; - using FieldT = Ok; - using FieldsT = Ok; - using StructT = Ok; - using ArrayT = Ok; - using LimitsT = Ok; - using MemTypeT = Ok; - using GlobalTypeT = Ok; - using TypeUseT = Ok; - using LocalsT = Ok; - using DataStringT = Ok; - - HeapTypeT makeFunc() { return Ok{}; } - HeapTypeT makeAny() { return Ok{}; } - HeapTypeT makeExtern() { return Ok{}; } - HeapTypeT makeEq() { return Ok{}; } - HeapTypeT makeI31() { return Ok{}; } - HeapTypeT makeStructType() { return Ok{}; } - HeapTypeT makeArrayType() { return Ok{}; } - - TypeT makeI32() { return Ok{}; } - TypeT makeI64() { return Ok{}; } - TypeT makeF32() { return Ok{}; } - TypeT makeF64() { return Ok{}; } - TypeT makeV128() { return Ok{}; } - - TypeT makeRefType(HeapTypeT, Nullability) { return Ok{}; } - - ParamsT makeParams() { return Ok{}; } - void appendParam(ParamsT&, Name, TypeT) {} - - // We have to count results because whether or not a block introduces a - // typeuse that may implicitly define a type depends on how many results it - // has. - size_t makeResults() { return 0; } - void appendResult(size_t& results, TypeT) { ++results; } - size_t getResultsSize(size_t results) { return results; } - - SignatureT makeFuncType(ParamsT*, ResultsT*) { return Ok{}; } - - StorageT makeI8() { return Ok{}; } - StorageT makeI16() { return Ok{}; } - StorageT makeStorageType(TypeT) { return Ok{}; } - - FieldT makeFieldType(StorageT, Mutability) { return Ok{}; } - - FieldsT makeFields() { return Ok{}; } - void appendField(FieldsT&, Name, FieldT) {} - - StructT makeStruct(FieldsT&) { return Ok{}; } - - std::optional<ArrayT> makeArray(FieldsT&) { return Ok{}; } - - GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } - - LocalsT makeLocals() { return Ok{}; } - void appendLocal(LocalsT&, Name, TypeT) {} - - Result<Index> getTypeIndex(Name) { return 1; } - Result<HeapTypeT> getHeapTypeFromIdx(Index) { return Ok{}; } - - DataStringT makeDataString() { return Ok{}; } - void appendDataString(DataStringT&, std::string_view) {} - - MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } - - BlockTypeT getBlockTypeFromResult(size_t results) { return Ok{}; } - - Result<> getBlockTypeFromTypeUse(Index, TypeUseT) { return Ok{}; } -}; - -template<typename Ctx> struct TypeParserCtx { - using IndexT = Index; - using HeapTypeT = HeapType; - using TypeT = Type; - using ParamsT = std::vector<NameType>; - using ResultsT = std::vector<Type>; - using BlockTypeT = HeapType; - using SignatureT = Signature; - using StorageT = Field; - using FieldT = Field; - using FieldsT = std::pair<std::vector<Name>, std::vector<Field>>; - using StructT = std::pair<std::vector<Name>, Struct>; - using ArrayT = Array; - using LimitsT = Ok; - using MemTypeT = Ok; - using LocalsT = std::vector<NameType>; - using DataStringT = Ok; - - // Map heap type names to their indices. - const IndexMap& typeIndices; - - TypeParserCtx(const IndexMap& typeIndices) : typeIndices(typeIndices) {} - - Ctx& self() { return *static_cast<Ctx*>(this); } - - HeapTypeT makeFunc() { return HeapType::func; } - HeapTypeT makeAny() { return HeapType::any; } - HeapTypeT makeExtern() { return HeapType::ext; } - HeapTypeT makeEq() { return HeapType::eq; } - HeapTypeT makeI31() { return HeapType::i31; } - HeapTypeT makeStructType() { return HeapType::struct_; } - HeapTypeT makeArrayType() { return HeapType::array; } - - TypeT makeI32() { return Type::i32; } - TypeT makeI64() { return Type::i64; } - TypeT makeF32() { return Type::f32; } - TypeT makeF64() { return Type::f64; } - TypeT makeV128() { return Type::v128; } - - TypeT makeRefType(HeapTypeT ht, Nullability nullability) { - return Type(ht, nullability); - } - - TypeT makeTupleType(const std::vector<Type> types) { return Tuple(types); } - - ParamsT makeParams() { return {}; } - void appendParam(ParamsT& params, Name id, TypeT type) { - params.push_back({id, type}); - } - - ResultsT makeResults() { return {}; } - void appendResult(ResultsT& results, TypeT type) { results.push_back(type); } - size_t getResultsSize(const ResultsT& results) { return results.size(); } - - SignatureT makeFuncType(ParamsT* params, ResultsT* results) { - std::vector<Type> empty; - const auto& paramTypes = params ? getUnnamedTypes(*params) : empty; - const auto& resultTypes = results ? *results : empty; - return Signature(self().makeTupleType(paramTypes), - self().makeTupleType(resultTypes)); - } - - StorageT makeI8() { return Field(Field::i8, Immutable); } - StorageT makeI16() { return Field(Field::i16, Immutable); } - StorageT makeStorageType(TypeT type) { return Field(type, Immutable); } - - FieldT makeFieldType(FieldT field, Mutability mutability) { - if (field.packedType == Field::not_packed) { - return Field(field.type, mutability); - } - return Field(field.packedType, mutability); - } - - FieldsT makeFields() { return {}; } - void appendField(FieldsT& fields, Name name, FieldT field) { - fields.first.push_back(name); - fields.second.push_back(field); - } - - StructT makeStruct(FieldsT& fields) { - return {std::move(fields.first), Struct(std::move(fields.second))}; - } - - std::optional<ArrayT> makeArray(FieldsT& fields) { - if (fields.second.size() == 1) { - return Array(fields.second[0]); - } - return {}; - } - - LocalsT makeLocals() { return {}; } - void appendLocal(LocalsT& locals, Name id, TypeT type) { - locals.push_back({id, type}); - } - - Result<Index> getTypeIndex(Name id) { - auto it = typeIndices.find(id); - if (it == typeIndices.end()) { - return self().in.err("unknown type identifier"); - } - return it->second; - } - - DataStringT makeDataString() { return Ok{}; } - void appendDataString(DataStringT&, std::string_view) {} - - LimitsT makeLimits(uint64_t, std::optional<uint64_t>) { return Ok{}; } - LimitsT getLimitsFromData(DataStringT) { return Ok{}; } - - MemTypeT makeMemType(Type, LimitsT, bool) { return Ok{}; } - - HeapType getBlockTypeFromResult(const std::vector<Type> results) { - assert(results.size() == 1); - return HeapType(Signature(Type::none, results[0])); - } -}; - -struct NullInstrParserCtx { - using InstrT = Ok; - using InstrsT = Ok; - using ExprT = Ok; - - using FieldIdxT = Ok; - using LocalIdxT = Ok; - using GlobalIdxT = Ok; - using MemoryIdxT = Ok; - using DataIdxT = Ok; - - using MemargT = Ok; - - InstrsT makeInstrs() { return Ok{}; } - void appendInstr(InstrsT&, InstrT) {} - InstrsT finishInstrs(InstrsT&) { return Ok{}; } - - ExprT makeExpr(InstrsT) { return Ok{}; } - Result<ExprT> instrToExpr(InstrT) { return Ok{}; } - - template<typename HeapTypeT> FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) { - return Ok{}; - } - template<typename HeapTypeT> FieldIdxT getFieldFromName(HeapTypeT, Name) { - return Ok{}; - } - LocalIdxT getLocalFromIdx(uint32_t) { return Ok{}; } - LocalIdxT getLocalFromName(Name) { return Ok{}; } - GlobalIdxT getGlobalFromIdx(uint32_t) { return Ok{}; } - GlobalIdxT getGlobalFromName(Name) { return Ok{}; } - MemoryIdxT getMemoryFromIdx(uint32_t) { return Ok{}; } - MemoryIdxT getMemoryFromName(Name) { return Ok{}; } - DataIdxT getDataFromIdx(uint32_t) { return Ok{}; } - DataIdxT getDataFromName(Name) { return Ok{}; } - - MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; } - - template<typename BlockTypeT> - InstrT makeBlock(Index, std::optional<Name>, BlockTypeT) { - return Ok{}; - } - InstrT finishBlock(Index, InstrsT) { return Ok{}; } - - InstrT makeUnreachable(Index) { return Ok{}; } - InstrT makeNop(Index) { return Ok{}; } - InstrT makeBinary(Index, BinaryOp) { return Ok{}; } - InstrT makeUnary(Index, UnaryOp) { return Ok{}; } - template<typename ResultsT> InstrT makeSelect(Index, ResultsT*) { - return Ok{}; - } - InstrT makeDrop(Index) { return Ok{}; } - InstrT makeMemorySize(Index, MemoryIdxT*) { return Ok{}; } - InstrT makeMemoryGrow(Index, MemoryIdxT*) { return Ok{}; } - InstrT makeLocalGet(Index, LocalIdxT) { return Ok{}; } - InstrT makeLocalTee(Index, LocalIdxT) { return Ok{}; } - InstrT makeLocalSet(Index, LocalIdxT) { return Ok{}; } - InstrT makeGlobalGet(Index, GlobalIdxT) { return Ok{}; } - InstrT makeGlobalSet(Index, GlobalIdxT) { return Ok{}; } - - InstrT makeI32Const(Index, uint32_t) { return Ok{}; } - InstrT makeI64Const(Index, uint64_t) { return Ok{}; } - InstrT makeF32Const(Index, float) { return Ok{}; } - InstrT makeF64Const(Index, double) { return Ok{}; } - InstrT makeLoad(Index, Type, bool, int, bool, MemoryIdxT*, MemargT) { - return Ok{}; - } - InstrT makeStore(Index, Type, int, bool, MemoryIdxT*, MemargT) { - return Ok{}; - } - InstrT makeAtomicRMW(Index, AtomicRMWOp, Type, int, MemoryIdxT*, MemargT) { - return Ok{}; - } - InstrT makeAtomicCmpxchg(Index, Type, int, MemoryIdxT*, MemargT) { - return Ok{}; - } - InstrT makeAtomicWait(Index, Type, MemoryIdxT*, MemargT) { return Ok{}; } - InstrT makeAtomicNotify(Index, MemoryIdxT*, MemargT) { return Ok{}; } - InstrT makeAtomicFence(Index) { return Ok{}; } - InstrT makeSIMDExtract(Index, SIMDExtractOp, uint8_t) { return Ok{}; } - InstrT makeSIMDReplace(Index, SIMDReplaceOp, uint8_t) { return Ok{}; } - InstrT makeSIMDShuffle(Index, const std::array<uint8_t, 16>&) { return Ok{}; } - InstrT makeSIMDTernary(Index, SIMDTernaryOp) { return Ok{}; } - InstrT makeSIMDShift(Index, SIMDShiftOp) { return Ok{}; } - InstrT makeSIMDLoad(Index, SIMDLoadOp, MemoryIdxT*, MemargT) { return Ok{}; } - InstrT makeSIMDLoadStoreLane( - Index, SIMDLoadStoreLaneOp, MemoryIdxT*, MemargT, uint8_t) { - return Ok{}; - } - InstrT makeMemoryInit(Index, MemoryIdxT*, DataIdxT) { return Ok{}; } - InstrT makeDataDrop(Index, DataIdxT) { return Ok{}; } - - InstrT makeMemoryCopy(Index, MemoryIdxT*, MemoryIdxT*) { return Ok{}; } - InstrT makeMemoryFill(Index, MemoryIdxT*) { return Ok{}; } - - InstrT makeReturn(Index) { return Ok{}; } - template<typename HeapTypeT> InstrT makeRefNull(Index, HeapTypeT) { - return Ok{}; - } - InstrT makeRefIsNull(Index) { return Ok{}; } - - InstrT makeRefEq(Index) { return Ok{}; } - - InstrT makeRefI31(Index) { return Ok{}; } - InstrT makeI31Get(Index, bool) { return Ok{}; } - - template<typename HeapTypeT> InstrT makeStructNew(Index, HeapTypeT) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeStructNewDefault(Index, HeapTypeT) { - return Ok{}; - } - template<typename HeapTypeT> - InstrT makeStructGet(Index, HeapTypeT, FieldIdxT, bool) { - return Ok{}; - } - template<typename HeapTypeT> - InstrT makeStructSet(Index, HeapTypeT, FieldIdxT) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeArrayNew(Index, HeapTypeT) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeArrayNewDefault(Index, HeapTypeT) { - return Ok{}; - } - template<typename HeapTypeT> - InstrT makeArrayNewData(Index, HeapTypeT, DataIdxT) { - return Ok{}; - } - template<typename HeapTypeT> - InstrT makeArrayNewElem(Index, HeapTypeT, DataIdxT) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeArrayGet(Index, HeapTypeT, bool) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeArraySet(Index, HeapTypeT) { - return Ok{}; - } - InstrT makeArrayLen(Index) { return Ok{}; } - template<typename HeapTypeT> - InstrT makeArrayCopy(Index, HeapTypeT, HeapTypeT) { - return Ok{}; - } - template<typename HeapTypeT> InstrT makeArrayFill(Index, HeapTypeT) { - return Ok{}; - } -}; - -// Phase 1: Parse definition spans for top-level module elements and determine -// their indices and names. -struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { - using DataStringT = std::vector<char>; - using LimitsT = Limits; - using MemTypeT = MemType; - - ParseInput in; - - // At this stage we only look at types to find implicit type definitions, - // which are inserted directly into the context. We cannot materialize or - // validate any types because we don't know what types exist yet. - // - // Declared module elements are inserted into the module, but their bodies are - // not filled out until later parsing phases. - Module& wasm; - - // The module element definitions we are parsing in this phase. - std::vector<DefPos> typeDefs; - std::vector<DefPos> subtypeDefs; - std::vector<DefPos> funcDefs; - std::vector<DefPos> memoryDefs; - std::vector<DefPos> globalDefs; - std::vector<DefPos> dataDefs; - - // Positions of typeuses that might implicitly define new types. - std::vector<Index> implicitTypeDefs; - - // Counters used for generating names for module elements. - int funcCounter = 0; - int memoryCounter = 0; - int globalCounter = 0; - int dataCounter = 0; - - // Used to verify that all imports come before all non-imports. - bool hasNonImport = false; - - ParseDeclsCtx(std::string_view in, Module& wasm) : in(in), wasm(wasm) {} - - void addFuncType(SignatureT) {} - void addStructType(StructT) {} - void addArrayType(ArrayT) {} - void setOpen() {} - Result<> addSubtype(Index) { return Ok{}; } - void finishSubtype(Name name, Index pos) { - subtypeDefs.push_back({name, pos, Index(subtypeDefs.size())}); - } - size_t getRecGroupStartIndex() { return 0; } - void addRecGroup(Index, size_t) {} - void finishDeftype(Index pos) { - typeDefs.push_back({{}, pos, Index(typeDefs.size())}); - } - - std::vector<char> makeDataString() { return {}; } - void appendDataString(std::vector<char>& data, std::string_view str) { - data.insert(data.end(), str.begin(), str.end()); - } - - Limits makeLimits(uint64_t n, std::optional<uint64_t> m) { - return m ? Limits{n, *m} : Limits{n, Memory::kUnlimitedSize}; - } - Limits getLimitsFromData(const std::vector<char>& data) { - uint64_t size = (data.size() + Memory::kPageSize - 1) / Memory::kPageSize; - return {size, size}; - } - - MemType makeMemType(Type type, Limits limits, bool shared) { - return {type, limits, shared}; - } - - Result<TypeUseT> - makeTypeUse(Index pos, std::optional<HeapTypeT> type, ParamsT*, ResultsT*) { - if (!type) { - implicitTypeDefs.push_back(pos); - } - return Ok{}; - } - - Result<Function*> - addFuncDecl(Index pos, Name name, ImportNames* importNames) { - auto f = std::make_unique<Function>(); - if (name.is()) { - if (wasm.getFunctionOrNull(name)) { - // TDOO: if the existing function is not explicitly named, fix its name - // and continue. - return in.err(pos, "repeated function name"); - } - f->setExplicitName(name); - } else { - name = (importNames ? "fimport$" : "") + std::to_string(funcCounter++); - name = Names::getValidFunctionName(wasm, name); - f->name = name; - } - applyImportNames(*f, importNames); - return wasm.addFunction(std::move(f)); - } - - Result<> addFunc(Name name, - const std::vector<Name>& exports, - ImportNames* import, - TypeUseT type, - std::optional<LocalsT>, - std::optional<InstrsT>, - Index pos) { - if (import && hasNonImport) { - return in.err(pos, "import after non-import"); - } - auto f = addFuncDecl(pos, name, import); - CHECK_ERR(f); - CHECK_ERR(addExports(in, wasm, *f, exports, ExternalKind::Function)); - funcDefs.push_back({name, pos, Index(funcDefs.size())}); - return Ok{}; - } - - Result<Memory*> - addMemoryDecl(Index pos, Name name, ImportNames* importNames, MemType type) { - auto m = std::make_unique<Memory>(); - m->indexType = type.type; - m->initial = type.limits.initial; - m->max = type.limits.max; - m->shared = type.shared; - if (name) { - // TODO: if the existing memory is not explicitly named, fix its name - // and continue. - if (wasm.getMemoryOrNull(name)) { - return in.err(pos, "repeated memory name"); - } - m->setExplicitName(name); - } else { - name = (importNames ? "mimport$" : "") + std::to_string(memoryCounter++); - name = Names::getValidMemoryName(wasm, name); - m->name = name; - } - applyImportNames(*m, importNames); - return wasm.addMemory(std::move(m)); - } - - Result<> addMemory(Name name, - const std::vector<Name>& exports, - ImportNames* import, - MemType type, - Index pos) { - if (import && hasNonImport) { - return in.err(pos, "import after non-import"); - } - auto m = addMemoryDecl(pos, name, import, type); - CHECK_ERR(m); - CHECK_ERR(addExports(in, wasm, *m, exports, ExternalKind::Memory)); - memoryDefs.push_back({name, pos, Index(memoryDefs.size())}); - return Ok{}; - } - - Result<> addImplicitData(DataStringT&& data) { - auto& mem = *wasm.memories.back(); - auto d = std::make_unique<DataSegment>(); - d->memory = mem.name; - d->isPassive = false; - d->offset = Builder(wasm).makeConstPtr(0, mem.indexType); - d->data = std::move(data); - d->name = Names::getValidDataSegmentName(wasm, "implicit-data"); - wasm.addDataSegment(std::move(d)); - return Ok{}; - } - - Result<Global*> - addGlobalDecl(Index pos, Name name, ImportNames* importNames) { - auto g = std::make_unique<Global>(); - if (name) { - if (wasm.getGlobalOrNull(name)) { - // TODO: if the existing global is not explicitly named, fix its name - // and continue. - return in.err(pos, "repeated global name"); - } - g->setExplicitName(name); - } else { - name = (importNames ? "gimport$" : "") + std::to_string(globalCounter++); - name = Names::getValidGlobalName(wasm, name); - g->name = name; - } - applyImportNames(*g, importNames); - return wasm.addGlobal(std::move(g)); - } - - Result<> addGlobal(Name name, - const std::vector<Name>& exports, - ImportNames* import, - GlobalTypeT, - std::optional<ExprT>, - Index pos) { - if (import && hasNonImport) { - return in.err(pos, "import after non-import"); - } - auto g = addGlobalDecl(pos, name, import); - CHECK_ERR(g); - CHECK_ERR(addExports(in, wasm, *g, exports, ExternalKind::Global)); - globalDefs.push_back({name, pos, Index(globalDefs.size())}); - return Ok{}; - } - - Result<> addData(Name name, - MemoryIdxT*, - std::optional<ExprT>, - std::vector<char>&& data, - Index pos) { - auto d = std::make_unique<DataSegment>(); - if (name) { - if (wasm.getDataSegmentOrNull(name)) { - // TODO: if the existing segment is not explicitly named, fix its name - // and continue. - return in.err(pos, "repeated data segment name"); - } - d->setExplicitName(name); - } else { - name = std::to_string(dataCounter++); - name = Names::getValidDataSegmentName(wasm, name); - d->name = name; - } - d->data = std::move(data); - dataDefs.push_back({name, pos, Index(wasm.dataSegments.size())}); - wasm.addDataSegment(std::move(d)); - return Ok{}; - } -}; - -// Phase 2: Parse type definitions into a TypeBuilder. -struct ParseTypeDefsCtx : TypeParserCtx<ParseTypeDefsCtx> { - ParseInput in; - - // We update slots in this builder as we parse type definitions. - TypeBuilder& builder; - - // Parse the names of types and fields as we go. - std::vector<TypeNames> names; - - // The index of the subtype definition we are parsing. - Index index = 0; - - ParseTypeDefsCtx(std::string_view in, - TypeBuilder& builder, - const IndexMap& typeIndices) - : TypeParserCtx<ParseTypeDefsCtx>(typeIndices), in(in), builder(builder), - names(builder.size()) {} - - TypeT makeRefType(HeapTypeT ht, Nullability nullability) { - return builder.getTempRefType(ht, nullability); - } - - TypeT makeTupleType(const std::vector<Type> types) { - return builder.getTempTupleType(types); - } - - Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { - if (idx >= builder.size()) { - return in.err("type index out of bounds"); - } - return builder[idx]; - } - - void addFuncType(SignatureT& type) { builder[index] = type; } - - void addStructType(StructT& type) { - auto& [fieldNames, str] = type; - builder[index] = str; - for (Index i = 0; i < fieldNames.size(); ++i) { - if (auto name = fieldNames[i]; name.is()) { - names[index].fieldNames[i] = name; - } - } - } - - void addArrayType(ArrayT& type) { builder[index] = type; } - - void setOpen() { builder[index].setOpen(); } - - Result<> addSubtype(Index super) { - if (super >= builder.size()) { - return in.err("supertype index out of bounds"); - } - builder[index].subTypeOf(builder[super]); - return Ok{}; - } - - void finishSubtype(Name name, Index pos) { names[index++].name = name; } - - size_t getRecGroupStartIndex() { return index; } - - void addRecGroup(Index start, size_t len) { - builder.createRecGroup(start, len); - } - - void finishDeftype(Index) {} -}; - -// Phase 3: Parse type uses to find implicitly defined types. -struct ParseImplicitTypeDefsCtx : TypeParserCtx<ParseImplicitTypeDefsCtx> { - using TypeUseT = Ok; - - ParseInput in; - - // Types parsed so far. - std::vector<HeapType>& types; - - // Map typeuse positions without an explicit type to the correct type. - std::unordered_map<Index, HeapType>& implicitTypes; - - // Map signatures to the first defined heap type they match. - std::unordered_map<Signature, HeapType> sigTypes; - - ParseImplicitTypeDefsCtx(std::string_view in, - std::vector<HeapType>& types, - std::unordered_map<Index, HeapType>& implicitTypes, - const IndexMap& typeIndices) - : TypeParserCtx<ParseImplicitTypeDefsCtx>(typeIndices), in(in), - types(types), implicitTypes(implicitTypes) { - for (auto type : types) { - if (type.isSignature() && type.getRecGroup().size() == 1) { - sigTypes.insert({type.getSignature(), type}); - } - } - } - - Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { - if (idx >= types.size()) { - return in.err("type index out of bounds"); - } - return types[idx]; - } - - Result<TypeUseT> makeTypeUse(Index pos, - std::optional<HeapTypeT>, - ParamsT* params, - ResultsT* results) { - std::vector<Type> paramTypes; - if (params) { - paramTypes = getUnnamedTypes(*params); - } - - std::vector<Type> resultTypes; - if (results) { - resultTypes = *results; - } - - auto sig = Signature(Type(paramTypes), Type(resultTypes)); - auto [it, inserted] = sigTypes.insert({sig, HeapType::func}); - if (inserted) { - auto type = HeapType(sig); - it->second = type; - types.push_back(type); - } - implicitTypes.insert({pos, it->second}); - - return Ok{}; - } -}; - -// Phase 4: Parse and set the types of module elements. -struct ParseModuleTypesCtx : TypeParserCtx<ParseModuleTypesCtx>, - NullInstrParserCtx { - // In this phase we have constructed all the types, so we can materialize and - // validate them when they are used. - - using GlobalTypeT = GlobalType; - using TypeUseT = TypeUse; - - ParseInput in; - - Module& wasm; - - const std::vector<HeapType>& types; - const std::unordered_map<Index, HeapType>& implicitTypes; - - // The index of the current type. - Index index = 0; - - ParseModuleTypesCtx(std::string_view in, - Module& wasm, - const std::vector<HeapType>& types, - const std::unordered_map<Index, HeapType>& implicitTypes, - const IndexMap& typeIndices) - : TypeParserCtx<ParseModuleTypesCtx>(typeIndices), in(in), wasm(wasm), - types(types), implicitTypes(implicitTypes) {} - - Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { - if (idx >= types.size()) { - return in.err("type index out of bounds"); - } - return types[idx]; - } - - Result<TypeUseT> makeTypeUse(Index pos, - std::optional<HeapTypeT> type, - ParamsT* params, - ResultsT* results) { - std::vector<Name> ids; - if (params) { - ids.reserve(params->size()); - for (auto& p : *params) { - ids.push_back(p.name); - } - } - - if (type) { - return TypeUse{*type, ids}; - } - - auto it = implicitTypes.find(pos); - assert(it != implicitTypes.end()); - - return TypeUse{it->second, ids}; - } - - Result<HeapType> getBlockTypeFromTypeUse(Index pos, TypeUse use) { - assert(use.type.isSignature()); - if (use.type.getSignature().params != Type::none) { - return in.err(pos, "block parameters not yet supported"); - } - // TODO: Once we support block parameters, return an error here if any of - // them are named. - return use.type; - } - - GlobalTypeT makeGlobalType(Mutability mutability, TypeT type) { - return {mutability, type}; - } - - Result<> addFunc(Name name, - const std::vector<Name>&, - ImportNames*, - TypeUse type, - std::optional<LocalsT> locals, - std::optional<InstrsT>, - Index pos) { - auto& f = wasm.functions[index]; - if (!type.type.isSignature()) { - return in.err(pos, "expected signature type"); - } - f->type = type.type; - for (Index i = 0; i < type.names.size(); ++i) { - if (type.names[i].is()) { - f->setLocalName(i, type.names[i]); - } - } - if (locals) { - for (auto& l : *locals) { - Builder::addVar(f.get(), l.name, l.type); - } - } - return Ok{}; - } - - Result<> - addMemory(Name, const std::vector<Name>&, ImportNames*, MemTypeT, Index) { - return Ok{}; - } - - Result<> addImplicitData(DataStringT&& data) { return Ok{}; } - - Result<> addGlobal(Name, - const std::vector<Name>&, - ImportNames*, - GlobalType type, - std::optional<ExprT>, - Index) { - auto& g = wasm.globals[index]; - g->mutable_ = type.mutability; - g->type = type.type; - return Ok{}; - } -}; - -// Phase 5: Parse module element definitions, including instructions. -struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> { - using GlobalTypeT = Ok; - using TypeUseT = HeapType; - - // Keep track of instructions internally rather than letting the general - // parser collect them. - using InstrT = Ok; - using InstrsT = Ok; - using ExprT = Expression*; - - using FieldIdxT = Index; - using LocalIdxT = Index; - using GlobalIdxT = Name; - using MemoryIdxT = Name; - using DataIdxT = Name; - - using MemargT = Memarg; - - ParseInput in; - - Module& wasm; - Builder builder; - - const std::vector<HeapType>& types; - const std::unordered_map<Index, HeapType>& implicitTypes; - - // The index of the current module element. - Index index = 0; - - // The current function being parsed, used to create scratch locals, type - // local.get, etc. - Function* func = nullptr; - - IRBuilder irBuilder; - - void setFunction(Function* func) { - this->func = func; - irBuilder.setFunction(func); - } - - ParseDefsCtx(std::string_view in, - Module& wasm, - const std::vector<HeapType>& types, - const std::unordered_map<Index, HeapType>& implicitTypes, - const IndexMap& typeIndices) - : TypeParserCtx(typeIndices), in(in), wasm(wasm), builder(wasm), - types(types), implicitTypes(implicitTypes), irBuilder(wasm) {} - - template<typename T> Result<T> withLoc(Index pos, Result<T> res) { - if (auto err = res.getErr()) { - return in.err(pos, err->msg); - } - return res; - } - - template<typename T> Result<T> withLoc(Result<T> res) { - return withLoc(in.getPos(), res); - } - - HeapType getBlockTypeFromResult(const std::vector<Type> results) { - assert(results.size() == 1); - return HeapType(Signature(Type::none, results[0])); - } - - Result<HeapType> getBlockTypeFromTypeUse(Index pos, HeapType type) { - return type; - } - - Ok makeInstrs() { return Ok{}; } - - void appendInstr(Ok&, InstrT instr) {} - - Result<InstrsT> finishInstrs(Ok&) { return Ok{}; } - - Result<Expression*> instrToExpr(Ok&) { return irBuilder.build(); } - - GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; } - - Result<HeapTypeT> getHeapTypeFromIdx(Index idx) { - if (idx >= types.size()) { - return in.err("type index out of bounds"); - } - return types[idx]; - } - - Result<Index> getFieldFromIdx(HeapType type, uint32_t idx) { - if (!type.isStruct()) { - return in.err("expected struct type"); - } - if (idx >= type.getStruct().fields.size()) { - return in.err("struct index out of bounds"); - } - return idx; - } - - Result<Index> getFieldFromName(HeapType type, Name name) { - // TODO: Field names - return in.err("symbolic field names note yet supported"); - } - - Result<Index> getLocalFromIdx(uint32_t idx) { - if (!func) { - return in.err("cannot access locals outside of a function"); - } - if (idx >= func->getNumLocals()) { - return in.err("local index out of bounds"); - } - return idx; - } - - Result<Index> getLocalFromName(Name name) { - if (!func) { - return in.err("cannot access locals outside of a function"); - } - if (!func->hasLocalIndex(name)) { - return in.err("local $" + name.toString() + " does not exist"); - } - return func->getLocalIndex(name); - } - - Result<Name> getGlobalFromIdx(uint32_t idx) { - if (idx >= wasm.globals.size()) { - return in.err("global index out of bounds"); - } - return wasm.globals[idx]->name; - } - - Result<Name> getGlobalFromName(Name name) { - if (!wasm.getGlobalOrNull(name)) { - return in.err("global $" + name.toString() + " does not exist"); - } - return name; - } - - Result<Name> getMemoryFromIdx(uint32_t idx) { - if (idx >= wasm.memories.size()) { - return in.err("memory index out of bounds"); - } - return wasm.memories[idx]->name; - } - - Result<Name> getMemoryFromName(Name name) { - if (!wasm.getMemoryOrNull(name)) { - return in.err("memory $" + name.toString() + " does not exist"); - } - return name; - } - - Result<Name> getDataFromIdx(uint32_t idx) { - if (idx >= wasm.dataSegments.size()) { - return in.err("data index out of bounds"); - } - return wasm.dataSegments[idx]->name; - } - - Result<Name> getDataFromName(Name name) { - if (!wasm.getDataSegmentOrNull(name)) { - return in.err("data $" + name.toString() + " does not exist"); - } - return name; - } - - Result<TypeUseT> makeTypeUse(Index pos, - std::optional<HeapTypeT> type, - ParamsT* params, - ResultsT* results) { - if (type && (params || results)) { - std::vector<Type> paramTypes; - if (params) { - paramTypes = getUnnamedTypes(*params); - } - - std::vector<Type> resultTypes; - if (results) { - resultTypes = *results; - } - - auto sig = Signature(Type(paramTypes), Type(resultTypes)); - - if (!type->isSignature() || type->getSignature() != sig) { - return in.err(pos, "type does not match provided signature"); - } - } - - if (type) { - return *type; - } - - auto it = implicitTypes.find(pos); - assert(it != implicitTypes.end()); - return it->second; - } - - Result<> addFunc(Name, - const std::vector<Name>&, - ImportNames*, - TypeUseT, - std::optional<LocalsT>, - std::optional<InstrsT>, - Index pos) { - CHECK_ERR(withLoc(pos, irBuilder.visitEnd())); - auto body = irBuilder.build(); - CHECK_ERR(withLoc(pos, body)); - wasm.functions[index]->body = *body; - return Ok{}; - } - - Result<> addGlobal(Name, - const std::vector<Name>&, - ImportNames*, - GlobalTypeT, - std::optional<ExprT> exp, - Index) { - if (exp) { - wasm.globals[index]->init = *exp; - } - return Ok{}; - } - - Result<> addData( - Name, Name* mem, std::optional<ExprT> offset, DataStringT, Index pos) { - auto& d = wasm.dataSegments[index]; - if (offset) { - d->isPassive = false; - d->offset = *offset; - if (mem) { - d->memory = *mem; - } else if (wasm.memories.size() > 0) { - d->memory = wasm.memories[0]->name; - } else { - return in.err(pos, "active segment with no memory"); - } - } else { - d->isPassive = true; - } - return Ok{}; - } - - Result<Index> addScratchLocal(Index pos, Type type) { - if (!func) { - return in.err(pos, - "scratch local required, but there is no function context"); - } - Name name = Names::getValidLocalName(*func, "scratch"); - return Builder::addVar(func, name, type); - } - - Result<Expression*> makeExpr(InstrsT& instrs) { return irBuilder.build(); } - - Memarg getMemarg(uint64_t offset, uint32_t align) { return {offset, align}; } - - Result<Name> getMemory(Index pos, Name* mem) { - if (mem) { - return *mem; - } - if (wasm.memories.empty()) { - return in.err(pos, "memory required, but there is no memory"); - } - return wasm.memories[0]->name; - } - - Result<> makeBlock(Index pos, std::optional<Name> label, HeapType type) { - // TODO: validate labels? - // TODO: Move error on input types to here? - return withLoc(pos, - irBuilder.makeBlock(label ? *label : Name{}, - type.getSignature().results)); - } - - Result<> finishBlock(Index pos, InstrsT) { - return withLoc(pos, irBuilder.visitEnd()); - } - - Result<> makeUnreachable(Index pos) { - return withLoc(pos, irBuilder.makeUnreachable()); - } - - Result<> makeNop(Index pos) { return withLoc(pos, irBuilder.makeNop()); } - - Result<> makeBinary(Index pos, BinaryOp op) { - return withLoc(pos, irBuilder.makeBinary(op)); - } - - Result<> makeUnary(Index pos, UnaryOp op) { - return withLoc(pos, irBuilder.makeUnary(op)); - } - - Result<> makeSelect(Index pos, std::vector<Type>* res) { - if (res && res->size()) { - if (res->size() > 1) { - return in.err(pos, "select may not have more than one result type"); - } - return withLoc(pos, irBuilder.makeSelect((*res)[0])); - } - return withLoc(pos, irBuilder.makeSelect()); - } - - Result<> makeDrop(Index pos) { return withLoc(pos, irBuilder.makeDrop()); } - - Result<> makeMemorySize(Index pos, Name* mem) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeMemorySize(*m)); - } - - Result<> makeMemoryGrow(Index pos, Name* mem) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeMemoryGrow(*m)); - } - - Result<> makeLocalGet(Index pos, Index local) { - return withLoc(pos, irBuilder.makeLocalGet(local)); - } - - Result<> makeLocalTee(Index pos, Index local) { - return withLoc(pos, irBuilder.makeLocalTee(local)); - } - - Result<> makeLocalSet(Index pos, Index local) { - return withLoc(pos, irBuilder.makeLocalSet(local)); - } - - Result<> makeGlobalGet(Index pos, Name global) { - return withLoc(pos, irBuilder.makeGlobalGet(global)); - } - - Result<> makeGlobalSet(Index pos, Name global) { - assert(wasm.getGlobalOrNull(global)); - return withLoc(pos, irBuilder.makeGlobalSet(global)); - } - - Result<> makeI32Const(Index pos, uint32_t c) { - return withLoc(pos, irBuilder.makeConst(Literal(c))); - } - - Result<> makeI64Const(Index pos, uint64_t c) { - return withLoc(pos, irBuilder.makeConst(Literal(c))); - } - - Result<> makeF32Const(Index pos, float c) { - return withLoc(pos, irBuilder.makeConst(Literal(c))); - } - - Result<> makeF64Const(Index pos, double c) { - return withLoc(pos, irBuilder.makeConst(Literal(c))); - } - - Result<> makeLoad(Index pos, - Type type, - bool signed_, - int bytes, - bool isAtomic, - Name* mem, - Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - if (isAtomic) { - return withLoc(pos, - irBuilder.makeAtomicLoad(bytes, memarg.offset, type, *m)); - } - return withLoc(pos, - irBuilder.makeLoad( - bytes, signed_, memarg.offset, memarg.align, type, *m)); - } - - Result<> makeStore( - Index pos, Type type, int bytes, bool isAtomic, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - if (isAtomic) { - return withLoc(pos, - irBuilder.makeAtomicStore(bytes, memarg.offset, type, *m)); - } - return withLoc( - pos, irBuilder.makeStore(bytes, memarg.offset, memarg.align, type, *m)); - } - - Result<> makeAtomicRMW( - Index pos, AtomicRMWOp op, Type type, int bytes, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, - irBuilder.makeAtomicRMW(op, bytes, memarg.offset, type, *m)); - } - - Result<> - makeAtomicCmpxchg(Index pos, Type type, int bytes, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, - irBuilder.makeAtomicCmpxchg(bytes, memarg.offset, type, *m)); - } - - Result<> makeAtomicWait(Index pos, Type type, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeAtomicWait(type, memarg.offset, *m)); - } - - Result<> makeAtomicNotify(Index pos, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeAtomicNotify(memarg.offset, *m)); - } - - Result<> makeAtomicFence(Index pos) { - return withLoc(pos, irBuilder.makeAtomicFence()); - } - - Result<> makeSIMDExtract(Index pos, SIMDExtractOp op, uint8_t lane) { - return withLoc(pos, irBuilder.makeSIMDExtract(op, lane)); - } - - Result<> makeSIMDReplace(Index pos, SIMDReplaceOp op, uint8_t lane) { - return withLoc(pos, irBuilder.makeSIMDReplace(op, lane)); - } - - Result<> makeSIMDShuffle(Index pos, const std::array<uint8_t, 16>& lanes) { - return withLoc(pos, irBuilder.makeSIMDShuffle(lanes)); - } - - Result<> makeSIMDTernary(Index pos, SIMDTernaryOp op) { - return withLoc(pos, irBuilder.makeSIMDTernary(op)); - } - - Result<> makeSIMDShift(Index pos, SIMDShiftOp op) { - return withLoc(pos, irBuilder.makeSIMDShift(op)); - } - - Result<> makeSIMDLoad(Index pos, SIMDLoadOp op, Name* mem, Memarg memarg) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, - irBuilder.makeSIMDLoad(op, memarg.offset, memarg.align, *m)); - } - - Result<> makeSIMDLoadStoreLane( - Index pos, SIMDLoadStoreLaneOp op, Name* mem, Memarg memarg, uint8_t lane) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, - irBuilder.makeSIMDLoadStoreLane( - op, memarg.offset, memarg.align, lane, *m)); - } - - Result<> makeMemoryInit(Index pos, Name* mem, Name data) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeMemoryInit(data, *m)); - } - - Result<> makeDataDrop(Index pos, Name data) { - return withLoc(pos, irBuilder.makeDataDrop(data)); - } - - Result<> makeMemoryCopy(Index pos, Name* destMem, Name* srcMem) { - auto destMemory = getMemory(pos, destMem); - CHECK_ERR(destMemory); - auto srcMemory = getMemory(pos, srcMem); - CHECK_ERR(srcMemory); - return withLoc(pos, irBuilder.makeMemoryCopy(*destMemory, *srcMemory)); - } - - Result<> makeMemoryFill(Index pos, Name* mem) { - auto m = getMemory(pos, mem); - CHECK_ERR(m); - return withLoc(pos, irBuilder.makeMemoryFill(*m)); - } - - Result<> makeReturn(Index pos) { - return withLoc(pos, irBuilder.makeReturn()); - } - - Result<> makeRefNull(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeRefNull(type)); - } - - Result<> makeRefIsNull(Index pos) { - return withLoc(pos, irBuilder.makeRefIsNull()); - } - - Result<> makeRefEq(Index pos) { return withLoc(pos, irBuilder.makeRefEq()); } - - Result<> makeRefI31(Index pos) { - return withLoc(pos, irBuilder.makeRefI31()); - } - - Result<> makeI31Get(Index pos, bool signed_) { - return withLoc(pos, irBuilder.makeI31Get(signed_)); - } - - Result<> makeStructNew(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeStructNew(type)); - } - - Result<> makeStructNewDefault(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeStructNewDefault(type)); - } - - Result<> makeStructGet(Index pos, HeapType type, Index field, bool signed_) { - return withLoc(pos, irBuilder.makeStructGet(type, field, signed_)); - } - - Result<> makeStructSet(Index pos, HeapType type, Index field) { - return withLoc(pos, irBuilder.makeStructSet(type, field)); - } - - Result<> makeArrayNew(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeArrayNew(type)); - } - - Result<> makeArrayNewDefault(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeArrayNewDefault(type)); - } - - Result<> makeArrayNewData(Index pos, HeapType type, Name data) { - return withLoc(pos, irBuilder.makeArrayNewData(type, data)); - } - - Result<> makeArrayNewElem(Index pos, HeapType type, Name elem) { - return withLoc(pos, irBuilder.makeArrayNewElem(type, elem)); - } - - Result<> makeArrayGet(Index pos, HeapType type, bool signed_) { - return withLoc(pos, irBuilder.makeArrayGet(type, signed_)); - } - - Result<> makeArraySet(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeArraySet(type)); - } - - Result<> makeArrayLen(Index pos) { - return withLoc(pos, irBuilder.makeArrayLen()); - } - - Result<> makeArrayCopy(Index pos, HeapType destType, HeapType srcType) { - return withLoc(pos, irBuilder.makeArrayCopy(destType, srcType)); - } - - Result<> makeArrayFill(Index pos, HeapType type) { - return withLoc(pos, irBuilder.makeArrayFill(type)); - } -}; - -// ================ -// Parser Functions -// ================ +using namespace std::string_view_literals; // Types template<typename Ctx> Result<typename Ctx::HeapTypeT> heaptype(Ctx&); @@ -2034,8 +231,28 @@ template<typename Ctx> MaybeResult<> memory(Ctx&); template<typename Ctx> MaybeResult<> global(Ctx&); template<typename Ctx> Result<typename Ctx::DataStringT> datastring(Ctx&); template<typename Ctx> MaybeResult<> data(Ctx&); -MaybeResult<> modulefield(ParseDeclsCtx&); -Result<> module(ParseDeclsCtx&); +template<typename Ctx> MaybeResult<> modulefield(Ctx&); +template<typename Ctx> Result<> module(Ctx&); + +// ========= +// Utilities +// ========= + +// RAII utility for temporarily changing the parsing position of a parsing +// context. +template<typename Ctx> struct WithPosition { + Ctx& ctx; + Index original; + + WithPosition(Ctx& ctx, Index pos) : ctx(ctx), original(ctx.in.getPos()) { + ctx.in.lexer.setIndex(pos); + } + + ~WithPosition() { ctx.in.lexer.setIndex(original); } +}; + +// Deduction guide to satisfy -Wctad-maybe-unsupported. +template<typename Ctx> WithPosition(Ctx& ctx, Index) -> WithPosition<Ctx>; // ===== // Types @@ -3765,7 +1982,7 @@ template<typename Ctx> MaybeResult<> data(Ctx& ctx) { // | start // | elem // | data -MaybeResult<> modulefield(ParseDeclsCtx& ctx) { +template<typename Ctx> MaybeResult<> modulefield(Ctx& ctx) { if (auto t = ctx.in.peek(); !t || t->isRParen()) { return {}; } @@ -3794,7 +2011,7 @@ MaybeResult<> modulefield(ParseDeclsCtx& ctx) { // module ::= '(' 'module' id? (m:modulefield)* ')' // | (m:modulefield)* eof -Result<> module(ParseDeclsCtx& ctx) { +template<typename Ctx> Result<> module(Ctx& ctx) { bool outer = ctx.in.takeSExprStart("module"sv); if (outer) { @@ -3814,82 +2031,6 @@ Result<> module(ParseDeclsCtx& ctx) { return Ok{}; } -} // anonymous namespace - -Result<> parseModule(Module& wasm, std::string_view input) { - // Parse module-level declarations. - ParseDeclsCtx decls(input, wasm); - CHECK_ERR(module(decls)); - if (!decls.in.empty()) { - return decls.in.err("Unexpected tokens after module"); - } - - auto typeIndices = createIndexMap(decls.in, decls.subtypeDefs); - CHECK_ERR(typeIndices); - - // Parse type definitions. - std::vector<HeapType> types; - { - TypeBuilder builder(decls.subtypeDefs.size()); - ParseTypeDefsCtx ctx(input, builder, *typeIndices); - for (auto& typeDef : decls.typeDefs) { - WithPosition with(ctx, typeDef.pos); - CHECK_ERR(deftype(ctx)); - } - auto built = builder.build(); - if (auto* err = built.getError()) { - std::stringstream msg; - msg << "invalid type: " << err->reason; - return ctx.in.err(decls.typeDefs[err->index].pos, msg.str()); - } - types = *built; - // Record type names on the module. - for (size_t i = 0; i < types.size(); ++i) { - auto& names = ctx.names[i]; - if (names.name.is() || names.fieldNames.size()) { - wasm.typeNames.insert({types[i], names}); - } - } - } - - // Parse implicit type definitions and map typeuses without explicit types to - // the correct types. - std::unordered_map<Index, HeapType> implicitTypes; - { - ParseImplicitTypeDefsCtx ctx(input, types, implicitTypes, *typeIndices); - for (Index pos : decls.implicitTypeDefs) { - WithPosition with(ctx, pos); - CHECK_ERR(typeuse(ctx)); - } - } - - { - // Parse module-level types. - ParseModuleTypesCtx ctx(input, wasm, types, implicitTypes, *typeIndices); - CHECK_ERR(parseDefs(ctx, decls.funcDefs, func)); - CHECK_ERR(parseDefs(ctx, decls.memoryDefs, memory)); - CHECK_ERR(parseDefs(ctx, decls.globalDefs, global)); - // TODO: Parse types of other module elements. - } - { - // Parse definitions. - // TODO: Parallelize this. - ParseDefsCtx ctx(input, wasm, types, implicitTypes, *typeIndices); - CHECK_ERR(parseDefs(ctx, decls.globalDefs, global)); - CHECK_ERR(parseDefs(ctx, decls.dataDefs, data)); - - for (Index i = 0; i < decls.funcDefs.size(); ++i) { - ctx.index = i; - ctx.setFunction(wasm.functions[i].get()); - CHECK_ERR(ctx.irBuilder.makeBlock(Name{}, ctx.func->getResults())); - WithPosition with(ctx, decls.funcDefs[i].pos); - auto parsed = func(ctx); - CHECK_ERR(parsed); - assert(parsed); - } - } - - return Ok{}; -} - } // namespace wasm::WATParser + +#endif // parser_parsers_h diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp new file mode 100644 index 000000000..7b58be4d5 --- /dev/null +++ b/src/parser/wat-parser.cpp @@ -0,0 +1,172 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wat-parser.h" +#include "contexts.h" +#include "ir/names.h" +#include "lexer.h" +#include "parsers.h" +#include "wasm-type.h" +#include "wasm.h" + +// The WebAssembly text format is recursive in the sense that elements may be +// referred to before they are declared. Furthermore, elements may be referred +// to by index or by name. As a result, we need to parse text modules in +// multiple phases. +// +// In the first phase, we find all of the module element declarations and +// record, but do not interpret, the input spans of their corresponding +// definitions. This phase establishes the indices and names of each module +// element so that subsequent phases can look them up. +// +// The second phase parses type definitions to construct the types used in the +// module. This has to be its own phase because we have no way to refer to a +// type before it has been built along with all the other types, unlike for +// other module elements that can be referred to by name before their +// definitions have been parsed. +// +// The third phase further parses and constructs types implicitly defined by +// type uses in functions, blocks, and call_indirect instructions. These +// implicitly defined types may be referred to by index elsewhere. +// +// The fourth phase parses and sets the types of globals, functions, and other +// top-level module elements. These types need to be set before we parse +// instructions because they determine the types of instructions such as +// global.get and ref.func. +// +// The fifth and final phase parses the remaining contents of all module +// elements, including instructions. +// +// Each phase of parsing gets its own context type that is passed to the +// individual parsing functions. There is a parsing function for each element of +// the grammar given in the spec. Parsing functions are templatized so that they +// may be passed the appropriate context type and return the correct result type +// for each phase. + +namespace wasm::WATParser { + +namespace { + +Result<IndexMap> createIndexMap(ParseInput& in, + const std::vector<DefPos>& defs) { + IndexMap indices; + for (auto& def : defs) { + if (def.name.is()) { + if (!indices.insert({def.name, def.index}).second) { + return in.err(def.pos, "duplicate element name"); + } + } + } + return indices; +} + +template<typename Ctx> +Result<> parseDefs(Ctx& ctx, + const std::vector<DefPos>& defs, + MaybeResult<> (*parser)(Ctx&)) { + for (auto& def : defs) { + ctx.index = def.index; + WithPosition with(ctx, def.pos); + auto parsed = parser(ctx); + CHECK_ERR(parsed); + assert(parsed); + } + return Ok{}; +} + +// ================ +// Parser Functions +// ================ + +} // anonymous namespace + +Result<> parseModule(Module& wasm, std::string_view input) { + // Parse module-level declarations. + ParseDeclsCtx decls(input, wasm); + CHECK_ERR(module(decls)); + if (!decls.in.empty()) { + return decls.in.err("Unexpected tokens after module"); + } + + auto typeIndices = createIndexMap(decls.in, decls.subtypeDefs); + CHECK_ERR(typeIndices); + + // Parse type definitions. + std::vector<HeapType> types; + { + TypeBuilder builder(decls.subtypeDefs.size()); + ParseTypeDefsCtx ctx(input, builder, *typeIndices); + for (auto& typeDef : decls.typeDefs) { + WithPosition with(ctx, typeDef.pos); + CHECK_ERR(deftype(ctx)); + } + auto built = builder.build(); + if (auto* err = built.getError()) { + std::stringstream msg; + msg << "invalid type: " << err->reason; + return ctx.in.err(decls.typeDefs[err->index].pos, msg.str()); + } + types = *built; + // Record type names on the module. + for (size_t i = 0; i < types.size(); ++i) { + auto& names = ctx.names[i]; + if (names.name.is() || names.fieldNames.size()) { + wasm.typeNames.insert({types[i], names}); + } + } + } + + // Parse implicit type definitions and map typeuses without explicit types to + // the correct types. + std::unordered_map<Index, HeapType> implicitTypes; + { + ParseImplicitTypeDefsCtx ctx(input, types, implicitTypes, *typeIndices); + for (Index pos : decls.implicitTypeDefs) { + WithPosition with(ctx, pos); + CHECK_ERR(typeuse(ctx)); + } + } + + { + // Parse module-level types. + ParseModuleTypesCtx ctx(input, wasm, types, implicitTypes, *typeIndices); + CHECK_ERR(parseDefs(ctx, decls.funcDefs, func)); + CHECK_ERR(parseDefs(ctx, decls.memoryDefs, memory)); + CHECK_ERR(parseDefs(ctx, decls.globalDefs, global)); + // TODO: Parse types of other module elements. + } + { + // Parse definitions. + // TODO: Parallelize this. + ParseDefsCtx ctx(input, wasm, types, implicitTypes, *typeIndices); + CHECK_ERR(parseDefs(ctx, decls.globalDefs, global)); + CHECK_ERR(parseDefs(ctx, decls.dataDefs, data)); + + for (Index i = 0; i < decls.funcDefs.size(); ++i) { + ctx.index = i; + ctx.setFunction(wasm.functions[i].get()); + CHECK_ERR(ctx.irBuilder.makeBlock(Name{}, ctx.func->getResults())); + WithPosition with(ctx, decls.funcDefs[i].pos); + auto parsed = func(ctx); + CHECK_ERR(parsed); + assert(parsed); + } + } + + return Ok{}; +} + +} // namespace wasm::WATParser diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h new file mode 100644 index 000000000..d3ad8d7f3 --- /dev/null +++ b/src/parser/wat-parser.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef parser_wat_parser_h +#define parser_wat_parser_h + +#include <string_view> + +#include "support/result.h" +#include "wasm.h" + +namespace wasm::WATParser { + +// Parse a single WAT module. +Result<> parseModule(Module& wasm, std::string_view in); + +} // namespace wasm::WATParser + +#endif // paser_wat_parser_h diff --git a/src/wasm/CMakeLists.txt b/src/wasm/CMakeLists.txt index 30f52f9ee..d5b4f6747 100644 --- a/src/wasm/CMakeLists.txt +++ b/src/wasm/CMakeLists.txt @@ -13,8 +13,6 @@ set(wasm_SOURCES wasm-stack.cpp wasm-type.cpp wasm-validator.cpp - wat-lexer.cpp - wat-parser.cpp ${wasm_HEADERS} ) # wasm-debug.cpp includes LLVM header using std::iterator (deprecated in C++17) |